diff --git a/src/broadcom/cle/meson.build b/src/broadcom/cle/meson.build
index 8ac32b313e4..a3ed2cff759 100644
--- a/src/broadcom/cle/meson.build
+++ b/src/broadcom/cle/meson.build
@@ -21,8 +21,6 @@
 # [version, cle XML version]
 v3d_versions = [
   [21, 21],
-  [33, 33],
-  [41, 33],
   [42, 33],
   [71, 33]
 ]
diff --git a/src/broadcom/cle/v3dx_pack.h b/src/broadcom/cle/v3dx_pack.h
index e5a1eb26698..0062ddbd516 100644
--- a/src/broadcom/cle/v3dx_pack.h
+++ b/src/broadcom/cle/v3dx_pack.h
@@ -31,10 +31,6 @@
 
 #if (V3D_VERSION == 21)
 #  include "cle/v3d_packet_v21_pack.h"
-#elif (V3D_VERSION == 33)
-#  include "cle/v3d_packet_v33_pack.h"
-#elif (V3D_VERSION == 41)
-#  include "cle/v3d_packet_v41_pack.h"
 #elif (V3D_VERSION == 42)
 #  include "cle/v3d_packet_v42_pack.h"
 #elif (V3D_VERSION == 71)
diff --git a/src/broadcom/clif/clif_dump.c b/src/broadcom/clif/clif_dump.c
index ede6f42eedf..db94edba113 100644
--- a/src/broadcom/clif/clif_dump.c
+++ b/src/broadcom/clif/clif_dump.c
@@ -106,12 +106,16 @@ static bool
 clif_dump_packet(struct clif_dump *clif, uint32_t offset, const uint8_t *cl,
                  uint32_t *size, bool reloc_mode)
 {
-        if (clif->devinfo->ver >= 42)
+
+        switch (clif->devinfo->ver) {
+        case 42:
                 return v3d42_clif_dump_packet(clif, offset, cl, size, reloc_mode);
-        else if (clif->devinfo->ver >= 41)
-                return v3d41_clif_dump_packet(clif, offset, cl, size, reloc_mode);
-        else
-                return v3d33_clif_dump_packet(clif, offset, cl, size, reloc_mode);
+        case 71:
+                return v3d71_clif_dump_packet(clif, offset, cl, size, reloc_mode);
+        default:
+                break;
+        };
+        unreachable("Unknown HW version");
 }
 
 static uint32_t
diff --git a/src/broadcom/clif/clif_private.h b/src/broadcom/clif/clif_private.h
index cda407a00bf..d4e55e03730 100644
--- a/src/broadcom/clif/clif_private.h
+++ b/src/broadcom/clif/clif_private.h
@@ -95,10 +95,6 @@ clif_dump_add_address_to_worklist(struct clif_dump *clif,
                                   enum reloc_worklist_type type,
                                   uint32_t addr);
 
-bool v3d33_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
-                            const uint8_t *cl, uint32_t *size, bool reloc_mode);
-bool v3d41_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
-                            const uint8_t *cl, uint32_t *size, bool reloc_mode);
 bool v3d42_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
                             const uint8_t *cl, uint32_t *size, bool reloc_mode);
 bool v3d71_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c
index 7bc2b662cfc..fa85a7d5077 100644
--- a/src/broadcom/common/v3d_device_info.c
+++ b/src/broadcom/common/v3d_device_info.c
@@ -68,8 +68,6 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
     devinfo->has_accumulators = devinfo->ver < 71;
 
     switch (devinfo->ver) {
-        case 33:
-        case 41:
         case 42:
         case 71:
                 break;
diff --git a/src/broadcom/common/v3d_macros.h b/src/broadcom/common/v3d_macros.h
index b4291fb5350..4ab66f647ab 100644
--- a/src/broadcom/common/v3d_macros.h
+++ b/src/broadcom/common/v3d_macros.h
@@ -32,12 +32,6 @@
 #if (V3D_VERSION == 21)
 #  define V3DX(x) V3D21_##x
 #  define v3dX(x) v3d21_##x
-#elif (V3D_VERSION == 33)
-#  define V3DX(x) V3D33_##x
-#  define v3dX(x) v3d33_##x
-#elif (V3D_VERSION == 41)
-#  define V3DX(x) V3D41_##x
-#  define v3dX(x) v3d41_##x
 #elif (V3D_VERSION == 42)
 #  define V3DX(x) V3D42_##x
 #  define v3dX(x) v3d42_##x
diff --git a/src/broadcom/common/v3d_performance_counters.h b/src/broadcom/common/v3d_performance_counters.h
index a8f0cff8784..33e3e0e78db 100644
--- a/src/broadcom/common/v3d_performance_counters.h
+++ b/src/broadcom/common/v3d_performance_counters.h
@@ -130,7 +130,7 @@ static const char *v3d_performance_counters[][3] = {
    {"QPU", "QPU-stalls-other", "[QPU] Stalled qcycles waiting for any other reason (vary/W/Z)"},
 };
 
-#elif (V3D_VERSION >= 41)
+#elif (V3D_VERSION >= 42)
 
 static const char *v3d_performance_counters[][3] = {
    {"FEP", "FEP-valid-primitives-no-rendered-pixels", "[FEP] Valid primitives that result in no rendered pixels, for all rendered tiles"},
diff --git a/src/broadcom/compiler/meson.build b/src/broadcom/compiler/meson.build
index 453af22aecc..4f696fd5aff 100644
--- a/src/broadcom/compiler/meson.build
+++ b/src/broadcom/compiler/meson.build
@@ -32,9 +32,7 @@ libbroadcom_compiler_files = files(
   'vir_to_qpu.c',
   'qpu_schedule.c',
   'qpu_validate.c',
-  'v3d33_tex.c',
-  'v3d40_tex.c',
-  'v3d33_vpm_setup.c',
+  'v3d_tex.c',
   'v3d_compiler.h',
   'v3d_nir_lower_io.c',
   'v3d_nir_lower_image_load_store.c',
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 220c864a056..ad677e3e0fb 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -38,7 +38,7 @@
 #define __gen_address_type uint32_t
 #define __gen_address_offset(reloc) (*reloc)
 #define __gen_emit_reloc(cl, reloc)
-#include "cle/v3d_packet_v41_pack.h"
+#include "cle/v3d_packet_v42_pack.h"
 
 #define GENERAL_TMU_LOOKUP_PER_QUAD                 (0 << 7)
 #define GENERAL_TMU_LOOKUP_PER_PIXEL                (1 << 7)
@@ -963,10 +963,7 @@ ntq_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
                 break;
         }
 
-        if (c->devinfo->ver >= 40)
-                v3d40_vir_emit_tex(c, instr);
-        else
-                v3d33_vir_emit_tex(c, instr);
+        v3d_vir_emit_tex(c, instr);
 }
 
 static struct qreg
@@ -1040,15 +1037,10 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var,
 
         struct qinst *ldvary = NULL;
         struct qreg vary;
-        if (c->devinfo->ver >= 41) {
-                ldvary = vir_add_inst(V3D_QPU_A_NOP, c->undef,
-                                      c->undef, c->undef);
-                ldvary->qpu.sig.ldvary = true;
-                vary = vir_emit_def(c, ldvary);
-        } else {
-                vir_NOP(c)->qpu.sig.ldvary = true;
-                vary = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R3);
-        }
+        ldvary = vir_add_inst(V3D_QPU_A_NOP, c->undef,
+                              c->undef, c->undef);
+        ldvary->qpu.sig.ldvary = true;
+        vary = vir_emit_def(c, ldvary);
 
         /* Store the input value before interpolation so we can implement
          * GLSL's interpolateAt functions if the shader uses them.
@@ -1904,12 +1896,8 @@ emit_frag_end(struct v3d_compile *c)
                         inst = vir_MOV_dest(c, tlbu_reg,
                                             c->outputs[c->output_position_index]);
 
-                        if (c->devinfo->ver >= 42) {
-                                tlb_specifier |= (TLB_V42_DEPTH_TYPE_PER_PIXEL |
-                                                  TLB_SAMPLE_MODE_PER_PIXEL);
-                        } else {
-                                tlb_specifier |= TLB_DEPTH_TYPE_PER_PIXEL;
-                        }
+                        tlb_specifier |= (TLB_V42_DEPTH_TYPE_PER_PIXEL |
+                                          TLB_SAMPLE_MODE_PER_PIXEL);
                 } else {
                         /* Shader doesn't write to gl_FragDepth, take Z from
                          * FEP.
@@ -1917,16 +1905,11 @@ emit_frag_end(struct v3d_compile *c)
                         c->writes_z_from_fep = true;
                         inst = vir_MOV_dest(c, tlbu_reg, vir_nop_reg());
 
-                        if (c->devinfo->ver >= 42) {
-                                /* The spec says the PER_PIXEL flag is ignored
-                                 * for invariant writes, but the simulator
-                                 * demands it.
-                                 */
-                                tlb_specifier |= (TLB_V42_DEPTH_TYPE_INVARIANT |
-                                                  TLB_SAMPLE_MODE_PER_PIXEL);
-                        } else {
-                                tlb_specifier |= TLB_DEPTH_TYPE_INVARIANT;
-                        }
+                        /* The spec says the PER_PIXEL flag is ignored for
+                         * invariant writes, but the simulator demands it.
+                         */
+                        tlb_specifier |= (TLB_V42_DEPTH_TYPE_INVARIANT |
+                                          TLB_SAMPLE_MODE_PER_PIXEL);
 
                         /* Since (single-threaded) fragment shaders always need
                          * a TLB write, if we dond't have any we emit a
@@ -1956,7 +1939,6 @@ vir_VPM_WRITE_indirect(struct v3d_compile *c,
                        struct qreg vpm_index,
                        bool uniform_vpm_index)
 {
-        assert(c->devinfo->ver >= 40);
         if (uniform_vpm_index)
                 vir_STVPMV(c, vpm_index, val);
         else
@@ -1966,13 +1948,8 @@ vir_VPM_WRITE_indirect(struct v3d_compile *c,
 static void
 vir_VPM_WRITE(struct v3d_compile *c, struct qreg val, uint32_t vpm_index)
 {
-        if (c->devinfo->ver >= 40) {
-                vir_VPM_WRITE_indirect(c, val,
-                                       vir_uniform_ui(c, vpm_index), true);
-        } else {
-                /* XXX: v3d33_vir_vpm_write_setup(c); */
-                vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val);
-        }
+        vir_VPM_WRITE_indirect(c, val,
+                               vir_uniform_ui(c, vpm_index), true);
 }
 
 static void
@@ -1980,7 +1957,7 @@ emit_vert_end(struct v3d_compile *c)
 {
         /* GFXH-1684: VPM writes need to be complete by the end of the shader.
          */
-        if (c->devinfo->ver >= 40 && c->devinfo->ver <= 42)
+        if (c->devinfo->ver == 42)
                 vir_VPMWT(c);
 }
 
@@ -1989,7 +1966,7 @@ emit_geom_end(struct v3d_compile *c)
 {
         /* GFXH-1684: VPM writes need to be complete by the end of the shader.
          */
-        if (c->devinfo->ver >= 40 && c->devinfo->ver <= 42)
+        if (c->devinfo->ver == 42)
                 vir_VPMWT(c);
 }
 
@@ -2174,26 +2151,9 @@ ntq_emit_vpm_read(struct v3d_compile *c,
                   uint32_t *remaining,
                   uint32_t vpm_index)
 {
-        if (c->devinfo->ver >= 40 ) {
-                return vir_LDVPMV_IN(c,
-                                     vir_uniform_ui(c,
-                                                    (*num_components_queued)++));
-        }
-
-        struct qreg vpm = vir_reg(QFILE_VPM, vpm_index);
-        if (*num_components_queued != 0) {
-                (*num_components_queued)--;
-                return vir_MOV(c, vpm);
-        }
-
-        uint32_t num_components = MIN2(*remaining, 32);
-
-        v3d33_vir_vpm_read_setup(c, num_components);
-
-        *num_components_queued = num_components - 1;
-        *remaining -= num_components;
-
-        return vir_MOV(c, vpm);
+        return vir_LDVPMV_IN(c,
+                             vir_uniform_ui(c,
+                                            (*num_components_queued)++));
 }
 
 static void
@@ -2263,31 +2223,8 @@ ntq_setup_vs_inputs(struct v3d_compile *c)
         }
 
         /* The actual loads will happen directly in nir_intrinsic_load_input
-         * on newer versions.
          */
-        if (c->devinfo->ver >= 40)
-                return;
-
-        for (int loc = 0; loc < ARRAY_SIZE(c->vattr_sizes); loc++) {
-                resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
-                                  (loc + 1) * 4);
-
-                for (int i = 0; i < c->vattr_sizes[loc]; i++) {
-                        c->inputs[loc * 4 + i] =
-                                ntq_emit_vpm_read(c,
-                                                  &vpm_components_queued,
-                                                  &num_components,
-                                                  loc * 4 + i);
-
-                }
-        }
-
-        if (c->devinfo->ver >= 40) {
-                assert(vpm_components_queued == num_components);
-        } else {
-                assert(vpm_components_queued == 0);
-                assert(num_components == 0);
-        }
+        return;
 }
 
 static bool
@@ -2533,10 +2470,8 @@ vir_emit_tlb_color_read(struct v3d_compile *c, nir_intrinsic_instr *instr)
          * switch instead -- see vir_emit_thrsw().
          */
         if (!c->emitted_tlb_load) {
-                if (!c->last_thrsw_at_top_level) {
-                        assert(c->devinfo->ver >= 41);
+                if (!c->last_thrsw_at_top_level)
                         vir_emit_thrsw(c);
-                }
 
                 c->emitted_tlb_load = true;
         }
@@ -2744,7 +2679,7 @@ ntq_emit_load_input(struct v3d_compile *c, nir_intrinsic_instr *instr)
         unsigned offset =
                 nir_intrinsic_base(instr) + nir_src_as_uint(instr->src[0]);
 
-        if (c->s->info.stage != MESA_SHADER_FRAGMENT && c->devinfo->ver >= 40) {
+        if (c->s->info.stage != MESA_SHADER_FRAGMENT) {
                /* Emit the LDVPM directly now, rather than at the top
                 * of the shader like we did for V3D 3.x (which needs
                 * vpmsetup when not just taking the next offset).
@@ -3328,11 +3263,11 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
         case nir_intrinsic_image_store:
         case nir_intrinsic_image_atomic:
         case nir_intrinsic_image_atomic_swap:
-                v3d40_vir_emit_image_load_store(c, instr);
+                v3d_vir_emit_image_load_store(c, instr);
                 break;
 
         case nir_intrinsic_image_load:
-                v3d40_vir_emit_image_load_store(c, instr);
+                v3d_vir_emit_image_load_store(c, instr);
                 /* Not really a general TMU load, but we only use this flag
                  * for NIR scheduling and we do schedule these under the same
                  * policy as general TMU.
@@ -3502,21 +3437,8 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
                          * (actually supergroup) to block until the last
                          * invocation reaches the TSY op.
                          */
-                        if (c->devinfo->ver >= 42) {
-                                vir_BARRIERID_dest(c, vir_reg(QFILE_MAGIC,
-                                                              V3D_QPU_WADDR_SYNCB));
-                        } else {
-                                struct qinst *sync =
-                                        vir_BARRIERID_dest(c,
-                                                           vir_reg(QFILE_MAGIC,
-                                                                   V3D_QPU_WADDR_SYNCU));
-                                sync->uniform =
-                                        vir_get_uniform_index(c, QUNIFORM_CONSTANT,
-                                                              0xffffff00 |
-                                                              V3D_TSY_WAIT_INC_CHECK);
-
-                        }
-
+                        vir_BARRIERID_dest(c, vir_reg(QFILE_MAGIC,
+                                                      V3D_QPU_WADDR_SYNCB));
                         /* The blocking of a TSY op only happens at the next
                          * thread switch. No texturing may be outstanding at the
                          * time of a TSY blocking operation.
@@ -4330,14 +4252,12 @@ nir_to_vir(struct v3d_compile *c)
                                emit_fragment_varying(c, NULL, -1, 0, 0);
                 }
 
-                if (c->fs_key->is_points &&
-                    (c->devinfo->ver < 40 || program_reads_point_coord(c))) {
+                if (c->fs_key->is_points && program_reads_point_coord(c)) {
                         c->point_x = emit_fragment_varying(c, NULL, -1, 0, 0);
                         c->point_y = emit_fragment_varying(c, NULL, -1, 0, 0);
                         c->uses_implicit_point_line_varyings = true;
                 } else if (c->fs_key->is_lines &&
-                           (c->devinfo->ver < 40 ||
-                            BITSET_TEST(c->s->info.system_values_read,
+                           (BITSET_TEST(c->s->info.system_values_read,
                                         SYSTEM_VALUE_LINE_COORD))) {
                         c->line_x = emit_fragment_varying(c, NULL, -1, 0, 0);
                         c->uses_implicit_point_line_varyings = true;
@@ -4350,7 +4270,7 @@ nir_to_vir(struct v3d_compile *c)
                                                       V3D_QPU_WADDR_SYNC));
                 }
 
-                if (c->devinfo->ver <= 42) {
+                if (c->devinfo->ver == 42) {
                         c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0));
                         c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2));
                 } else if (c->devinfo->ver >= 71) {
@@ -4461,25 +4381,12 @@ vir_emit_last_thrsw(struct v3d_compile *c,
 {
         *restore_last_thrsw = c->last_thrsw;
 
-        /* On V3D before 4.1, we need a TMU op to be outstanding when thread
-         * switching, so disable threads if we didn't do any TMU ops (each of
-         * which would have emitted a THRSW).
-         */
-        if (!c->last_thrsw_at_top_level && c->devinfo->ver < 41) {
-                c->threads = 1;
-                if (c->last_thrsw)
-                        vir_remove_thrsw(c);
-                *restore_last_thrsw = NULL;
-        }
-
         /* If we're threaded and the last THRSW was in conditional code, then
          * we need to emit another one so that we can flag it as the last
          * thrsw.
          */
-        if (c->last_thrsw && !c->last_thrsw_at_top_level) {
-                assert(c->devinfo->ver >= 41);
+        if (c->last_thrsw && !c->last_thrsw_at_top_level)
                 vir_emit_thrsw(c);
-        }
 
         /* If we're threaded, then we need to mark the last THRSW instruction
          * so we can emit a pair of them at QPU emit time.
@@ -4487,10 +4394,8 @@ vir_emit_last_thrsw(struct v3d_compile *c,
          * For V3D 4.x, we can spawn the non-fragment shaders already in the
          * post-last-THRSW state, so we can skip this.
          */
-        if (!c->last_thrsw && c->s->info.stage == MESA_SHADER_FRAGMENT) {
-                assert(c->devinfo->ver >= 41);
+        if (!c->last_thrsw && c->s->info.stage == MESA_SHADER_FRAGMENT)
                 vir_emit_thrsw(c);
-        }
 
         /* If we have not inserted a last thread switch yet, do it now to ensure
          * any potential spilling we do happens before this. If we don't spill
@@ -4616,7 +4521,7 @@ v3d_nir_to_vir(struct v3d_compile *c)
         /* Attempt to allocate registers for the temporaries.  If we fail,
          * reduce thread count and try again.
          */
-        int min_threads = (c->devinfo->ver >= 41) ? 2 : 1;
+        int min_threads = 2;
         struct qpu_reg *temp_registers;
         while (true) {
                 temp_registers = v3d_register_allocate(c);
diff --git a/src/broadcom/compiler/qpu_schedule.c b/src/broadcom/compiler/qpu_schedule.c
index 86494706386..8c0e65e3fe8 100644
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -202,9 +202,6 @@ tmu_write_is_sequence_terminator(uint32_t waddr)
 static bool
 can_reorder_tmu_write(const struct v3d_device_info *devinfo, uint32_t waddr)
 {
-        if (devinfo->ver < 40)
-                return false;
-
         if (tmu_write_is_sequence_terminator(waddr))
                 return false;
 
@@ -267,8 +264,7 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
                         break;
 
                 case V3D_QPU_WADDR_UNIFA:
-                        if (state->devinfo->ver >= 40)
-                                add_write_dep(state, &state->last_unifa, n);
+                        add_write_dep(state, &state->last_unifa, n);
                         break;
 
                 case V3D_QPU_WADDR_NOP:
@@ -660,7 +656,7 @@ writes_too_soon_after_write(const struct v3d_device_info *devinfo,
             v3d_qpu_writes_r4(devinfo, inst))
                 return true;
 
-        if (devinfo->ver <= 42)
+        if (devinfo->ver == 42)
            return false;
 
         /* Don't schedule anything that writes rf0 right after ldvary, since
@@ -854,13 +850,10 @@ qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo,
         if (util_bitcount(a_peripherals) + util_bitcount(b_peripherals) <= 1)
                 return true;
 
-        if (devinfo->ver < 41)
-                return false;
-
         /* V3D 4.x can't do more than one peripheral access except in a
          * few cases:
          */
-        if (devinfo->ver <= 42) {
+        if (devinfo->ver == 42) {
                 /* WRTMUC signal with TMU register write (other than tmuc). */
                 if (a_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
                     b_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
@@ -984,7 +977,7 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result,
                         result->sig.small_imm_d) <= 1;
         }
 
-        assert(devinfo->ver <= 42);
+        assert(devinfo->ver == 42);
 
         uint64_t raddrs_used = qpu_raddrs_used(add_instr, mul_instr);
         int naddrs = util_bitcount64(raddrs_used);
@@ -1499,7 +1492,7 @@ retry:
                          * as long as it is not the last delay slot.
                          */
                         if (inst->sig.ldvary) {
-                                if (c->devinfo->ver <= 42 &&
+                                if (c->devinfo->ver == 42 &&
                                     scoreboard->last_thrsw_tick + 2 >=
                                     scoreboard->tick - 1) {
                                         continue;
@@ -1607,7 +1600,7 @@ update_scoreboard_for_magic_waddr(struct choose_scoreboard *scoreboard,
 {
         if (v3d_qpu_magic_waddr_is_sfu(waddr))
                 scoreboard->last_magic_sfu_write_tick = scoreboard->tick;
-        else if (devinfo->ver >= 40 && waddr == V3D_QPU_WADDR_UNIFA)
+        else if (waddr == V3D_QPU_WADDR_UNIFA)
                 scoreboard->last_unifa_write_tick = scoreboard->tick;
 }
 
@@ -1938,7 +1931,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
         if (slot > 0 && qinst->uniform != ~0)
                 return false;
 
-        if (c->devinfo->ver <= 42 && v3d_qpu_waits_vpm(inst))
+        if (c->devinfo->ver == 42 && v3d_qpu_waits_vpm(inst))
                 return false;
 
         if (inst->sig.ldvary)
@@ -1946,12 +1939,12 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
 
         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
                 /* GFXH-1625: TMUWT not allowed in the final instruction. */
-                if (c->devinfo->ver <= 42 && slot == 2 &&
+                if (c->devinfo->ver == 42 && slot == 2 &&
                     inst->alu.add.op == V3D_QPU_A_TMUWT) {
                         return false;
                 }
 
-                if (c->devinfo->ver <= 42) {
+                if (c->devinfo->ver == 42) {
                         /* No writing physical registers at the end. */
                         bool add_is_nop = inst->alu.add.op == V3D_QPU_A_NOP;
                         bool mul_is_nop = inst->alu.mul.op == V3D_QPU_M_NOP;
@@ -1977,10 +1970,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
                         }
                 }
 
-                if (c->devinfo->ver < 40 && inst->alu.add.op == V3D_QPU_A_SETMSF)
-                        return false;
-
-                if (c->devinfo->ver <= 42) {
+                if (c->devinfo->ver == 42) {
                         /* RF0-2 might be overwritten during the delay slots by
                          * fragment shader setup.
                          */
@@ -2034,7 +2024,7 @@ qpu_inst_before_thrsw_valid_in_delay_slot(struct v3d_compile *c,
                 return false;
 
         if (qinst->qpu.sig.ldvary) {
-                if (c->devinfo->ver <= 42 && slot > 0)
+                if (c->devinfo->ver == 42 && slot > 0)
                         return false;
                 if (c->devinfo->ver >= 71 && slot == 2)
                         return false;
@@ -2475,7 +2465,7 @@ alu_reads_register(const struct v3d_device_info *devinfo,
         else
                 num_src = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
 
-        if (devinfo->ver <= 42) {
+        if (devinfo->ver == 42) {
                 enum v3d_qpu_mux mux_a, mux_b;
                 if (add) {
                         mux_a = inst->alu.add.a.mux;
@@ -2639,7 +2629,7 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
          * and flagging it for a fixup. In V3D 7.x this is limited only to the
          * second delay slot.
          */
-        assert((devinfo->ver <= 42 &&
+        assert((devinfo->ver == 42 &&
                 scoreboard->last_thrsw_tick + 2 < scoreboard->tick - 1) ||
                (devinfo->ver >= 71 &&
                 scoreboard->last_thrsw_tick + 2 != scoreboard->tick - 1));
@@ -2672,7 +2662,7 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
          * ldvary write to r5/rf0 happens in the next instruction).
          */
         assert(!v3d_qpu_writes_r5(devinfo, inst));
-        assert(devinfo->ver <= 42 ||
+        assert(devinfo->ver == 42 ||
                (!v3d_qpu_writes_rf0_implicitly(devinfo, inst) &&
                 !v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0)));
 
diff --git a/src/broadcom/compiler/qpu_validate.c b/src/broadcom/compiler/qpu_validate.c
index 0466ee5d0b6..6c15153b9cb 100644
--- a/src/broadcom/compiler/qpu_validate.c
+++ b/src/broadcom/compiler/qpu_validate.c
@@ -243,7 +243,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
                 }
 
                 if (inst->sig.ldvary) {
-                        if (devinfo->ver <= 42)
+                        if (devinfo->ver == 42)
                                 fail_instr(state, "LDVARY during THRSW delay slots");
                         if (devinfo->ver >= 71 &&
                             state->ip - state->last_thrsw_ip == 2) {
@@ -276,7 +276,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
             vpm_writes +
             tlb_writes +
             tsy_writes +
-            (devinfo->ver <= 42 ? inst->sig.ldtmu : 0) +
+            (devinfo->ver == 42 ? inst->sig.ldtmu : 0) +
             inst->sig.ldtlb +
             inst->sig.ldvpm +
             inst->sig.ldtlbu > 1) {
@@ -316,7 +316,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
             inst->type == V3D_QPU_INSTR_TYPE_ALU) {
                 if ((inst->alu.add.op != V3D_QPU_A_NOP &&
                      !inst->alu.add.magic_write)) {
-                        if (devinfo->ver <= 42) {
+                        if (devinfo->ver == 42) {
                                 fail_instr(state, "RF write after THREND");
                         } else if (devinfo->ver >= 71) {
                                 if (state->last_thrsw_ip - state->ip == 0) {
@@ -333,7 +333,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
 
                 if ((inst->alu.mul.op != V3D_QPU_M_NOP &&
                      !inst->alu.mul.magic_write)) {
-                        if (devinfo->ver <= 42) {
+                        if (devinfo->ver == 42) {
                                 fail_instr(state, "RF write after THREND");
                         } else if (devinfo->ver >= 71) {
                                 if (state->last_thrsw_ip - state->ip == 0) {
@@ -351,7 +351,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
 
                 if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
                     !inst->sig_magic) {
-                        if (devinfo->ver <= 42) {
+                        if (devinfo->ver == 42) {
                                 fail_instr(state, "RF write after THREND");
                         } else if (devinfo->ver >= 71 &&
                                    (inst->sig_addr == 2 ||
diff --git a/src/broadcom/compiler/v3d33_tex.c b/src/broadcom/compiler/v3d33_tex.c
deleted file mode 100644
index b4c888aab07..00000000000
--- a/src/broadcom/compiler/v3d33_tex.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright © 2016-2018 Broadcom
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "v3d_compiler.h"
-
-/* We don't do any address packing. */
-#define __gen_user_data void
-#define __gen_address_type uint32_t
-#define __gen_address_offset(reloc) (*reloc)
-#define __gen_emit_reloc(cl, reloc)
-#include "cle/v3d_packet_v33_pack.h"
-
-void
-v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
-{
-        /* FIXME: We don't bother implementing pipelining for texture reads
-         * for any pre 4.x hardware. It should be straight forward to do but
-         * we are not really testing or even targeting this hardware at
-         * present.
-         */
-        ntq_flush_tmu(c);
-
-        unsigned unit = instr->texture_index;
-
-        struct V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1 p0_unpacked = {
-                V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_header,
-
-                .fetch_sample_mode = instr->op == nir_texop_txf,
-        };
-
-        struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 p1_unpacked = {
-        };
-
-        switch (instr->sampler_dim) {
-        case GLSL_SAMPLER_DIM_1D:
-                if (instr->is_array)
-                        p0_unpacked.lookup_type = TEXTURE_1D_ARRAY;
-                else
-                        p0_unpacked.lookup_type = TEXTURE_1D;
-                break;
-        case GLSL_SAMPLER_DIM_2D:
-        case GLSL_SAMPLER_DIM_RECT:
-                if (instr->is_array)
-                        p0_unpacked.lookup_type = TEXTURE_2D_ARRAY;
-                else
-                        p0_unpacked.lookup_type = TEXTURE_2D;
-                break;
-        case GLSL_SAMPLER_DIM_3D:
-                p0_unpacked.lookup_type = TEXTURE_3D;
-                break;
-        case GLSL_SAMPLER_DIM_CUBE:
-                p0_unpacked.lookup_type = TEXTURE_CUBE_MAP;
-                break;
-        default:
-                unreachable("Bad sampler type");
-        }
-
-        struct qreg coords[5];
-        int next_coord = 0;
-        for (unsigned i = 0; i < instr->num_srcs; i++) {
-                switch (instr->src[i].src_type) {
-                case nir_tex_src_coord:
-                        for (int j = 0; j < instr->coord_components; j++) {
-                                coords[next_coord++] =
-                                        ntq_get_src(c, instr->src[i].src, j);
-                        }
-                        if (instr->coord_components < 2)
-                                coords[next_coord++] = vir_uniform_f(c, 0.5);
-                        break;
-                case nir_tex_src_bias:
-                        coords[next_coord++] =
-                                ntq_get_src(c, instr->src[i].src, 0);
-
-                        p0_unpacked.bias_supplied = true;
-                        break;
-                case nir_tex_src_lod:
-                        coords[next_coord++] =
-                                vir_FADD(c,
-                                         ntq_get_src(c, instr->src[i].src, 0),
-                                         vir_uniform(c, QUNIFORM_TEXTURE_FIRST_LEVEL,
-                                                     unit));
-
-                        if (instr->op != nir_texop_txf &&
-                            instr->op != nir_texop_tg4) {
-                                p0_unpacked.disable_autolod_use_bias_only = true;
-                        }
-                        break;
-                case nir_tex_src_comparator:
-                        coords[next_coord++] =
-                                ntq_get_src(c, instr->src[i].src, 0);
-
-                        p0_unpacked.shadow = true;
-                        break;
-
-                case nir_tex_src_offset: {
-                        p0_unpacked.texel_offset_for_s_coordinate =
-                                nir_src_comp_as_int(instr->src[i].src, 0);
-
-                        if (instr->coord_components >= 2)
-                                p0_unpacked.texel_offset_for_t_coordinate =
-                                        nir_src_comp_as_int(instr->src[i].src, 1);
-
-                        if (instr->coord_components >= 3)
-                                p0_unpacked.texel_offset_for_r_coordinate =
-                                        nir_src_comp_as_int(instr->src[i].src, 2);
-                        break;
-                }
-
-                default:
-                        unreachable("unknown texture source");
-                }
-        }
-
-        /* Limit the number of channels returned to both how many the NIR
-         * instruction writes and how many the instruction could produce.
-         */
-        p1_unpacked.return_words_of_texture_data =
-                nir_def_components_read(&instr->def);
-
-        uint32_t p0_packed;
-        V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL,
-                                                         (uint8_t *)&p0_packed,
-                                                         &p0_unpacked);
-
-        uint32_t p1_packed;
-        V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(NULL,
-                                                         (uint8_t *)&p1_packed,
-                                                         &p1_unpacked);
-        /* Load unit number into the address field, which will be be used by
-         * the driver to decide which texture to put in the actual address
-         * field.
-         */
-        p1_packed |= unit << 5;
-
-        /* There is no native support for GL texture rectangle coordinates, so
-         * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0,
-         * 1]).
-         */
-        if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
-                coords[0] = vir_FMUL(c, coords[0],
-                                     vir_uniform(c, QUNIFORM_TEXRECT_SCALE_X,
-                                                 unit));
-                coords[1] = vir_FMUL(c, coords[1],
-                                     vir_uniform(c, QUNIFORM_TEXRECT_SCALE_Y,
-                                                 unit));
-        }
-
-        int texture_u[] = {
-                vir_get_uniform_index(c, QUNIFORM_TEXTURE_CONFIG_P0_0 + unit, p0_packed),
-                vir_get_uniform_index(c, QUNIFORM_TEXTURE_CONFIG_P1, p1_packed),
-        };
-
-        for (int i = 0; i < next_coord; i++) {
-                struct qreg dst;
-
-                if (i == next_coord - 1)
-                        dst = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUL);
-                else
-                        dst = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMU);
-
-                struct qinst *tmu = vir_MOV_dest(c, dst, coords[i]);
-
-                if (i < 2)
-                        tmu->uniform = texture_u[i];
-        }
-
-        vir_emit_thrsw(c);
-
-        for (int i = 0; i < 4; i++) {
-                if (p1_unpacked.return_words_of_texture_data & (1 << i))
-                        ntq_store_def(c, &instr->def, i, vir_LDTMU(c));
-        }
-}
diff --git a/src/broadcom/compiler/v3d33_vpm_setup.c b/src/broadcom/compiler/v3d33_vpm_setup.c
deleted file mode 100644
index 8bce67dfae9..00000000000
--- a/src/broadcom/compiler/v3d33_vpm_setup.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright © 2016-2018 Broadcom
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "v3d_compiler.h"
-
-/* We don't do any address packing. */
-#define __gen_user_data void
-#define __gen_address_type uint32_t
-#define __gen_address_offset(reloc) (*reloc)
-#define __gen_emit_reloc(cl, reloc)
-#include "broadcom/cle/v3d_packet_v33_pack.h"
-
-void
-v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components)
-{
-        struct V3D33_VPM_GENERIC_BLOCK_READ_SETUP unpacked = {
-                V3D33_VPM_GENERIC_BLOCK_READ_SETUP_header,
-
-                .horiz = true,
-                .laned = false,
-                /* If the field is 0, that means a read count of 32. */
-                .num = num_components & 31,
-                .segs = true,
-                .stride = 1,
-                .size = VPM_SETUP_SIZE_32_BIT,
-                .addr = c->num_inputs,
-        };
-
-        uint32_t packed;
-        V3D33_VPM_GENERIC_BLOCK_READ_SETUP_pack(NULL,
-                                                (uint8_t *)&packed,
-                                                &unpacked);
-        vir_VPMSETUP(c, vir_uniform_ui(c, packed));
-}
-
-void
-v3d33_vir_vpm_write_setup(struct v3d_compile *c)
-{
-        uint32_t packed;
-        struct V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP unpacked = {
-                V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_header,
-
-                .horiz = true,
-                .laned = false,
-                .segs = true,
-                .stride = 1,
-                .size = VPM_SETUP_SIZE_32_BIT,
-                .addr = 0,
-        };
-
-        V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_pack(NULL,
-                                                (uint8_t *)&packed,
-                                                &unpacked);
-        vir_VPMSETUP(c, vir_uniform_ui(c, packed));
-}
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index 78f6c0c0db9..cb9b2ae5757 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -96,14 +96,6 @@ enum qfile {
          */
         QFILE_TEMP,
 
-        /**
-         * VPM reads use this with an index value to say what part of the VPM
-         * is being read.
-         *
-         * Used only for ver < 40. For ver >= 40 we use ldvpm.
-         */
-        QFILE_VPM,
-
         /**
          * Stores an immediate value in the index field that will be used
          * directly by qpu_load_imm().
@@ -1150,7 +1142,6 @@ bool vir_is_raw_mov(struct qinst *inst);
 bool vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst);
 bool vir_is_add(struct qinst *inst);
 bool vir_is_mul(struct qinst *inst);
-bool vir_writes_r3_implicitly(const struct v3d_device_info *devinfo, struct qinst *inst);
 bool vir_writes_r4_implicitly(const struct v3d_device_info *devinfo, struct qinst *inst);
 struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
 uint8_t vir_channels_written(struct qinst *inst);
@@ -1187,12 +1178,9 @@ bool v3d_nir_lower_txf_ms(nir_shader *s);
 bool v3d_nir_lower_image_load_store(nir_shader *s);
 bool v3d_nir_lower_load_store_bitsize(nir_shader *s);
 
-void v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components);
-void v3d33_vir_vpm_write_setup(struct v3d_compile *c);
-void v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr);
-void v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr);
-void v3d40_vir_emit_image_load_store(struct v3d_compile *c,
-                                     nir_intrinsic_instr *instr);
+void v3d_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr);
+void v3d_vir_emit_image_load_store(struct v3d_compile *c,
+                                   nir_intrinsic_instr *instr);
 
 void v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers);
 uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c);
@@ -1302,28 +1290,18 @@ vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b)         \
 #define VIR_SFU(name)                                                      \
 static inline struct qreg                                                \
 vir_##name(struct v3d_compile *c, struct qreg a)                         \
-{                                                                        \
-        if (c->devinfo->ver >= 41) {                                     \
-                return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name,    \
-                                                    c->undef,            \
-                                                    a, c->undef));       \
-        } else {                                                         \
-                vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \
-                return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \
-        }                                                                \
+{                                                                       \
+        return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name,           \
+                                            c->undef,                   \
+                                            a, c->undef));              \
 }                                                                        \
 static inline struct qinst *                                             \
 vir_##name##_dest(struct v3d_compile *c, struct qreg dest,               \
                   struct qreg a)                                         \
 {                                                                        \
-        if (c->devinfo->ver >= 41) {                                     \
-                return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name, \
-                                                       dest,             \
-                                                       a, c->undef));    \
-        } else {                                                         \
-                vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \
-                return vir_FMOV_dest(c, dest, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \
-        }                                                                \
+        return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name,        \
+                                               dest,                    \
+                                               a, c->undef));           \
 }
 
 #define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name)
@@ -1454,16 +1432,11 @@ vir_NOP(struct v3d_compile *c)
 static inline struct qreg
 vir_LDTMU(struct v3d_compile *c)
 {
-        if (c->devinfo->ver >= 41) {
-                struct qinst *ldtmu = vir_add_inst(V3D_QPU_A_NOP, c->undef,
-                                                   c->undef, c->undef);
-                ldtmu->qpu.sig.ldtmu = true;
+        struct qinst *ldtmu = vir_add_inst(V3D_QPU_A_NOP, c->undef,
+                                           c->undef, c->undef);
+        ldtmu->qpu.sig.ldtmu = true;
 
-                return vir_emit_def(c, ldtmu);
-        } else {
-                vir_NOP(c)->qpu.sig.ldtmu = true;
-                return vir_MOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4));
-        }
+        return vir_emit_def(c, ldtmu);
 }
 
 static inline struct qreg
@@ -1476,7 +1449,6 @@ vir_UMUL(struct v3d_compile *c, struct qreg src0, struct qreg src1)
 static inline struct qreg
 vir_TLBU_COLOR_READ(struct v3d_compile *c, uint32_t config)
 {
-        assert(c->devinfo->ver >= 41); /* XXX */
         assert((config & 0xffffff00) == 0xffffff00);
 
         struct qinst *ldtlb = vir_add_inst(V3D_QPU_A_NOP, c->undef,
@@ -1489,8 +1461,6 @@ vir_TLBU_COLOR_READ(struct v3d_compile *c, uint32_t config)
 static inline struct qreg
 vir_TLB_COLOR_READ(struct v3d_compile *c)
 {
-        assert(c->devinfo->ver >= 41); /* XXX */
-
         struct qinst *ldtlb = vir_add_inst(V3D_QPU_A_NOP, c->undef,
                                            c->undef, c->undef);
         ldtlb->qpu.sig.ldtlb = true;
diff --git a/src/broadcom/compiler/v3d_nir_lower_io.c b/src/broadcom/compiler/v3d_nir_lower_io.c
index f70195e5ec6..55e2e4f2e11 100644
--- a/src/broadcom/compiler/v3d_nir_lower_io.c
+++ b/src/broadcom/compiler/v3d_nir_lower_io.c
@@ -515,7 +515,7 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
                          * The correct fix for this as recommended by Broadcom
                          * is to convert to .8 fixed-point with ffloor().
                          */
-                        if (c->devinfo->ver <= 42)
+                        if (c->devinfo->ver == 42)
                                  pos = nir_f2i32(b, nir_ffloor(b, pos));
                         else
                                  pos = nir_f2i32(b, nir_fround_even(b, pos));
diff --git a/src/broadcom/compiler/v3d40_tex.c b/src/broadcom/compiler/v3d_tex.c
similarity index 94%
rename from src/broadcom/compiler/v3d40_tex.c
rename to src/broadcom/compiler/v3d_tex.c
index 9ae993859c5..7e0bc1aa0e5 100644
--- a/src/broadcom/compiler/v3d40_tex.c
+++ b/src/broadcom/compiler/v3d_tex.c
@@ -28,7 +28,7 @@
 #define __gen_address_type uint32_t
 #define __gen_address_offset(reloc) (*reloc)
 #define __gen_emit_reloc(cl, reloc)
-#include "cle/v3d_packet_v41_pack.h"
+#include "cle/v3d_packet_v42_pack.h"
 
 static inline struct qinst *
 vir_TMU_WRITE(struct v3d_compile *c, enum v3d_qpu_waddr waddr, struct qreg val)
@@ -61,11 +61,11 @@ vir_WRTMUC(struct v3d_compile *c, enum quniform_contents contents, uint32_t data
         inst->uniform = vir_get_uniform_index(c, contents, data);
 }
 
-static const struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked_default = {
+static const struct V3D42_TMU_CONFIG_PARAMETER_1 p1_unpacked_default = {
         .per_pixel_mask_enable = true,
 };
 
-static const struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked_default = {
+static const struct V3D42_TMU_CONFIG_PARAMETER_2 p2_unpacked_default = {
         .op = V3D_TMU_OP_REGULAR,
 };
 
@@ -86,7 +86,7 @@ handle_tex_src(struct v3d_compile *c,
                nir_tex_instr *instr,
                unsigned src_idx,
                unsigned non_array_components,
-               struct V3D41_TMU_CONFIG_PARAMETER_2 *p2_unpacked,
+               struct V3D42_TMU_CONFIG_PARAMETER_2 *p2_unpacked,
                struct qreg *s_out,
                unsigned *tmu_writes)
 {
@@ -201,7 +201,7 @@ handle_tex_src(struct v3d_compile *c,
 static void
 vir_tex_handle_srcs(struct v3d_compile *c,
                     nir_tex_instr *instr,
-                    struct V3D41_TMU_CONFIG_PARAMETER_2 *p2_unpacked,
+                    struct V3D42_TMU_CONFIG_PARAMETER_2 *p2_unpacked,
                     struct qreg *s,
                     unsigned *tmu_writes)
 {
@@ -224,10 +224,8 @@ get_required_tex_tmu_writes(struct v3d_compile *c, nir_tex_instr *instr)
 }
 
 void
-v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
+v3d_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
 {
-        assert(instr->op != nir_texop_lod || c->devinfo->ver >= 42);
-
         unsigned texture_idx = instr->texture_index;
 
         /* For instructions that don't have a sampler (i.e. txf) we bind
@@ -244,7 +242,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
         bool output_type_32_bit =
                 c->key->sampler[sampler_idx].return_size == 32;
 
-        struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
+        struct V3D42_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
         };
 
         /* Limit the number of channels returned to both how many the NIR
@@ -275,7 +273,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
         }
         assert(p0_unpacked.return_words_of_texture_data != 0);
 
-        struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked = {
+        struct V3D42_TMU_CONFIG_PARAMETER_2 p2_unpacked = {
                 .op = V3D_TMU_OP_REGULAR,
                 .gather_mode = instr->op == nir_texop_tg4,
                 .gather_component = instr->component,
@@ -304,12 +302,12 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
         vir_tex_handle_srcs(c, instr, &p2_unpacked, &s, NULL);
 
         uint32_t p0_packed;
-        V3D41_TMU_CONFIG_PARAMETER_0_pack(NULL,
+        V3D42_TMU_CONFIG_PARAMETER_0_pack(NULL,
                                           (uint8_t *)&p0_packed,
                                           &p0_unpacked);
 
         uint32_t p2_packed;
-        V3D41_TMU_CONFIG_PARAMETER_2_pack(NULL,
+        V3D42_TMU_CONFIG_PARAMETER_2_pack(NULL,
                                           (uint8_t *)&p2_packed,
                                           &p2_unpacked);
 
@@ -339,7 +337,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
                 output_type_32_bit;
 
         if (non_default_p1_config) {
-                struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
+                struct V3D42_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
                         .output_type_32_bit = output_type_32_bit,
 
                         .unnormalized_coordinates = (instr->sampler_dim ==
@@ -356,7 +354,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
                        p0_unpacked.return_words_of_texture_data < (1 << 2));
 
                 uint32_t p1_packed;
-                V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL,
+                V3D42_TMU_CONFIG_PARAMETER_1_pack(NULL,
                                                   (uint8_t *)&p1_packed,
                                                   &p1_unpacked);
 
@@ -384,7 +382,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
                  * address
                  */
                 uint32_t p1_packed_default;
-                V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL,
+                V3D42_TMU_CONFIG_PARAMETER_1_pack(NULL,
                                                   (uint8_t *)&p1_packed_default,
                                                   &p1_unpacked_default);
                 vir_WRTMUC(c, QUNIFORM_CONSTANT, p1_packed_default);
@@ -412,7 +410,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
 }
 
 static uint32_t
-v3d40_image_atomic_tmu_op(nir_intrinsic_instr *instr)
+v3d_image_atomic_tmu_op(nir_intrinsic_instr *instr)
 {
         nir_atomic_op atomic_op = nir_intrinsic_atomic_op(instr);
         switch (atomic_op) {
@@ -431,7 +429,7 @@ v3d40_image_atomic_tmu_op(nir_intrinsic_instr *instr)
 }
 
 static uint32_t
-v3d40_image_load_store_tmu_op(nir_intrinsic_instr *instr)
+v3d_image_load_store_tmu_op(nir_intrinsic_instr *instr)
 {
         switch (instr->intrinsic) {
         case nir_intrinsic_image_load:
@@ -440,7 +438,7 @@ v3d40_image_load_store_tmu_op(nir_intrinsic_instr *instr)
 
         case nir_intrinsic_image_atomic:
         case nir_intrinsic_image_atomic_swap:
-                return v3d40_image_atomic_tmu_op(instr);
+                return v3d_image_atomic_tmu_op(instr);
 
         default:
                 unreachable("unknown image intrinsic");
@@ -552,21 +550,21 @@ get_required_image_tmu_writes(struct v3d_compile *c,
 }
 
 void
-v3d40_vir_emit_image_load_store(struct v3d_compile *c,
-                                nir_intrinsic_instr *instr)
+v3d_vir_emit_image_load_store(struct v3d_compile *c,
+                              nir_intrinsic_instr *instr)
 {
         unsigned format = nir_intrinsic_format(instr);
         unsigned unit = nir_src_as_uint(instr->src[0]);
 
-        struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
+        struct V3D42_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
         };
 
-        struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
+        struct V3D42_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
                 .per_pixel_mask_enable = true,
                 .output_type_32_bit = v3d_gl_format_is_return_32(format),
         };
 
-        struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked = { 0 };
+        struct V3D42_TMU_CONFIG_PARAMETER_2 p2_unpacked = { 0 };
 
         /* Limit the number of channels returned to both how many the NIR
          * instruction writes and how many the instruction could produce.
@@ -578,7 +576,7 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c,
         p0_unpacked.return_words_of_texture_data =
                 (1 << instr_return_channels) - 1;
 
-        p2_unpacked.op = v3d40_image_load_store_tmu_op(instr);
+        p2_unpacked.op = v3d_image_load_store_tmu_op(instr);
 
         /* If we were able to replace atomic_add for an inc/dec, then we
          * need/can to do things slightly different, like not loading the
@@ -591,7 +589,7 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c,
                  p2_unpacked.op == V3D_TMU_OP_WRITE_OR_READ_DEC);
 
         uint32_t p0_packed;
-        V3D41_TMU_CONFIG_PARAMETER_0_pack(NULL,
+        V3D42_TMU_CONFIG_PARAMETER_0_pack(NULL,
                                           (uint8_t *)&p0_packed,
                                           &p0_unpacked);
 
@@ -602,12 +600,12 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c,
         p0_packed |= unit << 24;
 
         uint32_t p1_packed;
-        V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL,
+        V3D42_TMU_CONFIG_PARAMETER_1_pack(NULL,
                                           (uint8_t *)&p1_packed,
                                           &p1_unpacked);
 
         uint32_t p2_packed;
-        V3D41_TMU_CONFIG_PARAMETER_2_pack(NULL,
+        V3D42_TMU_CONFIG_PARAMETER_2_pack(NULL,
                                           (uint8_t *)&p2_packed,
                                           &p2_unpacked);
 
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 8c536b8fbcc..eb83dde784a 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -155,32 +155,6 @@ vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst)
         return false;
 }
 
-bool
-vir_writes_r3_implicitly(const struct v3d_device_info *devinfo,
-                         struct qinst *inst)
-{
-        if (!devinfo->has_accumulators)
-                return false;
-
-        for (int i = 0; i < vir_get_nsrc(inst); i++) {
-                switch (inst->src[i].file) {
-                case QFILE_VPM:
-                        return true;
-                default:
-                        break;
-                }
-        }
-
-        if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
-                                  inst->qpu.sig.ldtlb ||
-                                  inst->qpu.sig.ldtlbu ||
-                                  inst->qpu.sig.ldvpm)) {
-                return true;
-        }
-
-        return false;
-}
-
 bool
 vir_writes_r4_implicitly(const struct v3d_device_info *devinfo,
                          struct qinst *inst)
@@ -203,9 +177,6 @@ vir_writes_r4_implicitly(const struct v3d_device_info *devinfo,
                 break;
         }
 
-        if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
-                return true;
-
         return false;
 }
 
diff --git a/src/broadcom/compiler/vir_dump.c b/src/broadcom/compiler/vir_dump.c
index ab5d4043039..631eeee52ab 100644
--- a/src/broadcom/compiler/vir_dump.c
+++ b/src/broadcom/compiler/vir_dump.c
@@ -182,11 +182,6 @@ vir_print_reg(struct v3d_compile *c, const struct qinst *inst,
                 break;
         }
 
-        case QFILE_VPM:
-                fprintf(stderr, "vpm%d.%d",
-                        reg.index / 4, reg.index % 4);
-                break;
-
         case QFILE_TEMP:
                 fprintf(stderr, "t%d", reg.index);
                 break;
@@ -197,9 +192,6 @@ static void
 vir_dump_sig_addr(const struct v3d_device_info *devinfo,
                   const struct v3d_qpu_instr *instr)
 {
-        if (devinfo->ver < 41)
-                return;
-
         if (!instr->sig_magic)
                 fprintf(stderr, ".rf%d", instr->sig_addr);
         else {
diff --git a/src/broadcom/compiler/vir_opt_copy_propagate.c b/src/broadcom/compiler/vir_opt_copy_propagate.c
index 1260838ca05..611c4693ed3 100644
--- a/src/broadcom/compiler/vir_opt_copy_propagate.c
+++ b/src/broadcom/compiler/vir_opt_copy_propagate.c
@@ -62,7 +62,7 @@ is_copy_mov(const struct v3d_device_info *devinfo, struct qinst *inst)
                 return false;
         }
 
-        if (devinfo->ver <= 42) {
+        if (devinfo->ver == 42) {
                 switch (inst->src[0].file) {
                 case QFILE_MAGIC:
                         /* No copy propagating from R3/R4/R5 -- the MOVs from
diff --git a/src/broadcom/compiler/vir_opt_dead_code.c b/src/broadcom/compiler/vir_opt_dead_code.c
index 5101e62254a..fd1af944427 100644
--- a/src/broadcom/compiler/vir_opt_dead_code.c
+++ b/src/broadcom/compiler/vir_opt_dead_code.c
@@ -51,22 +51,11 @@ dce(struct v3d_compile *c, struct qinst *inst)
         vir_remove_instruction(c, inst);
 }
 
-static bool
-has_nonremovable_reads(struct v3d_compile *c, struct qinst *inst)
-{
-        for (int i = 0; i < vir_get_nsrc(inst); i++) {
-                if (inst->src[i].file == QFILE_VPM)
-                        return true;
-        }
-
-        return false;
-}
-
 static bool
 can_write_to_null(struct v3d_compile *c, struct qinst *inst)
 {
         /* The SFU instructions must write to a physical register. */
-        if (c->devinfo->ver >= 41 && v3d_qpu_uses_sfu(&inst->qpu))
+        if (v3d_qpu_uses_sfu(&inst->qpu))
                 return false;
 
         return true;
@@ -241,7 +230,6 @@ vir_opt_dead_code(struct v3d_compile *c)
                         }
 
                         if (v3d_qpu_writes_flags(&inst->qpu) ||
-                            has_nonremovable_reads(c, inst) ||
                             (is_ldunifa && !is_first_ldunifa && !is_last_ldunifa)) {
                                 /* If we can't remove the instruction, but we
                                  * don't need its destination value, just
diff --git a/src/broadcom/compiler/vir_opt_small_immediates.c b/src/broadcom/compiler/vir_opt_small_immediates.c
index ed5bc011964..56f0bf20706 100644
--- a/src/broadcom/compiler/vir_opt_small_immediates.c
+++ b/src/broadcom/compiler/vir_opt_small_immediates.c
@@ -82,7 +82,7 @@ vir_opt_small_immediates(struct v3d_compile *c)
                          */
                         struct v3d_qpu_sig new_sig = inst->qpu.sig;
                         uint32_t sig_packed;
-                        if (c->devinfo->ver <= 42) {
+                        if (c->devinfo->ver == 42) {
                                 new_sig.small_imm_b = true;
                         } else {
                                if (vir_is_add(inst)) {
diff --git a/src/broadcom/compiler/vir_register_allocate.c b/src/broadcom/compiler/vir_register_allocate.c
index 081376c0f08..53e84840899 100644
--- a/src/broadcom/compiler/vir_register_allocate.c
+++ b/src/broadcom/compiler/vir_register_allocate.c
@@ -942,7 +942,7 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
          * avoid allocating these to registers used by the last instructions
          * in the shader.
          */
-        const uint32_t safe_rf_start = v3d_ra->devinfo->ver <= 42 ? 3 : 4;
+        const uint32_t safe_rf_start = v3d_ra->devinfo->ver == 42 ? 3 : 4;
         if (v3d_ra->nodes->info[node].is_program_end &&
             v3d_ra->next_phys < safe_rf_start) {
                 v3d_ra->next_phys = safe_rf_start;
@@ -1004,7 +1004,7 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
         /* Allocate up to 3 regfile classes, for the ways the physical
          * register file can be divided up for fragment shader threading.
          */
-        int max_thread_index = (compiler->devinfo->ver >= 40 ? 2 : 3);
+        int max_thread_index = 2;
         uint8_t phys_index = get_phys_index(compiler->devinfo);
 
         compiler->regs = ra_alloc_reg_set(compiler, phys_index + PHYS_COUNT,
@@ -1070,20 +1070,10 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c,
         int32_t ip = inst->ip;
         assert(ip >= 0);
 
-        /* If the instruction writes r3/r4 (and optionally moves its
-         * result to a temp), nothing else can be stored in r3/r4 across
+        /* If the instruction writes r4 (and optionally moves its
+         * result to a temp), nothing else can be stored in r4 across
          * it.
          */
-        if (vir_writes_r3_implicitly(c->devinfo, inst)) {
-                for (int i = 0; i < c->num_temps; i++) {
-                        if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
-                                ra_add_node_interference(c->g,
-                                                         temp_to_node(c, i),
-                                                         acc_nodes[3]);
-                        }
-                }
-        }
-
         if (vir_writes_r4_implicitly(c->devinfo, inst)) {
                 for (int i = 0; i < c->num_temps; i++) {
                         if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
@@ -1207,15 +1197,6 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c,
                                 set_temp_class_bits(c, inst->dst.index,
                                                     class_bits);
 
-                        } else {
-                                /* Until V3D 4.x, we could only load a uniform
-                                 * to r5, so we'll need to spill if uniform
-                                 * loads interfere with each other.
-                                 */
-                                if (c->devinfo->ver < 40) {
-                                        set_temp_class_bits(c, inst->dst.index,
-                                                            CLASS_BITS_R5);
-                                }
                         }
                 } else {
                         /* Make sure we don't allocate the ldvary's
@@ -1320,7 +1301,7 @@ v3d_register_allocate(struct v3d_compile *c)
                  * RF0-2. Start at RF4 in 7.x to prevent TLB writes from
                  * using RF2-3.
                  */
-                .next_phys = c->devinfo->ver <= 42 ? 3 : 4,
+                .next_phys = c->devinfo->ver == 42 ? 3 : 4,
                 .nodes = &c->nodes,
                 .devinfo = c->devinfo,
         };
@@ -1333,10 +1314,8 @@ v3d_register_allocate(struct v3d_compile *c)
          * are available at both 1x and 2x threading, and 4x has 32.
          */
         c->thread_index = ffs(c->threads) - 1;
-        if (c->devinfo->ver >= 40) {
-                if (c->thread_index >= 1)
-                        c->thread_index--;
-        }
+        if (c->thread_index >= 1)
+                c->thread_index--;
 
         c->g = ra_alloc_interference_graph(c->compiler->regs, num_ra_nodes);
         ra_set_select_reg_callback(c->g, v3d_ra_select_callback, &callback_data);
diff --git a/src/broadcom/compiler/vir_to_qpu.c b/src/broadcom/compiler/vir_to_qpu.c
index 4ed184cbbcb..605c3e4c7d5 100644
--- a/src/broadcom/compiler/vir_to_qpu.c
+++ b/src/broadcom/compiler/vir_to_qpu.c
@@ -108,7 +108,7 @@ v3d71_set_src(struct v3d_qpu_instr *instr, uint8_t *raddr, struct qpu_reg src)
  * fields of the instruction.
  */
 static void
-v3d33_set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
+v3d42_set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
 {
         if (src.smimm) {
                 assert(instr->sig.small_imm_b);
@@ -158,13 +158,13 @@ set_src(struct v3d_qpu_instr *instr,
         const struct v3d_device_info *devinfo)
 {
         if (devinfo->ver < 71)
-                return v3d33_set_src(instr, mux, src);
+                return v3d42_set_src(instr, mux, src);
         else
                 return v3d71_set_src(instr, raddr, src);
 }
 
 static bool
-v3d33_mov_src_and_dst_equal(struct qinst *qinst)
+v3d42_mov_src_and_dst_equal(struct qinst *qinst)
 {
         enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
         if (qinst->qpu.alu.mul.magic_write) {
@@ -216,7 +216,7 @@ mov_src_and_dst_equal(struct qinst *qinst,
                       const struct v3d_device_info *devinfo)
 {
         if (devinfo->ver < 71)
-                return v3d33_mov_src_and_dst_equal(qinst);
+                return v3d42_mov_src_and_dst_equal(qinst);
         else
                 return v3d71_mov_src_and_dst_equal(qinst);
 }
@@ -262,8 +262,6 @@ v3d_generate_code_block(struct v3d_compile *c,
                         struct qblock *block,
                         struct qpu_reg *temp_registers)
 {
-        int last_vpm_read_index = -1;
-
         vir_for_each_inst_safe(qinst, block) {
 #if 0
                 fprintf(stderr, "translating qinst to qpu: ");
@@ -271,8 +269,6 @@ v3d_generate_code_block(struct v3d_compile *c,
                 fprintf(stderr, "\n");
 #endif
 
-                struct qinst *temp;
-
                 if (vir_has_uniform(qinst))
                         c->num_uniforms++;
 
@@ -303,19 +299,6 @@ v3d_generate_code_block(struct v3d_compile *c,
                         case QFILE_SMALL_IMM:
                                 src[i].smimm = true;
                                 break;
-
-                        case QFILE_VPM:
-                                assert(c->devinfo->ver < 40);
-                                assert((int)qinst->src[i].index >=
-                                       last_vpm_read_index);
-                                (void)last_vpm_read_index;
-                                last_vpm_read_index = qinst->src[i].index;
-
-                                temp = new_qpu_nop_before(qinst);
-                                temp->qpu.sig.ldvpm = true;
-
-                                src[i] = qpu_magic(V3D_QPU_WADDR_R3);
-                                break;
                         }
                 }
 
@@ -337,10 +320,6 @@ v3d_generate_code_block(struct v3d_compile *c,
                         dst = temp_registers[qinst->dst.index];
                         break;
 
-                case QFILE_VPM:
-                        dst = qpu_magic(V3D_QPU_WADDR_VPM);
-                        break;
-
                 case QFILE_SMALL_IMM:
                 case QFILE_LOAD_IMM:
                         assert(!"not reached");
@@ -361,8 +340,6 @@ v3d_generate_code_block(struct v3d_compile *c,
                                 }
 
                                 if (use_rf) {
-                                        assert(c->devinfo->ver >= 40);
-
                                         if (qinst->qpu.sig.ldunif) {
                                            qinst->qpu.sig.ldunif = false;
                                            qinst->qpu.sig.ldunifrf = true;
@@ -470,11 +447,7 @@ v3d_dump_qpu(struct v3d_compile *c)
                 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
                 fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);
 
-                /* We can only do this on 4.x, because we're not tracking TMU
-                 * implicit uniforms here on 3.x.
-                 */
-                if (c->devinfo->ver >= 40 &&
-                    reads_uniform(c->devinfo, c->qpu_insts[i])) {
+                if (reads_uniform(c->devinfo, c->qpu_insts[i])) {
                         fprintf(stderr, " (");
                         vir_dump_uniform(c->uniform_contents[next_uniform],
                                          c->uniform_data[next_uniform]);
@@ -486,8 +459,7 @@ v3d_dump_qpu(struct v3d_compile *c)
         }
 
         /* Make sure our dumping lined up. */
-        if (c->devinfo->ver >= 40)
-                assert(next_uniform == c->num_uniforms);
+        assert(next_uniform == c->num_uniforms);
 
         fprintf(stderr, "\n");
 }
diff --git a/src/broadcom/meson.build b/src/broadcom/meson.build
index 73cb7aa0575..30eb57e515f 100644
--- a/src/broadcom/meson.build
+++ b/src/broadcom/meson.build
@@ -22,7 +22,7 @@ inc_broadcom = include_directories('.', 'cle')
 
 subdir('cle')
 
-v3d_versions = ['33', '41', '42', '71']
+v3d_versions = ['42', '71']
 v3d_libs = []
 
 if with_gallium_v3d or with_broadcom_vk
diff --git a/src/broadcom/simulator/v3d_simulator.h b/src/broadcom/simulator/v3d_simulator.h
index 92305634468..03575ae8951 100644
--- a/src/broadcom/simulator/v3d_simulator.h
+++ b/src/broadcom/simulator/v3d_simulator.h
@@ -45,11 +45,7 @@ uint32_t v3d_simulator_get_mem_free(void);
 #ifdef v3dX
 #  include "v3dx_simulator.h"
 #else
-#  define v3dX(x) v3d33_##x
-#  include "v3dx_simulator.h"
-#  undef v3dX
-
-#  define v3dX(x) v3d41_##x
+#  define v3dX(x) v3d42_##x
 #  include "v3dx_simulator.h"
 #  undef v3dX
 
@@ -61,15 +57,10 @@ uint32_t v3d_simulator_get_mem_free(void);
 
 /* Helper to call simulator ver specific functions */
 #define v3d_X_simulator(thing) ({                     \
-   __typeof(&v3d33_simulator_##thing) v3d_X_sim_thing;\
+   __typeof(&v3d42_simulator_##thing) v3d_X_sim_thing;\
    switch (sim_state.ver) {                           \
-   case 33:                                           \
-   case 40:                                           \
-      v3d_X_sim_thing = &v3d33_simulator_##thing;     \
-      break;                                          \
-   case 41:                                           \
    case 42:                                           \
-      v3d_X_sim_thing = &v3d41_simulator_##thing;     \
+      v3d_X_sim_thing = &v3d42_simulator_##thing;     \
       break;                                          \
    case 71:                                           \
       v3d_X_sim_thing = &v3d71_simulator_##thing;     \
diff --git a/src/broadcom/simulator/v3dx_simulator.c b/src/broadcom/simulator/v3dx_simulator.c
index 904cf2d1b76..0517d4d4658 100644
--- a/src/broadcom/simulator/v3dx_simulator.c
+++ b/src/broadcom/simulator/v3dx_simulator.c
@@ -51,27 +51,14 @@
 #if V3D_VERSION == 71
 #include "libs/core/v3d/registers/7.1.6.0/v3d.h"
 #else
-#if V3D_VERSION == 41 || V3D_VERSION == 42
+#if V3D_VERSION == 42
 #include "libs/core/v3d/registers/4.2.14.0/v3d.h"
-#else
-#include "libs/core/v3d/registers/3.3.0.0/v3d.h"
 #endif
 #endif
 
 #define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
 #define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
 
-static void
-v3d_invalidate_l3(struct v3d_hw *v3d)
-{
-#if V3D_VERSION < 40
-        uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL);
-
-        V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET);
-        V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET);
-#endif
-}
-
 /* Invalidates the L2C cache.  This is a read-only cache for uniforms and instructions. */
 static void
 v3d_invalidate_l2c(struct v3d_hw *v3d)
@@ -156,7 +143,6 @@ v3d_invalidate_slices(struct v3d_hw *v3d)
 static void
 v3d_invalidate_caches(struct v3d_hw *v3d)
 {
-        v3d_invalidate_l3(v3d);
         v3d_invalidate_l2c(v3d);
         v3d_invalidate_l2t(v3d);
         v3d_invalidate_slices(v3d);
@@ -225,7 +211,7 @@ v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
                                  struct drm_v3d_submit_csd *args,
                                  uint32_t gmp_ofs)
 {
-#if V3D_VERSION >= 41
+#if V3D_VERSION >= 42
         int last_completed_jobs = (V3D_READ(V3D_CSD_0_STATUS) &
                                    V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET);
         g_gmp_ofs = gmp_ofs;
@@ -282,13 +268,13 @@ v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
                 args->value = 1;
                 return 0;
         case DRM_V3D_PARAM_SUPPORTS_CSD:
-                args->value = V3D_VERSION >= 41;
+                args->value = V3D_VERSION >= 42;
                 return 0;
         case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
                 args->value = 1;
                 return 0;
         case DRM_V3D_PARAM_SUPPORTS_PERFMON:
-                args->value = V3D_VERSION >= 41;
+                args->value = V3D_VERSION >= 42;
                 return 0;
         case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
                 args->value = 1;
@@ -359,8 +345,7 @@ handle_mmu_interruptions(struct v3d_hw *v3d,
         uint32_t axi_id = V3D_READ(V3D_MMU_VIO_ID);
         uint32_t va_width = 30;
 
-#if V3D_VERSION >= 41
-        static const char *const v3d41_axi_ids[] = {
+        static const char *const v3d42_axi_ids[] = {
                 "L2T",
                 "PTB",
                 "PSE",
@@ -372,14 +357,14 @@ handle_mmu_interruptions(struct v3d_hw *v3d,
         };
 
         axi_id = axi_id >> 5;
-        if (axi_id < ARRAY_SIZE(v3d41_axi_ids))
-                client = v3d41_axi_ids[axi_id];
+        if (axi_id < ARRAY_SIZE(v3d42_axi_ids))
+                client = v3d42_axi_ids[axi_id];
 
         uint32_t mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO);
 
         va_width += ((mmu_debug & V3D_MMU_DEBUG_INFO_VA_WIDTH_SET)
                      >> V3D_MMU_DEBUG_INFO_VA_WIDTH_LSB);
-#endif
+
         /* Only the top bits (final number depends on the gen) of the virtual
          * address are reported in the MMU VIO_ADDR register.
          */
@@ -454,18 +439,6 @@ v3d_isr(uint32_t hub_status)
 void
 v3dX(simulator_init_regs)(struct v3d_hw *v3d)
 {
-#if V3D_VERSION == 33
-        /* Set OVRTMUOUT to match kernel behavior.
-         *
-         * This means that the texture sampler uniform configuration's tmu
-         * output type field is used, instead of using the hardware default
-         * behavior based on the texture type.  If you want the default
-         * behavior, you can still put "2" in the indirect texture state's
-         * output_type field.
-         */
-        V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET);
-#endif
-
         /* FIXME: the kernel captures some additional core interrupts here,
          * for tracing. Perhaps we should evaluate to do the same here and add
          * some debug options.
@@ -514,13 +487,11 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
                 V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
                 V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms);
         }
-#if V3D_VERSION >= 41
         if (submit->qts) {
                 V3D_WRITE(V3D_CLE_0_CT0QTS,
                           V3D_CLE_0_CT0QTS_CTQTSEN_SET |
                           submit->qts);
         }
-#endif
         V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
         V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);
 
@@ -544,21 +515,18 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
         }
 }
 
-#if V3D_VERSION >= 41
 #define V3D_PCTR_0_PCTR_N(x) (V3D_PCTR_0_PCTR0 + 4 * (x))
 #define V3D_PCTR_0_SRC_N(x) (V3D_PCTR_0_SRC_0_3 + 4 * (x))
 #define V3D_PCTR_0_SRC_N_SHIFT(x) ((x) * 8)
 #define V3D_PCTR_0_SRC_N_MASK(x) (BITFIELD_RANGE(V3D_PCTR_0_SRC_N_SHIFT(x), \
                                                  V3D_PCTR_0_SRC_N_SHIFT(x) + \
                                                  V3D_PCTR_0_SRC_0_3_PCTRS0_MSB))
-#endif
 
 void
 v3dX(simulator_perfmon_start)(struct v3d_hw *v3d,
                               uint32_t ncounters,
                               uint8_t *events)
 {
-#if V3D_VERSION >= 41
         int i, j;
         uint32_t source;
         uint32_t mask = BITFIELD_RANGE(0, ncounters);
@@ -573,21 +541,18 @@ v3dX(simulator_perfmon_start)(struct v3d_hw *v3d,
         V3D_WRITE(V3D_PCTR_0_CLR, mask);
         V3D_WRITE(V3D_PCTR_0_OVERFLOW, mask);
         V3D_WRITE(V3D_PCTR_0_EN, mask);
-#endif
 }
 
 void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d,
                                   uint32_t ncounters,
                                   uint64_t *values)
 {
-#if V3D_VERSION >= 41
         int i;
 
         for (i = 0; i < ncounters; i++)
                 values[i] += V3D_READ(V3D_PCTR_0_PCTR_N(i));
 
         V3D_WRITE(V3D_PCTR_0_EN, 0);
-#endif
 }
 
 void v3dX(simulator_get_perfcnt_total)(uint32_t *count)
diff --git a/src/broadcom/vulkan/v3dv_cl.c b/src/broadcom/vulkan/v3dv_cl.c
index acdd013a996..851e1388a8d 100644
--- a/src/broadcom/vulkan/v3dv_cl.c
+++ b/src/broadcom/vulkan/v3dv_cl.c
@@ -27,7 +27,7 @@
  * versions, so we just explicitly set the V3D_VERSION and include v3dx_pack
  * here
  */
-#define V3D_VERSION 33
+#define V3D_VERSION 42
 #include "broadcom/common/v3d_macros.h"
 #include "broadcom/cle/v3dx_pack.h"
 
diff --git a/src/broadcom/vulkan/v3dv_private.h b/src/broadcom/vulkan/v3dv_private.h
index 9c104b3d6d4..8d9914938e2 100644
--- a/src/broadcom/vulkan/v3dv_private.h
+++ b/src/broadcom/vulkan/v3dv_private.h
@@ -618,10 +618,10 @@ struct v3dv_device_memory {
 
 #define V3DV_MAX_PLANE_COUNT 3
 struct v3dv_format_plane {
-   /* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
+   /* One of V3D42_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
    uint8_t rt_type;
 
-   /* One of V3D33_TEXTURE_DATA_FORMAT_*. */
+   /* One of V3D42_TEXTURE_DATA_FORMAT_*. */
    uint8_t tex_type;
 
    /* Swizzle to apply to the RGBA shader output for storing to the tile
diff --git a/src/gallium/drivers/v3d/meson.build b/src/gallium/drivers/v3d/meson.build
index 289473d2ca1..600840b8764 100644
--- a/src/gallium/drivers/v3d/meson.build
+++ b/src/gallium/drivers/v3d/meson.build
@@ -59,7 +59,7 @@ if dep_v3dv3.found()
   v3d_args += '-DUSE_V3D_SIMULATOR'
 endif
 
-v3d_versions = ['33', '42', '71']
+v3d_versions = ['42', '71']
 
 per_version_libs = []
 foreach ver : v3d_versions
diff --git a/src/gallium/drivers/v3d/v3d_blit.c b/src/gallium/drivers/v3d/v3d_blit.c
index f62d3a4f40f..32030a7b4e1 100644
--- a/src/gallium/drivers/v3d/v3d_blit.c
+++ b/src/gallium/drivers/v3d/v3d_blit.c
@@ -309,7 +309,7 @@ v3d_tlb_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
         struct v3d_screen *screen = v3d->screen;
         struct v3d_device_info *devinfo = &screen->devinfo;
 
-        if (devinfo->ver < 40 || !info->mask)
+        if (!info->mask)
                 return;
 
         bool is_color_blit = info->mask & PIPE_MASK_RGBA;
diff --git a/src/gallium/drivers/v3d/v3d_cl.c b/src/gallium/drivers/v3d/v3d_cl.c
index c03927e0453..d8ee4ffc206 100644
--- a/src/gallium/drivers/v3d/v3d_cl.c
+++ b/src/gallium/drivers/v3d/v3d_cl.c
@@ -28,7 +28,7 @@
  * hw versions, so we just explicitly set the V3D_VERSION and include
  * v3dx_pack here
  */
-#define V3D_VERSION 33
+#define V3D_VERSION 42
 #include "broadcom/common/v3d_macros.h"
 #include "broadcom/cle/v3dx_pack.h"
 
diff --git a/src/gallium/drivers/v3d/v3d_context.c b/src/gallium/drivers/v3d/v3d_context.c
index 1dc4bd017fe..240c99672f9 100644
--- a/src/gallium/drivers/v3d/v3d_context.c
+++ b/src/gallium/drivers/v3d/v3d_context.c
@@ -300,16 +300,11 @@ v3d_get_sample_position(struct pipe_context *pctx,
                         unsigned sample_count, unsigned sample_index,
                         float *xy)
 {
-        struct v3d_context *v3d = v3d_context(pctx);
-
         if (sample_count <= 1) {
                 xy[0] = 0.5;
                 xy[1] = 0.5;
         } else {
-                static const int xoffsets_v33[] = { 1, -3, 3, -1 };
-                static const int xoffsets_v42[] = { -1, 3, -3, 1 };
-                const int *xoffsets = (v3d->screen->devinfo.ver >= 42 ?
-                                       xoffsets_v42 : xoffsets_v33);
+                static const int xoffsets[] = { -1, 3, -3, 1 };
 
                 xy[0] = 0.5 + xoffsets[sample_index] * .125;
                 xy[1] = .125 + sample_index * .25;
diff --git a/src/gallium/drivers/v3d/v3d_context.h b/src/gallium/drivers/v3d/v3d_context.h
index 948abe686d7..2f27693fef6 100644
--- a/src/gallium/drivers/v3d/v3d_context.h
+++ b/src/gallium/drivers/v3d/v3d_context.h
@@ -825,12 +825,8 @@ void v3d_disk_cache_store(struct v3d_context *v3d,
 
 /* Helper to call hw ver specific functions */
 #define v3d_X(devinfo, thing) ({                                \
-        __typeof(&v3d33_##thing) v3d_X_thing;                   \
+        __typeof(&v3d42_##thing) v3d_X_thing;                   \
         switch (devinfo->ver) {                                 \
-        case 33:                                                \
-        case 40:                                                \
-                v3d_X_thing = &v3d33_##thing;                   \
-                break;                                          \
         case 42:                                                \
                 v3d_X_thing = &v3d42_##thing;                   \
                 break;                                          \
@@ -846,19 +842,13 @@ void v3d_disk_cache_store(struct v3d_context *v3d,
 /* FIXME: The same for vulkan/opengl. Common place? define it at the
  * v3d_packet files?
  */
-#define V3D33_CLIPPER_XY_GRANULARITY 256.0f
 #define V3D42_CLIPPER_XY_GRANULARITY 256.0f
 #define V3D71_CLIPPER_XY_GRANULARITY 64.0f
 
 /* Helper to get hw-specific macro values */
 #define V3DV_X(devinfo, thing) ({                               \
-   __typeof(V3D33_##thing) V3D_X_THING;                         \
+   __typeof(V3D42_##thing) V3D_X_THING;                         \
    switch (devinfo->ver) {                                      \
-   case 33:                                                     \
-   case 40:                                                     \
-      V3D_X_THING = V3D33_##thing;                              \
-      break;                                                    \
-      case 41:                                                  \
    case 42:                                                     \
       V3D_X_THING = V3D42_##thing;                              \
       break;                                                    \
@@ -874,10 +864,6 @@ void v3d_disk_cache_store(struct v3d_context *v3d,
 #ifdef v3dX
 #  include "v3dx_context.h"
 #else
-#  define v3dX(x) v3d33_##x
-#  include "v3dx_context.h"
-#  undef v3dX
-
 #  define v3dX(x) v3d42_##x
 #  include "v3dx_context.h"
 #  undef v3dX
diff --git a/src/gallium/drivers/v3d/v3d_format_table.h b/src/gallium/drivers/v3d/v3d_format_table.h
index b291708c3ed..45cddeb669d 100644
--- a/src/gallium/drivers/v3d/v3d_format_table.h
+++ b/src/gallium/drivers/v3d/v3d_format_table.h
@@ -30,10 +30,10 @@ struct v3d_format {
         /** Set if the pipe format is defined in the table. */
         bool present;
 
-        /** One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
+        /** One of V3D42_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
         uint8_t rt_type;
 
-        /** One of V3D33_TEXTURE_DATA_FORMAT_*. */
+        /** One of V3D42_TEXTURE_DATA_FORMAT_*. */
         uint8_t tex_type;
 
         /**
diff --git a/src/gallium/drivers/v3d/v3d_formats.c b/src/gallium/drivers/v3d/v3d_formats.c
index 559c6681e22..cb01f05e31b 100644
--- a/src/gallium/drivers/v3d/v3d_formats.c
+++ b/src/gallium/drivers/v3d/v3d_formats.c
@@ -38,7 +38,7 @@
 #include "v3d_format_table.h"
 
 /* The format internal types are the same across V3D versions */
-#define V3D_VERSION 33
+#define V3D_VERSION 42
 #include "broadcom/cle/v3dx_pack.h"
 
 bool
diff --git a/src/gallium/drivers/v3d/v3d_job.c b/src/gallium/drivers/v3d/v3d_job.c
index d837a9b23ac..68b67a5ce10 100644
--- a/src/gallium/drivers/v3d/v3d_job.c
+++ b/src/gallium/drivers/v3d/v3d_job.c
@@ -29,7 +29,7 @@
 #include <xf86drm.h>
 #include "v3d_context.h"
 /* The OQ/semaphore packets are the same across V3D versions. */
-#define V3D_VERSION 33
+#define V3D_VERSION 42
 #include "broadcom/cle/v3dx_pack.h"
 #include "broadcom/common/v3d_macros.h"
 #include "util/hash_table.h"
@@ -547,7 +547,7 @@ v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job)
         /* On V3D 4.1, the tile alloc/state setup moved to register writes
          * instead of binner packets.
          */
-        if (devinfo->ver >= 41) {
+        if (devinfo->ver >= 42) {
                 v3d_job_add_bo(job, job->tile_alloc);
                 job->submit.qma = job->tile_alloc->offset;
                 job->submit.qms = job->tile_alloc->size;
diff --git a/src/gallium/drivers/v3d/v3d_program.c b/src/gallium/drivers/v3d/v3d_program.c
index 89fee012ddd..236dd15ced1 100644
--- a/src/gallium/drivers/v3d/v3d_program.c
+++ b/src/gallium/drivers/v3d/v3d_program.c
@@ -35,7 +35,8 @@
 #include "nir/tgsi_to_nir.h"
 #include "compiler/v3d_compiler.h"
 #include "v3d_context.h"
-#include "broadcom/cle/v3d_packet_v33_pack.h"
+/* packets here are the same across V3D versions. */
+#include "broadcom/cle/v3d_packet_v42_pack.h"
 
 static struct v3d_compiled_shader *
 v3d_get_compiled_shader(struct v3d_context *v3d,
@@ -136,7 +137,7 @@ v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so,
                 while (vpm_size) {
                         uint32_t write_size = MIN2(vpm_size, 1 << 4);
 
-                        struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = {
+                        struct V3D42_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = {
                                 /* We need the offset from the coordinate shader's VPM
                                  * output block, which has the [X, Y, Z, W, Xs, Ys]
                                  * values at the start.
@@ -151,7 +152,7 @@ v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so,
                                so->num_tf_specs != 0);
 
                         assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs));
-                        V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
+                        V3D42_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
                                                                        (void *)&so->tf_specs[so->num_tf_specs],
                                                                        &unpacked);
 
@@ -166,7 +167,7 @@ v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so,
                         assert(unpacked.first_shaded_vertex_value_to_output != 8 ||
                                so->num_tf_specs != 0);
 
-                        V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
+                        V3D42_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
                                                                        (void *)&so->tf_specs_psiz[so->num_tf_specs],
                                                                        &unpacked);
                         so->num_tf_specs++;
@@ -559,7 +560,6 @@ v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key,
         assert(key->num_tex_used == key->num_samplers_used);
         for (int i = 0; i < texstate->num_textures; i++) {
                 struct pipe_sampler_view *sampler = texstate->textures[i];
-                struct v3d_sampler_view *v3d_sampler = v3d_sampler_view(sampler);
 
                 if (!sampler)
                         continue;
@@ -573,27 +573,16 @@ v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key,
                  */
                 if (key->sampler[i].return_size == 16) {
                         key->sampler[i].return_channels = 2;
-                } else if (devinfo->ver > 40) {
-                        key->sampler[i].return_channels = 4;
                 } else {
-                        key->sampler[i].return_channels =
-                                v3d_get_tex_return_channels(devinfo,
-                                                            sampler->format);
+                        key->sampler[i].return_channels = 4;
                 }
 
-                if (key->sampler[i].return_size == 32 && devinfo->ver < 40) {
-                        memcpy(key->tex[i].swizzle,
-                               v3d_sampler->swizzle,
-                               sizeof(v3d_sampler->swizzle));
-                } else {
-                        /* For 16-bit returns, we let the sampler state handle
-                         * the swizzle.
-                         */
-                        key->tex[i].swizzle[0] = PIPE_SWIZZLE_X;
-                        key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y;
-                        key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z;
-                        key->tex[i].swizzle[3] = PIPE_SWIZZLE_W;
-                }
+                /* We let the sampler state handle the swizzle.
+                 */
+                key->tex[i].swizzle[0] = PIPE_SWIZZLE_X;
+                key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y;
+                key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z;
+                key->tex[i].swizzle[3] = PIPE_SWIZZLE_W;
         }
 }
 
diff --git a/src/gallium/drivers/v3d/v3d_resource.c b/src/gallium/drivers/v3d/v3d_resource.c
index d9a79614dd1..4003ed722b5 100644
--- a/src/gallium/drivers/v3d/v3d_resource.c
+++ b/src/gallium/drivers/v3d/v3d_resource.c
@@ -36,7 +36,8 @@
 #include "v3d_screen.h"
 #include "v3d_context.h"
 #include "v3d_resource.h"
-#include "broadcom/cle/v3d_packet_v33_pack.h"
+/* The packets used here the same across V3D versions. */
+#include "broadcom/cle/v3d_packet_v42_pack.h"
 
 static void
 v3d_debug_resource_layout(struct v3d_resource *rsc, const char *caller)
@@ -747,8 +748,6 @@ static struct v3d_resource *
 v3d_resource_setup(struct pipe_screen *pscreen,
                    const struct pipe_resource *tmpl)
 {
-        struct v3d_screen *screen = v3d_screen(pscreen);
-        struct v3d_device_info *devinfo = &screen->devinfo;
         struct v3d_resource *rsc = CALLOC_STRUCT(v3d_resource);
 
         if (!rsc)
@@ -760,34 +759,7 @@ v3d_resource_setup(struct pipe_screen *pscreen,
         pipe_reference_init(&prsc->reference, 1);
         prsc->screen = pscreen;
 
-        if (prsc->nr_samples <= 1 ||
-            devinfo->ver >= 40 ||
-            util_format_is_depth_or_stencil(prsc->format)) {
-                rsc->cpp = util_format_get_blocksize(prsc->format);
-                if (devinfo->ver < 40 && prsc->nr_samples > 1)
-                        rsc->cpp *= prsc->nr_samples;
-        } else {
-                assert(v3d_rt_format_supported(devinfo, prsc->format));
-                uint32_t output_image_format =
-                        v3d_get_rt_format(devinfo, prsc->format);
-                uint32_t internal_type;
-                uint32_t internal_bpp;
-                v3d_X(devinfo, get_internal_type_bpp_for_output_format)
-                   (output_image_format, &internal_type, &internal_bpp);
-
-                switch (internal_bpp) {
-                case V3D_INTERNAL_BPP_32:
-                        rsc->cpp = 4;
-                        break;
-                case V3D_INTERNAL_BPP_64:
-                        rsc->cpp = 8;
-                        break;
-                case V3D_INTERNAL_BPP_128:
-                        rsc->cpp = 16;
-                        break;
-                }
-        }
-
+        rsc->cpp = util_format_get_blocksize(prsc->format);
         rsc->serial_id++;
 
         assert(rsc->cpp);
diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c
index 08d02c9a73b..44d5b90c44d 100644
--- a/src/gallium/drivers/v3d/v3d_screen.c
+++ b/src/gallium/drivers/v3d/v3d_screen.c
@@ -153,7 +153,7 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
                 return 1;
 
         case PIPE_CAP_POLYGON_OFFSET_CLAMP:
-                return screen->devinfo.ver >= 41;
+                return screen->devinfo.ver >= 42;
 
 
         case PIPE_CAP_TEXTURE_QUERY_LOD:
@@ -182,20 +182,18 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
                 return PIPE_TEXTURE_TRANSFER_BLIT;
 
         case PIPE_CAP_COMPUTE:
-                return screen->has_csd && screen->devinfo.ver >= 41;
+                return screen->has_csd && screen->devinfo.ver >= 42;
 
         case PIPE_CAP_GENERATE_MIPMAP:
                 return v3d_has_feature(screen, DRM_V3D_PARAM_SUPPORTS_TFU);
 
         case PIPE_CAP_INDEP_BLEND_ENABLE:
-                return screen->devinfo.ver >= 40;
+                return 1;
 
         case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
                 return V3D_NON_COHERENT_ATOM_SIZE;
 
         case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
-                if (screen->devinfo.ver < 40)
-                        return 0;
                 return 4;
 
         case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
@@ -218,15 +216,9 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
         case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT:
                 return 0;
         case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER:
-                if (screen->devinfo.ver >= 40)
-                        return 0;
-                else
-                        return 1;
+                return 0;
         case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
-                if (screen->devinfo.ver >= 40)
-                        return 1;
-                else
-                        return 0;
+                return 1;
 
         case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
         case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
@@ -240,18 +232,13 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 
                 /* Texturing. */
         case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
-                if (screen->devinfo.ver < 40)
-                        return 2048;
-                else if (screen->nonmsaa_texture_size_limit)
+                if (screen->nonmsaa_texture_size_limit)
                         return 7680;
                 else
                         return V3D_MAX_IMAGE_DIMENSION;
         case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
         case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
-                if (screen->devinfo.ver < 40)
-                        return 12;
-                else
-                        return V3D_MAX_MIP_LEVELS;
+                return V3D_MAX_MIP_LEVELS;
         case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
                 return V3D_MAX_ARRAY_LAYERS;
 
@@ -361,7 +348,7 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type s
                         return 0;
                 break;
         case PIPE_SHADER_GEOMETRY:
-                if (screen->devinfo.ver < 41)
+                if (screen->devinfo.ver < 42)
                         return 0;
                 break;
         default:
@@ -454,7 +441,7 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type s
 
         case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
                 if (screen->has_cache_flush) {
-                        if (screen->devinfo.ver < 41)
+                        if (screen->devinfo.ver < 42)
                                 return 0;
                         else
                                 return PIPE_MAX_SHADER_IMAGES;
diff --git a/src/gallium/drivers/v3d/v3d_uniforms.c b/src/gallium/drivers/v3d/v3d_uniforms.c
index 64c217d4f6c..c3b52dd39e3 100644
--- a/src/gallium/drivers/v3d/v3d_uniforms.c
+++ b/src/gallium/drivers/v3d/v3d_uniforms.c
@@ -28,9 +28,9 @@
 #include "compiler/v3d_compiler.h"
 
 /* We don't expect that the packets we use in this file change across across
- * hw versions, so we just include directly the v33 header
+ * hw versions, so we just include directly the v42 header
  */
-#include "broadcom/cle/v3d_packet_v33_pack.h"
+#include "broadcom/cle/v3d_packet_v42_pack.h"
 
 static uint32_t
 get_texrect_scale(struct v3d_texture_stateobj *texstate,
@@ -124,54 +124,6 @@ get_image_size(struct v3d_shaderimg_stateobj *shaderimg,
         }
 }
 
-/**
- *  Writes the V3D 3.x P0 (CFG_MODE=1) texture parameter.
- *
- * Some bits of this field are dependent on the type of sample being done by
- * the shader, while other bits are dependent on the sampler state.  We OR the
- * two together here.
- */
-static void
-write_texture_p0(struct v3d_job *job,
-                 struct v3d_cl_out **uniforms,
-                 struct v3d_texture_stateobj *texstate,
-                 uint32_t unit,
-                 uint32_t shader_data)
-{
-        struct pipe_sampler_state *psampler = texstate->samplers[unit];
-        struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);
-
-        cl_aligned_u32(uniforms, shader_data | sampler->p0);
-}
-
-/** Writes the V3D 3.x P1 (CFG_MODE=1) texture parameter. */
-static void
-write_texture_p1(struct v3d_job *job,
-                 struct v3d_cl_out **uniforms,
-                 struct v3d_texture_stateobj *texstate,
-                 uint32_t data)
-{
-        /* Extract the texture unit from the top bits, and the compiler's
-         * packed p1 from the bottom.
-         */
-        uint32_t unit = data >> 5;
-        uint32_t p1 = data & 0x1f;
-
-        struct pipe_sampler_view *psview = texstate->textures[unit];
-        struct v3d_sampler_view *sview = v3d_sampler_view(psview);
-
-        struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = {
-                .texture_state_record_base_address = texstate->texture_state[unit],
-        };
-
-        uint32_t packed;
-        V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(&job->indirect,
-                                                         (uint8_t *)&packed,
-                                                         &unpacked);
-
-        cl_aligned_u32(uniforms, p1 | packed | sview->p1);
-}
-
 /** Writes the V3D 4.x TMU configuration parameter 0. */
 static void
 write_tmu_p0(struct v3d_job *job,
@@ -328,11 +280,6 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job,
                                            &v3d->shaderimg[stage], data);
                         break;
 
-                case QUNIFORM_TEXTURE_CONFIG_P1:
-                        write_texture_p1(job, &uniforms, texstate,
-                                         data);
-                        break;
-
                 case QUNIFORM_TEXRECT_SCALE_X:
                 case QUNIFORM_TEXRECT_SCALE_Y:
                         cl_aligned_u32(&uniforms,
@@ -437,13 +384,7 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job,
                         break;
 
                 default:
-                        assert(quniform_contents_is_texture_p0(uinfo->contents[i]));
-
-                        write_texture_p0(job, &uniforms, texstate,
-                                         uinfo->contents[i] -
-                                         QUNIFORM_TEXTURE_CONFIG_P0_0,
-                                         data);
-                        break;
+                        unreachable("Unknown QUNIFORM");
 
                 }
 #if 0
diff --git a/src/gallium/drivers/v3d/v3dx_draw.c b/src/gallium/drivers/v3d/v3dx_draw.c
index 92dfdd9fc1d..0f3802fbf0a 100644
--- a/src/gallium/drivers/v3d/v3dx_draw.c
+++ b/src/gallium/drivers/v3d/v3dx_draw.c
@@ -75,7 +75,7 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job)
 
         job->tile_alloc = v3d_bo_alloc(v3d->screen, tile_alloc_size,
                                        "tile_alloc");
-        uint32_t tsda_per_tile_size = v3d->screen->devinfo.ver >= 40 ? 256 : 64;
+        uint32_t tsda_per_tile_size = 256;
         job->tile_state = v3d_bo_alloc(v3d->screen,
                                        MAX2(job->num_layers, 1) *
                                        job->draw_tiles_y *
@@ -83,7 +83,6 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job)
                                        tsda_per_tile_size,
                                        "TSDA");
 
-#if V3D_VERSION >= 41
         /* This must go before the binning mode configuration. It is
          * required for layered framebuffers to work.
          */
@@ -92,7 +91,6 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job)
                         config.number_of_layers = job->num_layers;
                 }
         }
-#endif
 
         assert(!job->msaa || !job->double_buffer);
 #if V3D_VERSION >= 71
@@ -113,7 +111,7 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job)
 
 #endif
 
-#if V3D_VERSION >= 40 && V3D_VERSION <= 42
+#if V3D_VERSION == 42
         cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
                 config.width_in_pixels = job->draw_width;
                 config.height_in_pixels = job->draw_height;
@@ -126,34 +124,6 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job)
                 config.maximum_bpp_of_all_render_targets = job->internal_bpp;
         }
 #endif
-#if V3D_VERSION < 40
-        /* "Binning mode lists start with a Tile Binning Mode Configuration
-         * item (120)"
-         *
-         * Part1 signals the end of binning config setup.
-         */
-        cl_emit(&job->bcl, TILE_BINNING_MODE_CFG_PART2, config) {
-                config.tile_allocation_memory_address =
-                        cl_address(job->tile_alloc, 0);
-                config.tile_allocation_memory_size = job->tile_alloc->size;
-        }
-
-        cl_emit(&job->bcl, TILE_BINNING_MODE_CFG_PART1, config) {
-                config.tile_state_data_array_base_address =
-                        cl_address(job->tile_state, 0);
-
-                config.width_in_tiles = job->draw_tiles_x;
-                config.height_in_tiles = job->draw_tiles_y;
-                /* Must be >= 1 */
-                config.number_of_render_targets =
-                        MAX2(job->nr_cbufs, 1);
-
-                config.multisample_mode_4x = job->msaa;
-                config.double_buffer_in_non_ms_mode = job->double_buffer;
-
-                config.maximum_bpp_of_all_render_targets = job->internal_bpp;
-        }
-#endif
 
         /* There's definitely nothing in the VCD cache we want. */
         cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
@@ -380,7 +350,6 @@ v3d_emit_wait_for_tf_if_needed(struct v3d_context *v3d, struct v3d_job *job)
         }
 }
 
-#if V3D_VERSION >= 41
 static void
 v3d_emit_gs_state_record(struct v3d_job *job,
                          struct v3d_compiled_shader *gs_bin,
@@ -396,7 +365,7 @@ v3d_emit_gs_state_record(struct v3d_job *job,
                         gs_bin->prog_data.gs->base.threads == 4;
                 shader.geometry_bin_mode_shader_start_in_final_thread_section =
                         gs_bin->prog_data.gs->base.single_seg;
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                 shader.geometry_bin_mode_shader_propagate_nans = true;
 #endif
                 shader.geometry_bin_mode_shader_uniforms_address =
@@ -408,7 +377,7 @@ v3d_emit_gs_state_record(struct v3d_job *job,
                         gs->prog_data.gs->base.threads == 4;
                 shader.geometry_render_mode_shader_start_in_final_thread_section =
                         gs->prog_data.gs->base.single_seg;
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                 shader.geometry_render_mode_shader_propagate_nans = true;
 #endif
                 shader.geometry_render_mode_shader_uniforms_address =
@@ -500,7 +469,6 @@ v3d_emit_tes_gs_shader_params(struct v3d_job *job,
                 shader.gbg_min_gs_output_segments_required_in_play = 1;
         }
 }
-#endif
 
 static void
 v3d_emit_gl_shader_state(struct v3d_context *v3d,
@@ -559,14 +527,12 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
 
         uint32_t shader_state_record_length =
                 cl_packet_length(GL_SHADER_STATE_RECORD);
-#if V3D_VERSION >= 41
         if (v3d->prog.gs) {
                 shader_state_record_length +=
                         cl_packet_length(GEOMETRY_SHADER_STATE_RECORD) +
                         cl_packet_length(TESSELLATION_GEOMETRY_COMMON_PARAMS) +
                         2 * cl_packet_length(TESSELLATION_GEOMETRY_SHADER_PARAMS);
         }
-#endif
 
         /* See GFXH-930 workaround below */
         uint32_t shader_rec_offset =
@@ -582,8 +548,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
          */
 
         struct vpm_config vpm_cfg_bin, vpm_cfg;
-
-        assert(v3d->screen->devinfo.ver >= 41 || !v3d->prog.gs);
         v3d_compute_vpm_config(&v3d->screen->devinfo,
                                v3d->prog.cs->prog_data.vs,
                                v3d->prog.vs->prog_data.vs,
@@ -593,7 +557,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                                &vpm_cfg);
 
         if (v3d->prog.gs) {
-#if V3D_VERSION >= 41
                 v3d_emit_gs_state_record(v3d->job,
                                          v3d->prog.gs_bin, gs_bin_uniforms,
                                          v3d->prog.gs, gs_uniforms);
@@ -614,9 +577,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                                               vpm_cfg.gs_width,
                                               vpm_cfg.Gd,
                                               vpm_cfg.Gv);
-#else
-                unreachable("No GS support pre-4.1");
-#endif
         }
 
         cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) {
@@ -643,20 +603,16 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                 shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
                         v3d->prog.fs->prog_data.fs->uses_center_w;
 
-#if V3D_VERSION >= 41
                 shader.any_shader_reads_hardware_written_primitive_id =
                         (v3d->prog.gs && v3d->prog.gs->prog_data.gs->uses_pid) ||
                         v3d->prog.fs->prog_data.fs->uses_pid;
                 shader.insert_primitive_id_as_first_varying_to_fragment_shader =
                         !v3d->prog.gs && v3d->prog.fs->prog_data.fs->uses_pid;
-#endif
 
-#if V3D_VERSION >= 40
-               shader.do_scoreboard_wait_on_first_thread_switch =
+                shader.do_scoreboard_wait_on_first_thread_switch =
                         v3d->prog.fs->prog_data.fs->lock_scoreboard_on_first_thrsw;
-               shader.disable_implicit_point_line_varyings =
+                shader.disable_implicit_point_line_varyings =
                         !v3d->prog.fs->prog_data.fs->uses_implicit_point_line_varyings;
-#endif
 
                 shader.number_of_varyings_in_fragment_shader =
                         v3d->prog.fs->prog_data.fs->num_inputs;
@@ -671,7 +627,7 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                         cl_address(v3d_resource(v3d->prog.fs->resource)->bo,
                                    v3d->prog.fs->offset);
 
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                 shader.coordinate_shader_propagate_nans = true;
                 shader.vertex_shader_propagate_nans = true;
                 shader.fragment_shader_propagate_nans = true;
@@ -711,7 +667,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                 shader.vertex_shader_uniforms_address = vs_uniforms;
                 shader.fragment_shader_uniforms_address = fs_uniforms;
 
-#if V3D_VERSION >= 41
                 shader.min_coord_shader_input_segments_required_in_play =
                         vpm_cfg_bin.As;
                 shader.min_vertex_shader_input_segments_required_in_play =
@@ -735,20 +690,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                         v3d->prog.vs->prog_data.vs->base.single_seg;
                 shader.fragment_shader_start_in_final_thread_section =
                         v3d->prog.fs->prog_data.fs->base.single_seg;
-#else
-                shader.coordinate_shader_4_way_threadable =
-                        v3d->prog.cs->prog_data.vs->base.threads == 4;
-                shader.coordinate_shader_2_way_threadable =
-                        v3d->prog.cs->prog_data.vs->base.threads == 2;
-                shader.vertex_shader_4_way_threadable =
-                        v3d->prog.vs->prog_data.vs->base.threads == 4;
-                shader.vertex_shader_2_way_threadable =
-                        v3d->prog.vs->prog_data.vs->base.threads == 2;
-                shader.fragment_shader_4_way_threadable =
-                        v3d->prog.fs->prog_data.fs->base.threads == 4;
-                shader.fragment_shader_2_way_threadable =
-                        v3d->prog.fs->prog_data.fs->base.threads == 2;
-#endif
 
                 shader.vertex_id_read_by_coordinate_shader =
                         v3d->prog.cs->prog_data.vs->uses_vid;
@@ -759,7 +700,7 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                 shader.instance_id_read_by_vertex_shader =
                         v3d->prog.vs->prog_data.vs->uses_iid;
 
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                 shader.address_of_default_attribute_values =
                         cl_address(v3d_resource(vtx->defaults)->bo,
                                    vtx->defaults_offset);
@@ -802,9 +743,7 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                         if (i == vtx->num_elements - 1 && !cs_loaded_any) {
                                 attr.number_of_values_read_by_coordinate_shader = 1;
                         }
-#if V3D_VERSION >= 41
                         attr.maximum_index = 0xffffff;
-#endif
                 }
                 STATIC_ASSERT(sizeof(vtx->attrs) >= V3D_MAX_VS_INPUTS / 4 * size);
         }
@@ -833,7 +772,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                 vcm.number_of_16_vertex_batches_for_rendering = vpm_cfg.Vc;
         }
 
-#if V3D_VERSION >= 41
         if (v3d->prog.gs) {
                 cl_emit(&job->bcl, GL_SHADER_STATE_INCLUDING_GS, state) {
                         state.address = cl_address(job->indirect.bo,
@@ -847,13 +785,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
                         state.number_of_attribute_arrays = num_elements_to_emit;
                 }
         }
-#else
-        assert(!v3d->prog.gs);
-        cl_emit(&job->bcl, GL_SHADER_STATE, state) {
-                state.address = cl_address(job->indirect.bo, shader_rec_offset);
-                state.number_of_attribute_arrays = num_elements_to_emit;
-        }
-#endif
 
         v3d_bo_unreference(&cs_uniforms.bo);
         v3d_bo_unreference(&vs_uniforms.bo);
@@ -1164,13 +1095,6 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
         }
 
         uint32_t prim_tf_enable = 0;
-#if V3D_VERSION < 40
-        /* V3D 3.x: The HW only processes transform feedback on primitives
-         * with the flag set.
-         */
-        if (v3d->streamout.num_targets)
-                prim_tf_enable = (V3D_PRIM_POINTS_TF - V3D_PRIM_POINTS);
-#endif
 
         v3d->prim_restart = info->primitive_restart;
 
@@ -1194,20 +1118,14 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
                 }
                 struct v3d_resource *rsc = v3d_resource(prsc);
 
-#if V3D_VERSION >= 40
                 cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) {
                         ib.address = cl_address(rsc->bo, 0);
                         ib.size = rsc->bo->size;
                 }
-#endif
 
                 if (indirect && indirect->buffer) {
                         cl_emit(&job->bcl, INDIRECT_INDEXED_INSTANCED_PRIM_LIST, prim) {
                                 prim.index_type = ffs(info->index_size) - 1;
-#if V3D_VERSION < 40
-                                prim.address_of_indices_list =
-                                        cl_address(rsc->bo, offset);
-#endif /* V3D_VERSION < 40 */
                                 prim.mode = hw_prim_type | prim_tf_enable;
                                 prim.enable_primitive_restarts = info->primitive_restart;
 
@@ -1220,13 +1138,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
                 } else if (info->instance_count > 1) {
                         cl_emit(&job->bcl, INDEXED_INSTANCED_PRIM_LIST, prim) {
                                 prim.index_type = ffs(info->index_size) - 1;
-#if V3D_VERSION >= 40
                                 prim.index_offset = offset;
-#else /* V3D_VERSION < 40 */
-                                prim.maximum_index = (1u << 31) - 1; /* XXX */
-                                prim.address_of_indices_list =
-                                        cl_address(rsc->bo, offset);
-#endif /* V3D_VERSION < 40 */
                                 prim.mode = hw_prim_type | prim_tf_enable;
                                 prim.enable_primitive_restarts = info->primitive_restart;
 
@@ -1237,13 +1149,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
                         cl_emit(&job->bcl, INDEXED_PRIM_LIST, prim) {
                                 prim.index_type = ffs(info->index_size) - 1;
                                 prim.length = draws[0].count;
-#if V3D_VERSION >= 40
                                 prim.index_offset = offset;
-#else /* V3D_VERSION < 40 */
-                                prim.maximum_index = (1u << 31) - 1; /* XXX */
-                                prim.address_of_indices_list =
-                                        cl_address(rsc->bo, offset);
-#endif /* V3D_VERSION < 40 */
                                 prim.mode = hw_prim_type | prim_tf_enable;
                                 prim.enable_primitive_restarts = info->primitive_restart;
                         }
@@ -1361,7 +1267,6 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
                 v3d_flush(pctx);
 }
 
-#if V3D_VERSION >= 41
 #define V3D_CSD_CFG012_WG_COUNT_SHIFT 16
 #define V3D_CSD_CFG012_WG_OFFSET_SHIFT 0
 /* Allow this dispatch to start while the last one is still running. */
@@ -1563,7 +1468,6 @@ v3d_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
         v3d_bo_unreference(&uniforms.bo);
         v3d_bo_unreference(&v3d->compute_shared_memory);
 }
-#endif
 
 /**
  * Implements gallium's clear() hook (glClear()) by drawing a pair of triangles.
@@ -1607,7 +1511,7 @@ v3d_tlb_clear(struct v3d_job *job, unsigned buffers,
          * if it would be possible to need to emit a load of just one after
          * we've set up our TLB clears. This issue is fixed since V3D 4.3.18.
          */
-        if (v3d->screen->devinfo.ver <= 42 &&
+        if (v3d->screen->devinfo.ver == 42 &&
             buffers & PIPE_CLEAR_DEPTHSTENCIL &&
             (buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL &&
             job->zsbuf &&
@@ -1762,8 +1666,6 @@ v3dX(draw_init)(struct pipe_context *pctx)
         pctx->clear = v3d_clear;
         pctx->clear_render_target = v3d_clear_render_target;
         pctx->clear_depth_stencil = v3d_clear_depth_stencil;
-#if V3D_VERSION >= 41
         if (v3d_context(pctx)->screen->has_csd)
                 pctx->launch_grid = v3d_launch_grid;
-#endif
 }
diff --git a/src/gallium/drivers/v3d/v3dx_emit.c b/src/gallium/drivers/v3d/v3dx_emit.c
index ee17b935e19..705b3b74969 100644
--- a/src/gallium/drivers/v3d/v3dx_emit.c
+++ b/src/gallium/drivers/v3d/v3dx_emit.c
@@ -78,172 +78,6 @@ v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
         }
 }
 
-#if V3D_VERSION < 40
-static inline uint16_t
-swizzled_border_color(const struct v3d_device_info *devinfo,
-                      struct pipe_sampler_state *sampler,
-                      struct v3d_sampler_view *sview,
-                      int chan)
-{
-        const struct util_format_description *desc =
-                util_format_description(sview->base.format);
-        uint8_t swiz = chan;
-
-        /* If we're doing swizzling in the sampler, then only rearrange the
-         * border color for the mismatch between the V3D texture format and
-         * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
-         * the sampler's swizzle.
-         *
-         * For swizzling in the shader, we don't do any pre-swizzling of the
-         * border color.
-         */
-        if (v3d_get_tex_return_size(devinfo, sview->base.format) != 32)
-                swiz = desc->swizzle[swiz];
-
-        switch (swiz) {
-        case PIPE_SWIZZLE_0:
-                return _mesa_float_to_half(0.0);
-        case PIPE_SWIZZLE_1:
-                return _mesa_float_to_half(1.0);
-        default:
-                return _mesa_float_to_half(sampler->border_color.f[swiz]);
-        }
-}
-
-static void
-emit_one_texture(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex,
-                 int i)
-{
-        struct v3d_job *job = v3d->job;
-        struct pipe_sampler_state *psampler = stage_tex->samplers[i];
-        struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);
-        struct pipe_sampler_view *psview = stage_tex->textures[i];
-        struct v3d_sampler_view *sview = v3d_sampler_view(psview);
-        struct pipe_resource *prsc = psview->texture;
-        struct v3d_resource *rsc = v3d_resource(prsc);
-        const struct v3d_device_info *devinfo = &v3d->screen->devinfo;
-
-        stage_tex->texture_state[i].offset =
-                v3d_cl_ensure_space(&job->indirect,
-                                    cl_packet_length(TEXTURE_SHADER_STATE),
-                                    32);
-        v3d_bo_set_reference(&stage_tex->texture_state[i].bo,
-                             job->indirect.bo);
-
-        uint32_t return_size = v3d_get_tex_return_size(devinfo, psview->format);
-
-        struct V3D33_TEXTURE_SHADER_STATE unpacked = {
-                /* XXX */
-                .border_color_red = swizzled_border_color(devinfo, psampler,
-                                                          sview, 0),
-                .border_color_green = swizzled_border_color(devinfo, psampler,
-                                                            sview, 1),
-                .border_color_blue = swizzled_border_color(devinfo, psampler,
-                                                           sview, 2),
-                .border_color_alpha = swizzled_border_color(devinfo, psampler,
-                                                            sview, 3),
-
-                /* In the normal texturing path, the LOD gets clamped between
-                 * min/max, and the base_level field (set in the sampler view
-                 * from first_level) only decides where the min/mag switch
-                 * happens, so we need to use the LOD clamps to keep us
-                 * between min and max.
-                 *
-                 * For txf, the LOD clamp is still used, despite GL not
-                 * wanting that.  We will need to have a separate
-                 * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to
-                 * support txf properly.
-                 */
-                .min_level_of_detail = MIN2(psview->u.tex.first_level +
-                                            MAX2(psampler->min_lod, 0),
-                                            psview->u.tex.last_level),
-                .max_level_of_detail = MIN2(psview->u.tex.first_level +
-                                            MAX2(psampler->max_lod,
-                                                 psampler->min_lod),
-                                            psview->u.tex.last_level),
-
-                .texture_base_pointer = cl_address(rsc->bo,
-                                                   rsc->slices[0].offset),
-
-                .output_32_bit = return_size == 32,
-        };
-
-        /* Set up the sampler swizzle if we're doing 16-bit sampling.  For
-         * 32-bit, we leave swizzling up to the shader compiler.
-         *
-         * Note: Contrary to the docs, the swizzle still applies even if the
-         * return size is 32.  It's just that you probably want to swizzle in
-         * the shader, because you need the Y/Z/W channels to be defined.
-         */
-        if (return_size == 32) {
-                unpacked.swizzle_r = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_X);
-                unpacked.swizzle_g = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_Y);
-                unpacked.swizzle_b = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_Z);
-                unpacked.swizzle_a = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_W);
-        } else {
-                unpacked.swizzle_r = v3d_translate_pipe_swizzle(sview->swizzle[0]);
-                unpacked.swizzle_g = v3d_translate_pipe_swizzle(sview->swizzle[1]);
-                unpacked.swizzle_b = v3d_translate_pipe_swizzle(sview->swizzle[2]);
-                unpacked.swizzle_a = v3d_translate_pipe_swizzle(sview->swizzle[3]);
-        }
-
-        int min_img_filter = psampler->min_img_filter;
-        int min_mip_filter = psampler->min_mip_filter;
-        int mag_img_filter = psampler->mag_img_filter;
-
-        if (return_size == 32) {
-                min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
-                min_img_filter = PIPE_TEX_FILTER_NEAREST;
-                mag_img_filter = PIPE_TEX_FILTER_NEAREST;
-        }
-
-        bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST;
-        switch (min_mip_filter) {
-        case PIPE_TEX_MIPFILTER_NONE:
-                unpacked.filter += min_nearest ? 2 : 0;
-                break;
-        case PIPE_TEX_MIPFILTER_NEAREST:
-                unpacked.filter += min_nearest ? 4 : 8;
-                break;
-        case PIPE_TEX_MIPFILTER_LINEAR:
-                unpacked.filter += min_nearest ? 4 : 8;
-                unpacked.filter += 2;
-                break;
-        }
-
-        if (mag_img_filter == PIPE_TEX_FILTER_NEAREST)
-                unpacked.filter++;
-
-        if (psampler->max_anisotropy > 8)
-                unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1;
-        else if (psampler->max_anisotropy > 4)
-                unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1;
-        else if (psampler->max_anisotropy > 2)
-                unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1;
-        else if (psampler->max_anisotropy)
-                unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1;
-
-        uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];
-        cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);
-
-        for (int i = 0; i < ARRAY_SIZE(packed); i++)
-                packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];
-
-        /* TMU indirect structs need to be 32b aligned. */
-        v3d_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32);
-        cl_emit_prepacked(&job->indirect, &packed);
-}
-
-static void
-emit_textures(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex)
-{
-        for (int i = 0; i < stage_tex->num_textures; i++) {
-                if (stage_tex->textures[i])
-                        emit_one_texture(v3d, stage_tex, i);
-        }
-}
-#endif /* V3D_VERSION < 40 */
-
 static uint32_t
 translate_colormask(struct v3d_context *v3d, uint32_t colormask, int rt)
 {
@@ -263,18 +97,12 @@ emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job,
 {
         struct pipe_rt_blend_state *rtblend = &blend->rt[rt];
 
-#if V3D_VERSION >= 40
         /* We don't need to emit blend state for disabled RTs. */
         if (!rtblend->blend_enable)
                 return;
-#endif
 
         cl_emit(&job->bcl, BLEND_CFG, config) {
-#if V3D_VERSION >= 40
                 config.render_target_mask = rt_mask;
-#else
-                assert(rt == 0);
-#endif
 
                 config.color_blend_mode = rtblend->rgb_func;
                 config.color_blend_dst_factor =
@@ -311,7 +139,6 @@ emit_flat_shade_flags(struct v3d_job *job,
         }
 }
 
-#if V3D_VERSION >= 40
 static void
 emit_noperspective_flags(struct v3d_job *job,
                          int varying_offset,
@@ -345,7 +172,6 @@ emit_centroid_flags(struct v3d_job *job,
                         higher;
         }
 }
-#endif /* V3D_VERSION >= 40 */
 
 static bool
 emit_varying_flags(struct v3d_job *job, uint32_t *flags,
@@ -433,14 +259,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                         if (maxx > minx && maxy > miny) {
                                 clip.clip_window_width_in_pixels = maxx - minx;
                                 clip.clip_window_height_in_pixels = maxy - miny;
-                        } else if (V3D_VERSION < 41) {
-                                /* The HW won't entirely clip out when scissor
-                                 * w/h is 0.  Just treat it the same as
-                                 * rasterizer discard.
-                                 */
-                                rasterizer_discard = true;
-                                clip.clip_window_width_in_pixels = 1;
-                                clip.clip_window_height_in_pixels = 1;
                         }
                 }
 
@@ -512,14 +330,14 @@ v3dX(emit_state)(struct pipe_context *pctx)
                         /* Note: EZ state may update based on the compiled FS,
                          * along with ZSA
                          */
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                         config.early_z_updates_enable =
                                 (job->ez_state != V3D_EZ_DISABLED);
 #endif
                         if (v3d->zsa->base.depth_enabled) {
                                 config.z_updates_enable =
                                         v3d->zsa->base.depth_writemask;
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                                 config.early_z_enable =
                                         config.early_z_updates_enable;
 #endif
@@ -559,7 +377,7 @@ v3dX(emit_state)(struct pipe_context *pctx)
 
         if (v3d->dirty & V3D_DIRTY_RASTERIZER &&
             v3d->rasterizer->base.offset_tri) {
-                if (v3d->screen->devinfo.ver <= 42 &&
+                if (v3d->screen->devinfo.ver == 42 &&
                     job->zsbuf &&
                     job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
                         cl_emit_prepacked_sized(&job->bcl,
@@ -583,7 +401,7 @@ v3dX(emit_state)(struct pipe_context *pctx)
         }
 
         if (v3d->dirty & V3D_DIRTY_VIEWPORT) {
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                 cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
                         clip.viewport_half_width_in_1_256th_of_pixel =
                                 v3d->viewport.scale[0] * 256.0f;
@@ -617,12 +435,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                 }
 
                 cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
-#if V3D_VERSION < 41
-                        vp.viewport_centre_x_coordinate =
-                                v3d->viewport.translate[0];
-                        vp.viewport_centre_y_coordinate =
-                                v3d->viewport.translate[1];
-#else
                         float vp_fine_x = v3d->viewport.translate[0];
                         float vp_fine_y = v3d->viewport.translate[1];
                         int32_t vp_coarse_x = 0;
@@ -649,7 +461,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                         vp.fine_y = vp_fine_y;
                         vp.coarse_x = vp_coarse_x;
                         vp.coarse_y = vp_coarse_y;
-#endif
                 }
         }
 
@@ -657,11 +468,9 @@ v3dX(emit_state)(struct pipe_context *pctx)
                 struct v3d_blend_state *blend = v3d->blend;
 
                 if (blend->blend_enables) {
-#if V3D_VERSION >= 40
                         cl_emit(&job->bcl, BLEND_ENABLES, enables) {
                                 enables.mask = blend->blend_enables;
                         }
-#endif
 
                         const uint32_t max_rts =
                                 V3D_MAX_RENDER_TARGETS(v3d->screen->devinfo.ver);
@@ -716,8 +525,7 @@ v3dX(emit_state)(struct pipe_context *pctx)
         /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant
          * color.
          */
-        if (v3d->dirty & V3D_DIRTY_BLEND_COLOR ||
-            (V3D_VERSION < 41 && (v3d->dirty & V3D_DIRTY_BLEND))) {
+        if (v3d->dirty & V3D_DIRTY_BLEND_COLOR) {
                 cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) {
                         color.red_f16 = (v3d->swap_color_rb ?
                                           v3d->blend_color.hf[2] :
@@ -751,20 +559,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                 }
         }
 
-#if V3D_VERSION < 40
-        /* Pre-4.x, we have texture state that depends on both the sampler and
-         * the view, so we merge them together at draw time.
-         */
-        if (v3d->dirty & V3D_DIRTY_FRAGTEX)
-                emit_textures(v3d, &v3d->tex[PIPE_SHADER_FRAGMENT]);
-
-        if (v3d->dirty & V3D_DIRTY_GEOMTEX)
-                emit_textures(v3d, &v3d->tex[PIPE_SHADER_GEOMETRY]);
-
-        if (v3d->dirty & V3D_DIRTY_VERTTEX)
-                emit_textures(v3d, &v3d->tex[PIPE_SHADER_VERTEX]);
-#endif
-
         if (v3d->dirty & V3D_DIRTY_FLAT_SHADE_FLAGS) {
                 if (!emit_varying_flags(job,
                                         v3d->prog.fs->prog_data.fs->flat_shade_flags,
@@ -773,7 +567,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                 }
         }
 
-#if V3D_VERSION >= 40
         if (v3d->dirty & V3D_DIRTY_NOPERSPECTIVE_FLAGS) {
                 if (!emit_varying_flags(job,
                                         v3d->prog.fs->prog_data.fs->noperspective_flags,
@@ -789,7 +582,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                         cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
                 }
         }
-#endif
 
         /* Set up the transform feedback data specs (which VPM entries to
          * output to which buffers).
@@ -807,7 +599,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                                               tf_shader->tf_specs_psiz :
                                               tf_shader->tf_specs);
 
-#if V3D_VERSION >= 40
                         bool tf_enabled = v3d_transform_feedback_enabled(v3d);
                         job->tf_enabled |= tf_enabled;
 
@@ -816,23 +607,13 @@ v3dX(emit_state)(struct pipe_context *pctx)
                                         tf_shader->num_tf_specs;
                                 tfe.enable = tf_enabled;
                         };
-#else /* V3D_VERSION < 40 */
-                        cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
-                                tfe.number_of_32_bit_output_buffer_address_following =
-                                        so->num_targets;
-                                tfe.number_of_16_bit_output_data_specs_following =
-                                        tf_shader->num_tf_specs;
-                        };
-#endif /* V3D_VERSION < 40 */
                         for (int i = 0; i < tf_shader->num_tf_specs; i++) {
                                 cl_emit_prepacked(&job->bcl, &tf_specs[i]);
                         }
                 } else {
-#if V3D_VERSION >= 40
                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
                                 tfe.enable = false;
                         };
-#endif /* V3D_VERSION >= 40 */
                 }
         }
 
@@ -850,7 +631,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                         uint32_t offset = target ?
                                 v3d_stream_output_target(target)->offset * info->stride[i] * 4 : 0;
 
-#if V3D_VERSION >= 40
                         if (!target)
                                 continue;
 
@@ -863,16 +643,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                                         (target->buffer_size - offset) >> 2;
                                 output.buffer_number = i;
                         }
-#else /* V3D_VERSION < 40 */
-                        cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {
-                                if (target) {
-                                        output.address =
-                                                cl_address(rsc->bo,
-                                                           target->buffer_offset +
-                                                           offset);
-                                }
-                        };
-#endif /* V3D_VERSION < 40 */
                         if (target) {
                                 v3d_job_add_tf_write_resource(v3d->job,
                                                               target->buffer);
@@ -889,7 +659,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
                 }
         }
 
-#if V3D_VERSION >= 40
         if (v3d->dirty & V3D_DIRTY_SAMPLE_STATE) {
                 cl_emit(&job->bcl, SAMPLE_STATE, state) {
                         /* Note: SampleCoverage was handled at the
@@ -899,5 +668,4 @@ v3dX(emit_state)(struct pipe_context *pctx)
                         state.mask = job->msaa ? v3d->sample_mask : 0xf;
                 }
         }
-#endif
 }
diff --git a/src/gallium/drivers/v3d/v3dx_format_table.c b/src/gallium/drivers/v3d/v3dx_format_table.c
index 78f6d955be3..76b1074cb62 100644
--- a/src/gallium/drivers/v3d/v3dx_format_table.c
+++ b/src/gallium/drivers/v3d/v3dx_format_table.c
@@ -145,7 +145,6 @@ static const struct v3d_format format_table[] = {
         FORMAT(R11G11B10_FLOAT,   R11F_G11F_B10F, R11F_G11F_B10F, SWIZ_XYZ1, 16, 0),
         FORMAT(R9G9B9E5_FLOAT,    NO,           RGB9_E5,     SWIZ_XYZ1, 16, 0),
 
-#if V3D_VERSION >= 40
         FORMAT(S8_UINT_Z24_UNORM, D24S8,        DEPTH24_X8,  SWIZ_XXXX, 32, 1),
         FORMAT(X8Z24_UNORM,       D24S8,        DEPTH24_X8,  SWIZ_XXXX, 32, 1),
         FORMAT(S8X24_UINT,        S8,           RGBA8UI, SWIZ_XXXX, 16, 1),
@@ -155,16 +154,6 @@ static const struct v3d_format format_table[] = {
         /* Pretend we support this, but it'll be separate Z32F depth and S8. */
         FORMAT(Z32_FLOAT_S8X24_UINT, D32F,      DEPTH_COMP32F, SWIZ_XXXX, 32, 1),
         FORMAT(X32_S8X24_UINT,    S8,           R8UI,          SWIZ_XXXX, 16, 1),
-#else
-        FORMAT(S8_UINT_Z24_UNORM, ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1),
-        FORMAT(X8Z24_UNORM,       ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1),
-        FORMAT(S8X24_UINT,        NO,           R32F,        SWIZ_XXXX, 32, 1),
-        FORMAT(Z32_FLOAT,         ZS_DEPTH_COMPONENT32F, R32F, SWIZ_XXXX, 32, 1),
-        FORMAT(Z16_UNORM,         ZS_DEPTH_COMPONENT16,  DEPTH_COMP16, SWIZ_XXXX, 32, 1),
-
-        /* Pretend we support this, but it'll be separate Z32F depth and S8. */
-        FORMAT(Z32_FLOAT_S8X24_UINT, ZS_DEPTH_COMPONENT32F, R32F, SWIZ_XXXX, 32, 1),
-#endif
 
         FORMAT(ETC2_RGB8,         NO,           RGB8_ETC2,   SWIZ_XYZ1, 16, 0),
         FORMAT(ETC2_SRGB8,        NO,           RGB8_ETC2,   SWIZ_XYZ1, 16, 0),
@@ -233,9 +222,6 @@ v3dX(get_internal_type_bpp_for_output_format)(uint32_t format,
 {
         switch (format) {
         case V3D_OUTPUT_IMAGE_FORMAT_RGBA8:
-#if V3D_VERSION < 41
-        case V3D_OUTPUT_IMAGE_FORMAT_RGBX8:
-#endif
         case V3D_OUTPUT_IMAGE_FORMAT_RGB8:
         case V3D_OUTPUT_IMAGE_FORMAT_RG8:
         case V3D_OUTPUT_IMAGE_FORMAT_R8:
@@ -264,9 +250,6 @@ v3dX(get_internal_type_bpp_for_output_format)(uint32_t format,
         case V3D_OUTPUT_IMAGE_FORMAT_SRGB:
         case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2:
         case V3D_OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F:
-#if V3D_VERSION < 41
-        case V3D_OUTPUT_IMAGE_FORMAT_SRGBX8:
-#endif
         case V3D_OUTPUT_IMAGE_FORMAT_RGBA16F:
                 /* Note that sRGB RTs are stored in the tile buffer at 16F,
                  * and the conversion to sRGB happens at tilebuffer
diff --git a/src/gallium/drivers/v3d/v3dx_job.c b/src/gallium/drivers/v3d/v3dx_job.c
index d48c97f99ff..391c1383971 100644
--- a/src/gallium/drivers/v3d/v3dx_job.c
+++ b/src/gallium/drivers/v3d/v3dx_job.c
@@ -34,9 +34,7 @@ void v3dX(bcl_epilogue)(struct v3d_context *v3d, struct v3d_job *job)
 {
                 v3d_cl_ensure_space_with_branch(&job->bcl,
                                                 cl_packet_length(PRIMITIVE_COUNTS_FEEDBACK) +
-#if V3D_VERSION >= 41
                                                 cl_packet_length(TRANSFORM_FEEDBACK_SPECS) +
-#endif
                                                 cl_packet_length(FLUSH));
 
                 if (job->tf_enabled || job->needs_primitives_generated) {
@@ -57,13 +55,11 @@ void v3dX(bcl_epilogue)(struct v3d_context *v3d, struct v3d_job *job)
                  * cleans up and finishes before it gets reset by the next
                  * frame's tile binning mode cfg packet. (SWVC5-718).
                  */
-#if V3D_VERSION >= 41
                 if (job->tf_enabled) {
                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
                                 tfe.enable = false;
                         };
                 }
-#endif /* V3D_VERSION >= 41 */
 
                 /* We just FLUSH here to tell the HW to cap the bin CLs with a
                  * return.  Any remaining state changes won't be flushed to
diff --git a/src/gallium/drivers/v3d/v3dx_rcl.c b/src/gallium/drivers/v3d/v3dx_rcl.c
index 8bac9739e29..2efe70b7b42 100644
--- a/src/gallium/drivers/v3d/v3dx_rcl.c
+++ b/src/gallium/drivers/v3d/v3dx_rcl.c
@@ -36,23 +36,6 @@
 
 #define PIPE_FIRST_COLOR_BUFFER_BIT (ffs(PIPE_CLEAR_COLOR0) - 1)
 
-/* The HW queues up the load until the tile coordinates show up, but can only
- * track one at a time.  If we need to do more than one load, then we need to
- * flush out the previous load by emitting the tile coordinates and doing a
- * dummy store.
- */
-static void
-flush_last_load(struct v3d_cl *cl)
-{
-        if (V3D_VERSION >= 40)
-                return;
-
-        cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
-        cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
-                store.buffer_to_store = NONE;
-        }
-}
-
 static void
 load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer,
              int layer, uint32_t pipe_bit, uint32_t *loads_pending)
@@ -73,7 +56,6 @@ load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer,
                 load.buffer_to_load = buffer;
                 load.address = cl_address(rsc->bo, layer_offset);
 
-#if V3D_VERSION >= 40
                 load.memory_format = surf->tiling;
                 if (separate_stencil)
                         load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
@@ -96,20 +78,9 @@ load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer,
                 else
                         load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
 
-#else /* V3D_VERSION < 40 */
-                /* Can't do raw ZSTENCIL loads -- need to load/store them to
-                 * separate buffers for Z and stencil.
-                 */
-                assert(buffer != ZSTENCIL);
-                load.raw_mode = true;
-                load.padded_height_of_output_image_in_uif_blocks =
-                        surf->padded_height_of_output_image_in_uif_blocks;
-#endif /* V3D_VERSION < 40 */
         }
 
         *loads_pending &= ~pipe_bit;
-        if (*loads_pending)
-                flush_last_load(cl);
 }
 
 static void
@@ -127,7 +98,6 @@ store_general(struct v3d_job *job,
         }
 
         *stores_pending &= ~pipe_bit;
-        bool last_store = !(*stores_pending);
 
         struct v3d_resource *rsc = v3d_resource(psurf->texture);
 
@@ -140,7 +110,6 @@ store_general(struct v3d_job *job,
                 store.buffer_to_store = buffer;
                 store.address = cl_address(rsc->bo, layer_offset);
 
-#if V3D_VERSION >= 40
                 store.clear_buffer_being_stored = false;
 
                 if (separate_stencil)
@@ -168,35 +137,6 @@ store_general(struct v3d_job *job,
                         store.decimate_mode = V3D_DECIMATE_MODE_4X;
                 else
                         store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
-
-#else /* V3D_VERSION < 40 */
-                /* Can't do raw ZSTENCIL stores -- need to load/store them to
-                 * separate buffers for Z and stencil.
-                 */
-                assert(buffer != ZSTENCIL);
-                store.raw_mode = true;
-                if (!last_store) {
-                        store.disable_color_buffers_clear_on_write = true;
-                        store.disable_z_buffer_clear_on_write = true;
-                        store.disable_stencil_buffer_clear_on_write = true;
-                } else {
-                        store.disable_color_buffers_clear_on_write =
-                                !(((pipe_bit & PIPE_CLEAR_COLOR_BUFFERS) &&
-                                   general_color_clear &&
-                                   (job->clear & pipe_bit)));
-                        store.disable_z_buffer_clear_on_write =
-                                !(job->clear & PIPE_CLEAR_DEPTH);
-                        store.disable_stencil_buffer_clear_on_write =
-                                !(job->clear & PIPE_CLEAR_STENCIL);
-                }
-                store.padded_height_of_output_image_in_uif_blocks =
-                        surf->padded_height_of_output_image_in_uif_blocks;
-#endif /* V3D_VERSION < 40 */
-        }
-
-        /* There must be a TILE_COORDINATES_IMPLICIT between each store. */
-        if (V3D_VERSION < 40 && !last_store) {
-                cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
         }
 }
 
@@ -223,7 +163,6 @@ v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer)
          */
         assert(!job->bbuf || job->load == 0);
         assert(!job->bbuf || job->nr_cbufs <= 1);
-        assert(!job->bbuf || V3D_VERSION >= 40);
 
         uint32_t loads_pending = job->bbuf ? job->store : job->load;
 
@@ -235,18 +174,14 @@ v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer)
                 struct pipe_surface *psurf = job->bbuf ? job->bbuf : job->cbufs[i];
                 assert(!job->bbuf || i == 0);
 
-                if (!psurf || (V3D_VERSION < 40 &&
-                               psurf->texture->nr_samples <= 1)) {
+                if (!psurf)
                         continue;
-                }
 
                 load_general(cl, psurf, RENDER_TARGET_0 + i, layer,
                              bit, &loads_pending);
         }
 
-        if ((loads_pending & PIPE_CLEAR_DEPTHSTENCIL) &&
-            (V3D_VERSION >= 40 ||
-             (job->zsbuf && job->zsbuf->texture->nr_samples > 1))) {
+        if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) {
                 assert(!job->early_zs_clear);
                 struct pipe_surface *src = job->bbuf ? job->bbuf : job->zsbuf;
                 struct v3d_resource *rsc = v3d_resource(src->texture);
@@ -268,57 +203,14 @@ v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer)
                 }
         }
 
-#if V3D_VERSION < 40
-        /* The initial reload will be queued until we get the
-         * tile coordinates.
-         */
-        if (loads_pending) {
-                cl_emit(cl, RELOAD_TILE_COLOR_BUFFER, load) {
-                        load.disable_color_buffer_load =
-                                (~loads_pending &
-                                 PIPE_CLEAR_COLOR_BUFFERS) >>
-                                PIPE_FIRST_COLOR_BUFFER_BIT;
-                        load.enable_z_load =
-                                loads_pending & PIPE_CLEAR_DEPTH;
-                        load.enable_stencil_load =
-                                loads_pending & PIPE_CLEAR_STENCIL;
-                }
-        }
-#else /* V3D_VERSION >= 40 */
         assert(!loads_pending);
         cl_emit(cl, END_OF_LOADS, end);
-#endif
 }
 
 static void
 v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
 {
-#if V3D_VERSION < 40
-        UNUSED bool needs_color_clear = job->clear & PIPE_CLEAR_COLOR_BUFFERS;
-        UNUSED bool needs_z_clear = job->clear & PIPE_CLEAR_DEPTH;
-        UNUSED bool needs_s_clear = job->clear & PIPE_CLEAR_STENCIL;
-
-        /* For clearing color in a TLB general on V3D 3.3:
-         *
-         * - NONE buffer store clears all TLB color buffers.
-         * - color buffer store clears just the TLB color buffer being stored.
-         * - Z/S buffers store may not clear the TLB color buffer.
-         *
-         * And on V3D 4.1, we only have one flag for "clear the buffer being
-         * stored" in the general packet, and a separate packet to clear all
-         * color TLB buffers.
-         *
-         * As a result, we only bother flagging TLB color clears in a general
-         * packet when we don't have to emit a separate packet to clear all
-         * TLB color buffers.
-         */
-        bool general_color_clear = (needs_color_clear &&
-                                    (job->clear & PIPE_CLEAR_COLOR_BUFFERS) ==
-                                    (job->store & PIPE_CLEAR_COLOR_BUFFERS));
-#else
         bool general_color_clear = false;
-#endif
-
         uint32_t stores_pending = job->store;
 
         /* For V3D 4.1, use general stores for all TLB stores.
@@ -337,17 +229,14 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
                         continue;
 
                 struct pipe_surface *psurf = job->cbufs[i];
-                if (!psurf ||
-                    (V3D_VERSION < 40 && psurf->texture->nr_samples <= 1)) {
+                if (!psurf)
                         continue;
-                }
 
                 store_general(job, cl, psurf, layer, RENDER_TARGET_0 + i, bit,
                               &stores_pending, general_color_clear, job->bbuf);
         }
 
-        if (job->store & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf &&
-            !(V3D_VERSION < 40 && job->zsbuf->texture->nr_samples <= 1)) {
+        if (job->store & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf) {
                 assert(!job->early_zs_clear);
                 struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
                 if (rsc->separate_stencil) {
@@ -375,35 +264,7 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
                 }
         }
 
-#if V3D_VERSION < 40
-        if (stores_pending) {
-                cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) {
 
-                        store.disable_color_buffer_write =
-                                (~stores_pending >>
-                                 PIPE_FIRST_COLOR_BUFFER_BIT) & 0xf;
-                        store.enable_z_write = stores_pending & PIPE_CLEAR_DEPTH;
-                        store.enable_stencil_write = stores_pending & PIPE_CLEAR_STENCIL;
-
-                        /* Note that when set this will clear all of the color
-                         * buffers.
-                         */
-                        store.disable_color_buffers_clear_on_write =
-                                !needs_color_clear;
-                        store.disable_z_buffer_clear_on_write =
-                                !needs_z_clear;
-                        store.disable_stencil_buffer_clear_on_write =
-                                !needs_s_clear;
-                };
-        } else if (needs_color_clear && !general_color_clear) {
-                /* If we didn't do our color clears in the general packet,
-                 * then emit a packet to clear all the TLB color buffers now.
-                 */
-                cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
-                        store.buffer_to_store = NONE;
-                }
-        }
-#else /* V3D_VERSION >= 40 */
         /* If we're emitting an RCL with GL_ARB_framebuffer_no_attachments,
          * we still need to emit some sort of store.
          */
@@ -421,7 +282,7 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
          * clearing Z/S.
          */
         if (job->clear) {
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                 cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
                         clear.clear_z_stencil_buffer = !job->early_zs_clear;
                         clear.clear_all_render_targets = true;
@@ -432,7 +293,6 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
 #endif
 
         }
-#endif /* V3D_VERSION >= 40 */
 }
 
 static void
@@ -445,22 +305,13 @@ v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer)
         v3d_cl_ensure_space(cl, 200, 1);
         struct v3d_cl_reloc tile_list_start = cl_get_address(cl);
 
-        if (V3D_VERSION >= 40) {
-                /* V3D 4.x only requires a single tile coordinates, and
-                 * END_OF_LOADS switches us between loading and rendering.
-                 */
-                cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
-        }
+        /* V3D 4.x/7.x only requires a single tile coordinates, and
+         * END_OF_LOADS switches us between loading and rendering.
+         */
+        cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
 
         v3d_rcl_emit_loads(job, cl, layer);
 
-        if (V3D_VERSION < 40) {
-                /* Tile Coordinates triggers the last reload and sets where
-                 * the stores go. There must be one per store packet.
-                 */
-                cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
-        }
-
         /* The binner starts out writing tiles assuming that the initial mode
          * is triangles, so make sure that's the case.
          */
@@ -468,20 +319,16 @@ v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer)
                 fmt.primitive_type = LIST_TRIANGLES;
         }
 
-#if V3D_VERSION >= 41
         /* PTB assumes that value to be 0, but hw will not set it. */
         cl_emit(cl, SET_INSTANCEID, set) {
            set.instance_id = 0;
         }
-#endif
 
         cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
 
         v3d_rcl_emit_stores(job, cl, layer);
 
-#if V3D_VERSION >= 40
         cl_emit(cl, END_OF_TILE_MARKER, end);
-#endif
 
         cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
 
@@ -491,7 +338,6 @@ v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer)
         }
 }
 
-#if V3D_VERSION > 33
 /* Note that for v71, render target cfg packets has just one field that
  * combined the internal type and clamp mode. For simplicity we keep just one
  * helper.
@@ -503,13 +349,11 @@ static uint32_t
 v3dX(clamp_for_format_and_type)(uint32_t rt_type,
                                 enum pipe_format format)
 {
-#if V3D_VERSION >= 40 && V3D_VERSION <= 42
+#if V3D_VERSION == 42
         if (util_format_is_srgb(format)) {
                 return V3D_RENDER_TARGET_CLAMP_NORM;
-#if V3D_VERSION >= 42
         } else if (util_format_is_pure_integer(format)) {
                 return V3D_RENDER_TARGET_CLAMP_INT;
-#endif
         } else {
                 return V3D_RENDER_TARGET_CLAMP_NONE;
         }
@@ -541,9 +385,8 @@ v3dX(clamp_for_format_and_type)(uint32_t rt_type,
         }
         return V3D_RENDER_TARGET_TYPE_CLAMP_INVALID;
 #endif
-        return 0;
+        unreachable("Wrong V3D_VERSION");
 }
-#endif
 
 #if V3D_VERSION >= 71
 static void
@@ -566,7 +409,7 @@ v3d_setup_render_target(struct v3d_job *job,
 }
 #endif
 
-#if V3D_VERSION >= 40 && V3D_VERSION <= 42
+#if V3D_VERSION == 42
 static void
 v3d_setup_render_target(struct v3d_job *job,
                         int cbuf,
@@ -589,36 +432,6 @@ v3d_setup_render_target(struct v3d_job *job,
 }
 #endif
 
-#if V3D_VERSION < 40
-static void
-v3d_emit_z_stencil_config(struct v3d_job *job, struct v3d_surface *surf,
-                          struct v3d_resource *rsc, bool is_separate_stencil)
-{
-        cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_Z_STENCIL, zs) {
-                zs.address = cl_address(rsc->bo, surf->offset);
-
-                if (!is_separate_stencil) {
-                        zs.internal_type = surf->internal_type;
-                        zs.output_image_format = surf->format;
-                } else {
-                        zs.z_stencil_id = 1; /* Separate stencil */
-                }
-
-                zs.padded_height_of_output_image_in_uif_blocks =
-                        surf->padded_height_of_output_image_in_uif_blocks;
-
-                assert(surf->tiling != V3D_TILING_RASTER);
-                zs.memory_format = surf->tiling;
-        }
-
-        if (job->store & (is_separate_stencil ?
-                          PIPE_CLEAR_STENCIL :
-                          PIPE_CLEAR_DEPTHSTENCIL)) {
-                rsc->writes++;
-        }
-}
-#endif /* V3D_VERSION < 40 */
-
 static bool
 supertile_in_job_scissors(struct v3d_job *job,
                           uint32_t x, uint32_t y, uint32_t w, uint32_t h)
@@ -648,7 +461,6 @@ supertile_in_job_scissors(struct v3d_job *job,
    return false;
 }
 
-#if V3D_VERSION >= 40
 static inline bool
 do_double_initial_tile_clear(const struct v3d_job *job)
 {
@@ -663,7 +475,6 @@ do_double_initial_tile_clear(const struct v3d_job *job)
         return job->double_buffer &&
                (job->draw_tiles_x > 1 || job->draw_tiles_y > 1);
 }
-#endif
 
 static void
 emit_render_layer(struct v3d_job *job, uint32_t layer)
@@ -730,12 +541,6 @@ emit_render_layer(struct v3d_job *job, uint32_t layer)
          * state, we need 1 dummy store in between internal type/size
          * changes on V3D 3.x, and 2 dummy stores on 4.x.
          */
-#if V3D_VERSION < 40
-        cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) {
-                store.buffer_to_store = NONE;
-        }
-#endif
-#if V3D_VERSION >= 40
         for (int i = 0; i < 2; i++) {
                 if (i > 0)
                         cl_emit(&job->rcl, TILE_COORDINATES, coords);
@@ -756,7 +561,6 @@ emit_render_layer(struct v3d_job *job, uint32_t layer)
                 }
                 cl_emit(&job->rcl, END_OF_TILE_MARKER, end);
         }
-#endif
         cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush);
 
         v3d_rcl_emit_generic_per_tile_list(job, layer);
@@ -808,15 +612,10 @@ v3dX(emit_rcl)(struct v3d_job *job)
          * optional updates to the previous HW state.
          */
         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
-#if V3D_VERSION < 40
-                config.enable_z_store = job->store & PIPE_CLEAR_DEPTH;
-                config.enable_stencil_store = job->store & PIPE_CLEAR_STENCIL;
-#else /* V3D_VERSION >= 40 */
                 if (job->zsbuf) {
                         struct v3d_surface *surf = v3d_surface(job->zsbuf);
                         config.internal_depth_type = surf->internal_type;
                 }
-#endif /* V3D_VERSION >= 40 */
 
                 if (job->decided_global_ez_enable) {
                         switch (job->first_ez_state) {
@@ -839,7 +638,6 @@ v3dX(emit_rcl)(struct v3d_job *job)
                         config.early_z_disable = true;
                 }
 
-#if V3D_VERSION >= 40
                 assert(job->zsbuf || config.early_z_disable);
 
                 job->early_zs_clear = (job->clear & PIPE_CLEAR_DEPTHSTENCIL) &&
@@ -847,7 +645,6 @@ v3dX(emit_rcl)(struct v3d_job *job)
                         !(job->store & PIPE_CLEAR_DEPTHSTENCIL);
 
                 config.early_depth_stencil_clear = job->early_zs_clear;
-#endif /* V3D_VERSION >= 40 */
 
                 config.image_width_pixels = job->draw_width;
                 config.image_height_pixels = job->draw_height;
@@ -858,7 +655,7 @@ v3dX(emit_rcl)(struct v3d_job *job)
                 config.multisample_mode_4x = job->msaa;
                 config.double_buffer_in_non_ms_mode = job->double_buffer;
 
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                 config.maximum_bpp_of_all_render_targets = job->internal_bpp;
 #endif
 #if V3D_VERSION >= 71
@@ -921,22 +718,7 @@ v3dX(emit_rcl)(struct v3d_job *job)
                         }
                 }
 
-#if V3D_VERSION < 40
-                cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
-                        rt.address = cl_address(rsc->bo, surf->offset);
-                        rt.internal_type = surf->internal_type;
-                        rt.output_image_format = surf->format;
-                        rt.memory_format = surf->tiling;
-                        rt.internal_bpp = surf->internal_bpp;
-                        rt.render_target_number = i;
-                        rt.pad = config_pad;
-
-                        if (job->store & PIPE_CLEAR_COLOR0 << i)
-                                rsc->writes++;
-                }
-#endif /* V3D_VERSION < 40 */
-
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                 cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1,
                         clear) {
                         clear.clear_color_low_32_bits = job->clear_color[i][0];
@@ -1000,7 +782,7 @@ v3dX(emit_rcl)(struct v3d_job *job)
 #endif
         }
 
-#if V3D_VERSION >= 40 && V3D_VERSION <= 42
+#if V3D_VERSION == 42
         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
                 v3d_setup_render_target(job, 0,
                                         &rt.render_target_0_internal_bpp,
@@ -1021,27 +803,6 @@ v3dX(emit_rcl)(struct v3d_job *job)
         }
 #endif
 
-#if V3D_VERSION < 40
-        /* FIXME: Don't bother emitting if we don't load/clear Z/S. */
-        if (job->zsbuf) {
-                struct pipe_surface *psurf = job->zsbuf;
-                struct v3d_surface *surf = v3d_surface(psurf);
-                struct v3d_resource *rsc = v3d_resource(psurf->texture);
-
-                v3d_emit_z_stencil_config(job, surf, rsc, false);
-
-                /* Emit the separate stencil packet if we have a resource for
-                 * it.  The HW will only load/store this buffer if the
-                 * Z/Stencil config doesn't have stencil in its format.
-                 */
-                if (surf->separate_stencil) {
-                        v3d_emit_z_stencil_config(job,
-                                                  v3d_surface(surf->separate_stencil),
-                                                  rsc->separate_stencil, true);
-                }
-        }
-#endif /* V3D_VERSION < 40 */
-
         /* Ends rendering mode config. */
         cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES,
                 clear) {
diff --git a/src/gallium/drivers/v3d/v3dx_state.c b/src/gallium/drivers/v3d/v3dx_state.c
index 6a72812ecc3..f2b2b2dab66 100644
--- a/src/gallium/drivers/v3d/v3dx_state.c
+++ b/src/gallium/drivers/v3d/v3dx_state.c
@@ -106,21 +106,17 @@ v3d_create_rasterizer_state(struct pipe_context *pctx,
         v3dx_pack(&so->depth_offset, DEPTH_OFFSET, depth) {
                 depth.depth_offset_factor = cso->offset_scale;
                 depth.depth_offset_units = cso->offset_units;
-#if V3D_VERSION >= 41
                 depth.limit = cso->offset_clamp;
-#endif
         }
 
         /* V3d 4.x treats polygon offset units based on a Z24 buffer, so we
          * need to scale up offset_units if we're only Z16.
          */
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
         v3dx_pack(&so->depth_offset_z16, DEPTH_OFFSET, depth) {
                 depth.depth_offset_factor = cso->offset_scale;
                 depth.depth_offset_units = cso->offset_units * 256.0;
-#if V3D_VERSION >= 41
                 depth.limit = cso->offset_clamp;
-#endif
         }
 #endif
 
@@ -144,10 +140,6 @@ v3d_create_blend_state(struct pipe_context *pctx,
         if (cso->independent_blend_enable) {
                 for (int i = 0; i < max_rts; i++) {
                         so->blend_enables |= cso->rt[i].blend_enable << i;
-
-                        /* V3D 4.x is when we got independent blend enables. */
-                        assert(V3D_VERSION >= 40 ||
-                               cso->rt[i].blend_enable == cso->rt[0].blend_enable);
                 }
         } else {
                 if (cso->rt[0].blend_enable)
@@ -343,7 +335,7 @@ v3d_zsa_state_bind(struct pipe_context *pctx, void *hwcso)
 static bool
 needs_default_attribute_values(void)
 {
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
         /* FIXME: on vulkan we are able to refine even further, as we know in
          * advance when we create the pipeline if we have an integer vertex
          * attrib. Pending to check if we could do something similar here.
@@ -517,18 +509,10 @@ v3d_set_framebuffer_state(struct pipe_context *pctx,
                 struct pipe_surface *cbuf = v3d->framebuffer.cbufs[i];
                 if (!cbuf)
                         continue;
-                struct v3d_surface *v3d_cbuf = v3d_surface(cbuf);
 
                 const struct util_format_description *desc =
                         util_format_description(cbuf->format);
 
-                /* For BGRA8 formats (DRI window system default format), we
-                 * need to swap R and B, since the HW's format is RGBA8.  On
-                 * V3D 4.1+, the RCL can swap R and B on load/store.
-                 */
-                if (v3d->screen->devinfo.ver < 41 && v3d_cbuf->swap_rb)
-                        v3d->swap_color_rb |= 1 << i;
-
                 if (desc->swizzle[3] == PIPE_SWIZZLE_1)
                         v3d->blend_dst_alpha_one |= 1 << i;
         }
@@ -555,7 +539,6 @@ translate_wrap(uint32_t pipe_wrap)
         }
 }
 
-#if V3D_VERSION >= 40
 static void
 v3d_upload_sampler_state_variant(void *map,
                                  const struct pipe_sampler_state *cso,
@@ -720,7 +703,7 @@ v3d_upload_sampler_state_variant(void *map,
                                 break;
                         }
 
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                         /* The TMU in V3D 7.x always takes 32-bit floats and handles conversions
                          * for us. In V3D 4.x we need to manually convert floating point color
                          * values to the expected format.
@@ -739,7 +722,6 @@ v3d_upload_sampler_state_variant(void *map,
                 }
         }
 }
-#endif
 
 static void *
 v3d_create_sampler_state(struct pipe_context *pctx,
@@ -757,7 +739,6 @@ v3d_create_sampler_state(struct pipe_context *pctx,
         enum V3DX(Wrap_Mode) wrap_t = translate_wrap(cso->wrap_t);
         enum V3DX(Wrap_Mode) wrap_r = translate_wrap(cso->wrap_r);
 
-#if V3D_VERSION >= 40
         bool uses_border_color = (wrap_s == V3D_WRAP_MODE_BORDER ||
                                   wrap_t == V3D_WRAP_MODE_BORDER ||
                                   wrap_r == V3D_WRAP_MODE_BORDER);
@@ -807,20 +788,6 @@ v3d_create_sampler_state(struct pipe_context *pctx,
                                                  so->border_color_variants ? i : border_variant);
         }
 
-#else /* V3D_VERSION < 40 */
-        v3dx_pack(&so->p0, TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1, p0) {
-                p0.s_wrap_mode = wrap_s;
-                p0.t_wrap_mode = wrap_t;
-                p0.r_wrap_mode = wrap_r;
-        }
-
-        v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) {
-                tex.depth_compare_function = cso->compare_mode ?
-                                             cso->compare_func :
-                                             V3D_COMPARE_FUNC_NEVER;
-                tex.fixed_bias = cso->lod_bias;
-        }
-#endif /* V3D_VERSION < 40 */
         return so;
 }
 
@@ -911,8 +878,7 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo,
         tex->image_width = prsc->width0 * msaa_scale;
         tex->image_height = prsc->height0 * msaa_scale;
 
-#if V3D_VERSION >= 40
-        /* On 4.x, the height of a 1D texture is redefined to be the
+       /* On 4.x, the height of a 1D texture is redefined to be the
          * upper 14 bits of the width (which is only usable with txf).
          */
         if (prsc->target == PIPE_TEXTURE_1D ||
@@ -922,7 +888,6 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo,
 
         tex->image_width &= (1 << 14) - 1;
         tex->image_height &= (1 << 14) - 1;
-#endif
 
         if (prsc->target == PIPE_TEXTURE_3D) {
                 tex->image_depth = prsc->depth0;
@@ -941,7 +906,6 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo,
 
         tex->base_level = base_level;
 
-#if V3D_VERSION >= 40
         tex->max_level = last_level;
         /* Note that we don't have a job to reference the texture's sBO
          * at state create time, so any time this sampler view is used
@@ -951,8 +915,6 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo,
                 v3d_layer_offset(prsc, 0, first_layer);
 
         tex->texture_base_pointer = cl_address(NULL, base_offset);
-#endif
-
         tex->array_stride_64_byte_aligned = rsc->cube_map_stride / 64;
 
 #if V3D_VERSION >= 71
@@ -976,12 +938,10 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo,
         if (tex->level_0_is_strictly_uif)
                 tex->level_0_ub_pad = rsc->slices[0].ub_pad;
 
-#if V3D_VERSION >= 40
         if (tex->uif_xor_disable ||
             tex->level_0_is_strictly_uif) {
                 tex->extended = true;
         }
-#endif /* V3D_VERSION >= 40 */
 }
 
 void
@@ -997,16 +957,10 @@ v3dX(create_texture_shader_state_bo)(struct v3d_context *v3d,
 
         assert(so->serial_id != rsc->serial_id);
 
-#if V3D_VERSION >= 40
         v3d_bo_unreference(&so->bo);
         so->bo = v3d_bo_alloc(v3d->screen,
                               cl_packet_length(TEXTURE_SHADER_STATE), "sampler");
         map = v3d_bo_map(so->bo);
-#else /* V3D_VERSION < 40 */
-        STATIC_ASSERT(sizeof(so->texture_shader_state) >=
-                      cl_packet_length(TEXTURE_SHADER_STATE));
-        map = &so->texture_shader_state;
-#endif
 
         v3dx_pack(map, TEXTURE_SHADER_STATE, tex) {
                 if (prsc->target != PIPE_BUFFER) {
@@ -1025,69 +979,20 @@ v3dX(create_texture_shader_state_bo)(struct v3d_context *v3d,
                 }
 
                 bool is_srgb = util_format_is_srgb(cso->format);
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
                 tex.srgb = is_srgb;
 #endif
 #if V3D_VERSION >= 71
                 tex.transfer_func = is_srgb ? TRANSFER_FUNC_SRGB : TRANSFER_FUNC_NONE;
 #endif
 
-#if V3D_VERSION >= 40
                 tex.swizzle_r = v3d_translate_pipe_swizzle(so->swizzle[0]);
                 tex.swizzle_g = v3d_translate_pipe_swizzle(so->swizzle[1]);
                 tex.swizzle_b = v3d_translate_pipe_swizzle(so->swizzle[2]);
                 tex.swizzle_a = v3d_translate_pipe_swizzle(so->swizzle[3]);
-#endif
 
-                if (prsc->nr_samples > 1 && V3D_VERSION < 40) {
-                        /* Using texture views to reinterpret formats on our
-                         * MSAA textures won't work, because we don't lay out
-                         * the bits in memory as it's expected -- for example,
-                         * RGBA8 and RGB10_A2 are compatible in the
-                         * ARB_texture_view spec, but in HW we lay them out as
-                         * 32bpp RGBA8 and 64bpp RGBA16F.  Just assert for now
-                         * to catch failures.
-                         *
-                         * We explicitly allow remapping S8Z24 to RGBA8888 for
-                         * v3d_blit.c's stencil blits.
-                         */
-                        assert((util_format_linear(cso->format) ==
-                                util_format_linear(prsc->format)) ||
-                               (prsc->format == PIPE_FORMAT_S8_UINT_Z24_UNORM &&
-                                cso->format == PIPE_FORMAT_R8G8B8A8_UNORM));
-                        uint32_t output_image_format =
-                                v3d_get_rt_format(&screen->devinfo, cso->format);
-                        uint32_t internal_type;
-                        uint32_t internal_bpp;
-                        v3dX(get_internal_type_bpp_for_output_format)(output_image_format,
-                                                                      &internal_type,
-                                                                      &internal_bpp);
-
-                        switch (internal_type) {
-                        case V3D_INTERNAL_TYPE_8:
-                                tex.texture_type = TEXTURE_DATA_FORMAT_RGBA8;
-                                break;
-                        case V3D_INTERNAL_TYPE_16F:
-                                tex.texture_type = TEXTURE_DATA_FORMAT_RGBA16F;
-                                break;
-                        default:
-                                unreachable("Bad MSAA texture type");
-                        }
-
-                        /* sRGB was stored in the tile buffer as linear and
-                         * would have been encoded to sRGB on resolved tile
-                         * buffer store.  Note that this means we would need
-                         * shader code if we wanted to read an MSAA sRGB
-                         * texture without sRGB decode.
-                         */
-#if V3D_VERSION <= 42
-                        tex.srgb = false;
-#endif
-
-                } else {
-                        tex.texture_type = v3d_get_tex_format(&screen->devinfo,
-                                                              cso->format);
-                }
+                tex.texture_type = v3d_get_tex_format(&screen->devinfo,
+                                                      cso->format);
         };
 
         so->serial_id = rsc->serial_id;
@@ -1141,7 +1046,6 @@ v3d_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
         if (sample_format == PIPE_FORMAT_S8_UINT_Z24_UNORM)
                 sample_format = PIPE_FORMAT_X8Z24_UNORM;
 
-#if V3D_VERSION >= 40
         const struct util_format_description *desc =
                 util_format_description(sample_format);
 
@@ -1202,7 +1106,6 @@ v3d_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
                                                 V3D_SAMPLER_STATE_F16);
                 }
         }
-#endif
 
         /* V3D still doesn't support sampling from raster textures, so we will
          * have to copy to a temporary tiled texture.
@@ -1433,7 +1336,6 @@ v3d_create_image_view_texture_shader_state(struct v3d_context *v3d,
                                            struct v3d_shaderimg_stateobj *so,
                                            int img)
 {
-#if V3D_VERSION >= 40
         struct v3d_image_view *iview = &so->si[img];
 
         void *map;
@@ -1469,12 +1371,6 @@ v3d_create_image_view_texture_shader_state(struct v3d_context *v3d,
                 tex.texture_type = v3d_get_tex_format(&v3d->screen->devinfo,
                                                       iview->base.format);
         };
-#else /* V3D_VERSION < 40 */
-        /* V3D 3.x doesn't use support shader image load/store operations on
-         * textures, so it would get lowered in the shader to general memory
-         * accesses.
-         */
-#endif
 }
 
 static void
diff --git a/src/gallium/drivers/v3d/v3dx_tfu.c b/src/gallium/drivers/v3d/v3dx_tfu.c
index f4dba0cfa48..3990409e122 100644
--- a/src/gallium/drivers/v3d/v3dx_tfu.c
+++ b/src/gallium/drivers/v3d/v3dx_tfu.c
@@ -118,7 +118,7 @@ v3dX(tfu)(struct pipe_context *pctx,
                 break;
        }
 
-#if V3D_VERSION <= 42
+#if V3D_VERSION == 42
         if (src_base_slice->tiling == V3D_TILING_RASTER) {
                 tfu.icfg |= (V3D33_TFU_ICFG_FORMAT_RASTER <<
                              V3D33_TFU_ICFG_FORMAT_SHIFT);
@@ -152,7 +152,7 @@ v3dX(tfu)(struct pipe_context *pctx,
                                implicit_padded_height) / uif_block_h) <<
                              V3D33_TFU_ICFG_OPAD_SHIFT);
         }
-#endif /* V3D_VERSION <= 42 */
+#endif /* V3D_VERSION == 42 */
 
 #if V3D_VERSION >= 71
         if (src_base_slice->tiling == V3D_TILING_RASTER) {