diff --git a/docs/features.txt b/docs/features.txt
index 7e19b8a906b..f074230ae21 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -510,7 +510,7 @@ Khronos extensions that are not part of any Vulkan version:
   VK_KHR_acceleration_structure                         DONE (anv/gfx12.5+, lvp, radv/gfx10.3+)
   VK_KHR_android_surface                                not started
   VK_KHR_calibrated_timestamps                          DONE (anv, nvk, radv, tu/a750+)
-  VK_KHR_compute_shader_derivatives                     DONE (anv, nvk, radv)
+  VK_KHR_compute_shader_derivatives                     DONE (anv, nvk, radv, tu/a7xx+)
   VK_KHR_cooperative_matrix                             DONE (anv, radv/gfx11+)
   VK_KHR_deferred_host_operations                       DONE (anv, hasvk, lvp, radv)
   VK_KHR_display                                        DONE (anv, nvk, pvr, radv, tu, v3dv)
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 615bfbff213..899b6e9a7ed 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -602,16 +602,58 @@ lower_subgroup_id(nir_builder *b, nir_instr *instr, void *_shader)
     * LocalInvocationIndex here. This means that whenever we do this lowering we
     * have to force linear dispatch to make sure that the relation between
     * SubgroupId/SubgroupLocalInvocationId and LocalInvocationIndex is what we
-    * expect.
+    * expect, unless the shader forces us to do the quad layout in which case we
+    * have to use the tiled layout.
     */
-   shader->cs.force_linear_dispatch = true;
-
    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic == nir_intrinsic_load_subgroup_id &&
+       shader->nir->info.derivative_group == DERIVATIVE_GROUP_QUADS) {
+      /* We have to manually figure out which subgroup we're in using the
+       * tiling. The tiling is 4x4, unless one of the dimensions is not a
+       * multiple of 4 in which case it drops to 2.
+       */
+      nir_def *local_size = nir_load_workgroup_size(b);
+      nir_def *local_size_x = nir_channel(b, local_size, 0);
+      nir_def *local_size_y = nir_channel(b, local_size, 1);
+      /* Calculate the shift from invocation to tile index for x and y */
+      nir_def *x_shift = nir_bcsel(b,
+                                   nir_ieq_imm(b,
+                                               nir_iand_imm(b, local_size_x, 3),
+                                               0),
+                                   nir_imm_int(b, 2), nir_imm_int(b, 1));
+      nir_def *y_shift = nir_bcsel(b,
+                                   nir_ieq_imm(b,
+                                               nir_iand_imm(b, local_size_y, 3),
+                                               0),
+                                   nir_imm_int(b, 2), nir_imm_int(b, 1));
+      nir_def *id = nir_load_local_invocation_id(b);
+      nir_def *id_x = nir_channel(b, id, 0);
+      nir_def *id_y = nir_channel(b, id, 1);
+      /* Calculate which tile we're in */
+      nir_def *tile_id =
+         nir_iadd(b, nir_imul24(b, nir_ishr(b, id_y, y_shift),
+                                nir_ishr(b, local_size_x, x_shift)),
+                  nir_ishr(b, id_x, x_shift));
+      /* Finally calculate the subgroup id */
+      return nir_ishr(b, tile_id, nir_isub(b,
+                                           nir_load_subgroup_id_shift_ir3(b),
+                                           nir_iadd(b, x_shift, y_shift)));
+   }
+
+   /* Just use getfiberid if we have to use tiling */
+   if (intr->intrinsic == nir_intrinsic_load_subgroup_invocation &&
+       shader->nir->info.derivative_group == DERIVATIVE_GROUP_QUADS) {
+      return NULL;
+   }
+
+
    if (intr->intrinsic == nir_intrinsic_load_subgroup_invocation) {
+      shader->cs.force_linear_dispatch = true;
       return nir_iand(
          b, nir_load_local_invocation_index(b),
          nir_iadd_imm(b, nir_load_subgroup_size(b), -1));
    } else if (intr->intrinsic == nir_intrinsic_load_subgroup_id) {
+      shader->cs.force_linear_dispatch = true;
       return nir_ishr(b, nir_load_local_invocation_index(b),
                       nir_load_subgroup_id_shift_ir3(b));
    } else {
@@ -771,6 +813,9 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
       bool progress = false;
       NIR_PASS(progress, s, ir3_nir_lower_subgroup_id_cs, shader);
 
+      if (s->info.derivative_group == DERIVATIVE_GROUP_LINEAR)
+         shader->cs.force_linear_dispatch = true;
+
       /* ir3_nir_lower_subgroup_id_cs creates extra compute intrinsics which
        * we need to lower again.
        */
diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc
index db88bf97f2a..5545a36ad38 100644
--- a/src/freedreno/vulkan/tu_device.cc
+++ b/src/freedreno/vulkan/tu_device.cc
@@ -148,6 +148,7 @@ get_device_extensions(const struct tu_physical_device *device,
       .KHR_bind_memory2 = true,
       .KHR_buffer_device_address = true,
       .KHR_calibrated_timestamps = device->info->a7xx.has_persistent_counter,
+      .KHR_compute_shader_derivatives = device->info->chip >= 7,
       .KHR_copy_commands2 = true,
       .KHR_create_renderpass2 = true,
       .KHR_dedicated_allocation = true,
@@ -308,6 +309,7 @@ get_device_extensions(const struct tu_physical_device *device,
       .GOOGLE_hlsl_functionality1 = true,
       .GOOGLE_user_type = true,
       .IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
+      .NV_compute_shader_derivatives = device->info->chip >= 7,
       .VALVE_mutable_descriptor_type = true,
    } };
 
@@ -455,6 +457,10 @@ tu_get_features(struct tu_physical_device *pdevice,
    features->shaderIntegerDotProduct             = true;
    features->maintenance4                        = true;
 
+   /* VK_KHR_compute_shader_derivatives */
+   features->computeDerivativeGroupQuads = pdevice->info->chip >= 7;
+   features->computeDerivativeGroupLinear = pdevice->info->chip >= 7;
+
    /* VK_KHR_index_type_uint8 */
    features->indexTypeUint8 = true;
 
@@ -1023,6 +1029,9 @@ tu_get_properties(struct tu_physical_device *pdevice,
    tu_get_physical_device_properties_1_2(pdevice, props);
    tu_get_physical_device_properties_1_3(pdevice, props);
 
+   /* VK_KHR_compute_shader_derivatives */
+   props->meshAndTaskShaderDerivatives = false;
+
    /* VK_KHR_push_descriptor */
    props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;