svga/tgsi: add translation for DTRUNC

DTRUNC is not lowered in glsl-nir-tgsi path. Add the translation in svga_tgsi_vgpu10(). Reviewed-by: Neha Bhende <bhenden@vmware.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16412>
2026-05-05 18:18:06 +02:00 · 2022-05-06 14:45:55 -07:00 · 2022-05-06 14:45:55 -07:00 · 6253a23eff
commit 6253a23eff
parent e73443b7a5
1 changed files with 96 additions and 3 deletions
--- a/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
+++ b/src/gallium/drivers/svga/svga_tgsi_vgpu10.c
@ -60,7 +60,7 @@


 #define INVALID_INDEX 99999
-#define MAX_INTERNAL_TEMPS 3
+#define MAX_INTERNAL_TEMPS 4
 #define MAX_SYSTEM_VALUES 4
 #define MAX_IMMEDIATE_COUNT \
        (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4)
@ -3218,7 +3218,8 @@ alloc_common_immediates(struct svga_shader_emitter_v10 *emit)
         alloc_immediate_double2(emit, -1.0, -1.0);
   }

-   if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0) {
+   if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0 ||
+       emit->info.opcode_count[TGSI_OPCODE_DTRUNC] > 0) {
      emit->common_immediate_pos[n++] =
         alloc_immediate_double2(emit, 0.0, 0.0);
      emit->common_immediate_pos[n++] =
@ -9470,6 +9471,96 @@ emit_dsqrt(struct svga_shader_emitter_v10 *emit,
 }


+/**
+ * glsl-nir path does not lower DTRUNC, so we need to
+ * add the translation here.
+ *
+ * frac = DFRAC(src)
+ * tmp = src - frac
+ * dst = src >= 0 ? tmp : (tmp + (frac==0 ? 0 : 1))
+ */
+static boolean
+emit_dtrunc(struct svga_shader_emitter_v10 *emit,
+            const struct tgsi_full_instruction *inst)
+{
+   assert(emit->version >= 50);
+
+   check_double_src_swizzle(&inst->Src[0]);
+
+
+
+   /* frac = DFRAC(src) */
+   unsigned frac_index = get_temp_index(emit);
+   struct tgsi_full_dst_register frac_dst = make_dst_temp_reg(frac_index);
+   struct tgsi_full_src_register frac_src = make_src_temp_reg(frac_index);
+
+   VGPU10OpcodeToken0 token0;
+   begin_emit_instruction(emit);
+   token0.value = 0;
+   token0.opcodeType = VGPU10_OPCODE_VMWARE;
+   token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DFRC;
+   emit_dword(emit, token0.value);
+   emit_dst_register(emit, &frac_dst);
+   emit_src_register(emit, &inst->Src[0]);
+   end_emit_instruction(emit);
+
+   /* tmp = src - frac */
+   unsigned tmp_index = get_temp_index(emit);
+   struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp_index);
+   struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp_index);
+   struct tgsi_full_src_register negate_frac_src = negate_src(&frac_src);
+   emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
+                        &tmp_dst, &inst->Src[0], &negate_frac_src, NULL,
+                        inst->Instruction.Saturate, inst->Instruction.Precise);
+
+   /* cond = frac==0 */
+   unsigned cond_index = get_temp_index(emit);
+   struct tgsi_full_dst_register cond_dst = make_dst_temp_reg(cond_index);
+   struct tgsi_full_src_register cond_src = make_src_temp_reg(cond_index);
+   struct tgsi_full_src_register zero =
+               make_immediate_reg_double(emit, 0);
+
+   /* Only use one or two components for double opcode */
+   cond_dst = writemask_dst(&cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y);
+
+   emit_instruction_opn(emit, VGPU10_OPCODE_DEQ,
+                        &cond_dst, &frac_src, &zero, NULL,
+                        inst->Instruction.Saturate, inst->Instruction.Precise);
+
+   /* tmp2 = cond ? 0 : 1 */
+   unsigned tmp2_index = get_temp_index(emit);
+   struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2_index);
+   struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2_index);
+   struct tgsi_full_src_register cond_src_xy =
+      swizzle_src(&cond_src, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
+		             PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y);
+   struct tgsi_full_src_register one =
+               make_immediate_reg_double(emit, 1.0);
+
+   emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC,
+                        &tmp2_dst, &cond_src_xy, &zero, &one,
+                        inst->Instruction.Saturate, inst->Instruction.Precise);
+
+   /* tmp2 = tmp + tmp2 */
+   emit_instruction_opn(emit, VGPU10_OPCODE_DADD,
+                        &tmp2_dst, &tmp_src, &tmp2_src, NULL,
+                        inst->Instruction.Saturate, inst->Instruction.Precise);
+
+   /* cond = src>=0 */
+   emit_instruction_opn(emit, VGPU10_OPCODE_DGE,
+                        &cond_dst, &inst->Src[0], &zero, NULL,
+                        inst->Instruction.Saturate, inst->Instruction.Precise);
+
+   /* dst = cond ? tmp : tmp2 */
+   emit_instruction_opn(emit, VGPU10_OPCODE_DMOVC,
+                        &inst->Dst[0], &cond_src_xy, &tmp_src, &tmp2_src,
+                        inst->Instruction.Saturate, inst->Instruction.Precise);
+
+   free_temp_indexes(emit);
+   return TRUE;
+}
+
+
 static boolean
 emit_interp_offset(struct svga_shader_emitter_v10 *emit,
                   const struct tgsi_full_instruction *inst)
@ -10960,6 +11051,9 @@ emit_instruction(struct svga_shader_emitter_v10 *emit,
   case TGSI_OPCODE_DFMA:
      return emit_simple(emit, inst);

+   case TGSI_OPCODE_DTRUNC:
+      return emit_dtrunc(emit, inst);
+
   /* The following opcodes should never be seen here.  We return zero
    * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED,
    * LDEXP_SUPPORTED queries.
@ -10968,7 +11062,6 @@ emit_instruction(struct svga_shader_emitter_v10 *emit,
   case TGSI_OPCODE_DSSG:
   case TGSI_OPCODE_DFRACEXP:
   case TGSI_OPCODE_DLDEXP:
-   case TGSI_OPCODE_DTRUNC:
   case TGSI_OPCODE_DCEIL:
   case TGSI_OPCODE_DFLR:
      debug_printf("Unexpected TGSI opcode %s.  "