radeonsi: split si_clear_buffer to remove enum si_method

Tested-by: Dieter Nützel <Dieter@nuetzel-hh.de> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
2026-05-07 07:08:04 +02:00 · 2018-08-02 20:32:30 -04:00 · 2018-08-02 20:32:30 -04:00 · de8d5edbc4
commit de8d5edbc4
parent 4de92f2abb
6 changed files with 60 additions and 53 deletions
--- a/src/gallium/drivers/radeonsi/si_clear.c
+++ b/src/gallium/drivers/radeonsi/si_clear.c
@ -256,7 +256,7 @@ void vi_dcc_clear_level(struct si_context *sctx,
 	}

 	si_clear_buffer(sctx, dcc_buffer, dcc_offset, clear_size,
-			clear_value, SI_COHERENCY_CB_META, SI_METHOD_BEST);
+			clear_value, SI_COHERENCY_CB_META);
 }

 /* Set the same micro tile mode as the destination of the last MSAA resolve.
@ -489,7 +489,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,

 				si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
 						tex->cmask_offset, tex->surface.cmask_size,
-						0xCCCCCCCC, SI_COHERENCY_CB_META, SI_METHOD_BEST);
+						0xCCCCCCCC, SI_COHERENCY_CB_META);
 				need_decompress_pass = true;
 			}

@ -520,7 +520,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
 			/* Do the fast clear. */
 			si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
 					tex->cmask_offset, tex->surface.cmask_size, 0,
-					SI_COHERENCY_CB_META, SI_METHOD_BEST);
+					SI_COHERENCY_CB_META);
 			need_decompress_pass = true;
 		}

--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@ -224,28 +224,63 @@ static void si_cp_dma_prepare(struct si_context *sctx, struct pipe_resource *dst
 	}
 }

+void si_cp_dma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
+			    uint64_t offset, uint64_t size, unsigned value,
+			    enum si_coherency coher,
+			    enum si_cache_policy cache_policy)
+{
+	struct r600_resource *rdst = r600_resource(dst);
+	uint64_t va = rdst->gpu_address + offset;
+	bool is_first = true;
+
+	assert(size && size % 4 == 0);
+
+	/* Mark the buffer range of destination as valid (initialized),
+	 * so that transfer_map knows it should wait for the GPU when mapping
+	 * that range. */
+	util_range_add(&rdst->valid_buffer_range, offset, offset + size);
+
+	/* Flush the caches. */
+	sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+		       SI_CONTEXT_CS_PARTIAL_FLUSH |
+		       get_flush_flags(sctx, coher, cache_policy);
+
+	while (size) {
+		unsigned byte_count = MIN2(size, cp_dma_max_byte_count(sctx));
+		unsigned dma_flags = CP_DMA_CLEAR;
+
+		si_cp_dma_prepare(sctx, dst, NULL, byte_count, size, 0, coher,
+				  &is_first, &dma_flags);
+
+		/* Emit the clear packet. */
+		si_emit_cp_dma(sctx, va, value, byte_count, dma_flags, cache_policy);
+
+		size -= byte_count;
+		va += byte_count;
+	}
+
+	if (cache_policy != L2_BYPASS)
+		rdst->TC_L2_dirty = true;
+
+	/* If it's not a framebuffer fast clear... */
+	if (coher == SI_COHERENCY_SHADER)
+		sctx->num_cp_dma_calls++;
+}
+
 void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
 		     uint64_t offset, uint64_t size, unsigned value,
-		     enum si_coherency coher, enum si_method xfer)
+		     enum si_coherency coher)
 {
 	struct radeon_winsys *ws = sctx->ws;
 	struct r600_resource *rdst = r600_resource(dst);
 	enum si_cache_policy cache_policy = get_cache_policy(sctx, coher);
-	unsigned flush_flags = get_flush_flags(sctx, coher, cache_policy);
 	uint64_t dma_clear_size;
-	bool is_first = true;

 	if (!size)
 		return;

 	dma_clear_size = size & ~3ull;

-	/* Mark the buffer range of destination as valid (initialized),
-	 * so that transfer_map knows it should wait for the GPU when mapping
-	 * that range. */
-	util_range_add(&rdst->valid_buffer_range, offset,
-		       offset + dma_clear_size);
-
 	/* dma_clear_buffer can use clear_buffer on failure. Make sure that
 	 * doesn't happen. We don't want an infinite recursion: */
 	if (sctx->dma_cs &&
@ -261,44 +296,17 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
 	      * For example, DeusEx:MD has 21 buffer clears per frame and all
 	      * of them are moved to SDMA thanks to this. */
 	     !ws->cs_is_buffer_referenced(sctx->gfx_cs, rdst->buf,
-				          RADEON_USAGE_READWRITE)) &&
-	    /* bypass sdma transfer with param xfer */
-	    (xfer != SI_METHOD_CP_DMA)) {
+				          RADEON_USAGE_READWRITE))) {
 		sctx->dma_clear_buffer(sctx, dst, offset, dma_clear_size, value);

 		offset += dma_clear_size;
 		size -= dma_clear_size;
 	} else if (dma_clear_size >= 4) {
-		uint64_t va = rdst->gpu_address + offset;
+		si_cp_dma_clear_buffer(sctx, dst, offset, dma_clear_size, value,
+				       coher, cache_policy);

 		offset += dma_clear_size;
 		size -= dma_clear_size;
-
-		/* Flush the caches. */
-		sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
-			       SI_CONTEXT_CS_PARTIAL_FLUSH | flush_flags;
-
-		while (dma_clear_size) {
-			unsigned byte_count = MIN2(dma_clear_size, cp_dma_max_byte_count(sctx));
-			unsigned dma_flags = CP_DMA_CLEAR;
-
-			si_cp_dma_prepare(sctx, dst, NULL, byte_count, dma_clear_size, 0,
-					  coher, &is_first, &dma_flags);
-
-			/* Emit the clear packet. */
-			si_emit_cp_dma(sctx, va, value, byte_count, dma_flags,
-				       cache_policy);
-
-			dma_clear_size -= byte_count;
-			va += byte_count;
-		}
-
-		if (cache_policy != L2_BYPASS)
-			rdst->TC_L2_dirty = true;
-
-		/* If it's not a framebuffer fast clear... */
-		if (coher == SI_COHERENCY_SHADER)
-			sctx->num_cp_dma_calls++;
 	}

 	if (size) {
@ -370,7 +378,7 @@ static void si_pipe_clear_buffer(struct pipe_context *ctx,
 	}

 	si_clear_buffer(sctx, dst, offset, size, dword_value,
-			SI_COHERENCY_SHADER, SI_METHOD_BEST);
+			SI_COHERENCY_SHADER);
 }

 /**
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@ -546,7 +546,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 		/* Clear the NULL constant buffer, because loads should return zeros. */
 		si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0,
 				sctx->null_const_buf.buffer->width0, 0,
-				SI_COHERENCY_SHADER, SI_METHOD_BEST);
+				SI_COHERENCY_SHADER);
 	}

 	uint64_t max_threads_per_block;
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@ -1119,15 +1119,14 @@ enum si_coherency {
 	SI_COHERENCY_CB_META,
 };

-enum si_method {
-	SI_METHOD_CP_DMA,
-	SI_METHOD_BEST,
-};
-
 void si_cp_dma_wait_for_idle(struct si_context *sctx);
+void si_cp_dma_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
+			    uint64_t offset, uint64_t size, unsigned value,
+			    enum si_coherency coher,
+			    enum si_cache_policy cache_policy);
 void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
 		     uint64_t offset, uint64_t size, unsigned value,
-		     enum si_coherency coher, enum si_method xfer);
+		     enum si_coherency coher);
 void si_copy_buffer(struct si_context *sctx,
 		    struct pipe_resource *dst, struct pipe_resource *src,
 		    uint64_t dst_offset, uint64_t src_offset, unsigned size,
--- a/src/gallium/drivers/radeonsi/si_test_clearbuffer.c
+++ b/src/gallium/drivers/radeonsi/si_test_clearbuffer.c
@ -48,8 +48,8 @@ measure_clearbuf_time(struct pipe_context *ctx,

 	ctx->begin_query(ctx, query_te);
 	/* operation  */
-	si_clear_buffer(sctx, buf, 0, memory_size, 0x00,
-			SI_COHERENCY_SHADER, SI_METHOD_CP_DMA);
+	si_cp_dma_clear_buffer(sctx, buf, 0, memory_size, 0x00,
+			       SI_COHERENCY_SHADER, L2_LRU);
 	ctx->end_query(ctx, query_te);
 	ctx->get_query_result(ctx, query_te, true, &qresult);

--- a/src/gallium/drivers/radeonsi/si_test_dma.c
+++ b/src/gallium/drivers/radeonsi/si_test_dma.c
@ -308,7 +308,7 @@ void si_test_dma(struct si_screen *sscreen)

 		/* clear dst pixels */
 		si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, 0,
-		                SI_COHERENCY_SHADER, SI_METHOD_BEST);
+		                SI_COHERENCY_SHADER);
 		memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size);

 		/* preparation */