mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 09:08:10 +02:00
iris: Skip mi_builder init for indirect draws
We only need it for indirect draws. Improves performance on an i7-12700 and A770: - Piglit's drawoverhead base case +150.639% +/- 2.86933% (n=15). - gfxbench5 gl_driver2_off +19.7219% +/- 1.13778% (n=15) - SPECviewperf2020 catiav5test1 +1.6831% +/- 0.552052% (n=10). Cc: mesa-stable Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26806>
This commit is contained in:
parent
d55b5d4af5
commit
55c262898a
1 changed files with 89 additions and 87 deletions
|
|
@ -8250,6 +8250,11 @@ iris_upload_render_state(struct iris_context *ice,
|
|||
#endif
|
||||
}
|
||||
|
||||
if (indirect) {
|
||||
struct mi_builder b;
|
||||
uint32_t mocs;
|
||||
mi_builder_init(&b, batch->screen->devinfo, batch);
|
||||
|
||||
#define _3DPRIM_END_OFFSET 0x2420
|
||||
#define _3DPRIM_START_VERTEX 0x2430
|
||||
#define _3DPRIM_VERTEX_COUNT 0x2434
|
||||
|
|
@ -8257,103 +8262,100 @@ iris_upload_render_state(struct iris_context *ice,
|
|||
#define _3DPRIM_START_INSTANCE 0x243C
|
||||
#define _3DPRIM_BASE_VERTEX 0x2440
|
||||
|
||||
struct mi_builder b;
|
||||
uint32_t mocs;
|
||||
mi_builder_init(&b, batch->screen->devinfo, batch);
|
||||
if (!indirect->count_from_stream_output) {
|
||||
if (indirect->indirect_draw_count) {
|
||||
use_predicate = true;
|
||||
|
||||
if (indirect && !indirect->count_from_stream_output) {
|
||||
if (indirect->indirect_draw_count) {
|
||||
use_predicate = true;
|
||||
struct iris_bo *draw_count_bo =
|
||||
iris_resource_bo(indirect->indirect_draw_count);
|
||||
unsigned draw_count_offset =
|
||||
indirect->indirect_draw_count_offset;
|
||||
mocs = iris_mocs(draw_count_bo, &batch->screen->isl_dev, 0);
|
||||
mi_builder_set_mocs(&b, mocs);
|
||||
|
||||
struct iris_bo *draw_count_bo =
|
||||
iris_resource_bo(indirect->indirect_draw_count);
|
||||
unsigned draw_count_offset =
|
||||
indirect->indirect_draw_count_offset;
|
||||
mocs = iris_mocs(draw_count_bo, &batch->screen->isl_dev, 0);
|
||||
if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) {
|
||||
/* comparison = draw id < draw count */
|
||||
struct mi_value comparison =
|
||||
mi_ult(&b, mi_imm(drawid_offset),
|
||||
mi_mem32(ro_bo(draw_count_bo, draw_count_offset)));
|
||||
|
||||
/* predicate = comparison & conditional rendering predicate */
|
||||
mi_store(&b, mi_reg32(MI_PREDICATE_RESULT),
|
||||
mi_iand(&b, comparison, mi_reg32(CS_GPR(15))));
|
||||
} else {
|
||||
uint32_t mi_predicate;
|
||||
|
||||
/* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
|
||||
mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(drawid_offset));
|
||||
/* Upload the current draw count from the draw parameters buffer
|
||||
* to MI_PREDICATE_SRC0. Zero the top 32-bits of
|
||||
* MI_PREDICATE_SRC0.
|
||||
*/
|
||||
mi_store(&b, mi_reg64(MI_PREDICATE_SRC0),
|
||||
mi_mem32(ro_bo(draw_count_bo, draw_count_offset)));
|
||||
|
||||
if (drawid_offset == 0) {
|
||||
mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
|
||||
MI_PREDICATE_COMBINEOP_SET |
|
||||
MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
|
||||
} else {
|
||||
/* While draw_index < draw_count the predicate's result will be
|
||||
* (draw_index == draw_count) ^ TRUE = TRUE
|
||||
* When draw_index == draw_count the result is
|
||||
* (TRUE) ^ TRUE = FALSE
|
||||
* After this all results will be:
|
||||
* (FALSE) ^ FALSE = FALSE
|
||||
*/
|
||||
mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOAD |
|
||||
MI_PREDICATE_COMBINEOP_XOR |
|
||||
MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
|
||||
}
|
||||
iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
|
||||
}
|
||||
}
|
||||
struct iris_bo *bo = iris_resource_bo(indirect->buffer);
|
||||
assert(bo);
|
||||
|
||||
mocs = iris_mocs(bo, &batch->screen->isl_dev, 0);
|
||||
mi_builder_set_mocs(&b, mocs);
|
||||
|
||||
if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) {
|
||||
/* comparison = draw id < draw count */
|
||||
struct mi_value comparison =
|
||||
mi_ult(&b, mi_imm(drawid_offset),
|
||||
mi_mem32(ro_bo(draw_count_bo, draw_count_offset)));
|
||||
|
||||
/* predicate = comparison & conditional rendering predicate */
|
||||
mi_store(&b, mi_reg32(MI_PREDICATE_RESULT),
|
||||
mi_iand(&b, comparison, mi_reg32(CS_GPR(15))));
|
||||
mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT),
|
||||
mi_mem32(ro_bo(bo, indirect->offset + 0)));
|
||||
mi_store(&b, mi_reg32(_3DPRIM_INSTANCE_COUNT),
|
||||
mi_mem32(ro_bo(bo, indirect->offset + 4)));
|
||||
mi_store(&b, mi_reg32(_3DPRIM_START_VERTEX),
|
||||
mi_mem32(ro_bo(bo, indirect->offset + 8)));
|
||||
if (draw->index_size) {
|
||||
mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX),
|
||||
mi_mem32(ro_bo(bo, indirect->offset + 12)));
|
||||
mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE),
|
||||
mi_mem32(ro_bo(bo, indirect->offset + 16)));
|
||||
} else {
|
||||
uint32_t mi_predicate;
|
||||
|
||||
/* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
|
||||
mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(drawid_offset));
|
||||
/* Upload the current draw count from the draw parameters buffer
|
||||
* to MI_PREDICATE_SRC0. Zero the top 32-bits of
|
||||
* MI_PREDICATE_SRC0.
|
||||
*/
|
||||
mi_store(&b, mi_reg64(MI_PREDICATE_SRC0),
|
||||
mi_mem32(ro_bo(draw_count_bo, draw_count_offset)));
|
||||
|
||||
if (drawid_offset == 0) {
|
||||
mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
|
||||
MI_PREDICATE_COMBINEOP_SET |
|
||||
MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
|
||||
} else {
|
||||
/* While draw_index < draw_count the predicate's result will be
|
||||
* (draw_index == draw_count) ^ TRUE = TRUE
|
||||
* When draw_index == draw_count the result is
|
||||
* (TRUE) ^ TRUE = FALSE
|
||||
* After this all results will be:
|
||||
* (FALSE) ^ FALSE = FALSE
|
||||
*/
|
||||
mi_predicate = MI_PREDICATE | MI_PREDICATE_LOADOP_LOAD |
|
||||
MI_PREDICATE_COMBINEOP_XOR |
|
||||
MI_PREDICATE_COMPAREOP_SRCS_EQUAL;
|
||||
}
|
||||
iris_batch_emit(batch, &mi_predicate, sizeof(uint32_t));
|
||||
mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE),
|
||||
mi_mem32(ro_bo(bo, indirect->offset + 12)));
|
||||
mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX), mi_imm(0));
|
||||
}
|
||||
}
|
||||
struct iris_bo *bo = iris_resource_bo(indirect->buffer);
|
||||
assert(bo);
|
||||
} else if (indirect->count_from_stream_output) {
|
||||
struct iris_stream_output_target *so =
|
||||
(void *) indirect->count_from_stream_output;
|
||||
struct iris_bo *so_bo = iris_resource_bo(so->offset.res);
|
||||
|
||||
mocs = iris_mocs(bo, &batch->screen->isl_dev, 0);
|
||||
mi_builder_set_mocs(&b, mocs);
|
||||
mocs = iris_mocs(so_bo, &batch->screen->isl_dev, 0);
|
||||
mi_builder_set_mocs(&b, mocs);
|
||||
|
||||
mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT),
|
||||
mi_mem32(ro_bo(bo, indirect->offset + 0)));
|
||||
mi_store(&b, mi_reg32(_3DPRIM_INSTANCE_COUNT),
|
||||
mi_mem32(ro_bo(bo, indirect->offset + 4)));
|
||||
mi_store(&b, mi_reg32(_3DPRIM_START_VERTEX),
|
||||
mi_mem32(ro_bo(bo, indirect->offset + 8)));
|
||||
if (draw->index_size) {
|
||||
mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX),
|
||||
mi_mem32(ro_bo(bo, indirect->offset + 12)));
|
||||
mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE),
|
||||
mi_mem32(ro_bo(bo, indirect->offset + 16)));
|
||||
} else {
|
||||
mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE),
|
||||
mi_mem32(ro_bo(bo, indirect->offset + 12)));
|
||||
iris_emit_buffer_barrier_for(batch, so_bo, IRIS_DOMAIN_OTHER_READ);
|
||||
|
||||
struct iris_address addr = ro_bo(so_bo, so->offset.offset);
|
||||
struct mi_value offset =
|
||||
mi_iadd_imm(&b, mi_mem32(addr), -so->base.buffer_offset);
|
||||
mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT),
|
||||
mi_udiv32_imm(&b, offset, so->stride));
|
||||
mi_store(&b, mi_reg32(_3DPRIM_START_VERTEX), mi_imm(0));
|
||||
mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX), mi_imm(0));
|
||||
mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE), mi_imm(0));
|
||||
mi_store(&b, mi_reg32(_3DPRIM_INSTANCE_COUNT),
|
||||
mi_imm(draw->instance_count));
|
||||
}
|
||||
} else if (indirect && indirect->count_from_stream_output) {
|
||||
struct iris_stream_output_target *so =
|
||||
(void *) indirect->count_from_stream_output;
|
||||
struct iris_bo *so_bo = iris_resource_bo(so->offset.res);
|
||||
|
||||
mocs = iris_mocs(so_bo, &batch->screen->isl_dev, 0);
|
||||
mi_builder_set_mocs(&b, mocs);
|
||||
|
||||
iris_emit_buffer_barrier_for(batch, so_bo, IRIS_DOMAIN_OTHER_READ);
|
||||
|
||||
struct iris_address addr = ro_bo(so_bo, so->offset.offset);
|
||||
struct mi_value offset =
|
||||
mi_iadd_imm(&b, mi_mem32(addr), -so->base.buffer_offset);
|
||||
mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT),
|
||||
mi_udiv32_imm(&b, offset, so->stride));
|
||||
mi_store(&b, mi_reg32(_3DPRIM_START_VERTEX), mi_imm(0));
|
||||
mi_store(&b, mi_reg32(_3DPRIM_BASE_VERTEX), mi_imm(0));
|
||||
mi_store(&b, mi_reg32(_3DPRIM_START_INSTANCE), mi_imm(0));
|
||||
mi_store(&b, mi_reg32(_3DPRIM_INSTANCE_COUNT),
|
||||
mi_imm(draw->instance_count));
|
||||
}
|
||||
|
||||
iris_measure_snapshot(ice, batch, INTEL_SNAPSHOT_DRAW, draw, indirect, sc);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue