mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-01 19:50:25 +01:00
anv/brw: prep work for SIMD32 ray queries
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: mesa-stable
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
(cherry picked from commit 6d19b898e7)
Conflicts:
src/intel/compiler/brw/brw_lower_logical_sends.cpp
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39462>
This commit is contained in:
parent
4bdf4f3e89
commit
5f119bb7af
6 changed files with 53 additions and 22 deletions
|
|
@ -4144,7 +4144,7 @@
|
|||
"description": "anv/brw: prep work for SIMD32 ray queries",
|
||||
"nominated": true,
|
||||
"nomination_type": 1,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -2520,6 +2520,8 @@ system_value("leaf_procedural_intel", 1, bit_sizes=[1])
|
|||
# 2: Miss
|
||||
# 3: Intersection
|
||||
system_value("btd_shader_type_intel", 1)
|
||||
# 64bit pointer to a couple of RT_DISPATCH_GLOBALS structure each aligned to
|
||||
# 64B, the pointer needs 256B aligned.
|
||||
system_value("ray_query_global_intel", 1, bit_sizes=[64])
|
||||
|
||||
# Source 0: Accumulator matrix (type specified by DEST_TYPE)
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@
|
|||
#include "brw_eu.h"
|
||||
#include "brw_shader.h"
|
||||
#include "brw_builder.h"
|
||||
#include "brw_rt.h"
|
||||
|
||||
#include "util/bitpack_helpers.h"
|
||||
|
||||
|
|
@ -2353,11 +2354,16 @@ lower_trace_ray_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
brw_reg header = ubld.vgrf(BRW_TYPE_UD);
|
||||
ubld.MOV(header, brw_imm_ud(0));
|
||||
|
||||
const uint32_t second_group_offset =
|
||||
align(BRW_RT_DISPATCH_GLOBALS_SIZE, 64);
|
||||
|
||||
const brw_reg globals_addr = inst->src[RT_LOGICAL_SRC_GLOBALS];
|
||||
if (globals_addr.file != UNIFORM) {
|
||||
brw_reg addr_ud = retype(globals_addr, BRW_TYPE_UD);
|
||||
addr_ud.stride = 1;
|
||||
ubld.group(2, 0).MOV(header, addr_ud);
|
||||
if (inst->group == 16)
|
||||
ubld.group(1, 0).ADD(header, header, brw_imm_ud(second_group_offset));
|
||||
} else {
|
||||
/* If the globals address comes from a uniform, do not do the SIMD2
|
||||
* optimization. This occurs in many Vulkan CTS tests.
|
||||
|
|
@ -2367,8 +2373,14 @@ lower_trace_ray_logical_send(const brw_builder &bld, brw_inst *inst)
|
|||
* UNIFORM will be uniform (i.e., <0,1,0>). The clever SIMD2
|
||||
* optimization violates that assumption.
|
||||
*/
|
||||
ubld.group(1, 0).MOV(byte_offset(header, 0),
|
||||
subscript(globals_addr, BRW_TYPE_UD, 0));
|
||||
if (inst->group == 16) {
|
||||
ubld.group(1, 0).ADD(byte_offset(header, 0),
|
||||
subscript(globals_addr, BRW_TYPE_UD, 0),
|
||||
brw_imm_ud(second_group_offset));
|
||||
} else {
|
||||
ubld.group(1, 0).MOV(byte_offset(header, 0),
|
||||
subscript(globals_addr, BRW_TYPE_UD, 0));
|
||||
}
|
||||
ubld.group(1, 0).MOV(byte_offset(header, 4),
|
||||
subscript(globals_addr, BRW_TYPE_UD, 1));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,6 +38,13 @@ extern "C" {
|
|||
/** RT_DISPATCH_GLOBALS size (see gen_rt.xml) */
|
||||
#define BRW_RT_DISPATCH_GLOBALS_SIZE 80
|
||||
|
||||
/** RT_DISPATCH_GLOBALS alignment
|
||||
*
|
||||
* Use 256B to make sure we can access the pair of RT_DISPATCH_GLOBALS without
|
||||
* 64bit math.
|
||||
*/
|
||||
#define BRW_RT_DISPATCH_GLOBALS_ALIGN 256
|
||||
|
||||
/** Offset after the RT dispatch globals at which "push" constants live */
|
||||
#define BRW_RT_PUSH_CONST_OFFSET 128
|
||||
|
||||
|
|
|
|||
|
|
@ -4161,7 +4161,11 @@ struct anv_push_constants {
|
|||
*/
|
||||
uint32_t surfaces_base_offset;
|
||||
|
||||
/** Ray query globals (RT_DISPATCH_GLOBALS) */
|
||||
/** Ray query globals
|
||||
*
|
||||
* Pointer to a couple of RT_DISPATCH_GLOBALS structures (see
|
||||
* genX(cmd_buffer_ray_query_globals))
|
||||
*/
|
||||
uint64_t ray_query_globals;
|
||||
|
||||
union {
|
||||
|
|
|
|||
|
|
@ -817,7 +817,8 @@ genX(cmd_buffer_ray_query_globals)(struct anv_cmd_buffer *cmd_buffer)
|
|||
|
||||
struct anv_state state =
|
||||
anv_cmd_buffer_alloc_temporary_state(cmd_buffer,
|
||||
BRW_RT_DISPATCH_GLOBALS_SIZE, 64);
|
||||
2 * align(BRW_RT_DISPATCH_GLOBALS_SIZE, 64),
|
||||
BRW_RT_DISPATCH_GLOBALS_ALIGN);
|
||||
struct brw_rt_scratch_layout layout;
|
||||
uint32_t stack_ids_per_dss = 2048; /* TODO: can we use a lower value in
|
||||
* some cases?
|
||||
|
|
@ -827,23 +828,28 @@ genX(cmd_buffer_ray_query_globals)(struct anv_cmd_buffer *cmd_buffer)
|
|||
|
||||
uint8_t idx = anv_get_ray_query_bo_index(cmd_buffer);
|
||||
|
||||
const struct GENX(RT_DISPATCH_GLOBALS) rtdg = {
|
||||
.MemBaseAddress = (struct anv_address) {
|
||||
/* The ray query HW computes offsets from the top of the buffer, so
|
||||
* let the address at the end of the buffer.
|
||||
*/
|
||||
.bo = device->ray_query_bo[idx],
|
||||
.offset = device->ray_query_bo[idx]->size
|
||||
},
|
||||
.AsyncRTStackSize = layout.ray_stack_stride / 64,
|
||||
.NumDSSRTStacks = layout.stack_ids_per_dss,
|
||||
.MaxBVHLevels = BRW_RT_MAX_BVH_LEVELS,
|
||||
.Flags = RT_DEPTH_TEST_LESS_EQUAL,
|
||||
.ResumeShaderTable = (struct anv_address) {
|
||||
.bo = cmd_buffer->state.ray_query_shadow_bo,
|
||||
},
|
||||
};
|
||||
GENX(RT_DISPATCH_GLOBALS_pack)(NULL, state.map, &rtdg);
|
||||
for (uint32_t i = 0; i < 2; i++) {
|
||||
const struct GENX(RT_DISPATCH_GLOBALS) rtdg = {
|
||||
.MemBaseAddress = (struct anv_address) {
|
||||
/* The ray query HW computes offsets from the top of the buffer, so
|
||||
* let the address at the end of the buffer.
|
||||
*/
|
||||
.bo = device->ray_query_bo[idx],
|
||||
.offset = (i + 1) * (device->ray_query_bo[idx]->size / 2),
|
||||
},
|
||||
.AsyncRTStackSize = layout.ray_stack_stride / 64,
|
||||
.NumDSSRTStacks = layout.stack_ids_per_dss,
|
||||
.MaxBVHLevels = BRW_RT_MAX_BVH_LEVELS,
|
||||
.Flags = RT_DEPTH_TEST_LESS_EQUAL,
|
||||
.ResumeShaderTable = (struct anv_address) {
|
||||
.bo = cmd_buffer->state.ray_query_shadow_bo,
|
||||
},
|
||||
};
|
||||
GENX(RT_DISPATCH_GLOBALS_pack)(
|
||||
NULL,
|
||||
state.map + i * align(4 * GENX(RT_DISPATCH_GLOBALS_length), 64),
|
||||
&rtdg);
|
||||
}
|
||||
|
||||
return anv_cmd_buffer_temporary_state_address(cmd_buffer, state);
|
||||
#else
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue