diff --git a/src/intel/compiler/brw_compile_bs.cpp b/src/intel/compiler/brw_compile_bs.cpp index ab71d3dbfa7..76b7c4dec36 100644 --- a/src/intel/compiler/brw_compile_bs.cpp +++ b/src/intel/compiler/brw_compile_bs.cpp @@ -18,13 +18,15 @@ static uint64_t brw_bsr(const struct intel_device_info *devinfo, - uint32_t offset, uint8_t simd_size, uint8_t local_arg_offset) + uint32_t offset, uint8_t simd_size, uint8_t local_arg_offset, + uint8_t grf_used) { assert(offset % 64 == 0); assert(simd_size == 8 || simd_size == 16); assert(local_arg_offset % 8 == 0); - return offset | + return ((uint64_t)ptl_register_blocks(grf_used) << 60) | + offset | SET_BITS(simd_size == 8, 4, 4) | SET_BITS(local_arg_offset / 8, 2, 0); } @@ -69,7 +71,8 @@ compile_single_bs(const struct brw_compiler *compiler, nir_shader *shader, brw_generator *g, struct brw_compile_stats *stats, - int *prog_offset) + int *prog_offset, + uint64_t *bsr) { const bool debug_enabled = brw_should_print_shader(shader, DEBUG_RT); @@ -147,7 +150,10 @@ compile_single_bs(const struct brw_compiler *compiler, else assert(offset == 0); - if (!prog_offset) + if (bsr) + *bsr = brw_bsr(compiler->devinfo, offset, dispatch_width, 0, + selected->grf_used); + else prog_data->base.grf_used = MAX2(prog_data->base.grf_used, selected->grf_used); @@ -185,7 +191,7 @@ brw_compile_bs(const struct brw_compiler *compiler, prog_data->simd_size = compile_single_bs(compiler, params, params->key, prog_data, - shader, &g, params->base.stats, NULL); + shader, &g, params->base.stats, NULL, NULL); if (prog_data->simd_size == 0) return NULL; @@ -206,12 +212,12 @@ brw_compile_bs(const struct brw_compiler *compiler, int offset = 0; uint8_t simd_size = compile_single_bs(compiler, params, params->key, - prog_data, resume_shaders[i], &g, NULL, &offset); + prog_data, resume_shaders[i], &g, NULL, &offset, + &resume_sbt[i]); if (simd_size == 0) return NULL; assert(offset > 0); - resume_sbt[i] = brw_bsr(compiler->devinfo, offset, simd_size, 0); } /* We only have one constant data so we want to make sure they're all the diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 2756e0bd35b..b60f93c8a09 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -2214,6 +2214,14 @@ lower_btd_logical_send(const brw_builder &bld, fs_inst *inst) global_addr.type = BRW_TYPE_UD; global_addr.stride = 1; ubld.group(2, 0).MOV(header, global_addr); + + /* XXX - There is a Registers Per Thread field in the BTD spawn + * header starting on Xe3, it doesn't appear to be needed + * by the hardware so we don't set it. If it's ever + * needed though we will need some sort of reloc since + * we'll have to initialize it based on the prog_data + * structure of the callee. + */ break; case SHADER_OPCODE_BTD_RETIRE_LOGICAL: diff --git a/src/intel/genxml/gen300_rt.xml b/src/intel/genxml/gen300_rt.xml index efa7dc7d79d..9579d1b4ef5 100644 --- a/src/intel/genxml/gen300_rt.xml +++ b/src/intel/genxml/gen300_rt.xml @@ -1,4 +1,12 @@ + + + + + + + + diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 20374358c20..769cd75da44 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -276,6 +276,21 @@ genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline); void genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline); +#if GFX_VERx10 >= 300 +#define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \ + assert((local_arg_offset) % 8 == 0); \ + const struct brw_bs_prog_data *prog_data = \ + brw_bs_prog_data_const(bin->prog_data); \ + assert(prog_data->simd_size == 16); \ + \ + (struct GENX(BINDLESS_SHADER_RECORD)) { \ + .OffsetToLocalArguments = (local_arg_offset) / 8, \ + .BindlessShaderDispatchMode = RT_SIMD16, \ + .KernelStartPointer = bin->kernel.offset, \ + .RegistersPerThread = ptl_register_blocks(prog_data->base.grf_used), \ + }; \ +}) +#else #define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \ assert((local_arg_offset) % 8 == 0); \ const struct brw_bs_prog_data *prog_data = \ @@ -289,6 +304,7 @@ genX(ray_tracing_pipeline_emit)(struct anv_ray_tracing_pipeline *pipeline); .KernelStartPointer = bin->kernel.offset, \ }; \ }) +#endif void genX(batch_set_preemption)(struct anv_batch *batch,