mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 20:00:10 +01:00
i965: Use new helper functions to pick SIMD variant for CS
Also expand the existing i965 helper to return the other CS related paramters. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5142>
This commit is contained in:
parent
cb26d9c311
commit
ee0fc0f6dc
4 changed files with 43 additions and 31 deletions
|
|
@ -32,25 +32,34 @@
|
||||||
#include "brw_program.h"
|
#include "brw_program.h"
|
||||||
#include "compiler/glsl/ir_uniform.h"
|
#include "compiler/glsl/ir_uniform.h"
|
||||||
|
|
||||||
uint32_t
|
struct brw_cs_parameters
|
||||||
brw_cs_group_size(const struct brw_context *brw)
|
brw_cs_get_parameters(const struct brw_context *brw)
|
||||||
{
|
{
|
||||||
assert(brw->cs.base.prog_data);
|
assert(brw->cs.base.prog_data);
|
||||||
struct brw_cs_prog_data *cs_prog_data =
|
struct brw_cs_prog_data *cs_prog_data =
|
||||||
brw_cs_prog_data(brw->cs.base.prog_data);
|
brw_cs_prog_data(brw->cs.base.prog_data);
|
||||||
|
|
||||||
|
struct brw_cs_parameters params = {};
|
||||||
|
|
||||||
if (brw->compute.group_size) {
|
if (brw->compute.group_size) {
|
||||||
/* With ARB_compute_variable_group_size the group size is set at
|
/* With ARB_compute_variable_group_size the group size is set at
|
||||||
* dispatch time, so we can't use the one provided by the compiler.
|
* dispatch time, so we can't use the one provided by the compiler.
|
||||||
*/
|
*/
|
||||||
return brw->compute.group_size[0] *
|
params.group_size = brw->compute.group_size[0] *
|
||||||
brw->compute.group_size[1] *
|
brw->compute.group_size[1] *
|
||||||
brw->compute.group_size[2];
|
brw->compute.group_size[2];
|
||||||
} else {
|
} else {
|
||||||
return cs_prog_data->local_size[0] *
|
params.group_size = cs_prog_data->local_size[0] *
|
||||||
cs_prog_data->local_size[1] *
|
cs_prog_data->local_size[1] *
|
||||||
cs_prog_data->local_size[2];
|
cs_prog_data->local_size[2];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
params.simd_size =
|
||||||
|
brw_cs_simd_size_for_group_size(&brw->screen->devinfo,
|
||||||
|
cs_prog_data, params.group_size);
|
||||||
|
params.threads = DIV_ROUND_UP(params.group_size, params.simd_size);
|
||||||
|
|
||||||
|
return params;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
|
||||||
|
|
@ -29,8 +29,14 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
uint32_t
|
struct brw_cs_parameters {
|
||||||
brw_cs_group_size(const struct brw_context *brw);
|
unsigned group_size;
|
||||||
|
unsigned simd_size;
|
||||||
|
unsigned threads;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct brw_cs_parameters
|
||||||
|
brw_cs_get_parameters(const struct brw_context *brw);
|
||||||
|
|
||||||
void
|
void
|
||||||
brw_upload_cs_prog(struct brw_context *brw);
|
brw_upload_cs_prog(struct brw_context *brw);
|
||||||
|
|
|
||||||
|
|
@ -308,10 +308,9 @@ brw_upload_cs_push_constants(struct brw_context *brw,
|
||||||
/* XXX: Should this happen somewhere before to get our state flag set? */
|
/* XXX: Should this happen somewhere before to get our state flag set? */
|
||||||
_mesa_load_state_parameters(ctx, prog->Parameters);
|
_mesa_load_state_parameters(ctx, prog->Parameters);
|
||||||
|
|
||||||
const unsigned threads =
|
const struct brw_cs_parameters cs_params = brw_cs_get_parameters(brw);
|
||||||
DIV_ROUND_UP(brw_cs_group_size(brw), cs_prog_data->simd_size);
|
|
||||||
const unsigned push_const_size =
|
const unsigned push_const_size =
|
||||||
brw_cs_push_const_total_size(cs_prog_data, threads);
|
brw_cs_push_const_total_size(cs_prog_data, cs_params.threads);
|
||||||
|
|
||||||
if (push_const_size == 0) {
|
if (push_const_size == 0) {
|
||||||
stage_state->push_const_size = 0;
|
stage_state->push_const_size = 0;
|
||||||
|
|
@ -338,7 +337,7 @@ brw_upload_cs_push_constants(struct brw_context *brw,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cs_prog_data->push.per_thread.size > 0) {
|
if (cs_prog_data->push.per_thread.size > 0) {
|
||||||
for (unsigned t = 0; t < threads; t++) {
|
for (unsigned t = 0; t < cs_params.threads; t++) {
|
||||||
unsigned dst =
|
unsigned dst =
|
||||||
8 * (cs_prog_data->push.per_thread.regs * t +
|
8 * (cs_prog_data->push.per_thread.regs * t +
|
||||||
cs_prog_data->push.cross_thread.regs);
|
cs_prog_data->push.cross_thread.regs);
|
||||||
|
|
|
||||||
|
|
@ -4264,8 +4264,7 @@ genX(upload_cs_state)(struct brw_context *brw)
|
||||||
struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data);
|
struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data);
|
||||||
const struct gen_device_info *devinfo = &brw->screen->devinfo;
|
const struct gen_device_info *devinfo = &brw->screen->devinfo;
|
||||||
|
|
||||||
const unsigned threads =
|
const struct brw_cs_parameters cs_params = brw_cs_get_parameters(brw);
|
||||||
DIV_ROUND_UP(brw_cs_group_size(brw), cs_prog_data->simd_size);
|
|
||||||
|
|
||||||
if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
|
if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
|
||||||
brw_emit_buffer_surface_state(
|
brw_emit_buffer_surface_state(
|
||||||
|
|
@ -4357,13 +4356,13 @@ genX(upload_cs_state)(struct brw_context *brw)
|
||||||
vfe.URBEntryAllocationSize = GEN_GEN >= 8 ? 2 : 0;
|
vfe.URBEntryAllocationSize = GEN_GEN >= 8 ? 2 : 0;
|
||||||
|
|
||||||
const uint32_t vfe_curbe_allocation =
|
const uint32_t vfe_curbe_allocation =
|
||||||
ALIGN(cs_prog_data->push.per_thread.regs * threads +
|
ALIGN(cs_prog_data->push.per_thread.regs * cs_params.threads +
|
||||||
cs_prog_data->push.cross_thread.regs, 2);
|
cs_prog_data->push.cross_thread.regs, 2);
|
||||||
vfe.CURBEAllocationSize = vfe_curbe_allocation;
|
vfe.CURBEAllocationSize = vfe_curbe_allocation;
|
||||||
}
|
}
|
||||||
|
|
||||||
const unsigned push_const_size =
|
const unsigned push_const_size =
|
||||||
brw_cs_push_const_total_size(cs_prog_data, threads);
|
brw_cs_push_const_total_size(cs_prog_data, cs_params.threads);
|
||||||
if (push_const_size > 0) {
|
if (push_const_size > 0) {
|
||||||
brw_batch_emit(brw, GENX(MEDIA_CURBE_LOAD), curbe) {
|
brw_batch_emit(brw, GENX(MEDIA_CURBE_LOAD), curbe) {
|
||||||
curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
|
curbe.CURBETotalDataLength = ALIGN(push_const_size, 64);
|
||||||
|
|
@ -4374,15 +4373,18 @@ genX(upload_cs_state)(struct brw_context *brw)
|
||||||
/* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
|
/* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
|
||||||
memcpy(bind, stage_state->surf_offset,
|
memcpy(bind, stage_state->surf_offset,
|
||||||
prog_data->binding_table.size_bytes);
|
prog_data->binding_table.size_bytes);
|
||||||
|
const uint64_t ksp = brw->cs.base.prog_offset +
|
||||||
|
brw_cs_prog_data_prog_offset(cs_prog_data,
|
||||||
|
cs_params.simd_size);
|
||||||
const struct GENX(INTERFACE_DESCRIPTOR_DATA) idd = {
|
const struct GENX(INTERFACE_DESCRIPTOR_DATA) idd = {
|
||||||
.KernelStartPointer = brw->cs.base.prog_offset,
|
.KernelStartPointer = ksp,
|
||||||
.SamplerStatePointer = stage_state->sampler_offset,
|
.SamplerStatePointer = stage_state->sampler_offset,
|
||||||
/* WA_1606682166 */
|
/* WA_1606682166 */
|
||||||
.SamplerCount = GEN_GEN == 11 ? 0 :
|
.SamplerCount = GEN_GEN == 11 ? 0 :
|
||||||
DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4),
|
DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4),
|
||||||
.BindingTablePointer = stage_state->bind_bo_offset,
|
.BindingTablePointer = stage_state->bind_bo_offset,
|
||||||
.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
|
.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
|
||||||
.NumberofThreadsinGPGPUThreadGroup = threads,
|
.NumberofThreadsinGPGPUThreadGroup = cs_params.threads,
|
||||||
.SharedLocalMemorySize = encode_slm_size(GEN_GEN,
|
.SharedLocalMemorySize = encode_slm_size(GEN_GEN,
|
||||||
prog_data->total_shared),
|
prog_data->total_shared),
|
||||||
.BarrierEnable = cs_prog_data->uses_barrier,
|
.BarrierEnable = cs_prog_data->uses_barrier,
|
||||||
|
|
@ -4479,31 +4481,27 @@ prepare_indirect_gpgpu_walker(struct brw_context *brw)
|
||||||
static void
|
static void
|
||||||
genX(emit_gpgpu_walker)(struct brw_context *brw)
|
genX(emit_gpgpu_walker)(struct brw_context *brw)
|
||||||
{
|
{
|
||||||
const struct brw_cs_prog_data *prog_data =
|
|
||||||
brw_cs_prog_data(brw->cs.base.prog_data);
|
|
||||||
|
|
||||||
const GLuint *num_groups = brw->compute.num_work_groups;
|
const GLuint *num_groups = brw->compute.num_work_groups;
|
||||||
|
|
||||||
bool indirect = brw->compute.num_work_groups_bo != NULL;
|
bool indirect = brw->compute.num_work_groups_bo != NULL;
|
||||||
if (indirect)
|
if (indirect)
|
||||||
prepare_indirect_gpgpu_walker(brw);
|
prepare_indirect_gpgpu_walker(brw);
|
||||||
|
|
||||||
const unsigned group_size = brw_cs_group_size(brw);
|
const struct brw_cs_parameters cs_params = brw_cs_get_parameters(brw);
|
||||||
const unsigned simd_size = prog_data->simd_size;
|
|
||||||
unsigned thread_width_max = DIV_ROUND_UP(group_size, simd_size);
|
|
||||||
|
|
||||||
uint32_t right_mask = 0xffffffffu >> (32 - simd_size);
|
uint32_t right_mask = 0xffffffffu >> (32 - cs_params.simd_size);
|
||||||
const unsigned right_non_aligned = group_size & (simd_size - 1);
|
const unsigned right_non_aligned =
|
||||||
|
cs_params.group_size & (cs_params.simd_size - 1);
|
||||||
if (right_non_aligned != 0)
|
if (right_non_aligned != 0)
|
||||||
right_mask >>= (simd_size - right_non_aligned);
|
right_mask >>= (cs_params.simd_size - right_non_aligned);
|
||||||
|
|
||||||
brw_batch_emit(brw, GENX(GPGPU_WALKER), ggw) {
|
brw_batch_emit(brw, GENX(GPGPU_WALKER), ggw) {
|
||||||
ggw.IndirectParameterEnable = indirect;
|
ggw.IndirectParameterEnable = indirect;
|
||||||
ggw.PredicateEnable = GEN_GEN <= 7 && indirect;
|
ggw.PredicateEnable = GEN_GEN <= 7 && indirect;
|
||||||
ggw.SIMDSize = prog_data->simd_size / 16;
|
ggw.SIMDSize = cs_params.simd_size / 16;
|
||||||
ggw.ThreadDepthCounterMaximum = 0;
|
ggw.ThreadDepthCounterMaximum = 0;
|
||||||
ggw.ThreadHeightCounterMaximum = 0;
|
ggw.ThreadHeightCounterMaximum = 0;
|
||||||
ggw.ThreadWidthCounterMaximum = thread_width_max - 1;
|
ggw.ThreadWidthCounterMaximum = cs_params.threads - 1;
|
||||||
ggw.ThreadGroupIDXDimension = num_groups[0];
|
ggw.ThreadGroupIDXDimension = num_groups[0];
|
||||||
ggw.ThreadGroupIDYDimension = num_groups[1];
|
ggw.ThreadGroupIDYDimension = num_groups[1];
|
||||||
ggw.ThreadGroupIDZDimension = num_groups[2];
|
ggw.ThreadGroupIDZDimension = num_groups[2];
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue