mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-19 10:10:39 +02:00
freedreno/ir3: Add support for load_kernel_input
Used for function arguments to compute kernels (ie. OpenCL). Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13300>
This commit is contained in:
parent
e10c76d277
commit
e544a9db16
5 changed files with 75 additions and 2 deletions
|
|
@ -851,6 +851,41 @@ emit_intrinsic_load_ubo(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
|||
}
|
||||
}
|
||||
|
||||
/* Load a kernel param: src[] = { address }. */
|
||||
static void
|
||||
emit_intrinsic_load_kernel_input(struct ir3_context *ctx,
|
||||
nir_intrinsic_instr *intr,
|
||||
struct ir3_instruction **dst)
|
||||
{
|
||||
const struct ir3_const_state *const_state = ir3_const_state(ctx->so);
|
||||
struct ir3_block *b = ctx->block;
|
||||
unsigned offset = nir_intrinsic_base(intr);
|
||||
unsigned p = regid(const_state->offsets.kernel_params, 0);
|
||||
|
||||
struct ir3_instruction *src0 = ir3_get_src(ctx, &intr->src[0])[0];
|
||||
|
||||
if (is_same_type_mov(src0) && (src0->srcs[0]->flags & IR3_REG_IMMED)) {
|
||||
offset += src0->srcs[0]->iim_val;
|
||||
|
||||
/* kernel param position is in bytes, but constant space is 32b registers: */
|
||||
compile_assert(ctx, !(offset & 0x3));
|
||||
|
||||
dst[0] = create_uniform(b, p + (offset / 4));
|
||||
} else {
|
||||
/* kernel param position is in bytes, but constant space is 32b registers: */
|
||||
compile_assert(ctx, !(offset & 0x3));
|
||||
|
||||
/* TODO we should probably be lowering this in nir, and also handling
|
||||
* non-32b inputs.. Also we probably don't want to be using
|
||||
* SP_MODE_CONTROL.CONSTANT_DEMOTION_ENABLE for KERNEL shaders..
|
||||
*/
|
||||
src0 = ir3_SHR_B(b, src0, 0, create_immed(b, 2), 0);
|
||||
|
||||
dst[0] = create_uniform_indirect(b, offset / 4, TYPE_U32,
|
||||
ir3_get_addr0(ctx, src0, 1));
|
||||
}
|
||||
}
|
||||
|
||||
/* src[] = { block_index } */
|
||||
static void
|
||||
emit_intrinsic_ssbo_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
||||
|
|
@ -1801,6 +1836,9 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
case nir_intrinsic_load_input:
|
||||
setup_input(ctx, intr);
|
||||
break;
|
||||
case nir_intrinsic_load_kernel_input:
|
||||
emit_intrinsic_load_kernel_input(ctx, intr, dst);
|
||||
break;
|
||||
/* All SSBO intrinsics should have been lowered by 'lower_io_offsets'
|
||||
* pass and replaced by an ir3-specifc version that adds the
|
||||
* dword-offset in the last source.
|
||||
|
|
|
|||
|
|
@ -888,6 +888,11 @@ ir3_setup_const_state(nir_shader *nir, struct ir3_shader_variant *v,
|
|||
constoff += align(cnt, 4) / 4;
|
||||
}
|
||||
|
||||
if (v->type == MESA_SHADER_KERNEL) {
|
||||
const_state->offsets.kernel_params = constoff;
|
||||
constoff += align(v->shader->cs.req_input_mem, 4) / 4;
|
||||
}
|
||||
|
||||
if (const_state->num_driver_params > 0) {
|
||||
/* num_driver_params in dwords. we only need to align to vec4s for the
|
||||
* common case of immediate constant uploads, but for indirect dispatch
|
||||
|
|
|
|||
|
|
@ -146,12 +146,14 @@ struct ir3_ubo_analysis_state {
|
|||
* user consts
|
||||
* UBO addresses
|
||||
* SSBO sizes
|
||||
* image dimensions
|
||||
* if (vertex shader) {
|
||||
* driver params (IR3_DP_*)
|
||||
* driver params (IR3_DP_VS_COUNT)
|
||||
* if (stream_output.num_outputs > 0)
|
||||
* stream-out addresses
|
||||
* } else if (compute_shader) {
|
||||
* driver params (IR3_DP_*)
|
||||
* kernel params
|
||||
* driver params (IR3_DP_CS_COUNT)
|
||||
* }
|
||||
* immediates
|
||||
*
|
||||
|
|
@ -171,6 +173,7 @@ struct ir3_const_state {
|
|||
/* user const start at zero */
|
||||
unsigned ubo;
|
||||
unsigned image_dims;
|
||||
unsigned kernel_params;
|
||||
unsigned driver_param;
|
||||
unsigned tfbo;
|
||||
unsigned primitive_param;
|
||||
|
|
@ -740,6 +743,14 @@ struct ir3_shader {
|
|||
struct nir_shader *nir;
|
||||
struct ir3_stream_output_info stream_output;
|
||||
|
||||
/* per shader stage specific info: */
|
||||
union {
|
||||
/* for compute shaders: */
|
||||
struct {
|
||||
unsigned req_input_mem; /* in dwords */
|
||||
} cs;
|
||||
};
|
||||
|
||||
struct ir3_shader_variant *variants;
|
||||
mtx_t variants_lock;
|
||||
|
||||
|
|
|
|||
|
|
@ -433,6 +433,22 @@ emit_common_consts(const struct ir3_shader_variant *v,
|
|||
}
|
||||
}
|
||||
|
||||
/* emit kernel params */
|
||||
static inline void
|
||||
emit_kernel_params(struct fd_context *ctx, const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring, const struct pipe_grid_info *info)
|
||||
assert_dt
|
||||
{
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
uint32_t offset = const_state->offsets.kernel_params;
|
||||
if (v->constlen > offset) {
|
||||
ring_wfi(ctx->batch, ring);
|
||||
emit_const_user(ring, v, offset * 4,
|
||||
align(v->shader->cs.req_input_mem, 4),
|
||||
info->input);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
ir3_emit_vs_driver_params(const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring, struct fd_context *ctx,
|
||||
|
|
@ -552,6 +568,7 @@ ir3_emit_cs_consts(const struct ir3_shader_variant *v,
|
|||
debug_assert(gl_shader_stage_is_compute(v->type));
|
||||
|
||||
emit_common_consts(v, ring, ctx, PIPE_SHADER_COMPUTE);
|
||||
emit_kernel_params(ctx, v, ring, info);
|
||||
|
||||
/* emit compute-shader driver-params: */
|
||||
const struct ir3_const_state *const_state = ir3_const_state(v);
|
||||
|
|
|
|||
|
|
@ -297,6 +297,8 @@ ir3_shader_compute_state_create(struct pipe_context *pctx,
|
|||
}
|
||||
|
||||
struct ir3_shader *shader = ir3_shader_from_nir(compiler, nir, 0, NULL);
|
||||
shader->cs.req_input_mem = align(cso->req_input_mem, 4) / 4; /* byte->dword */
|
||||
|
||||
struct ir3_shader_state *hwcso = calloc(1, sizeof(*hwcso));
|
||||
|
||||
util_queue_fence_init(&hwcso->ready);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue