mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 07:28:11 +02:00
freedreno: Switch to using lowered image intrinsics.
This cuts out a bunch of deref chain walking that the compiler can do for us. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3728> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3728>
This commit is contained in:
parent
3e16434acd
commit
e4baff9081
8 changed files with 108 additions and 143 deletions
|
|
@ -207,22 +207,22 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
}
|
||||
|
||||
static struct ir3_instruction *
|
||||
get_image_offset(struct ir3_context *ctx, const nir_variable *var,
|
||||
get_image_offset(struct ir3_context *ctx, const nir_intrinsic_instr *instr,
|
||||
struct ir3_instruction * const *coords, bool byteoff)
|
||||
{
|
||||
struct ir3_block *b = ctx->block;
|
||||
struct ir3_instruction *offset;
|
||||
unsigned ncoords = ir3_get_image_coords(var, NULL);
|
||||
unsigned index = nir_src_as_uint(instr->src[0]);
|
||||
unsigned ncoords = ir3_get_image_coords(instr, NULL);
|
||||
|
||||
/* to calculate the byte offset (yes, uggg) we need (up to) three
|
||||
* const values to know the bytes per pixel, and y and z stride:
|
||||
*/
|
||||
struct ir3_const_state *const_state = &ctx->so->shader->const_state;
|
||||
unsigned cb = regid(const_state->offsets.image_dims, 0) +
|
||||
const_state->image_dims.off[var->data.driver_location];
|
||||
const_state->image_dims.off[index];
|
||||
|
||||
debug_assert(const_state->image_dims.mask &
|
||||
(1 << var->data.driver_location));
|
||||
debug_assert(const_state->image_dims.mask & (1 << index));
|
||||
|
||||
/* offset = coords.x * bytes_per_pixel: */
|
||||
offset = ir3_MUL_S24(b, coords[0], 0, create_uniform(b, cb + 0), 0);
|
||||
|
|
@ -251,26 +251,25 @@ get_image_offset(struct ir3_context *ctx, const nir_variable *var,
|
|||
}, 2);
|
||||
}
|
||||
|
||||
/* src[] = { deref, coord, sample_index, value }. const_index[] = {} */
|
||||
/* src[] = { index, coord, sample_index, value }. const_index[] = {} */
|
||||
static void
|
||||
emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
struct ir3_block *b = ctx->block;
|
||||
const nir_variable *var = nir_intrinsic_get_var(intr, 0);
|
||||
struct ir3_instruction *stib, *offset;
|
||||
struct ir3_instruction * const *value = ir3_get_src(ctx, &intr->src[3]);
|
||||
struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
|
||||
unsigned ncoords = ir3_get_image_coords(var, NULL);
|
||||
unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
|
||||
unsigned ncoords = ir3_get_image_coords(intr, NULL);
|
||||
unsigned slot = nir_src_as_uint(intr->src[0]);
|
||||
unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot);
|
||||
unsigned ncomp = ir3_get_num_components_for_image_format(var->data.image.format);
|
||||
unsigned ncomp = ir3_get_num_components_for_image_format(nir_intrinsic_format(intr));
|
||||
|
||||
/* src0 is value
|
||||
* src1 is coords
|
||||
* src2 is 64b byte offset
|
||||
*/
|
||||
|
||||
offset = get_image_offset(ctx, var, coords, true);
|
||||
offset = get_image_offset(ctx, intr, coords, true);
|
||||
|
||||
/* NOTE: stib seems to take byte offset, but stgb.typed can be used
|
||||
* too and takes a dword offset.. not quite sure yet why blob uses
|
||||
|
|
@ -283,7 +282,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
offset, 0);
|
||||
stib->cat6.iim_val = ncomp;
|
||||
stib->cat6.d = ncoords;
|
||||
stib->cat6.type = ir3_get_image_type(var);
|
||||
stib->cat6.type = ir3_get_type_for_image_intrinsic(intr);
|
||||
stib->cat6.typed = true;
|
||||
stib->barrier_class = IR3_BARRIER_IMAGE_W;
|
||||
stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
|
||||
|
|
@ -296,11 +295,10 @@ static struct ir3_instruction *
|
|||
emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
struct ir3_block *b = ctx->block;
|
||||
const nir_variable *var = nir_intrinsic_get_var(intr, 0);
|
||||
struct ir3_instruction *atomic, *image, *src0, *src1, *src2;
|
||||
struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
|
||||
unsigned ncoords = ir3_get_image_coords(var, NULL);
|
||||
unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
|
||||
unsigned ncoords = ir3_get_image_coords(intr, NULL);
|
||||
unsigned slot = nir_src_as_uint(intr->src[0]);
|
||||
unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot);
|
||||
|
||||
image = create_immed(b, ibo_idx);
|
||||
|
|
@ -311,33 +309,33 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
*/
|
||||
src0 = ir3_get_src(ctx, &intr->src[3])[0];
|
||||
src1 = ir3_create_collect(ctx, coords, ncoords);
|
||||
src2 = get_image_offset(ctx, var, coords, false);
|
||||
src2 = get_image_offset(ctx, intr, coords, false);
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_image_deref_atomic_add:
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
atomic = ir3_ATOMIC_ADD_G(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_atomic_imin:
|
||||
case nir_intrinsic_image_deref_atomic_umin:
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
atomic = ir3_ATOMIC_MIN_G(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_atomic_imax:
|
||||
case nir_intrinsic_image_deref_atomic_umax:
|
||||
case nir_intrinsic_image_atomic_imax:
|
||||
case nir_intrinsic_image_atomic_umax:
|
||||
atomic = ir3_ATOMIC_MAX_G(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_atomic_and:
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
atomic = ir3_ATOMIC_AND_G(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_atomic_or:
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
atomic = ir3_ATOMIC_OR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_atomic_xor:
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
atomic = ir3_ATOMIC_XOR_G(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_atomic_exchange:
|
||||
case nir_intrinsic_image_atomic_exchange:
|
||||
atomic = ir3_ATOMIC_XCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_atomic_comp_swap:
|
||||
case nir_intrinsic_image_atomic_comp_swap:
|
||||
/* for cmpxchg, src0 is [ui]vec2(data, compare): */
|
||||
src0 = ir3_create_collect(ctx, (struct ir3_instruction*[]){
|
||||
ir3_get_src(ctx, &intr->src[4])[0],
|
||||
|
|
@ -351,7 +349,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
|
||||
atomic->cat6.iim_val = 1;
|
||||
atomic->cat6.d = ncoords;
|
||||
atomic->cat6.type = ir3_get_image_type(var);
|
||||
atomic->cat6.type = ir3_get_type_for_image_intrinsic(intr);
|
||||
atomic->cat6.typed = true;
|
||||
atomic->barrier_class = IR3_BARRIER_IMAGE_W;
|
||||
atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
|
||||
|
|
|
|||
|
|
@ -208,14 +208,14 @@ static void
|
|||
emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
struct ir3_block *b = ctx->block;
|
||||
const nir_variable *var = nir_intrinsic_get_var(intr, 0);
|
||||
struct ir3_instruction *stib;
|
||||
struct ir3_instruction * const *value = ir3_get_src(ctx, &intr->src[3]);
|
||||
struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
|
||||
unsigned ncoords = ir3_get_image_coords(var, NULL);
|
||||
unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
|
||||
unsigned ncoords = ir3_get_image_coords(intr, NULL);
|
||||
unsigned slot = nir_src_as_uint(intr->src[0]);
|
||||
unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot);
|
||||
unsigned ncomp = ir3_get_num_components_for_image_format(var->data.image.format);
|
||||
enum pipe_format format = nir_intrinsic_format(intr);
|
||||
unsigned ncomp = ir3_get_num_components_for_image_format(format);
|
||||
|
||||
/* src0 is offset, src1 is value:
|
||||
*/
|
||||
|
|
@ -224,7 +224,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
ir3_create_collect(ctx, value, ncomp), 0);
|
||||
stib->cat6.iim_val = ncomp;
|
||||
stib->cat6.d = ncoords;
|
||||
stib->cat6.type = ir3_get_image_type(var);
|
||||
stib->cat6.type = ir3_get_type_for_image_intrinsic(intr);
|
||||
stib->cat6.typed = true;
|
||||
stib->barrier_class = IR3_BARRIER_IMAGE_W;
|
||||
stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
|
||||
|
|
@ -237,12 +237,11 @@ static struct ir3_instruction *
|
|||
emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
struct ir3_block *b = ctx->block;
|
||||
const nir_variable *var = nir_intrinsic_get_var(intr, 0);
|
||||
struct ir3_instruction *atomic, *ibo, *src0, *src1, *dummy;
|
||||
struct ir3_instruction * const *coords = ir3_get_src(ctx, &intr->src[1]);
|
||||
struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[3])[0];
|
||||
unsigned ncoords = ir3_get_image_coords(var, NULL);
|
||||
unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
|
||||
unsigned ncoords = ir3_get_image_coords(intr, NULL);
|
||||
unsigned slot = nir_src_as_uint(intr->src[0]);
|
||||
unsigned ibo_idx = ir3_image_to_ibo(ctx->so->shader, slot);
|
||||
|
||||
ibo = create_immed(b, ibo_idx);
|
||||
|
|
@ -262,7 +261,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
dummy = create_immed(b, 0);
|
||||
src0 = ir3_create_collect(ctx, coords, ncoords);
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
|
||||
if (intr->intrinsic == nir_intrinsic_image_atomic_comp_swap) {
|
||||
struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[4])[0];
|
||||
src1 = ir3_create_collect(ctx, (struct ir3_instruction*[]){
|
||||
dummy, compare, value
|
||||
|
|
@ -274,30 +273,30 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
}
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_image_deref_atomic_add:
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_atomic_imin:
|
||||
case nir_intrinsic_image_deref_atomic_umin:
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_atomic_imax:
|
||||
case nir_intrinsic_image_deref_atomic_umax:
|
||||
case nir_intrinsic_image_atomic_imax:
|
||||
case nir_intrinsic_image_atomic_umax:
|
||||
atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_atomic_and:
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_atomic_or:
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_atomic_xor:
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_atomic_exchange:
|
||||
case nir_intrinsic_image_atomic_exchange:
|
||||
atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_atomic_comp_swap:
|
||||
case nir_intrinsic_image_atomic_comp_swap:
|
||||
atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0);
|
||||
break;
|
||||
default:
|
||||
|
|
@ -306,7 +305,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
|
||||
atomic->cat6.iim_val = 1;
|
||||
atomic->cat6.d = ncoords;
|
||||
atomic->cat6.type = ir3_get_image_type(var);
|
||||
atomic->cat6.type = ir3_get_type_for_image_intrinsic(intr);
|
||||
atomic->cat6.typed = true;
|
||||
atomic->barrier_class = IR3_BARRIER_IMAGE_W;
|
||||
atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W;
|
||||
|
|
|
|||
|
|
@ -1040,7 +1040,7 @@ emit_intrinsic_atomic_shared(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
static struct ir3_instruction *
|
||||
get_image_samp_tex_src(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
{
|
||||
unsigned slot = ir3_get_image_slot(nir_src_as_deref(intr->src[0]));
|
||||
unsigned slot = nir_src_as_uint(intr->src[0]);
|
||||
unsigned tex_idx = ir3_image_to_tex(&ctx->so->image_mapping, slot);
|
||||
struct ir3_instruction *texture, *sampler;
|
||||
|
||||
|
|
@ -1059,13 +1059,12 @@ emit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
|||
struct ir3_instruction **dst)
|
||||
{
|
||||
struct ir3_block *b = ctx->block;
|
||||
const nir_variable *var = nir_intrinsic_get_var(intr, 0);
|
||||
struct ir3_instruction *samp_tex = get_image_samp_tex_src(ctx, intr);
|
||||
struct ir3_instruction *sam;
|
||||
struct ir3_instruction * const *src0 = ir3_get_src(ctx, &intr->src[1]);
|
||||
struct ir3_instruction *coords[4];
|
||||
unsigned flags, ncoords = ir3_get_image_coords(var, &flags);
|
||||
type_t type = ir3_get_image_type(var);
|
||||
unsigned flags, ncoords = ir3_get_image_coords(intr, &flags);
|
||||
type_t type = ir3_get_type_for_image_intrinsic(intr);
|
||||
|
||||
/* hmm, this seems a bit odd, but it is what blob does and (at least
|
||||
* a5xx) just faults on bogus addresses otherwise:
|
||||
|
|
@ -1095,10 +1094,9 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
|||
struct ir3_instruction **dst)
|
||||
{
|
||||
struct ir3_block *b = ctx->block;
|
||||
const nir_variable *var = nir_intrinsic_get_var(intr, 0);
|
||||
struct ir3_instruction *samp_tex = get_image_samp_tex_src(ctx, intr);
|
||||
struct ir3_instruction *sam, *lod;
|
||||
unsigned flags, ncoords = ir3_get_image_coords(var, &flags);
|
||||
unsigned flags, ncoords = ir3_get_image_coords(intr, &flags);
|
||||
type_t dst_type = nir_dest_bit_size(intr->dest) < 32 ?
|
||||
TYPE_U16 : TYPE_U32;
|
||||
|
||||
|
|
@ -1126,9 +1124,7 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
|||
*
|
||||
* TODO: This is at least true on a5xx. Check other gens.
|
||||
*/
|
||||
enum glsl_sampler_dim dim =
|
||||
glsl_get_sampler_dim(glsl_without_array(var->type));
|
||||
if (dim == GLSL_SAMPLER_DIM_BUF) {
|
||||
if (nir_intrinsic_image_dim(intr) == GLSL_SAMPLER_DIM_BUF) {
|
||||
/* Since all the possible values the divisor can take are
|
||||
* power-of-two (4, 8, or 16), the division is implemented
|
||||
* as a shift-right.
|
||||
|
|
@ -1138,7 +1134,7 @@ emit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr,
|
|||
*/
|
||||
struct ir3_const_state *const_state = &ctx->so->shader->const_state;
|
||||
unsigned cb = regid(const_state->offsets.image_dims, 0) +
|
||||
const_state->image_dims.off[var->data.driver_location];
|
||||
const_state->image_dims.off[nir_src_as_uint(intr->src[0])];
|
||||
struct ir3_instruction *aux = create_uniform(b, cb + 1);
|
||||
|
||||
tmp[0] = ir3_SHR_B(b, tmp[0], 0, aux, 0);
|
||||
|
|
@ -1621,28 +1617,28 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
case nir_intrinsic_shared_atomic_comp_swap:
|
||||
dst[0] = emit_intrinsic_atomic_shared(ctx, intr);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_load:
|
||||
case nir_intrinsic_image_load:
|
||||
emit_intrinsic_load_image(ctx, intr, dst);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_store:
|
||||
case nir_intrinsic_image_store:
|
||||
if ((ctx->so->type == MESA_SHADER_FRAGMENT) &&
|
||||
!ctx->s->info.fs.early_fragment_tests)
|
||||
ctx->so->no_earlyz = true;
|
||||
ctx->funcs->emit_intrinsic_store_image(ctx, intr);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_size:
|
||||
case nir_intrinsic_image_size:
|
||||
emit_intrinsic_image_size(ctx, intr, dst);
|
||||
break;
|
||||
case nir_intrinsic_image_deref_atomic_add:
|
||||
case nir_intrinsic_image_deref_atomic_imin:
|
||||
case nir_intrinsic_image_deref_atomic_umin:
|
||||
case nir_intrinsic_image_deref_atomic_imax:
|
||||
case nir_intrinsic_image_deref_atomic_umax:
|
||||
case nir_intrinsic_image_deref_atomic_and:
|
||||
case nir_intrinsic_image_deref_atomic_or:
|
||||
case nir_intrinsic_image_deref_atomic_xor:
|
||||
case nir_intrinsic_image_deref_atomic_exchange:
|
||||
case nir_intrinsic_image_deref_atomic_comp_swap:
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
case nir_intrinsic_image_atomic_imax:
|
||||
case nir_intrinsic_image_atomic_umax:
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
case nir_intrinsic_image_atomic_exchange:
|
||||
case nir_intrinsic_image_atomic_comp_swap:
|
||||
if ((ctx->so->type == MESA_SHADER_FRAGMENT) &&
|
||||
!ctx->s->info.fs.early_fragment_tests)
|
||||
ctx->so->no_earlyz = true;
|
||||
|
|
|
|||
|
|
@ -73,49 +73,19 @@ ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image)
|
|||
return mapping->image_to_tex[image] + mapping->tex_base;
|
||||
}
|
||||
|
||||
/* Helper to parse the deref for an image to get image slot. This should be
|
||||
* mapped to tex or ibo idx using ir3_image_to_tex() or ir3_image_to_ibo().
|
||||
*/
|
||||
unsigned
|
||||
ir3_get_image_slot(nir_deref_instr *deref)
|
||||
{
|
||||
unsigned int loc = 0;
|
||||
unsigned inner_size = 1;
|
||||
|
||||
while (deref->deref_type != nir_deref_type_var) {
|
||||
assert(deref->deref_type == nir_deref_type_array);
|
||||
unsigned const_index = nir_src_as_uint(deref->arr.index);
|
||||
|
||||
/* Go to the next instruction */
|
||||
deref = nir_deref_instr_parent(deref);
|
||||
|
||||
assert(glsl_type_is_array(deref->type));
|
||||
const unsigned array_len = glsl_get_length(deref->type);
|
||||
loc += MIN2(const_index, array_len - 1) * inner_size;
|
||||
|
||||
/* Update the inner size */
|
||||
inner_size *= array_len;
|
||||
}
|
||||
|
||||
loc += deref->var->data.driver_location;
|
||||
|
||||
return loc;
|
||||
}
|
||||
|
||||
/* see tex_info() for equiv logic for texture instructions.. it would be
|
||||
* nice if this could be better unified..
|
||||
*/
|
||||
unsigned
|
||||
ir3_get_image_coords(const nir_variable *var, unsigned *flagsp)
|
||||
ir3_get_image_coords(const nir_intrinsic_instr *instr, unsigned *flagsp)
|
||||
{
|
||||
const struct glsl_type *type = glsl_without_array(var->type);
|
||||
unsigned coords = glsl_get_sampler_coordinate_components(type);
|
||||
unsigned coords = nir_image_intrinsic_coord_components(instr);
|
||||
unsigned flags = 0;
|
||||
|
||||
if (coords == 3)
|
||||
flags |= IR3_INSTR_3D;
|
||||
|
||||
if (glsl_sampler_type_is_array(type))
|
||||
if (nir_intrinsic_image_array(instr))
|
||||
flags |= IR3_INSTR_A;
|
||||
|
||||
if (flagsp)
|
||||
|
|
@ -125,25 +95,18 @@ ir3_get_image_coords(const nir_variable *var, unsigned *flagsp)
|
|||
}
|
||||
|
||||
type_t
|
||||
ir3_get_image_type(const nir_variable *var)
|
||||
ir3_get_type_for_image_intrinsic(const nir_intrinsic_instr *instr)
|
||||
{
|
||||
switch (glsl_get_sampler_result_type(glsl_without_array(var->type))) {
|
||||
case GLSL_TYPE_UINT:
|
||||
return TYPE_U32;
|
||||
case GLSL_TYPE_INT:
|
||||
return TYPE_S32;
|
||||
case GLSL_TYPE_FLOAT:
|
||||
return TYPE_F32;
|
||||
case GLSL_TYPE_UINT16:
|
||||
return TYPE_U16;
|
||||
case GLSL_TYPE_INT16:
|
||||
return TYPE_S16;
|
||||
case GLSL_TYPE_FLOAT16:
|
||||
return TYPE_F16;
|
||||
default:
|
||||
unreachable("bad sampler type.");
|
||||
return 0;
|
||||
}
|
||||
const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
|
||||
int bit_size = info->has_dest ? nir_dest_bit_size(instr->dest) : 32;
|
||||
enum pipe_format format = nir_intrinsic_format(instr);
|
||||
|
||||
if (util_format_is_pure_uint(format))
|
||||
return bit_size == 16 ? TYPE_U16 : TYPE_U32;
|
||||
else if (util_format_is_pure_sint(format))
|
||||
return bit_size == 16 ? TYPE_S16 : TYPE_S32;
|
||||
else
|
||||
return bit_size == 16 ? TYPE_F16 : TYPE_F32;
|
||||
}
|
||||
|
||||
/* Returns the number of components for the different image formats
|
||||
|
|
|
|||
|
|
@ -36,9 +36,8 @@ unsigned ir3_ssbo_to_tex(struct ir3_ibo_mapping *mapping, unsigned ssbo);
|
|||
unsigned ir3_image_to_ibo(struct ir3_shader *shader, unsigned image);
|
||||
unsigned ir3_image_to_tex(struct ir3_ibo_mapping *mapping, unsigned image);
|
||||
|
||||
unsigned ir3_get_image_slot(nir_deref_instr *deref);
|
||||
unsigned ir3_get_image_coords(const nir_variable *var, unsigned *flagsp);
|
||||
type_t ir3_get_image_type(const nir_variable *var);
|
||||
unsigned ir3_get_image_coords(const nir_intrinsic_instr *instr, unsigned *flagsp);
|
||||
type_t ir3_get_type_for_image_intrinsic(const nir_intrinsic_instr *instr);
|
||||
unsigned ir3_get_num_components_for_image_format(enum pipe_format);
|
||||
|
||||
#endif /* IR3_IMAGE_H_ */
|
||||
|
|
|
|||
|
|
@ -382,19 +382,19 @@ ir3_nir_scan_driver_consts(nir_shader *shader,
|
|||
layout->ssbo_size.count;
|
||||
layout->ssbo_size.count += 1; /* one const per */
|
||||
break;
|
||||
case nir_intrinsic_image_deref_atomic_add:
|
||||
case nir_intrinsic_image_deref_atomic_imin:
|
||||
case nir_intrinsic_image_deref_atomic_umin:
|
||||
case nir_intrinsic_image_deref_atomic_imax:
|
||||
case nir_intrinsic_image_deref_atomic_umax:
|
||||
case nir_intrinsic_image_deref_atomic_and:
|
||||
case nir_intrinsic_image_deref_atomic_or:
|
||||
case nir_intrinsic_image_deref_atomic_xor:
|
||||
case nir_intrinsic_image_deref_atomic_exchange:
|
||||
case nir_intrinsic_image_deref_atomic_comp_swap:
|
||||
case nir_intrinsic_image_deref_store:
|
||||
case nir_intrinsic_image_deref_size:
|
||||
idx = nir_intrinsic_get_var(intr, 0)->data.driver_location;
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
case nir_intrinsic_image_atomic_imax:
|
||||
case nir_intrinsic_image_atomic_umax:
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
case nir_intrinsic_image_atomic_exchange:
|
||||
case nir_intrinsic_image_atomic_comp_swap:
|
||||
case nir_intrinsic_image_store:
|
||||
case nir_intrinsic_image_size:
|
||||
idx = nir_src_as_uint(intr->src[0]);
|
||||
if (layout->image_dims.mask & (1 << idx))
|
||||
break;
|
||||
layout->image_dims.mask |= (1 << idx);
|
||||
|
|
|
|||
|
|
@ -269,8 +269,9 @@ lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr,
|
|||
}
|
||||
|
||||
static void
|
||||
add_image_deref_mapping(nir_intrinsic_instr *instr, struct tu_shader *shader,
|
||||
const struct tu_pipeline_layout *layout)
|
||||
lower_image_deref(nir_builder *b,
|
||||
nir_intrinsic_instr *instr, struct tu_shader *shader,
|
||||
const struct tu_pipeline_layout *layout)
|
||||
{
|
||||
nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
|
|
@ -281,9 +282,15 @@ add_image_deref_mapping(nir_intrinsic_instr *instr, struct tu_shader *shader,
|
|||
struct tu_descriptor_set_binding_layout *binding_layout =
|
||||
&set_layout->binding[binding];
|
||||
|
||||
var->data.driver_location =
|
||||
map_add(&shader->image_map, set, binding, var->data.index,
|
||||
binding_layout->array_size);
|
||||
nir_ssa_def *index = nir_imm_int(b,
|
||||
map_add(&shader->image_map,
|
||||
set, binding, var->data.index,
|
||||
binding_layout->array_size));
|
||||
if (deref->deref_type != nir_deref_type_var) {
|
||||
assert(deref->deref_type == nir_deref_type_array);
|
||||
index = nir_iadd(b, index, nir_ssa_for_src(b, deref->arr.index, 1));
|
||||
}
|
||||
nir_rewrite_image_intrinsic(instr, index, false);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -324,7 +331,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
|
|||
case nir_intrinsic_image_deref_load_param_intel:
|
||||
case nir_intrinsic_image_deref_load_raw_intel:
|
||||
case nir_intrinsic_image_deref_store_raw_intel:
|
||||
add_image_deref_mapping(instr, shader, layout);
|
||||
lower_image_deref(b, instr, shader, layout);
|
||||
return true;
|
||||
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -342,6 +342,9 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
return 1;
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_NIR_IMAGES_AS_DEREF:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_MAX_VIEWPORTS:
|
||||
return 1;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue