nir: Drop "SSA" from NIR language

Everything is SSA now.

   sed -e 's/nir_ssa_def/nir_def/g' \
       -e 's/nir_ssa_undef/nir_undef/g' \
       -e 's/nir_ssa_scalar/nir_scalar/g' \
       -e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
       -e 's/nir_gather_ssa_types/nir_gather_types/g' \
       -i $(git grep -l nir | grep -v relnotes)

   git mv src/compiler/nir/nir_gather_ssa_types.c \
          src/compiler/nir/nir_gather_types.c

   ninja -C build/ clang-format
   cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>
This commit is contained in:
Alyssa Rosenzweig 2023-08-12 16:17:15 -04:00
parent 777d336b1f
commit 09d31922de
492 changed files with 10408 additions and 10455 deletions

View file

@ -40,7 +40,7 @@ import nir_opcodes
OP_DESC_TEMPLATE = mako.template.Template("""
<%
def src_decl_list(num_srcs):
return ', '.join('nir_ssa_def *src' + str(i) for i in range(num_srcs))
return ', '.join('nir_def *src' + str(i) for i in range(num_srcs))
def to_yn(b):
return 'Y' if b else 'N'
@ -68,7 +68,7 @@ ${textwrap.indent(op.const_expr, ' ')}
**Builder function:**
.. c:function:: nir_ssa_def *nir_${op.name}(nir_builder *, ${src_decl_list(op.num_inputs)})
.. c:function:: nir_def *nir_${op.name}(nir_builder *, ${src_decl_list(op.num_inputs)})
""")
def parse_rst(state, parent, rst):

View file

@ -10,7 +10,7 @@
#include "nir_xfb_info.h"
/* Load argument with index start from arg plus relative_index. */
nir_ssa_def *
nir_def *
ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args,
struct ac_arg arg, unsigned relative_index)
{
@ -25,7 +25,7 @@ ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args,
void
ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
nir_ssa_def *val)
nir_def *val)
{
assert(nir_cursor_current_block(b->cursor)->cf_node.parent->type == nir_cf_node_function);
@ -35,11 +35,11 @@ ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac
nir_store_vector_arg_amd(b, val, .base = arg.arg_index);
}
nir_ssa_def *
nir_def *
ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
unsigned rshift, unsigned bitwidth)
{
nir_ssa_def *value = ac_nir_load_arg(b, ac_args, arg);
nir_def *value = ac_nir_load_arg(b, ac_args, arg);
if (rshift == 0 && bitwidth == 32)
return value;
else if (rshift == 0)
@ -57,11 +57,11 @@ is_sin_cos(const nir_instr *instr, UNUSED const void *_)
nir_instr_as_alu(instr)->op == nir_op_fcos);
}
static nir_ssa_def *
static nir_def *
lower_sin_cos(struct nir_builder *b, nir_instr *instr, UNUSED void *_)
{
nir_alu_instr *sincos = nir_instr_as_alu(instr);
nir_ssa_def *src = nir_fmul_imm(b, nir_ssa_for_alu_src(b, sincos, 0), 0.15915493667125702);
nir_def *src = nir_fmul_imm(b, nir_ssa_for_alu_src(b, sincos, 0), 0.15915493667125702);
return sincos->op == nir_op_fsin ? nir_fsin_amd(b, src) : nir_fcos_amd(b, src);
}
@ -85,7 +85,7 @@ lower_intrinsic_to_arg(nir_builder *b, nir_instr *instr, void *state)
lower_intrinsics_to_args_state *s = (lower_intrinsics_to_args_state *)state;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
nir_ssa_def *replacement = NULL;
nir_def *replacement = NULL;
b->cursor = nir_after_instr(&intrin->instr);
switch (intrin->intrinsic) {
@ -134,7 +134,7 @@ lower_intrinsic_to_arg(nir_builder *b, nir_instr *instr, void *state)
}
assert(replacement);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, replacement);
nir_def_rewrite_uses(&intrin->dest.ssa, replacement);
nir_instr_remove(&intrin->instr);
return true;
}
@ -155,15 +155,15 @@ ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_level gfx
}
void
ac_nir_store_var_components(nir_builder *b, nir_variable *var, nir_ssa_def *value,
ac_nir_store_var_components(nir_builder *b, nir_variable *var, nir_def *value,
unsigned component, unsigned writemask)
{
/* component store */
if (value->num_components != 4) {
nir_ssa_def *undef = nir_ssa_undef(b, 1, value->bit_size);
nir_def *undef = nir_undef(b, 1, value->bit_size);
/* add undef component before and after value to form a vec4 */
nir_ssa_def *comp[4];
nir_def *comp[4];
for (int i = 0; i < 4; i++) {
comp[i] = (i >= component && i < component + value->num_components) ?
nir_channel(b, value, i - component) : undef;
@ -180,7 +180,7 @@ ac_nir_store_var_components(nir_builder *b, nir_variable *var, nir_ssa_def *valu
}
void
ac_nir_export_primitive(nir_builder *b, nir_ssa_def *prim)
ac_nir_export_primitive(nir_builder *b, nir_def *prim)
{
unsigned write_mask = BITFIELD_MASK(prim->num_components);
@ -190,15 +190,15 @@ ac_nir_export_primitive(nir_builder *b, nir_ssa_def *prim)
.write_mask = write_mask);
}
static nir_ssa_def *
get_export_output(nir_builder *b, nir_ssa_def **output)
static nir_def *
get_export_output(nir_builder *b, nir_def **output)
{
nir_ssa_def *vec[4];
nir_def *vec[4];
for (int i = 0; i < 4; i++) {
if (output[i])
vec[i] = nir_u2uN(b, output[i], 32);
else
vec[i] = nir_ssa_undef(b, 1, 32);
vec[i] = nir_undef(b, 1, 32);
}
return nir_vec(b, vec, 4);
@ -211,17 +211,17 @@ ac_nir_export_position(nir_builder *b,
bool no_param_export,
bool force_vrs,
uint64_t outputs_written,
nir_ssa_def *(*outputs)[4])
nir_def *(*outputs)[4])
{
nir_intrinsic_instr *exp[4];
unsigned exp_num = 0;
nir_ssa_def *pos;
nir_def *pos;
if (outputs_written & VARYING_BIT_POS) {
pos = get_export_output(b, outputs[VARYING_SLOT_POS]);
} else {
nir_ssa_def *zero = nir_imm_float(b, 0);
nir_ssa_def *one = nir_imm_float(b, 1);
nir_def *zero = nir_imm_float(b, 0);
nir_def *one = nir_imm_float(b, 1);
pos = nir_vec4(b, zero, zero, zero, one);
}
@ -255,8 +255,8 @@ ac_nir_export_position(nir_builder *b,
outputs_written &= ~VARYING_BIT_VIEWPORT;
if ((outputs_written & mask) || force_vrs) {
nir_ssa_def *zero = nir_imm_float(b, 0);
nir_ssa_def *vec[4] = { zero, zero, zero, zero };
nir_def *zero = nir_imm_float(b, 0);
nir_def *vec[4] = { zero, zero, zero, zero };
unsigned flags = 0;
unsigned write_mask = 0;
@ -270,13 +270,13 @@ ac_nir_export_position(nir_builder *b,
write_mask |= BITFIELD_BIT(1);
}
nir_ssa_def *rates = NULL;
nir_def *rates = NULL;
if (outputs_written & VARYING_BIT_PRIMITIVE_SHADING_RATE) {
rates = outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE][0];
} else if (force_vrs) {
/* If Pos.W != 1 (typical for non-GUI elements), use coarse shading. */
nir_ssa_def *pos_w = nir_channel(b, pos, 3);
nir_ssa_def *cond = nir_fneu_imm(b, pos_w, 1);
nir_def *pos_w = nir_channel(b, pos, 3);
nir_def *cond = nir_fneu_imm(b, pos_w, 1);
rates = nir_bcsel(b, cond, nir_load_force_vrs_rates_amd(b), nir_imm_int(b, 0));
}
@ -293,7 +293,7 @@ ac_nir_export_position(nir_builder *b,
if (outputs_written & VARYING_BIT_VIEWPORT) {
if (gfx_level >= GFX9) {
/* GFX9 has the layer in [10:0] and the viewport index in [19:16]. */
nir_ssa_def *v = nir_ishl_imm(b, outputs[VARYING_SLOT_VIEWPORT][0], 16);
nir_def *v = nir_ishl_imm(b, outputs[VARYING_SLOT_VIEWPORT][0], 16);
vec[2] = nir_ior(b, vec[2], v);
write_mask |= BITFIELD_BIT(2);
} else {
@ -322,12 +322,12 @@ ac_nir_export_position(nir_builder *b,
}
if (outputs_written & VARYING_BIT_CLIP_VERTEX) {
nir_ssa_def *vtx = get_export_output(b, outputs[VARYING_SLOT_CLIP_VERTEX]);
nir_def *vtx = get_export_output(b, outputs[VARYING_SLOT_CLIP_VERTEX]);
/* Clip distance for clip vertex to each user clip plane. */
nir_ssa_def *clip_dist[8] = {0};
nir_def *clip_dist[8] = {0};
u_foreach_bit (i, clip_cull_mask) {
nir_ssa_def *ucp = nir_load_user_clip_plane(b, .ucp_id = i);
nir_def *ucp = nir_load_user_clip_plane(b, .ucp_id = i);
clip_dist[i] = nir_fdot4(b, vtx, ucp);
}
@ -365,9 +365,9 @@ ac_nir_export_parameters(nir_builder *b,
const uint8_t *param_offsets,
uint64_t outputs_written,
uint16_t outputs_written_16bit,
nir_ssa_def *(*outputs)[4],
nir_ssa_def *(*outputs_16bit_lo)[4],
nir_ssa_def *(*outputs_16bit_hi)[4])
nir_def *(*outputs)[4],
nir_def *(*outputs_16bit_lo)[4],
nir_def *(*outputs_16bit_hi)[4])
{
uint32_t exported_params = 0;
@ -422,11 +422,11 @@ ac_nir_export_parameters(nir_builder *b,
if (exported_params & BITFIELD_BIT(offset))
continue;
nir_ssa_def *vec[4];
nir_ssa_def *undef = nir_ssa_undef(b, 1, 16);
nir_def *vec[4];
nir_def *undef = nir_undef(b, 1, 16);
for (int i = 0; i < 4; i++) {
nir_ssa_def *lo = outputs_16bit_lo[slot][i] ? outputs_16bit_lo[slot][i] : undef;
nir_ssa_def *hi = outputs_16bit_hi[slot][i] ? outputs_16bit_hi[slot][i] : undef;
nir_def *lo = outputs_16bit_lo[slot][i] ? outputs_16bit_lo[slot][i] : undef;
nir_def *hi = outputs_16bit_hi[slot][i] ? outputs_16bit_hi[slot][i] : undef;
vec[i] = nir_pack_32_2x16_split(b, lo, hi);
}
@ -443,10 +443,10 @@ ac_nir_export_parameters(nir_builder *b,
* and emits a sequence that calculates the full offset of that instruction,
* including a stride to the base and component offsets.
*/
nir_ssa_def *
nir_def *
ac_nir_calc_io_offset(nir_builder *b,
nir_intrinsic_instr *intrin,
nir_ssa_def *base_stride,
nir_def *base_stride,
unsigned component_stride,
ac_nir_map_io_driver_location map_io)
{
@ -455,13 +455,13 @@ ac_nir_calc_io_offset(nir_builder *b,
unsigned mapped_driver_location = map_io ? map_io(semantic) : base;
/* base is the driver_location, which is in slots (1 slot = 4x4 bytes) */
nir_ssa_def *base_op = nir_imul_imm(b, base_stride, mapped_driver_location);
nir_def *base_op = nir_imul_imm(b, base_stride, mapped_driver_location);
/* offset should be interpreted in relation to the base,
* so the instruction effectively reads/writes another input/output
* when it has an offset
*/
nir_ssa_def *offset_op = nir_imul(b, base_stride, nir_ssa_for_src(b, *nir_get_io_offset_src(intrin), 1));
nir_def *offset_op = nir_imul(b, base_stride, nir_ssa_for_src(b, *nir_get_io_offset_src(intrin), 1));
/* component is in bytes */
unsigned const_op = nir_intrinsic_component(intrin) * component_stride;
@ -513,19 +513,19 @@ ac_nir_lower_indirect_derefs(nir_shader *shader,
}
struct shader_outputs {
nir_ssa_def *data[VARYING_SLOT_MAX][4];
nir_ssa_def *data_16bit_lo[16][4];
nir_ssa_def *data_16bit_hi[16][4];
nir_def *data[VARYING_SLOT_MAX][4];
nir_def *data_16bit_lo[16][4];
nir_def *data_16bit_hi[16][4];
nir_alu_type (*type_16bit_lo)[4];
nir_alu_type (*type_16bit_hi)[4];
};
static nir_ssa_def **
static nir_def **
get_output_and_type(struct shader_outputs *outputs, unsigned slot, bool high_16bits,
nir_alu_type **types)
{
nir_ssa_def **data;
nir_def **data;
nir_alu_type *type;
/* Only VARYING_SLOT_VARn_16BIT slots need output type to convert 16bit output
@ -554,38 +554,38 @@ static void
emit_streamout(nir_builder *b, unsigned stream, nir_xfb_info *info,
struct shader_outputs *outputs)
{
nir_ssa_def *so_vtx_count = nir_ubfe_imm(b, nir_load_streamout_config_amd(b), 16, 7);
nir_ssa_def *tid = nir_load_subgroup_invocation(b);
nir_def *so_vtx_count = nir_ubfe_imm(b, nir_load_streamout_config_amd(b), 16, 7);
nir_def *tid = nir_load_subgroup_invocation(b);
nir_push_if(b, nir_ilt(b, tid, so_vtx_count));
nir_ssa_def *so_write_index = nir_load_streamout_write_index_amd(b);
nir_def *so_write_index = nir_load_streamout_write_index_amd(b);
nir_ssa_def *so_buffers[NIR_MAX_XFB_BUFFERS];
nir_ssa_def *so_write_offset[NIR_MAX_XFB_BUFFERS];
nir_def *so_buffers[NIR_MAX_XFB_BUFFERS];
nir_def *so_write_offset[NIR_MAX_XFB_BUFFERS];
u_foreach_bit(i, info->buffers_written) {
so_buffers[i] = nir_load_streamout_buffer_amd(b, i);
unsigned stride = info->buffers[i].stride;
nir_ssa_def *offset = nir_load_streamout_offset_amd(b, i);
nir_def *offset = nir_load_streamout_offset_amd(b, i);
offset = nir_iadd(b, nir_imul_imm(b, nir_iadd(b, so_write_index, tid), stride),
nir_imul_imm(b, offset, 4));
so_write_offset[i] = offset;
}
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
nir_def *undef = nir_undef(b, 1, 32);
for (unsigned i = 0; i < info->output_count; i++) {
const nir_xfb_output_info *output = info->outputs + i;
if (stream != info->buffer_to_stream[output->buffer])
continue;
nir_alu_type *output_type;
nir_ssa_def **output_data =
nir_def **output_data =
get_output_and_type(outputs, output->location, output->high_16bits, &output_type);
nir_ssa_def *vec[4] = {undef, undef, undef, undef};
nir_def *vec[4] = {undef, undef, undef, undef};
uint8_t mask = 0;
u_foreach_bit(j, output->component_mask) {
nir_ssa_def *data = output_data[j];
nir_def *data = output_data[j];
if (data) {
if (data->bit_size < 32) {
@ -606,8 +606,8 @@ emit_streamout(nir_builder *b, unsigned stream, nir_xfb_info *info,
continue;
unsigned buffer = output->buffer;
nir_ssa_def *data = nir_vec(b, vec, util_last_bit(mask));
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_def *data = nir_vec(b, vec, util_last_bit(mask));
nir_def *zero = nir_imm_int(b, 0);
nir_store_buffer_amd(b, data, so_buffers[buffer], so_write_offset[buffer], zero, zero,
.base = output->offset, .write_mask = mask,
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
@ -636,15 +636,15 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
b.shader->info.outputs_written = gs_nir->info.outputs_written;
b.shader->info.outputs_written_16bit = gs_nir->info.outputs_written_16bit;
nir_ssa_def *gsvs_ring = nir_load_ring_gsvs_amd(&b);
nir_def *gsvs_ring = nir_load_ring_gsvs_amd(&b);
nir_xfb_info *info = gs_nir->xfb_info;
nir_ssa_def *stream_id = NULL;
nir_def *stream_id = NULL;
if (!disable_streamout && info)
stream_id = nir_ubfe_imm(&b, nir_load_streamout_config_amd(&b), 24, 2);
nir_ssa_def *vtx_offset = nir_imul_imm(&b, nir_load_vertex_id_zero_base(&b), 4);
nir_ssa_def *zero = nir_imm_zero(&b, 1, 32);
nir_def *vtx_offset = nir_imul_imm(&b, nir_load_vertex_id_zero_base(&b), 4);
nir_def *zero = nir_imm_zero(&b, 1, 32);
for (unsigned stream = 0; stream < 4; stream++) {
if (stream > 0 && (!stream_id || !(info->streams_written & BITFIELD_BIT(stream))))
@ -672,8 +672,8 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
/* clamp legacy color output */
if (i == VARYING_SLOT_COL0 || i == VARYING_SLOT_COL1 ||
i == VARYING_SLOT_BFC0 || i == VARYING_SLOT_BFC1) {
nir_ssa_def *color = outputs.data[i][j];
nir_ssa_def *clamp = nir_load_clamp_vertex_color_amd(&b);
nir_def *color = outputs.data[i][j];
nir_def *clamp = nir_load_clamp_vertex_color_amd(&b);
outputs.data[i][j] = nir_bcsel(&b, clamp, nir_fsat(&b, color), color);
}
@ -690,7 +690,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
if (!has_lo_16bit && !has_hi_16bit)
continue;
nir_ssa_def *data =
nir_def *data =
nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero,
.base = offset,
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
@ -759,7 +759,7 @@ gather_outputs(nir_builder *b, nir_function_impl *impl, struct shader_outputs *o
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
nir_alu_type *output_type;
nir_ssa_def **output_data =
nir_def **output_data =
get_output_and_type(outputs, sem.location, sem.high_16bits, &output_type);
u_foreach_bit (i, nir_intrinsic_write_mask(intrin)) {
@ -841,12 +841,12 @@ ac_nir_gs_shader_query(nir_builder *b,
bool has_pipeline_stats_query,
unsigned num_vertices_per_primitive,
unsigned wave_size,
nir_ssa_def *vertex_count[4],
nir_ssa_def *primitive_count[4])
nir_def *vertex_count[4],
nir_def *primitive_count[4])
{
nir_ssa_def *pipeline_query_enabled = NULL;
nir_ssa_def *prim_gen_query_enabled = NULL;
nir_ssa_def *shader_query_enabled = NULL;
nir_def *pipeline_query_enabled = NULL;
nir_def *prim_gen_query_enabled = NULL;
nir_def *shader_query_enabled = NULL;
if (has_gen_prim_query) {
prim_gen_query_enabled = nir_load_prim_gen_query_enabled_amd(b);
if (has_pipeline_stats_query) {
@ -865,31 +865,31 @@ ac_nir_gs_shader_query(nir_builder *b,
nir_if *if_shader_query = nir_push_if(b, shader_query_enabled);
nir_ssa_def *active_threads_mask = nir_ballot(b, 1, wave_size, nir_imm_true(b));
nir_ssa_def *num_active_threads = nir_bit_count(b, active_threads_mask);
nir_def *active_threads_mask = nir_ballot(b, 1, wave_size, nir_imm_true(b));
nir_def *num_active_threads = nir_bit_count(b, active_threads_mask);
/* Calculate the "real" number of emitted primitives from the emitted GS vertices and primitives.
* GS emits points, line strips or triangle strips.
* Real primitives are points, lines or triangles.
*/
nir_ssa_def *num_prims_in_wave[4] = {0};
nir_def *num_prims_in_wave[4] = {0};
u_foreach_bit (i, b->shader->info.gs.active_stream_mask) {
assert(vertex_count[i] && primitive_count[i]);
nir_ssa_scalar vtx_cnt = nir_get_ssa_scalar(vertex_count[i], 0);
nir_ssa_scalar prm_cnt = nir_get_ssa_scalar(primitive_count[i], 0);
nir_scalar vtx_cnt = nir_get_ssa_scalar(vertex_count[i], 0);
nir_scalar prm_cnt = nir_get_ssa_scalar(primitive_count[i], 0);
if (nir_ssa_scalar_is_const(vtx_cnt) && nir_ssa_scalar_is_const(prm_cnt)) {
unsigned gs_vtx_cnt = nir_ssa_scalar_as_uint(vtx_cnt);
unsigned gs_prm_cnt = nir_ssa_scalar_as_uint(prm_cnt);
if (nir_scalar_is_const(vtx_cnt) && nir_scalar_is_const(prm_cnt)) {
unsigned gs_vtx_cnt = nir_scalar_as_uint(vtx_cnt);
unsigned gs_prm_cnt = nir_scalar_as_uint(prm_cnt);
unsigned total_prm_cnt = gs_vtx_cnt - gs_prm_cnt * (num_vertices_per_primitive - 1u);
if (total_prm_cnt == 0)
continue;
num_prims_in_wave[i] = nir_imul_imm(b, num_active_threads, total_prm_cnt);
} else {
nir_ssa_def *gs_vtx_cnt = vtx_cnt.def;
nir_ssa_def *gs_prm_cnt = prm_cnt.def;
nir_def *gs_vtx_cnt = vtx_cnt.def;
nir_def *gs_prm_cnt = prm_cnt.def;
if (num_vertices_per_primitive > 1)
gs_prm_cnt = nir_iadd(b, nir_imul_imm(b, gs_prm_cnt, -1u * (num_vertices_per_primitive - 1)), gs_vtx_cnt);
num_prims_in_wave[i] = nir_reduce(b, gs_prm_cnt, .reduction_op = nir_op_iadd);
@ -902,7 +902,7 @@ ac_nir_gs_shader_query(nir_builder *b,
if (has_pipeline_stats_query) {
nir_if *if_pipeline_query = nir_push_if(b, pipeline_query_enabled);
{
nir_ssa_def *count = NULL;
nir_def *count = NULL;
/* Add all streams' number to the same counter. */
for (int i = 0; i < 4; i++) {
@ -941,14 +941,14 @@ ac_nir_gs_shader_query(nir_builder *b,
}
typedef struct {
nir_ssa_def *outputs[64][4];
nir_ssa_def *outputs_16bit_lo[16][4];
nir_ssa_def *outputs_16bit_hi[16][4];
nir_def *outputs[64][4];
nir_def *outputs_16bit_lo[16][4];
nir_def *outputs_16bit_hi[16][4];
ac_nir_gs_output_info *info;
nir_ssa_def *vertex_count[4];
nir_ssa_def *primitive_count[4];
nir_def *vertex_count[4];
nir_def *primitive_count[4];
} lower_legacy_gs_state;
static bool
@ -968,7 +968,7 @@ lower_legacy_gs_store_output(nir_builder *b, nir_intrinsic_instr *intrin,
unsigned write_mask = nir_intrinsic_write_mask(intrin);
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
nir_ssa_def **outputs;
nir_def **outputs;
if (sem.location < VARYING_SLOT_VAR0_16BIT) {
outputs = s->outputs[sem.location];
} else {
@ -979,7 +979,7 @@ lower_legacy_gs_store_output(nir_builder *b, nir_intrinsic_instr *intrin,
outputs = s->outputs_16bit_lo[index];
}
nir_ssa_def *store_val = intrin->src[0].ssa;
nir_def *store_val = intrin->src[0].ssa;
/* 64bit output has been lowered to 32bit */
assert(store_val->bit_size <= 32);
@ -999,15 +999,15 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in
b->cursor = nir_before_instr(&intrin->instr);
unsigned stream = nir_intrinsic_stream_id(intrin);
nir_ssa_def *vtxidx = intrin->src[0].ssa;
nir_def *vtxidx = intrin->src[0].ssa;
nir_ssa_def *gsvs_ring = nir_load_ring_gsvs_amd(b, .stream_id = stream);
nir_ssa_def *soffset = nir_load_ring_gs2vs_offset_amd(b);
nir_def *gsvs_ring = nir_load_ring_gsvs_amd(b, .stream_id = stream);
nir_def *soffset = nir_load_ring_gs2vs_offset_amd(b);
unsigned offset = 0;
u_foreach_bit64 (i, b->shader->info.outputs_written) {
for (unsigned j = 0; j < 4; j++) {
nir_ssa_def *output = s->outputs[i][j];
nir_def *output = s->outputs[i][j];
/* Next vertex emit need a new value, reset all outputs. */
s->outputs[i][j] = NULL;
@ -1022,10 +1022,10 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in
if (!output)
continue;
nir_ssa_def *voffset = nir_ishl_imm(b, vtxidx, 2);
nir_def *voffset = nir_ishl_imm(b, vtxidx, 2);
/* extend 8/16 bit to 32 bit, 64 bit has been lowered */
nir_ssa_def *data = nir_u2uN(b, output, 32);
nir_def *data = nir_u2uN(b, output, 32);
nir_store_buffer_amd(b, data, gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL |
@ -1038,8 +1038,8 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in
u_foreach_bit (i, b->shader->info.outputs_written_16bit) {
for (unsigned j = 0; j < 4; j++) {
nir_ssa_def *output_lo = s->outputs_16bit_lo[i][j];
nir_ssa_def *output_hi = s->outputs_16bit_hi[i][j];
nir_def *output_lo = s->outputs_16bit_lo[i][j];
nir_def *output_hi = s->outputs_16bit_hi[i][j];
/* Next vertex emit need a new value, reset all outputs. */
s->outputs_16bit_lo[i][j] = NULL;
s->outputs_16bit_hi[i][j] = NULL;
@ -1062,12 +1062,12 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in
continue;
if (!has_lo_16bit_out)
output_lo = nir_ssa_undef(b, 1, 16);
output_lo = nir_undef(b, 1, 16);
if (!has_hi_16bit_out)
output_hi = nir_ssa_undef(b, 1, 16);
output_hi = nir_undef(b, 1, 16);
nir_ssa_def *voffset = nir_iadd_imm(b, vtxidx, base);
nir_def *voffset = nir_iadd_imm(b, vtxidx, base);
voffset = nir_ishl_imm(b, voffset, 2);
nir_store_buffer_amd(b, nir_pack_32_2x16_split(b, output_lo, output_hi),

View file

@ -47,20 +47,20 @@ typedef struct nir_builder nir_builder;
/* Executed by ac_nir_cull when the current primitive is accepted. */
typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state);
nir_ssa_def *
nir_def *
ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args,
struct ac_arg arg, unsigned relative_index);
static inline nir_ssa_def *
static inline nir_def *
ac_nir_load_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg)
{
return ac_nir_load_arg_at_offset(b, ac_args, arg, 0);
}
void ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
nir_ssa_def *val);
nir_def *val);
nir_ssa_def *
nir_def *
ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
unsigned rshift, unsigned bitwidth);
@ -71,11 +71,11 @@ bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_leve
const struct ac_shader_args *ac_args);
void
ac_nir_store_var_components(nir_builder *b, nir_variable *var, nir_ssa_def *value,
ac_nir_store_var_components(nir_builder *b, nir_variable *var, nir_def *value,
unsigned component, unsigned writemask);
void
ac_nir_export_primitive(nir_builder *b, nir_ssa_def *prim);
ac_nir_export_primitive(nir_builder *b, nir_def *prim);
void
ac_nir_export_position(nir_builder *b,
@ -84,21 +84,21 @@ ac_nir_export_position(nir_builder *b,
bool no_param_export,
bool force_vrs,
uint64_t outputs_written,
nir_ssa_def *(*outputs)[4]);
nir_def *(*outputs)[4]);
void
ac_nir_export_parameters(nir_builder *b,
const uint8_t *param_offsets,
uint64_t outputs_written,
uint16_t outputs_written_16bit,
nir_ssa_def *(*outputs)[4],
nir_ssa_def *(*outputs_16bit_lo)[4],
nir_ssa_def *(*outputs_16bit_hi)[4]);
nir_def *(*outputs)[4],
nir_def *(*outputs_16bit_lo)[4],
nir_def *(*outputs_16bit_hi)[4]);
nir_ssa_def *
nir_def *
ac_nir_calc_io_offset(nir_builder *b,
nir_intrinsic_instr *intrin,
nir_ssa_def *base_stride,
nir_def *base_stride,
unsigned component_stride,
ac_nir_map_io_driver_location map_io);
@ -206,10 +206,10 @@ ac_nir_lower_mesh_inputs_to_mem(nir_shader *shader,
unsigned task_payload_entry_bytes,
unsigned task_num_entries);
nir_ssa_def *
nir_def *
ac_nir_cull_primitive(nir_builder *b,
nir_ssa_def *initially_accepted,
nir_ssa_def *pos[3][4],
nir_def *initially_accepted,
nir_def *pos[3][4],
unsigned num_vertices,
ac_nir_cull_accepted accept_func,
void *state);
@ -262,8 +262,8 @@ ac_nir_gs_shader_query(nir_builder *b,
bool has_pipeline_stats_query,
unsigned num_vertices_per_primitive,
unsigned wave_size,
nir_ssa_def *vertex_count[4],
nir_ssa_def *primitive_count[4]);
nir_def *vertex_count[4],
nir_def *primitive_count[4]);
void
ac_nir_lower_legacy_gs(nir_shader *nir,

View file

@ -12,13 +12,13 @@
typedef struct
{
nir_ssa_def *w_reflection;
nir_ssa_def *all_w_negative;
nir_ssa_def *any_w_negative;
nir_def *w_reflection;
nir_def *all_w_negative;
nir_def *any_w_negative;
} position_w_info;
static void
analyze_position_w(nir_builder *b, nir_ssa_def *pos[][4], unsigned num_vertices,
analyze_position_w(nir_builder *b, nir_def *pos[][4], unsigned num_vertices,
position_w_info *w_info)
{
w_info->all_w_negative = nir_imm_true(b);
@ -26,34 +26,34 @@ analyze_position_w(nir_builder *b, nir_ssa_def *pos[][4], unsigned num_vertices,
w_info->any_w_negative = nir_imm_false(b);
for (unsigned i = 0; i < num_vertices; ++i) {
nir_ssa_def *neg_w = nir_flt_imm(b, pos[i][3], 0.0f);
nir_def *neg_w = nir_flt_imm(b, pos[i][3], 0.0f);
w_info->w_reflection = nir_ixor(b, neg_w, w_info->w_reflection);
w_info->any_w_negative = nir_ior(b, neg_w, w_info->any_w_negative);
w_info->all_w_negative = nir_iand(b, neg_w, w_info->all_w_negative);
}
}
static nir_ssa_def *
cull_face_triangle(nir_builder *b, nir_ssa_def *pos[3][4], const position_w_info *w_info)
static nir_def *
cull_face_triangle(nir_builder *b, nir_def *pos[3][4], const position_w_info *w_info)
{
nir_ssa_def *det_t0 = nir_fsub(b, pos[2][0], pos[0][0]);
nir_ssa_def *det_t1 = nir_fsub(b, pos[1][1], pos[0][1]);
nir_ssa_def *det_t2 = nir_fsub(b, pos[0][0], pos[1][0]);
nir_ssa_def *det_t3 = nir_fsub(b, pos[0][1], pos[2][1]);
nir_ssa_def *det_p0 = nir_fmul(b, det_t0, det_t1);
nir_ssa_def *det_p1 = nir_fmul(b, det_t2, det_t3);
nir_ssa_def *det = nir_fsub(b, det_p0, det_p1);
nir_def *det_t0 = nir_fsub(b, pos[2][0], pos[0][0]);
nir_def *det_t1 = nir_fsub(b, pos[1][1], pos[0][1]);
nir_def *det_t2 = nir_fsub(b, pos[0][0], pos[1][0]);
nir_def *det_t3 = nir_fsub(b, pos[0][1], pos[2][1]);
nir_def *det_p0 = nir_fmul(b, det_t0, det_t1);
nir_def *det_p1 = nir_fmul(b, det_t2, det_t3);
nir_def *det = nir_fsub(b, det_p0, det_p1);
det = nir_bcsel(b, w_info->w_reflection, nir_fneg(b, det), det);
nir_ssa_def *front_facing_ccw = nir_fgt_imm(b, det, 0.0f);
nir_ssa_def *zero_area = nir_feq_imm(b, det, 0.0f);
nir_ssa_def *ccw = nir_load_cull_ccw_amd(b);
nir_ssa_def *front_facing = nir_ieq(b, front_facing_ccw, ccw);
nir_ssa_def *cull_front = nir_load_cull_front_face_enabled_amd(b);
nir_ssa_def *cull_back = nir_load_cull_back_face_enabled_amd(b);
nir_def *front_facing_ccw = nir_fgt_imm(b, det, 0.0f);
nir_def *zero_area = nir_feq_imm(b, det, 0.0f);
nir_def *ccw = nir_load_cull_ccw_amd(b);
nir_def *front_facing = nir_ieq(b, front_facing_ccw, ccw);
nir_def *cull_front = nir_load_cull_front_face_enabled_amd(b);
nir_def *cull_back = nir_load_cull_back_face_enabled_amd(b);
nir_ssa_def *face_culled = nir_bcsel(b, front_facing, cull_front, cull_back);
nir_def *face_culled = nir_bcsel(b, front_facing, cull_front, cull_back);
face_culled = nir_ior(b, face_culled, zero_area);
/* Don't reject NaN and +/-infinity, these are tricky.
@ -63,7 +63,7 @@ cull_face_triangle(nir_builder *b, nir_ssa_def *pos[3][4], const position_w_info
}
static void
calc_bbox_triangle(nir_builder *b, nir_ssa_def *pos[3][4], nir_ssa_def *bbox_min[2], nir_ssa_def *bbox_max[2])
calc_bbox_triangle(nir_builder *b, nir_def *pos[3][4], nir_def *bbox_min[2], nir_def *bbox_max[2])
{
for (unsigned chan = 0; chan < 2; ++chan) {
bbox_min[chan] = nir_fmin(b, pos[0][chan], nir_fmin(b, pos[1][chan], pos[2][chan]));
@ -71,10 +71,10 @@ calc_bbox_triangle(nir_builder *b, nir_ssa_def *pos[3][4], nir_ssa_def *bbox_min
}
}
static nir_ssa_def *
cull_frustrum(nir_builder *b, nir_ssa_def *bbox_min[2], nir_ssa_def *bbox_max[2])
static nir_def *
cull_frustrum(nir_builder *b, nir_def *bbox_min[2], nir_def *bbox_max[2])
{
nir_ssa_def *prim_outside_view = nir_imm_false(b);
nir_def *prim_outside_view = nir_imm_false(b);
for (unsigned chan = 0; chan < 2; ++chan) {
prim_outside_view = nir_ior(b, prim_outside_view, nir_flt_imm(b, bbox_max[chan], -1.0f));
@ -84,25 +84,25 @@ cull_frustrum(nir_builder *b, nir_ssa_def *bbox_min[2], nir_ssa_def *bbox_max[2]
return prim_outside_view;
}
static nir_ssa_def *
cull_small_primitive_triangle(nir_builder *b, nir_ssa_def *bbox_min[2], nir_ssa_def *bbox_max[2],
nir_ssa_def *prim_is_small_else)
static nir_def *
cull_small_primitive_triangle(nir_builder *b, nir_def *bbox_min[2], nir_def *bbox_max[2],
nir_def *prim_is_small_else)
{
nir_ssa_def *prim_is_small = NULL;
nir_def *prim_is_small = NULL;
nir_if *if_cull_small_prims = nir_push_if(b, nir_load_cull_small_primitives_enabled_amd(b));
{
nir_ssa_def *vp = nir_load_viewport_xy_scale_and_offset(b);
nir_ssa_def *small_prim_precision = nir_load_cull_small_prim_precision_amd(b);
nir_def *vp = nir_load_viewport_xy_scale_and_offset(b);
nir_def *small_prim_precision = nir_load_cull_small_prim_precision_amd(b);
prim_is_small = prim_is_small_else;
for (unsigned chan = 0; chan < 2; ++chan) {
nir_ssa_def *vp_scale = nir_channel(b, vp, chan);
nir_ssa_def *vp_translate = nir_channel(b, vp, 2 + chan);
nir_def *vp_scale = nir_channel(b, vp, chan);
nir_def *vp_translate = nir_channel(b, vp, 2 + chan);
/* Convert the position to screen-space coordinates. */
nir_ssa_def *min = nir_ffma(b, bbox_min[chan], vp_scale, vp_translate);
nir_ssa_def *max = nir_ffma(b, bbox_max[chan], vp_scale, vp_translate);
nir_def *min = nir_ffma(b, bbox_min[chan], vp_scale, vp_translate);
nir_def *max = nir_ffma(b, bbox_max[chan], vp_scale, vp_translate);
/* Scale the bounding box according to precision. */
min = nir_fsub(b, min, small_prim_precision);
@ -112,7 +112,7 @@ cull_small_primitive_triangle(nir_builder *b, nir_ssa_def *bbox_min[2], nir_ssa_
min = nir_fround_even(b, min);
max = nir_fround_even(b, max);
nir_ssa_def *rounded_to_eq = nir_feq(b, min, max);
nir_def *rounded_to_eq = nir_feq(b, min, max);
prim_is_small = nir_ior(b, prim_is_small, rounded_to_eq);
}
}
@ -121,27 +121,27 @@ cull_small_primitive_triangle(nir_builder *b, nir_ssa_def *bbox_min[2], nir_ssa_
return nir_if_phi(b, prim_is_small, prim_is_small_else);
}
static nir_ssa_def *
static nir_def *
ac_nir_cull_triangle(nir_builder *b,
nir_ssa_def *initially_accepted,
nir_ssa_def *pos[3][4],
nir_def *initially_accepted,
nir_def *pos[3][4],
position_w_info *w_info,
ac_nir_cull_accepted accept_func,
void *state)
{
nir_ssa_def *accepted = initially_accepted;
nir_def *accepted = initially_accepted;
accepted = nir_iand(b, accepted, nir_inot(b, w_info->all_w_negative));
accepted = nir_iand(b, accepted, nir_inot(b, cull_face_triangle(b, pos, w_info)));
nir_ssa_def *bbox_accepted = NULL;
nir_def *bbox_accepted = NULL;
nir_if *if_accepted = nir_push_if(b, accepted);
{
nir_ssa_def *bbox_min[2] = {0}, *bbox_max[2] = {0};
nir_def *bbox_min[2] = {0}, *bbox_max[2] = {0};
calc_bbox_triangle(b, pos, bbox_min, bbox_max);
nir_ssa_def *prim_outside_view = cull_frustrum(b, bbox_min, bbox_max);
nir_ssa_def *prim_invisible =
nir_def *prim_outside_view = cull_frustrum(b, bbox_min, bbox_max);
nir_def *prim_invisible =
cull_small_primitive_triangle(b, bbox_min, bbox_max, prim_outside_view);
bbox_accepted = nir_ior(b, nir_inot(b, prim_invisible), w_info->any_w_negative);
@ -162,18 +162,18 @@ ac_nir_cull_triangle(nir_builder *b,
}
static void
rotate_45degrees(nir_builder *b, nir_ssa_def *v[2])
rotate_45degrees(nir_builder *b, nir_def *v[2])
{
/* sin(45) == cos(45) */
nir_ssa_def *sincos45 = nir_imm_float(b, 0.707106781);
nir_def *sincos45 = nir_imm_float(b, 0.707106781);
/* x2 = x*cos45 - y*sin45 = x*sincos45 - y*sincos45
* y2 = x*sin45 + y*cos45 = x*sincos45 + y*sincos45
*/
nir_ssa_def *first = nir_fmul(b, v[0], sincos45);
nir_def *first = nir_fmul(b, v[0], sincos45);
/* Doing 2x ffma while duplicating the multiplication is 33% faster than fmul+fadd+fadd. */
nir_ssa_def *result[2] = {
nir_def *result[2] = {
nir_ffma(b, nir_fneg(b, v[1]), sincos45, first),
nir_ffma(b, v[1], sincos45, first),
};
@ -182,26 +182,26 @@ rotate_45degrees(nir_builder *b, nir_ssa_def *v[2])
}
static void
calc_bbox_line(nir_builder *b, nir_ssa_def *pos[3][4], nir_ssa_def *bbox_min[2], nir_ssa_def *bbox_max[2])
calc_bbox_line(nir_builder *b, nir_def *pos[3][4], nir_def *bbox_min[2], nir_def *bbox_max[2])
{
nir_ssa_def *clip_half_line_width = nir_load_clip_half_line_width_amd(b);
nir_def *clip_half_line_width = nir_load_clip_half_line_width_amd(b);
for (unsigned chan = 0; chan < 2; ++chan) {
bbox_min[chan] = nir_fmin(b, pos[0][chan], pos[1][chan]);
bbox_max[chan] = nir_fmax(b, pos[0][chan], pos[1][chan]);
nir_ssa_def *width = nir_channel(b, clip_half_line_width, chan);
nir_def *width = nir_channel(b, clip_half_line_width, chan);
bbox_min[chan] = nir_fsub(b, bbox_min[chan], width);
bbox_max[chan] = nir_fadd(b, bbox_max[chan], width);
}
}
static nir_ssa_def *
cull_small_primitive_line(nir_builder *b, nir_ssa_def *pos[3][4],
nir_ssa_def *bbox_min[2], nir_ssa_def *bbox_max[2],
nir_ssa_def *prim_is_small_else)
static nir_def *
cull_small_primitive_line(nir_builder *b, nir_def *pos[3][4],
nir_def *bbox_min[2], nir_def *bbox_max[2],
nir_def *prim_is_small_else)
{
nir_ssa_def *prim_is_small = NULL;
nir_def *prim_is_small = NULL;
/* Small primitive filter - eliminate lines that are too small to affect a sample. */
nir_if *if_cull_small_prims = nir_push_if(b, nir_load_cull_small_primitives_enabled_amd(b));
@ -234,13 +234,13 @@ cull_small_primitive_line(nir_builder *b, nir_ssa_def *pos[3][4],
* A good test is piglit/lineloop because it draws 10k subpixel lines in a circle.
* It should contain no holes if this matches hw behavior.
*/
nir_ssa_def *v0[2], *v1[2];
nir_ssa_def *vp = nir_load_viewport_xy_scale_and_offset(b);
nir_def *v0[2], *v1[2];
nir_def *vp = nir_load_viewport_xy_scale_and_offset(b);
/* Get vertex positions in pixels. */
for (unsigned chan = 0; chan < 2; chan++) {
nir_ssa_def *vp_scale = nir_channel(b, vp, chan);
nir_ssa_def *vp_translate = nir_channel(b, vp, 2 + chan);
nir_def *vp_scale = nir_channel(b, vp, chan);
nir_def *vp_translate = nir_channel(b, vp, 2 + chan);
v0[chan] = nir_ffma(b, pos[0][chan], vp_scale, vp_translate);
v1[chan] = nir_ffma(b, pos[1][chan], vp_scale, vp_translate);
@ -250,9 +250,9 @@ cull_small_primitive_line(nir_builder *b, nir_ssa_def *pos[3][4],
rotate_45degrees(b, v0);
rotate_45degrees(b, v1);
nir_ssa_def *small_prim_precision = nir_load_cull_small_prim_precision_amd(b);
nir_def *small_prim_precision = nir_load_cull_small_prim_precision_amd(b);
nir_ssa_def *rounded_to_eq[2];
nir_def *rounded_to_eq[2];
for (unsigned chan = 0; chan < 2; chan++) {
/* The width of each square is sqrt(0.5), so scale it to 1 because we want
* round() to give us the position of the closest center of a square (diamond).
@ -263,8 +263,8 @@ cull_small_primitive_line(nir_builder *b, nir_ssa_def *pos[3][4],
/* Compute the bounding box around both vertices. We do this because we must
* enlarge the line area by the precision of the rasterizer.
*/
nir_ssa_def *min = nir_fmin(b, v0[chan], v1[chan]);
nir_ssa_def *max = nir_fmax(b, v0[chan], v1[chan]);
nir_def *min = nir_fmin(b, v0[chan], v1[chan]);
nir_def *max = nir_fmax(b, v0[chan], v1[chan]);
/* Enlarge the bounding box by the precision of the rasterizer. */
min = nir_fsub(b, min, small_prim_precision);
@ -287,27 +287,27 @@ cull_small_primitive_line(nir_builder *b, nir_ssa_def *pos[3][4],
return nir_if_phi(b, prim_is_small, prim_is_small_else);
}
static nir_ssa_def *
static nir_def *
ac_nir_cull_line(nir_builder *b,
nir_ssa_def *initially_accepted,
nir_ssa_def *pos[3][4],
nir_def *initially_accepted,
nir_def *pos[3][4],
position_w_info *w_info,
ac_nir_cull_accepted accept_func,
void *state)
{
nir_ssa_def *accepted = initially_accepted;
nir_def *accepted = initially_accepted;
accepted = nir_iand(b, accepted, nir_inot(b, w_info->all_w_negative));
nir_ssa_def *bbox_accepted = NULL;
nir_def *bbox_accepted = NULL;
nir_if *if_accepted = nir_push_if(b, accepted);
{
nir_ssa_def *bbox_min[2] = {0}, *bbox_max[2] = {0};
nir_def *bbox_min[2] = {0}, *bbox_max[2] = {0};
calc_bbox_line(b, pos, bbox_min, bbox_max);
/* Frustrum culling - eliminate lines that are fully outside the view. */
nir_ssa_def *prim_outside_view = cull_frustrum(b, bbox_min, bbox_max);
nir_ssa_def *prim_invisible =
nir_def *prim_outside_view = cull_frustrum(b, bbox_min, bbox_max);
nir_def *prim_invisible =
cull_small_primitive_line(b, pos, bbox_min, bbox_max, prim_outside_view);
bbox_accepted = nir_ior(b, nir_inot(b, prim_invisible), w_info->any_w_negative);
@ -326,10 +326,10 @@ ac_nir_cull_line(nir_builder *b,
return nir_if_phi(b, bbox_accepted, accepted);
}
nir_ssa_def *
nir_def *
ac_nir_cull_primitive(nir_builder *b,
nir_ssa_def *initially_accepted,
nir_ssa_def *pos[3][4],
nir_def *initially_accepted,
nir_def *pos[3][4],
unsigned num_vertices,
ac_nir_cull_accepted accept_func,
void *state)

View file

@ -36,8 +36,8 @@ typedef struct {
bool gs_triangle_strip_adjacency_fix;
} lower_esgs_io_state;
static nir_ssa_def *
emit_split_buffer_load(nir_builder *b, nir_ssa_def *desc, nir_ssa_def *v_off, nir_ssa_def *s_off,
static nir_def *
emit_split_buffer_load(nir_builder *b, nir_def *desc, nir_def *v_off, nir_def *s_off,
unsigned component_stride, unsigned num_components, unsigned bit_size)
{
unsigned total_bytes = num_components * bit_size / 8u;
@ -45,7 +45,7 @@ emit_split_buffer_load(nir_builder *b, nir_ssa_def *desc, nir_ssa_def *v_off, ni
unsigned remaining_bytes = total_bytes - full_dwords * 4u;
/* Accommodate max number of split 64-bit loads */
nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS * 2u];
nir_def *comps[NIR_MAX_VEC_COMPONENTS * 2u];
/* Assume that 1x32-bit load is better than 1x16-bit + 1x8-bit */
if (remaining_bytes == 3) {
@ -53,7 +53,7 @@ emit_split_buffer_load(nir_builder *b, nir_ssa_def *desc, nir_ssa_def *v_off, ni
full_dwords++;
}
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_def *zero = nir_imm_int(b, 0);
for (unsigned i = 0; i < full_dwords; ++i)
comps[i] = nir_load_buffer_amd(b, 1, 32, desc, v_off, s_off, zero,
@ -70,11 +70,11 @@ emit_split_buffer_load(nir_builder *b, nir_ssa_def *desc, nir_ssa_def *v_off, ni
}
static void
emit_split_buffer_store(nir_builder *b, nir_ssa_def *d, nir_ssa_def *desc, nir_ssa_def *v_off, nir_ssa_def *s_off,
emit_split_buffer_store(nir_builder *b, nir_def *d, nir_def *desc, nir_def *v_off, nir_def *s_off,
unsigned component_stride, unsigned num_components, unsigned bit_size,
unsigned writemask, bool swizzled, bool slc)
{
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_def *zero = nir_imm_int(b, 0);
while (writemask) {
int start, count;
@ -91,7 +91,7 @@ emit_split_buffer_store(nir_builder *b, nir_ssa_def *d, nir_ssa_def *desc, nir_s
else if ((start_byte % 4) == 2)
store_bytes = MIN2(store_bytes, 2);
nir_ssa_def *store_val = nir_extract_bits(b, &d, 1, start_byte * 8u, 1, store_bytes * 8u);
nir_def *store_val = nir_extract_bits(b, &d, 1, start_byte * 8u, 1, store_bytes * 8u);
nir_store_buffer_amd(b, store_val, desc, v_off, s_off, zero,
.base = start_byte, .memory_modes = nir_var_shader_out,
.access = ACCESS_COHERENT |
@ -153,19 +153,19 @@ lower_es_output_store(nir_builder *b,
unsigned write_mask = nir_intrinsic_write_mask(intrin);
b->cursor = nir_before_instr(instr);
nir_ssa_def *io_off = ac_nir_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u, st->map_io);
nir_def *io_off = ac_nir_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u, st->map_io);
if (st->gfx_level <= GFX8) {
/* GFX6-8: ES is a separate HW stage, data is passed from ES to GS in VRAM. */
nir_ssa_def *ring = nir_load_ring_esgs_amd(b);
nir_ssa_def *es2gs_off = nir_load_ring_es2gs_offset_amd(b);
nir_def *ring = nir_load_ring_esgs_amd(b);
nir_def *es2gs_off = nir_load_ring_es2gs_offset_amd(b);
emit_split_buffer_store(b, intrin->src[0].ssa, ring, io_off, es2gs_off, 4u,
intrin->src[0].ssa->num_components, intrin->src[0].ssa->bit_size,
write_mask, true, true);
} else {
/* GFX9+: ES is merged into GS, data is passed through LDS. */
nir_ssa_def *vertex_idx = nir_load_local_invocation_index(b);
nir_ssa_def *off = nir_iadd(b, nir_imul_imm(b, vertex_idx, st->esgs_itemsize), io_off);
nir_def *vertex_idx = nir_load_local_invocation_index(b);
nir_def *off = nir_iadd(b, nir_imul_imm(b, vertex_idx, st->esgs_itemsize), io_off);
nir_store_shared(b, intrin->src[0].ssa, off, .write_mask = write_mask);
}
@ -173,10 +173,10 @@ lower_es_output_store(nir_builder *b,
return true;
}
static nir_ssa_def *
static nir_def *
gs_get_vertex_offset(nir_builder *b, lower_esgs_io_state *st, unsigned vertex_index)
{
nir_ssa_def *origin = nir_load_gs_vertex_offset_amd(b, .base = vertex_index);
nir_def *origin = nir_load_gs_vertex_offset_amd(b, .base = vertex_index);
if (!st->gs_triangle_strip_adjacency_fix)
return origin;
@ -190,33 +190,33 @@ gs_get_vertex_offset(nir_builder *b, lower_esgs_io_state *st, unsigned vertex_in
/* 6 vertex offset are packed to 3 vgprs for GFX9+ */
fixed_index = (vertex_index + 2) % 3;
}
nir_ssa_def *fixed = nir_load_gs_vertex_offset_amd(b, .base = fixed_index);
nir_def *fixed = nir_load_gs_vertex_offset_amd(b, .base = fixed_index);
nir_ssa_def *prim_id = nir_load_primitive_id(b);
nir_def *prim_id = nir_load_primitive_id(b);
/* odd primitive id use fixed offset */
nir_ssa_def *cond = nir_i2b(b, nir_iand_imm(b, prim_id, 1));
nir_def *cond = nir_i2b(b, nir_iand_imm(b, prim_id, 1));
return nir_bcsel(b, cond, fixed, origin);
}
static nir_ssa_def *
static nir_def *
gs_per_vertex_input_vertex_offset_gfx6(nir_builder *b, lower_esgs_io_state *st,
nir_src *vertex_src)
{
if (nir_src_is_const(*vertex_src))
return gs_get_vertex_offset(b, st, nir_src_as_uint(*vertex_src));
nir_ssa_def *vertex_offset = gs_get_vertex_offset(b, st, 0);
nir_def *vertex_offset = gs_get_vertex_offset(b, st, 0);
for (unsigned i = 1; i < b->shader->info.gs.vertices_in; ++i) {
nir_ssa_def *cond = nir_ieq_imm(b, vertex_src->ssa, i);
nir_ssa_def *elem = gs_get_vertex_offset(b, st, i);
nir_def *cond = nir_ieq_imm(b, vertex_src->ssa, i);
nir_def *elem = gs_get_vertex_offset(b, st, i);
vertex_offset = nir_bcsel(b, cond, elem, vertex_offset);
}
return vertex_offset;
}
static nir_ssa_def *
static nir_def *
gs_per_vertex_input_vertex_offset_gfx9(nir_builder *b, lower_esgs_io_state *st,
nir_src *vertex_src)
{
@ -226,11 +226,11 @@ gs_per_vertex_input_vertex_offset_gfx9(nir_builder *b, lower_esgs_io_state *st,
(vertex & 1u) * 16u, 16u);
}
nir_ssa_def *vertex_offset = gs_get_vertex_offset(b, st, 0);
nir_def *vertex_offset = gs_get_vertex_offset(b, st, 0);
for (unsigned i = 1; i < b->shader->info.gs.vertices_in; i++) {
nir_ssa_def *cond = nir_ieq_imm(b, vertex_src->ssa, i);
nir_ssa_def *elem = gs_get_vertex_offset(b, st, i / 2u * 2u);
nir_def *cond = nir_ieq_imm(b, vertex_src->ssa, i);
nir_def *elem = gs_get_vertex_offset(b, st, i / 2u * 2u);
if (i % 2u)
elem = nir_ishr_imm(b, elem, 16u);
@ -240,13 +240,13 @@ gs_per_vertex_input_vertex_offset_gfx9(nir_builder *b, lower_esgs_io_state *st,
return nir_iand_imm(b, vertex_offset, 0xffffu);
}
static nir_ssa_def *
static nir_def *
gs_per_vertex_input_offset(nir_builder *b,
lower_esgs_io_state *st,
nir_intrinsic_instr *instr)
{
nir_src *vertex_src = nir_get_io_arrayed_index_src(instr);
nir_ssa_def *vertex_offset = st->gfx_level >= GFX9
nir_def *vertex_offset = st->gfx_level >= GFX9
? gs_per_vertex_input_vertex_offset_gfx9(b, st, vertex_src)
: gs_per_vertex_input_vertex_offset_gfx6(b, st, vertex_src);
@ -257,25 +257,25 @@ gs_per_vertex_input_offset(nir_builder *b,
vertex_offset = nir_imul(b, vertex_offset, nir_load_esgs_vertex_stride_amd(b));
unsigned base_stride = st->gfx_level >= GFX9 ? 1 : 64 /* Wave size on GFX6-8 */;
nir_ssa_def *io_off = ac_nir_calc_io_offset(b, instr, nir_imm_int(b, base_stride * 4u), base_stride, st->map_io);
nir_ssa_def *off = nir_iadd(b, io_off, vertex_offset);
nir_def *io_off = ac_nir_calc_io_offset(b, instr, nir_imm_int(b, base_stride * 4u), base_stride, st->map_io);
nir_def *off = nir_iadd(b, io_off, vertex_offset);
return nir_imul_imm(b, off, 4u);
}
static nir_ssa_def *
static nir_def *
lower_gs_per_vertex_input_load(nir_builder *b,
nir_instr *instr,
void *state)
{
lower_esgs_io_state *st = (lower_esgs_io_state *) state;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
nir_ssa_def *off = gs_per_vertex_input_offset(b, st, intrin);
nir_def *off = gs_per_vertex_input_offset(b, st, intrin);
if (st->gfx_level >= GFX9)
return nir_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off);
unsigned wave_size = 64u; /* GFX6-8 only support wave64 */
nir_ssa_def *ring = nir_load_ring_esgs_amd(b);
nir_def *ring = nir_load_ring_esgs_amd(b);
return emit_split_buffer_load(b, ring, off, nir_imm_zero(b, 1, 32), 4u * wave_size,
intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size);
}

View file

@ -8,24 +8,24 @@
#include "nir.h"
#include "nir_builder.h"
static nir_ssa_def *
try_extract_additions(nir_builder *b, nir_ssa_scalar scalar, uint64_t *out_const,
nir_ssa_def **out_offset)
static nir_def *
try_extract_additions(nir_builder *b, nir_scalar scalar, uint64_t *out_const,
nir_def **out_offset)
{
if (!nir_ssa_scalar_is_alu(scalar) || nir_ssa_scalar_alu_op(scalar) != nir_op_iadd)
if (!nir_scalar_is_alu(scalar) || nir_scalar_alu_op(scalar) != nir_op_iadd)
return NULL;
nir_alu_instr *alu = nir_instr_as_alu(scalar.def->parent_instr);
nir_ssa_scalar src0 = nir_ssa_scalar_chase_alu_src(scalar, 0);
nir_ssa_scalar src1 = nir_ssa_scalar_chase_alu_src(scalar, 1);
nir_scalar src0 = nir_scalar_chase_alu_src(scalar, 0);
nir_scalar src1 = nir_scalar_chase_alu_src(scalar, 1);
for (unsigned i = 0; i < 2; ++i) {
nir_ssa_scalar src = i ? src1 : src0;
if (nir_ssa_scalar_is_const(src)) {
*out_const += nir_ssa_scalar_as_uint(src);
} else if (nir_ssa_scalar_is_alu(src) && nir_ssa_scalar_alu_op(src) == nir_op_u2u64) {
nir_ssa_scalar offset_scalar = nir_ssa_scalar_chase_alu_src(src, 0);
nir_ssa_def *offset = nir_channel(b, offset_scalar.def, offset_scalar.comp);
nir_scalar src = i ? src1 : src0;
if (nir_scalar_is_const(src)) {
*out_const += nir_scalar_as_uint(src);
} else if (nir_scalar_is_alu(src) && nir_scalar_alu_op(src) == nir_op_u2u64) {
nir_scalar offset_scalar = nir_scalar_chase_alu_src(src, 0);
nir_def *offset = nir_channel(b, offset_scalar.def, offset_scalar.comp);
if (*out_offset)
*out_offset = nir_iadd(b, *out_offset, offset);
else
@ -34,13 +34,13 @@ try_extract_additions(nir_builder *b, nir_ssa_scalar scalar, uint64_t *out_const
continue;
}
nir_ssa_def *replace_src =
nir_def *replace_src =
try_extract_additions(b, i == 1 ? src0 : src1, out_const, out_offset);
return replace_src ? replace_src : nir_ssa_for_alu_src(b, alu, 1 - i);
}
nir_ssa_def *replace_src0 = try_extract_additions(b, src0, out_const, out_offset);
nir_ssa_def *replace_src1 = try_extract_additions(b, src1, out_const, out_offset);
nir_def *replace_src0 = try_extract_additions(b, src0, out_const, out_offset);
nir_def *replace_src1 = try_extract_additions(b, src1, out_const, out_offset);
if (!replace_src0 && !replace_src1)
return NULL;
@ -80,10 +80,10 @@ process_instr(nir_builder *b, nir_instr *instr, void *_)
nir_src *addr_src = &intrin->src[addr_src_idx];
uint64_t off_const = 0;
nir_ssa_def *offset = NULL;
nir_ssa_scalar src = {addr_src->ssa, 0};
nir_def *offset = NULL;
nir_scalar src = {addr_src->ssa, 0};
b->cursor = nir_after_instr(addr_src->ssa->parent_instr);
nir_ssa_def *addr = try_extract_additions(b, src, &off_const, &offset);
nir_def *addr = try_extract_additions(b, src, &off_const, &offset);
addr = addr ? addr : addr_src->ssa;
b->cursor = nir_before_instr(&intrin->instr);
@ -122,7 +122,7 @@ process_instr(nir_builder *b, nir_instr *instr, void *_)
nir_builder_instr_insert(b, &new_intrin->instr);
if (op != nir_intrinsic_store_global_amd)
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, &new_intrin->dest.ssa);
nir_def_rewrite_uses(&intrin->dest.ssa, &new_intrin->dest.ssa);
nir_instr_remove(&intrin->instr);
return true;

View file

@ -23,7 +23,7 @@
#include "nir_builder.h"
#include "amdgfxregs.h"
static nir_ssa_def *get_field(nir_builder *b, nir_ssa_def *desc, unsigned index, unsigned mask)
static nir_def *get_field(nir_builder *b, nir_def *desc, unsigned index, unsigned mask)
{
return nir_ubfe_imm(b, nir_channel(b, desc, index), ffs(mask) - 1, util_bitcount(mask));
}
@ -46,17 +46,17 @@ static unsigned get_coord_components(enum glsl_sampler_dim dim, bool is_array)
/* Lower image coordinates to a buffer element index. Return UINT_MAX if the image coordinates
* are out of bounds.
*/
static nir_ssa_def *lower_image_coords(nir_builder *b, nir_ssa_def *desc, nir_ssa_def *coord,
static nir_def *lower_image_coords(nir_builder *b, nir_def *desc, nir_def *coord,
enum glsl_sampler_dim dim, bool is_array,
bool handle_out_of_bounds)
{
unsigned num_coord_components = get_coord_components(dim, is_array);
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_def *zero = nir_imm_int(b, 0);
/* Get coordinates. */
nir_ssa_def *x = nir_channel(b, coord, 0);
nir_ssa_def *y = num_coord_components >= 2 ? nir_channel(b, coord, 1) : NULL;
nir_ssa_def *z = num_coord_components >= 3 ? nir_channel(b, coord, 2) : NULL;
nir_def *x = nir_channel(b, coord, 0);
nir_def *y = num_coord_components >= 2 ? nir_channel(b, coord, 1) : NULL;
nir_def *z = num_coord_components >= 3 ? nir_channel(b, coord, 2) : NULL;
if (dim == GLSL_SAMPLER_DIM_1D && is_array) {
z = y;
@ -64,35 +64,35 @@ static nir_ssa_def *lower_image_coords(nir_builder *b, nir_ssa_def *desc, nir_ss
}
if (is_array) {
nir_ssa_def *first_layer = get_field(b, desc, 5, 0xffff0000);
nir_def *first_layer = get_field(b, desc, 5, 0xffff0000);
z = nir_iadd(b, z, first_layer);
}
/* Compute the buffer element index. */
nir_ssa_def *index = x;
nir_def *index = x;
if (y) {
nir_ssa_def *pitch = nir_channel(b, desc, 6);
nir_def *pitch = nir_channel(b, desc, 6);
index = nir_iadd(b, index, nir_imul(b, pitch, y));
}
if (z) {
nir_ssa_def *slice_elements = nir_channel(b, desc, 7);
nir_def *slice_elements = nir_channel(b, desc, 7);
index = nir_iadd(b, index, nir_imul(b, slice_elements, z));
}
/* Determine whether the coordinates are out of bounds. */
nir_ssa_def *out_of_bounds = NULL;
nir_def *out_of_bounds = NULL;
if (handle_out_of_bounds) {
nir_ssa_def *width = get_field(b, desc, 4, 0xffff);
nir_def *width = get_field(b, desc, 4, 0xffff);
out_of_bounds = nir_ior(b, nir_ilt(b, x, zero), nir_ige(b, x, width));
if (y) {
nir_ssa_def *height = get_field(b, desc, 4, 0xffff0000);
nir_def *height = get_field(b, desc, 4, 0xffff0000);
out_of_bounds = nir_ior(b, out_of_bounds,
nir_ior(b, nir_ilt(b, y, zero), nir_ige(b, y, height)));
}
if (z) {
nir_ssa_def *depth = get_field(b, desc, 5, 0xffff);
nir_def *depth = get_field(b, desc, 5, 0xffff);
out_of_bounds = nir_ior(b, out_of_bounds,
nir_ior(b, nir_ilt(b, z, zero), nir_ige(b, z, depth)));
}
@ -104,12 +104,12 @@ static nir_ssa_def *lower_image_coords(nir_builder *b, nir_ssa_def *desc, nir_ss
return index;
}
static nir_ssa_def *emulated_image_load(nir_builder *b, unsigned num_components, unsigned bit_size,
nir_ssa_def *desc, nir_ssa_def *coord,
static nir_def *emulated_image_load(nir_builder *b, unsigned num_components, unsigned bit_size,
nir_def *desc, nir_def *coord,
enum gl_access_qualifier access, enum glsl_sampler_dim dim,
bool is_array, bool handle_out_of_bounds)
{
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_def *zero = nir_imm_int(b, 0);
return nir_load_buffer_amd(b, num_components, bit_size, nir_channels(b, desc, 0xf),
zero, zero,
@ -120,11 +120,11 @@ static nir_ssa_def *emulated_image_load(nir_builder *b, unsigned num_components,
.access = access | ACCESS_USES_FORMAT_AMD);
}
static void emulated_image_store(nir_builder *b, nir_ssa_def *desc, nir_ssa_def *coord,
nir_ssa_def *data, enum gl_access_qualifier access,
static void emulated_image_store(nir_builder *b, nir_def *desc, nir_def *coord,
nir_def *data, enum gl_access_qualifier access,
enum glsl_sampler_dim dim, bool is_array)
{
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_def *zero = nir_imm_int(b, 0);
nir_store_buffer_amd(b, data, nir_channels(b, desc, 0xf), zero, zero,
lower_image_coords(b, desc, coord, dim, is_array, true),
@ -134,7 +134,7 @@ static void emulated_image_store(nir_builder *b, nir_ssa_def *desc, nir_ssa_def
}
/* Return the width, height, or depth for dim=0,1,2. */
static nir_ssa_def *get_dim(nir_builder *b, nir_ssa_def *desc, unsigned dim)
static nir_def *get_dim(nir_builder *b, nir_def *desc, unsigned dim)
{
return get_field(b, desc, 4 + dim / 2, 0xffff << (16 * (dim % 2)));
}
@ -142,9 +142,9 @@ static nir_ssa_def *get_dim(nir_builder *b, nir_ssa_def *desc, unsigned dim)
/* Lower txl with lod=0 to typed buffer loads. This is based on the equations in the GL spec.
* This basically converts the tex opcode into 1 or more image_load opcodes.
*/
static nir_ssa_def *emulated_tex_level_zero(nir_builder *b, unsigned num_components,
unsigned bit_size, nir_ssa_def *desc,
nir_ssa_def *sampler_desc, nir_ssa_def *coord_vec,
static nir_def *emulated_tex_level_zero(nir_builder *b, unsigned num_components,
unsigned bit_size, nir_def *desc,
nir_def *sampler_desc, nir_def *coord_vec,
enum glsl_sampler_dim sampler_dim, bool is_array)
{
const enum gl_access_qualifier access =
@ -153,9 +153,9 @@ static nir_ssa_def *emulated_tex_level_zero(nir_builder *b, unsigned num_compone
const unsigned num_dim_coords = num_coord_components - is_array;
const unsigned array_comp = num_coord_components - 1;
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_ssa_def *fp_one = nir_imm_floatN_t(b, 1, bit_size);
nir_ssa_def *coord[3] = {0};
nir_def *zero = nir_imm_int(b, 0);
nir_def *fp_one = nir_imm_floatN_t(b, 1, bit_size);
nir_def *coord[3] = {0};
assert(num_coord_components <= 3);
for (unsigned i = 0; i < num_coord_components; i++)
@ -179,14 +179,14 @@ static nir_ssa_def *emulated_tex_level_zero(nir_builder *b, unsigned num_compone
*
* We assume that XY_MIN_FILTER and Z_FILTER are identical.
*/
nir_ssa_def *is_nearest =
nir_def *is_nearest =
nir_ieq_imm(b, nir_iand_imm(b, nir_channel(b, sampler_desc, 2), 1 << 20), 0);
nir_ssa_def *result_nearest, *result_linear;
nir_def *result_nearest, *result_linear;
nir_if *if_nearest = nir_push_if(b, is_nearest);
{
/* Nearest filter. */
nir_ssa_def *coord0[3] = {0};
nir_def *coord0[3] = {0};
memcpy(coord0, coord, sizeof(coord));
for (unsigned dim = 0; dim < num_dim_coords; dim++) {
@ -205,9 +205,9 @@ static nir_ssa_def *emulated_tex_level_zero(nir_builder *b, unsigned num_compone
nir_push_else(b, if_nearest);
{
/* Linear filter. */
nir_ssa_def *coord0[3] = {0};
nir_ssa_def *coord1[3] = {0};
nir_ssa_def *weight[3] = {0};
nir_def *coord0[3] = {0};
nir_def *coord1[3] = {0};
nir_def *weight[3] = {0};
memcpy(coord0, coord, sizeof(coord));
@ -231,10 +231,10 @@ static nir_ssa_def *emulated_tex_level_zero(nir_builder *b, unsigned num_compone
/* Load all texels for the linear filter.
* This is 2 texels for 1D, 4 texels for 2D, and 8 texels for 3D.
*/
nir_ssa_def *texel[8];
nir_def *texel[8];
for (unsigned i = 0; i < (1 << num_dim_coords); i++) {
nir_ssa_def *texel_coord[3];
nir_def *texel_coord[3];
/* Determine whether the current texel should use channels from coord0
* or coord1. The i-th bit of the texel index determines that.
@ -247,7 +247,7 @@ static nir_ssa_def *emulated_tex_level_zero(nir_builder *b, unsigned num_compone
texel_coord[array_comp] = coord0[array_comp];
/* Compute how much the texel contributes to the final result. */
nir_ssa_def *texel_weight = fp_one;
nir_def *texel_weight = fp_one;
for (unsigned dim = 0; dim < num_dim_coords; dim++) {
/* Let's see what "i" represents:
* Texel i=0 = 000
@ -296,10 +296,10 @@ static bool lower_image_opcodes(nir_builder *b, nir_instr *instr, void *data)
enum gl_access_qualifier access;
enum glsl_sampler_dim dim;
bool is_array;
nir_ssa_def *desc = NULL, *result = NULL;
nir_def *desc = NULL, *result = NULL;
ASSERTED const char *intr_name;
nir_ssa_def *dst = &intr->dest.ssa;
nir_def *dst = &intr->dest.ssa;
b->cursor = nir_before_instr(instr);
switch (intr->intrinsic) {
@ -359,7 +359,7 @@ static bool lower_image_opcodes(nir_builder *b, nir_instr *instr, void *data)
case nir_intrinsic_bindless_image_load:
result = emulated_image_load(b, intr->dest.ssa.num_components, intr->dest.ssa.bit_size,
desc, intr->src[1].ssa, access, dim, is_array, true);
nir_ssa_def_rewrite_uses_after(dst, result, instr);
nir_def_rewrite_uses_after(dst, result, instr);
nir_instr_remove(instr);
return true;
@ -376,9 +376,9 @@ static bool lower_image_opcodes(nir_builder *b, nir_instr *instr, void *data)
} else if (instr->type == nir_instr_type_tex) {
nir_tex_instr *tex = nir_instr_as_tex(instr);
nir_tex_instr *new_tex;
nir_ssa_def *coord = NULL, *desc = NULL, *sampler_desc = NULL, *result = NULL;
nir_def *coord = NULL, *desc = NULL, *sampler_desc = NULL, *result = NULL;
nir_ssa_def *dst = &tex->dest.ssa;
nir_def *dst = &tex->dest.ssa;
b->cursor = nir_before_instr(instr);
switch (tex->op) {
@ -447,7 +447,7 @@ static bool lower_image_opcodes(nir_builder *b, nir_instr *instr, void *data)
desc, coord,
ACCESS_RESTRICT | ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER,
tex->sampler_dim, tex->is_array, true);
nir_ssa_def_rewrite_uses_after(dst, result, instr);
nir_def_rewrite_uses_after(dst, result, instr);
nir_instr_remove(instr);
return true;
@ -455,7 +455,7 @@ static bool lower_image_opcodes(nir_builder *b, nir_instr *instr, void *data)
case nir_texop_txl:
result = emulated_tex_level_zero(b, tex->dest.ssa.num_components, tex->dest.ssa.bit_size,
desc, sampler_desc, coord, tex->sampler_dim, tex->is_array);
nir_ssa_def_rewrite_uses_after(dst, result, instr);
nir_def_rewrite_uses_after(dst, result, instr);
nir_instr_remove(instr);
return true;

File diff suppressed because it is too large Load diff

View file

@ -21,7 +21,7 @@ typedef struct {
bool lower_load_barycentric;
/* Add one for dual source blend second output. */
nir_ssa_def *outputs[FRAG_RESULT_MAX + 1][4];
nir_def *outputs[FRAG_RESULT_MAX + 1][4];
nir_alu_type output_types[FRAG_RESULT_MAX + 1];
/* MAX_DRAW_BUFFERS for MRT export, 1 for MRTZ export */
@ -85,52 +85,52 @@ init_interp_param(nir_builder *b, lower_ps_state *s)
* contains fully-covered quads.
*/
if (s->options->bc_optimize_for_persp || s->options->bc_optimize_for_linear) {
nir_ssa_def *bc_optimize = nir_load_barycentric_optimize_amd(b);
nir_def *bc_optimize = nir_load_barycentric_optimize_amd(b);
if (s->options->bc_optimize_for_persp) {
nir_ssa_def *center =
nir_def *center =
nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_SMOOTH);
nir_ssa_def *centroid =
nir_def *centroid =
nir_load_barycentric_centroid(b, 32, .interp_mode = INTERP_MODE_SMOOTH);
nir_ssa_def *value = nir_bcsel(b, bc_optimize, center, centroid);
nir_def *value = nir_bcsel(b, bc_optimize, center, centroid);
nir_store_var(b, s->persp_centroid, value, 0x3);
}
if (s->options->bc_optimize_for_linear) {
nir_ssa_def *center =
nir_def *center =
nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE);
nir_ssa_def *centroid =
nir_def *centroid =
nir_load_barycentric_centroid(b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE);
nir_ssa_def *value = nir_bcsel(b, bc_optimize, center, centroid);
nir_def *value = nir_bcsel(b, bc_optimize, center, centroid);
nir_store_var(b, s->linear_centroid, value, 0x3);
}
}
if (s->options->force_persp_sample_interp) {
nir_ssa_def *sample =
nir_def *sample =
nir_load_barycentric_sample(b, 32, .interp_mode = INTERP_MODE_SMOOTH);
nir_store_var(b, s->persp_center, sample, 0x3);
nir_store_var(b, s->persp_centroid, sample, 0x3);
}
if (s->options->force_linear_sample_interp) {
nir_ssa_def *sample =
nir_def *sample =
nir_load_barycentric_sample(b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE);
nir_store_var(b, s->linear_center, sample, 0x3);
nir_store_var(b, s->linear_centroid, sample, 0x3);
}
if (s->options->force_persp_center_interp) {
nir_ssa_def *center =
nir_def *center =
nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_SMOOTH);
nir_store_var(b, s->persp_sample, center, 0x3);
nir_store_var(b, s->persp_centroid, center, 0x3);
}
if (s->options->force_linear_center_interp) {
nir_ssa_def *center =
nir_def *center =
nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE);
nir_store_var(b, s->linear_sample, center, 0x3);
nir_store_var(b, s->linear_centroid, center, 0x3);
@ -186,8 +186,8 @@ lower_ps_load_barycentric(nir_builder *b, nir_intrinsic_instr *intrin, lower_ps_
b->cursor = nir_before_instr(&intrin->instr);
nir_ssa_def *replacement = nir_load_var(b, var);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, replacement);
nir_def *replacement = nir_load_var(b, var);
nir_def_rewrite_uses(&intrin->dest.ssa, replacement);
nir_instr_remove(&intrin->instr);
return true;
@ -200,7 +200,7 @@ gather_ps_store_output(nir_builder *b, nir_intrinsic_instr *intrin, lower_ps_sta
unsigned write_mask = nir_intrinsic_write_mask(intrin);
unsigned component = nir_intrinsic_component(intrin);
nir_alu_type type = nir_intrinsic_src_type(intrin);
nir_ssa_def *store_val = intrin->src[0].ssa;
nir_def *store_val = intrin->src[0].ssa;
b->cursor = nir_before_instr(&intrin->instr);
@ -249,13 +249,13 @@ lower_ps_load_sample_mask_in(nir_builder *b, nir_intrinsic_instr *intrin, lower_
b->cursor = nir_before_instr(&intrin->instr);
uint32_t ps_iter_mask = ac_get_ps_iter_mask(s->options->ps_iter_samples);
nir_ssa_def *sampleid = nir_load_sample_id(b);
nir_ssa_def *submask = nir_ishl(b, nir_imm_int(b, ps_iter_mask), sampleid);
nir_def *sampleid = nir_load_sample_id(b);
nir_def *submask = nir_ishl(b, nir_imm_int(b, ps_iter_mask), sampleid);
nir_ssa_def *sample_mask = nir_load_sample_mask_in(b);
nir_ssa_def *replacement = nir_iand(b, sample_mask, submask);
nir_def *sample_mask = nir_load_sample_mask_in(b);
nir_def *replacement = nir_iand(b, sample_mask, submask);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, replacement);
nir_def_rewrite_uses(&intrin->dest.ssa, replacement);
nir_instr_remove(&intrin->instr);
return true;
@ -327,8 +327,8 @@ emit_ps_color_clamp_and_alpha_test(nir_builder *b, lower_ps_state *s)
} else if (s->options->alpha_func == COMPARE_FUNC_NEVER) {
nir_discard(b);
} else if (s->outputs[slot][3]) {
nir_ssa_def *ref = nir_load_alpha_reference_amd(b);
nir_ssa_def *cond =
nir_def *ref = nir_load_alpha_reference_amd(b);
nir_def *cond =
nir_compare_func(b, s->options->alpha_func, s->outputs[slot][3], ref);
nir_discard_if(b, nir_inot(b, cond));
}
@ -341,16 +341,16 @@ emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s)
{
uint64_t outputs_written = b->shader->info.outputs_written;
nir_ssa_def *mrtz_alpha = NULL;
nir_def *mrtz_alpha = NULL;
if (s->options->alpha_to_coverage_via_mrtz) {
mrtz_alpha = s->outputs[FRAG_RESULT_COLOR][3] ?
s->outputs[FRAG_RESULT_COLOR][3] :
s->outputs[FRAG_RESULT_DATA0][3];
}
nir_ssa_def *depth = s->outputs[FRAG_RESULT_DEPTH][0];
nir_ssa_def *stencil = s->outputs[FRAG_RESULT_STENCIL][0];
nir_ssa_def *sample_mask = s->outputs[FRAG_RESULT_SAMPLE_MASK][0];
nir_def *depth = s->outputs[FRAG_RESULT_DEPTH][0];
nir_def *stencil = s->outputs[FRAG_RESULT_STENCIL][0];
nir_def *sample_mask = s->outputs[FRAG_RESULT_SAMPLE_MASK][0];
if (s->options->kill_samplemask) {
sample_mask = NULL;
@ -371,8 +371,8 @@ emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s)
outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK),
s->options->alpha_to_coverage_via_mrtz);
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
nir_ssa_def *outputs[4] = {undef, undef, undef, undef};
nir_def *undef = nir_undef(b, 1, 32);
nir_def *outputs[4] = {undef, undef, undef, undef};
unsigned write_mask = 0;
unsigned flags = 0;
@ -465,22 +465,22 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, gl_frag_result slot, uns
bool enable_mrt_output_nan_fixup =
s->options->enable_mrt_output_nan_fixup & BITFIELD_BIT(cbuf);
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
nir_ssa_def *outputs[4] = {undef, undef, undef, undef};
nir_def *undef = nir_undef(b, 1, 32);
nir_def *outputs[4] = {undef, undef, undef, undef};
unsigned write_mask = 0;
unsigned flags = 0;
nir_alu_type base_type = nir_alu_type_get_base_type(type);
unsigned type_size = nir_alu_type_get_type_size(type);
nir_ssa_def *data[4];
nir_def *data[4];
memcpy(data, s->outputs[slot], sizeof(data));
/* Replace NaN by zero (for 32-bit float formats) to fix game bugs if requested. */
if (enable_mrt_output_nan_fixup && type == nir_type_float32) {
for (int i = 0; i < 4; i++) {
if (data[i]) {
nir_ssa_def *isnan = nir_fisnan(b, data[i]);
nir_def *isnan = nir_fisnan(b, data[i]);
data[i] = nir_bcsel(b, isnan, nir_imm_float(b, 0), data[i]);
}
}
@ -593,14 +593,14 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, gl_frag_result slot, uns
}
for (int i = 0; i < 2; i++) {
nir_ssa_def *lo = data[i * 2];
nir_ssa_def *hi = data[i * 2 + 1];
nir_def *lo = data[i * 2];
nir_def *hi = data[i * 2 + 1];
if (!lo && !hi)
continue;
lo = lo ? lo : nir_ssa_undef(b, 1, type_size);
hi = hi ? hi : nir_ssa_undef(b, 1, type_size);
nir_ssa_def *vec = nir_vec2(b, lo, hi);
lo = lo ? lo : nir_undef(b, 1, type_size);
hi = hi ? hi : nir_undef(b, 1, type_size);
nir_def *vec = nir_vec2(b, lo, hi);
outputs[i] = nir_build_alu1(b, pack_op, vec);
@ -657,8 +657,8 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first
uint32_t mrt1_write_mask = nir_intrinsic_write_mask(mrt1_exp);
uint32_t write_mask = mrt0_write_mask | mrt1_write_mask;
nir_ssa_def *mrt0_arg = mrt0_exp->src[0].ssa;
nir_ssa_def *mrt1_arg = mrt1_exp->src[0].ssa;
nir_def *mrt0_arg = mrt0_exp->src[0].ssa;
nir_def *mrt1_arg = mrt1_exp->src[0].ssa;
/* Swizzle code is right before mrt0_exp. */
b->cursor = nir_before_instr(&mrt0_exp->instr);
@ -671,9 +671,9 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first
return;
}
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
nir_ssa_def *arg0_vec[4] = {undef, undef, undef, undef};
nir_ssa_def *arg1_vec[4] = {undef, undef, undef, undef};
nir_def *undef = nir_undef(b, 1, 32);
nir_def *arg0_vec[4] = {undef, undef, undef, undef};
nir_def *arg1_vec[4] = {undef, undef, undef, undef};
/* For illustration, originally
* lane0 export arg00 and arg01
@ -684,17 +684,17 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first
* lane1 export arg01 and arg11.
*/
u_foreach_bit (i, write_mask) {
nir_ssa_def *arg0 = nir_channel(b, mrt0_arg, i);
nir_ssa_def *arg1 = nir_channel(b, mrt1_arg, i);
nir_def *arg0 = nir_channel(b, mrt0_arg, i);
nir_def *arg1 = nir_channel(b, mrt1_arg, i);
/* swap odd,even lanes of arg0 */
arg0 = nir_quad_swizzle_amd(b, arg0, .swizzle_mask = 0b10110001);
/* swap even lanes between arg0 and arg1 */
nir_ssa_def *tid = nir_load_subgroup_invocation(b);
nir_ssa_def *is_even = nir_ieq_imm(b, nir_iand_imm(b, tid, 1), 0);
nir_def *tid = nir_load_subgroup_invocation(b);
nir_def *is_even = nir_ieq_imm(b, nir_iand_imm(b, tid, 1), 0);
nir_ssa_def *tmp = arg0;
nir_def *tmp = arg0;
arg0 = nir_bcsel(b, is_even, arg1, arg0);
arg1 = nir_bcsel(b, is_even, tmp, arg1);
@ -741,7 +741,7 @@ emit_ps_null_export(nir_builder *b, lower_ps_state *s)
V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_NULL;
nir_intrinsic_instr *intrin =
nir_export_amd(b, nir_ssa_undef(b, 4, 32),
nir_export_amd(b, nir_undef(b, 4, 32),
.base = target,
.flags = AC_EXP_FLAG_VALID_MASK | AC_EXP_FLAG_DONE);
/* To avoid builder set write mask to 0xf. */
@ -798,7 +798,7 @@ export_ps_outputs(nir_builder *b, lower_ps_state *s)
unsigned target = get_ps_color_export_target(s);
s->exp[s->exp_num++] =
nir_export_amd(b, nir_ssa_undef(b, 4, 32), .base = target);
nir_export_amd(b, nir_undef(b, 4, 32), .base = target);
}
} else {
if (s->output_types[FRAG_RESULT_COLOR] != nir_type_invalid) {

View file

@ -12,20 +12,20 @@
#include "nir_builder.h"
#include "amdgfxregs.h"
static nir_ssa_def *get_field(nir_builder *b, nir_ssa_def *desc, unsigned index, unsigned mask)
static nir_def *get_field(nir_builder *b, nir_def *desc, unsigned index, unsigned mask)
{
return nir_ubfe_imm(b, nir_channel(b, desc, index), ffs(mask) - 1, util_bitcount(mask));
}
static nir_ssa_def *handle_null_desc(nir_builder *b, nir_ssa_def *desc, nir_ssa_def *value)
static nir_def *handle_null_desc(nir_builder *b, nir_def *desc, nir_def *value)
{
nir_ssa_def *is_null = nir_ieq_imm(b, nir_channel(b, desc, 1), 0);
nir_def *is_null = nir_ieq_imm(b, nir_channel(b, desc, 1), 0);
return nir_bcsel(b, is_null, nir_imm_int(b, 0), value);
}
static nir_ssa_def *query_samples(nir_builder *b, nir_ssa_def *desc, enum glsl_sampler_dim dim)
static nir_def *query_samples(nir_builder *b, nir_def *desc, enum glsl_sampler_dim dim)
{
nir_ssa_def *samples;
nir_def *samples;
if (dim == GLSL_SAMPLER_DIM_MS) {
/* LAST_LEVEL contains log2(num_samples). */
@ -38,22 +38,22 @@ static nir_ssa_def *query_samples(nir_builder *b, nir_ssa_def *desc, enum glsl_s
return handle_null_desc(b, desc, samples);
}
static nir_ssa_def *query_levels(nir_builder *b, nir_ssa_def *desc)
static nir_def *query_levels(nir_builder *b, nir_def *desc)
{
nir_ssa_def *base_level = get_field(b, desc, 3, ~C_00A00C_BASE_LEVEL);
nir_ssa_def *last_level = get_field(b, desc, 3, ~C_00A00C_LAST_LEVEL);
nir_def *base_level = get_field(b, desc, 3, ~C_00A00C_BASE_LEVEL);
nir_def *last_level = get_field(b, desc, 3, ~C_00A00C_LAST_LEVEL);
nir_ssa_def *levels = nir_iadd_imm(b, nir_isub(b, last_level, base_level), 1);
nir_def *levels = nir_iadd_imm(b, nir_isub(b, last_level, base_level), 1);
return handle_null_desc(b, desc, levels);
}
static nir_ssa_def *
lower_query_size(nir_builder *b, nir_ssa_def *desc, nir_src *lod,
static nir_def *
lower_query_size(nir_builder *b, nir_def *desc, nir_src *lod,
enum glsl_sampler_dim dim, bool is_array, enum amd_gfx_level gfx_level)
{
if (dim == GLSL_SAMPLER_DIM_BUF) {
nir_ssa_def *size = nir_channel(b, desc, 2);
nir_def *size = nir_channel(b, desc, 2);
if (gfx_level == GFX8) {
/* On GFX8, the descriptor contains the size in bytes,
@ -72,14 +72,14 @@ lower_query_size(nir_builder *b, nir_ssa_def *desc, nir_src *lod,
bool has_width = dim != GLSL_SAMPLER_DIM_CUBE;
bool has_height = dim != GLSL_SAMPLER_DIM_1D;
bool has_depth = dim == GLSL_SAMPLER_DIM_3D;
nir_ssa_def *width = NULL, *height = NULL, *layers = NULL, *base_array = NULL;
nir_ssa_def *last_array = NULL, *depth = NULL;
nir_def *width = NULL, *height = NULL, *layers = NULL, *base_array = NULL;
nir_def *last_array = NULL, *depth = NULL;
/* Get the width, height, depth, layers. */
if (gfx_level >= GFX10) {
if (has_width) {
nir_ssa_def *width_lo = get_field(b, desc, 1, ~C_00A004_WIDTH_LO);
nir_ssa_def *width_hi = get_field(b, desc, 2, ~C_00A008_WIDTH_HI);
nir_def *width_lo = get_field(b, desc, 1, ~C_00A004_WIDTH_LO);
nir_def *width_hi = get_field(b, desc, 2, ~C_00A008_WIDTH_HI);
/* Use iadd to get s_lshl2_add_u32 in the end. */
width = nir_iadd(b, width_lo, nir_ishl_imm(b, width_hi, 2));
}
@ -115,8 +115,8 @@ lower_query_size(nir_builder *b, nir_ssa_def *desc, nir_src *lod,
* the pitch for 2D. We need to set depth and last_array to 0 in that case.
*/
if (gfx_level >= GFX10_3 && (has_depth || is_array)) {
nir_ssa_def *type = get_field(b, desc, 3, ~C_00A00C_TYPE);
nir_ssa_def *is_2d = nir_ieq_imm(b, type, V_008F1C_SQ_RSRC_IMG_2D);
nir_def *type = get_field(b, desc, 3, ~C_00A00C_TYPE);
nir_def *is_2d = nir_ieq_imm(b, type, V_008F1C_SQ_RSRC_IMG_2D);
if (has_depth)
depth = nir_bcsel(b, is_2d, nir_imm_int(b, 0), depth);
@ -139,8 +139,8 @@ lower_query_size(nir_builder *b, nir_ssa_def *desc, nir_src *lod,
/* Minify the dimensions according to base_level + lod. */
if (dim != GLSL_SAMPLER_DIM_MS && dim != GLSL_SAMPLER_DIM_RECT) {
nir_ssa_def *base_level = get_field(b, desc, 3, ~C_00A00C_BASE_LEVEL);
nir_ssa_def *level = lod ? nir_iadd(b, base_level, lod->ssa) : base_level;
nir_def *base_level = get_field(b, desc, 3, ~C_00A00C_BASE_LEVEL);
nir_def *level = lod ? nir_iadd(b, base_level, lod->ssa) : base_level;
if (has_width)
width = nir_ushr(b, width, level);
@ -165,16 +165,16 @@ lower_query_size(nir_builder *b, nir_ssa_def *desc, nir_src *lod,
/* Special case for sliced storage 3D views which shouldn't be minified. */
if (gfx_level >= GFX10 && has_depth) {
nir_ssa_def *uav3d =
nir_def *uav3d =
nir_ieq_imm(b, get_field(b, desc, 5, ~C_00A014_ARRAY_PITCH), 1);
nir_ssa_def *layers_3d =
nir_def *layers_3d =
nir_isub(b, get_field(b, desc, 4, ~C_00A010_DEPTH),
get_field(b, desc, 4, ~C_00A010_BASE_ARRAY));
layers_3d = nir_iadd_imm(b, layers_3d, 1);
depth = nir_bcsel(b, uav3d, layers_3d, depth);
}
nir_ssa_def *result = NULL;
nir_def *result = NULL;
/* Construct the result. */
switch (dim) {
@ -203,14 +203,14 @@ lower_query_size(nir_builder *b, nir_ssa_def *desc, nir_src *lod,
static bool lower_resinfo(nir_builder *b, nir_instr *instr, void *data)
{
enum amd_gfx_level gfx_level = *(enum amd_gfx_level*)data;
nir_ssa_def *result = NULL, *dst = NULL;
nir_def *result = NULL, *dst = NULL;
if (instr->type == nir_instr_type_intrinsic) {
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
const struct glsl_type *type;
enum glsl_sampler_dim dim;
bool is_array;
nir_ssa_def *desc = NULL;
nir_def *desc = NULL;
dst = &intr->dest.ssa;
b->cursor = nir_before_instr(instr);
@ -265,7 +265,7 @@ static bool lower_resinfo(nir_builder *b, nir_instr *instr, void *data)
} else if (instr->type == nir_instr_type_tex) {
nir_tex_instr *tex = nir_instr_as_tex(instr);
nir_tex_instr *new_tex;
nir_ssa_def *desc = NULL;
nir_def *desc = NULL;
nir_src *lod = NULL;
dst = &tex->dest.ssa;
@ -326,7 +326,7 @@ static bool lower_resinfo(nir_builder *b, nir_instr *instr, void *data)
if (!result)
return false;
nir_ssa_def_rewrite_uses_after(dst, result, instr);
nir_def_rewrite_uses_after(dst, result, instr);
nir_instr_remove(instr);
return true;
}

View file

@ -69,8 +69,8 @@ lower_subdword_loads(nir_builder *b, nir_instr *instr, void *data)
unsigned align_offset = nir_intrinsic_align_offset(intr) % align_mul;
nir_src *src_offset = nir_get_io_offset_src(intr);
nir_ssa_def *offset = src_offset->ssa;
nir_ssa_def *result = &intr->dest.ssa;
nir_def *offset = src_offset->ssa;
nir_def *result = &intr->dest.ssa;
/* Change the load to 32 bits per channel, update the channel count,
* and increase the declared load alignment.
@ -87,7 +87,7 @@ lower_subdword_loads(nir_builder *b, nir_instr *instr, void *data)
b->cursor = nir_after_instr(instr);
result = nir_extract_bits(b, &result, 1, 0, num_components, bit_size);
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, result,
nir_def_rewrite_uses_after(&intr->dest.ssa, result,
result->parent_instr);
return true;
}
@ -121,7 +121,7 @@ lower_subdword_loads(nir_builder *b, nir_instr *instr, void *data)
result = nir_extract_bits(b, &result, 1, comp_offset * bit_size,
num_components, bit_size);
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, result,
nir_def_rewrite_uses_after(&intr->dest.ssa, result,
result->parent_instr);
return true;
}
@ -138,10 +138,10 @@ lower_subdword_loads(nir_builder *b, nir_instr *instr, void *data)
/* We need to shift bits in the loaded vector by this number. */
b->cursor = nir_after_instr(instr);
nir_ssa_def *shift = nir_ishl_imm(b, nir_iand_imm(b, offset, 0x3), 3);
nir_ssa_def *rev_shift32 = nir_isub_imm(b, 32, shift);
nir_def *shift = nir_ishl_imm(b, nir_iand_imm(b, offset, 0x3), 3);
nir_def *rev_shift32 = nir_isub_imm(b, 32, shift);
nir_ssa_def *elems[NIR_MAX_VEC_COMPONENTS];
nir_def *elems[NIR_MAX_VEC_COMPONENTS];
/* "shift" can be only be one of: 0, 8, 16, 24
*
@ -170,7 +170,7 @@ lower_subdword_loads(nir_builder *b, nir_instr *instr, void *data)
if (intr->num_components >= 2) {
/* Use the 64-bit algorithm as described above. */
for (i = 0; i < intr->num_components / 2 - 1; i++) {
nir_ssa_def *qword1, *dword2;
nir_def *qword1, *dword2;
qword1 = nir_pack_64_2x32_split(b,
nir_channel(b, result, i * 2 + 0),
@ -203,7 +203,7 @@ lower_subdword_loads(nir_builder *b, nir_instr *instr, void *data)
result = nir_vec(b, elems, intr->num_components);
result = nir_extract_bits(b, &result, 1, 0, num_components, bit_size);
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, result,
nir_def_rewrite_uses_after(&intr->dest.ssa, result,
result->parent_instr);
return true;
}

View file

@ -22,25 +22,25 @@ typedef struct {
unsigned num_entries;
} lower_tsms_io_state;
static nir_ssa_def *
static nir_def *
task_workgroup_index(nir_builder *b,
lower_tsms_io_state *s)
{
nir_ssa_def *id = nir_load_workgroup_id(b, 32);
nir_def *id = nir_load_workgroup_id(b, 32);
nir_ssa_def *x = nir_channel(b, id, 0);
nir_ssa_def *y = nir_channel(b, id, 1);
nir_ssa_def *z = nir_channel(b, id, 2);
nir_def *x = nir_channel(b, id, 0);
nir_def *y = nir_channel(b, id, 1);
nir_def *z = nir_channel(b, id, 2);
nir_ssa_def *grid_size = nir_load_num_workgroups(b, 32);
nir_ssa_def *grid_size_x = nir_channel(b, grid_size, 0);
nir_ssa_def *grid_size_y = nir_channel(b, grid_size, 1);
nir_def *grid_size = nir_load_num_workgroups(b, 32);
nir_def *grid_size_x = nir_channel(b, grid_size, 0);
nir_def *grid_size_y = nir_channel(b, grid_size, 1);
return nir_iadd(b, nir_imul(b, nir_imul(b, grid_size_x, grid_size_y), z),
nir_iadd(b, nir_imul(b, grid_size_x, y), x));
}
static nir_ssa_def *
static nir_def *
task_ring_entry_index(nir_builder *b,
lower_tsms_io_state *s)
{
@ -54,12 +54,12 @@ task_ring_entry_index(nir_builder *b,
* AND with num_entries - 1 to get the correct meaning.
* Note that num_entries must be a power of two.
*/
nir_ssa_def *ring_entry = nir_load_task_ring_entry_amd(b);
nir_ssa_def *idx = nir_iadd_nuw(b, ring_entry, task_workgroup_index(b, s));
nir_def *ring_entry = nir_load_task_ring_entry_amd(b);
nir_def *idx = nir_iadd_nuw(b, ring_entry, task_workgroup_index(b, s));
return nir_iand_imm(b, idx, s->num_entries - 1);
}
static nir_ssa_def *
static nir_def *
task_draw_ready_bit(nir_builder *b,
lower_tsms_io_state *s)
{
@ -86,14 +86,14 @@ task_draw_ready_bit(nir_builder *b,
* If the task shader doesn't write this bit, the HW hangs.
*/
nir_ssa_def *ring_entry = nir_load_task_ring_entry_amd(b);
nir_ssa_def *workgroup_index = task_workgroup_index(b, s);
nir_def *ring_entry = nir_load_task_ring_entry_amd(b);
nir_def *workgroup_index = task_workgroup_index(b, s);
nir_ssa_def *idx = nir_iadd_nuw(b, ring_entry, workgroup_index);
nir_def *idx = nir_iadd_nuw(b, ring_entry, workgroup_index);
return nir_u2u8(b, nir_ubfe_imm(b, idx, util_bitcount(s->num_entries - 1), 1));
}
static nir_ssa_def *
static nir_def *
mesh_ring_entry_index(nir_builder *b,
lower_tsms_io_state *s)
{
@ -111,15 +111,15 @@ mesh_ring_entry_index(nir_builder *b,
static void
task_write_draw_ring(nir_builder *b,
nir_ssa_def *store_val,
nir_def *store_val,
unsigned const_off,
lower_tsms_io_state *s)
{
nir_ssa_def *ptr = task_ring_entry_index(b, s);
nir_ssa_def *ring = nir_load_ring_task_draw_amd(b);
nir_ssa_def *scalar_off = nir_imul_imm(b, ptr, s->draw_entry_bytes);
nir_ssa_def *vector_off = nir_imm_int(b, 0);
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_def *ptr = task_ring_entry_index(b, s);
nir_def *ring = nir_load_ring_task_draw_amd(b);
nir_def *scalar_off = nir_imul_imm(b, ptr, s->draw_entry_bytes);
nir_def *vector_off = nir_imm_int(b, 0);
nir_def *zero = nir_imm_int(b, 0);
nir_store_buffer_amd(b, store_val, ring, vector_off, scalar_off, zero,
.base = const_off, .memory_modes = nir_var_shader_out,
@ -139,7 +139,7 @@ filter_task_intrinsics(const nir_instr *instr,
intrin->intrinsic == nir_intrinsic_load_task_payload;
}
static nir_ssa_def *
static nir_def *
lower_task_launch_mesh_workgroups(nir_builder *b,
nir_intrinsic_instr *intrin,
lower_tsms_io_state *s)
@ -160,13 +160,13 @@ lower_task_launch_mesh_workgroups(nir_builder *b,
nir_var_mem_ssbo | nir_var_mem_global);
/* On the first invocation, write the full draw ring entry. */
nir_ssa_def *invocation_index = nir_load_local_invocation_index(b);
nir_def *invocation_index = nir_load_local_invocation_index(b);
nir_if *if_invocation_index_zero = nir_push_if(b, nir_ieq_imm(b, invocation_index, 0));
{
nir_ssa_def *dimensions = intrin->src[0].ssa;
nir_ssa_def *x = nir_channel(b, dimensions, 0);
nir_ssa_def *y = nir_channel(b, dimensions, 1);
nir_ssa_def *z = nir_channel(b, dimensions, 2);
nir_def *dimensions = intrin->src[0].ssa;
nir_def *x = nir_channel(b, dimensions, 0);
nir_def *y = nir_channel(b, dimensions, 1);
nir_def *z = nir_channel(b, dimensions, 2);
/* When either Y or Z are 0, also set X to 0.
* Not necessary, but speeds up the job of the CP.
@ -185,7 +185,7 @@ lower_task_launch_mesh_workgroups(nir_builder *b,
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
}
static nir_ssa_def *
static nir_def *
lower_task_payload_store(nir_builder *b,
nir_intrinsic_instr *intrin,
lower_tsms_io_state *s)
@ -193,12 +193,12 @@ lower_task_payload_store(nir_builder *b,
unsigned write_mask = nir_intrinsic_write_mask(intrin);
unsigned base = nir_intrinsic_base(intrin);
nir_ssa_def *store_val = intrin->src[0].ssa;
nir_ssa_def *addr = intrin->src[1].ssa;
nir_ssa_def *ring = nir_load_ring_task_payload_amd(b);
nir_ssa_def *ptr = task_ring_entry_index(b, s);
nir_ssa_def *ring_off = nir_imul_imm(b, ptr, s->payload_entry_bytes);
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_def *store_val = intrin->src[0].ssa;
nir_def *addr = intrin->src[1].ssa;
nir_def *ring = nir_load_ring_task_payload_amd(b);
nir_def *ptr = task_ring_entry_index(b, s);
nir_def *ring_off = nir_imul_imm(b, ptr, s->payload_entry_bytes);
nir_def *zero = nir_imm_int(b, 0);
nir_store_buffer_amd(b, store_val, ring, addr, ring_off, zero, .base = base,
.write_mask = write_mask,
@ -208,7 +208,7 @@ lower_task_payload_store(nir_builder *b,
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
}
static nir_ssa_def *
static nir_def *
lower_taskmesh_payload_load(nir_builder *b,
nir_intrinsic_instr *intrin,
lower_tsms_io_state *s)
@ -217,22 +217,22 @@ lower_taskmesh_payload_load(nir_builder *b,
unsigned num_components = intrin->dest.ssa.num_components;
unsigned bit_size = intrin->dest.ssa.bit_size;
nir_ssa_def *ptr =
nir_def *ptr =
b->shader->info.stage == MESA_SHADER_TASK ?
task_ring_entry_index(b, s) :
mesh_ring_entry_index(b, s);
nir_ssa_def *addr = intrin->src[0].ssa;
nir_ssa_def *ring = nir_load_ring_task_payload_amd(b);
nir_ssa_def *ring_off = nir_imul_imm(b, ptr, s->payload_entry_bytes);
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_def *addr = intrin->src[0].ssa;
nir_def *ring = nir_load_ring_task_payload_amd(b);
nir_def *ring_off = nir_imul_imm(b, ptr, s->payload_entry_bytes);
nir_def *zero = nir_imm_int(b, 0);
return nir_load_buffer_amd(b, num_components, bit_size, ring, addr, ring_off, zero, .base = base,
.memory_modes = nir_var_mem_task_payload,
.access = ACCESS_COHERENT);
}
static nir_ssa_def *
static nir_def *
lower_task_intrinsics(nir_builder *b,
nir_instr *instr,
void *state)
@ -293,7 +293,7 @@ filter_mesh_input_load(const nir_instr *instr,
return intrin->intrinsic == nir_intrinsic_load_task_payload;
}
static nir_ssa_def *
static nir_def *
lower_mesh_intrinsics(nir_builder *b,
nir_instr *instr,
void *state)

View file

@ -238,13 +238,13 @@ lower_ls_output_store(nir_builder *b,
b->cursor = nir_before_instr(instr);
nir_ssa_def *vertex_idx = nir_load_local_invocation_index(b);
nir_ssa_def *base_off_var = nir_imul(b, vertex_idx, nir_load_lshs_vertex_stride_amd(b));
nir_def *vertex_idx = nir_load_local_invocation_index(b);
nir_def *base_off_var = nir_imul(b, vertex_idx, nir_load_lshs_vertex_stride_amd(b));
nir_ssa_def *io_off = ac_nir_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u, st->map_io);
nir_def *io_off = ac_nir_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u, st->map_io);
unsigned write_mask = nir_intrinsic_write_mask(intrin);
nir_ssa_def *off = nir_iadd_nuw(b, base_off_var, io_off);
nir_def *off = nir_iadd_nuw(b, base_off_var, io_off);
nir_store_shared(b, intrin->src[0].ssa, off, .write_mask = write_mask);
/* NOTE: don't remove the store_output intrinsic on GFX9+ when tcs_in_out_eq,
@ -285,27 +285,27 @@ filter_load_tcs_per_vertex_input(const nir_instr *instr,
return !can_use_temps;
}
static nir_ssa_def *
static nir_def *
hs_per_vertex_input_lds_offset(nir_builder *b,
lower_tess_io_state *st,
nir_intrinsic_instr *instr)
{
nir_ssa_def *tcs_in_vtxcnt = nir_load_patch_vertices_in(b);
nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
nir_ssa_def *vertex_index = nir_get_io_arrayed_index_src(instr)->ssa;
nir_def *tcs_in_vtxcnt = nir_load_patch_vertices_in(b);
nir_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
nir_def *vertex_index = nir_get_io_arrayed_index_src(instr)->ssa;
nir_ssa_def *stride = nir_load_lshs_vertex_stride_amd(b);
nir_ssa_def *tcs_in_patch_stride = nir_imul(b, tcs_in_vtxcnt, stride);
nir_ssa_def *vertex_index_off = nir_imul(b, vertex_index, stride);
nir_def *stride = nir_load_lshs_vertex_stride_amd(b);
nir_def *tcs_in_patch_stride = nir_imul(b, tcs_in_vtxcnt, stride);
nir_def *vertex_index_off = nir_imul(b, vertex_index, stride);
nir_ssa_def *tcs_in_current_patch_offset = nir_imul(b, rel_patch_id, tcs_in_patch_stride);
nir_def *tcs_in_current_patch_offset = nir_imul(b, rel_patch_id, tcs_in_patch_stride);
nir_ssa_def *io_offset = ac_nir_calc_io_offset(b, instr, nir_imm_int(b, 16u), 4u, st->map_io);
nir_def *io_offset = ac_nir_calc_io_offset(b, instr, nir_imm_int(b, 16u), 4u, st->map_io);
return nir_iadd_nuw(b, nir_iadd_nuw(b, tcs_in_current_patch_offset, vertex_index_off), io_offset);
}
static nir_ssa_def *
static nir_def *
hs_output_lds_offset(nir_builder *b,
lower_tess_io_state *st,
nir_intrinsic_instr *intrin)
@ -318,28 +318,28 @@ hs_output_lds_offset(nir_builder *b,
unsigned pervertex_output_patch_size = b->shader->info.tess.tcs_vertices_out * output_vertex_size;
unsigned output_patch_stride = pervertex_output_patch_size + st->tcs_num_reserved_patch_outputs * 16u;
nir_ssa_def *off = intrin
nir_def *off = intrin
? ac_nir_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u, st->map_io)
: nir_imm_int(b, 0);
nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
nir_ssa_def *patch_offset = nir_imul_imm(b, rel_patch_id, output_patch_stride);
nir_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
nir_def *patch_offset = nir_imul_imm(b, rel_patch_id, output_patch_stride);
nir_ssa_def *output_patch_offset;
nir_def *output_patch_offset;
if (st->tcs_no_inputs_in_lds)
output_patch_offset = patch_offset;
else {
nir_ssa_def *tcs_in_vtxcnt = nir_load_patch_vertices_in(b);
nir_ssa_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b);
nir_ssa_def *input_patch_size =
nir_def *tcs_in_vtxcnt = nir_load_patch_vertices_in(b);
nir_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b);
nir_def *input_patch_size =
nir_imul(b, tcs_in_vtxcnt, nir_load_lshs_vertex_stride_amd(b));
nir_ssa_def *output_patch0_offset = nir_imul(b, input_patch_size, tcs_num_patches);
nir_def *output_patch0_offset = nir_imul(b, input_patch_size, tcs_num_patches);
output_patch_offset = nir_iadd_nuw(b, patch_offset, output_patch0_offset);
}
if (per_vertex) {
nir_ssa_def *vertex_index = nir_ssa_for_src(b, *nir_get_io_arrayed_index_src(intrin), 1);
nir_ssa_def *vertex_index_off = nir_imul_imm(b, vertex_index, output_vertex_size);
nir_def *vertex_index = nir_ssa_for_src(b, *nir_get_io_arrayed_index_src(intrin), 1);
nir_def *vertex_index_off = nir_imul_imm(b, vertex_index, output_vertex_size);
off = nir_iadd_nuw(b, off, vertex_index_off);
return nir_iadd_nuw(b, off, output_patch_offset);
@ -349,51 +349,51 @@ hs_output_lds_offset(nir_builder *b,
}
}
static nir_ssa_def *
static nir_def *
hs_per_vertex_output_vmem_offset(nir_builder *b,
lower_tess_io_state *st,
nir_intrinsic_instr *intrin)
{
nir_ssa_def *out_vertices_per_patch = b->shader->info.stage == MESA_SHADER_TESS_CTRL
nir_def *out_vertices_per_patch = b->shader->info.stage == MESA_SHADER_TESS_CTRL
? nir_imm_int(b, b->shader->info.tess.tcs_vertices_out)
: nir_load_patch_vertices_in(b);
nir_ssa_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b);
nir_ssa_def *attr_stride = nir_imul(b, tcs_num_patches, nir_imul_imm(b, out_vertices_per_patch, 16u));
nir_ssa_def *io_offset = ac_nir_calc_io_offset(b, intrin, attr_stride, 4u, st->map_io);
nir_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b);
nir_def *attr_stride = nir_imul(b, tcs_num_patches, nir_imul_imm(b, out_vertices_per_patch, 16u));
nir_def *io_offset = ac_nir_calc_io_offset(b, intrin, attr_stride, 4u, st->map_io);
nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
nir_ssa_def *patch_offset = nir_imul(b, rel_patch_id, nir_imul_imm(b, out_vertices_per_patch, 16u));
nir_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
nir_def *patch_offset = nir_imul(b, rel_patch_id, nir_imul_imm(b, out_vertices_per_patch, 16u));
nir_ssa_def *vertex_index = nir_ssa_for_src(b, *nir_get_io_arrayed_index_src(intrin), 1);
nir_ssa_def *vertex_index_off = nir_imul_imm(b, vertex_index, 16u);
nir_def *vertex_index = nir_ssa_for_src(b, *nir_get_io_arrayed_index_src(intrin), 1);
nir_def *vertex_index_off = nir_imul_imm(b, vertex_index, 16u);
return nir_iadd_nuw(b, nir_iadd_nuw(b, patch_offset, vertex_index_off), io_offset);
}
static nir_ssa_def *
static nir_def *
hs_per_patch_output_vmem_offset(nir_builder *b,
lower_tess_io_state *st,
nir_intrinsic_instr *intrin,
unsigned const_base_offset)
{
nir_ssa_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b);
nir_ssa_def *per_patch_data_offset = nir_load_hs_out_patch_data_offset_amd(b);
nir_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b);
nir_def *per_patch_data_offset = nir_load_hs_out_patch_data_offset_amd(b);
nir_ssa_def * off = intrin
nir_def * off = intrin
? ac_nir_calc_io_offset(b, intrin, nir_imul_imm(b, tcs_num_patches, 16u), 4u, st->map_io)
: nir_imm_int(b, 0);
if (const_base_offset)
off = nir_iadd_nuw(b, off, nir_imul_imm(b, tcs_num_patches, const_base_offset));
nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
nir_ssa_def *patch_offset = nir_imul_imm(b, rel_patch_id, 16u);
nir_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
nir_def *patch_offset = nir_imul_imm(b, rel_patch_id, 16u);
off = nir_iadd_nuw(b, off, per_patch_data_offset);
return nir_iadd_nuw(b, off, patch_offset);
}
static nir_ssa_def *
static nir_def *
lower_hs_per_vertex_input_load(nir_builder *b,
nir_instr *instr,
void *state)
@ -401,11 +401,11 @@ lower_hs_per_vertex_input_load(nir_builder *b,
lower_tess_io_state *st = (lower_tess_io_state *) state;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
nir_ssa_def *off = hs_per_vertex_input_lds_offset(b, st, intrin);
nir_def *off = hs_per_vertex_input_lds_offset(b, st, intrin);
return nir_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off);
}
static nir_ssa_def *
static nir_def *
lower_hs_output_store(nir_builder *b,
nir_intrinsic_instr *intrin,
lower_tess_io_state *st)
@ -414,7 +414,7 @@ lower_hs_output_store(nir_builder *b,
intrin->intrinsic == nir_intrinsic_store_output);
nir_io_semantics semantics = nir_intrinsic_io_semantics(intrin);
nir_ssa_def *store_val = intrin->src[0].ssa;
nir_def *store_val = intrin->src[0].ssa;
unsigned component = nir_intrinsic_component(intrin);
unsigned write_mask = nir_intrinsic_write_mask(intrin);
bool is_tess_factor = semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
@ -437,24 +437,24 @@ lower_hs_output_store(nir_builder *b,
}
if (write_to_vmem) {
nir_ssa_def *vmem_off = intrin->intrinsic == nir_intrinsic_store_per_vertex_output
nir_def *vmem_off = intrin->intrinsic == nir_intrinsic_store_per_vertex_output
? hs_per_vertex_output_vmem_offset(b, st, intrin)
: hs_per_patch_output_vmem_offset(b, st, intrin, 0);
nir_ssa_def *hs_ring_tess_offchip = nir_load_ring_tess_offchip_amd(b);
nir_ssa_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b);
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_def *hs_ring_tess_offchip = nir_load_ring_tess_offchip_amd(b);
nir_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b);
nir_def *zero = nir_imm_int(b, 0);
nir_store_buffer_amd(b, store_val, hs_ring_tess_offchip, vmem_off, offchip_offset, zero,
.write_mask = write_mask, .memory_modes = nir_var_shader_out,
.access = ACCESS_COHERENT);
}
if (write_to_lds) {
nir_ssa_def *lds_off = hs_output_lds_offset(b, st, intrin);
nir_def *lds_off = hs_output_lds_offset(b, st, intrin);
nir_store_shared(b, store_val, lds_off, .write_mask = write_mask);
}
nir_ssa_def *ret = NIR_LOWER_INSTR_PROGRESS_REPLACE;
nir_def *ret = NIR_LOWER_INSTR_PROGRESS_REPLACE;
if (is_tess_factor && st->tcs_pass_tessfactors_by_reg) {
if (st->tcs_emit_tess_factor_write) {
@ -474,12 +474,12 @@ lower_hs_output_store(nir_builder *b,
return ret;
}
static nir_ssa_def *
static nir_def *
lower_hs_output_load(nir_builder *b,
nir_intrinsic_instr *intrin,
lower_tess_io_state *st)
{
nir_ssa_def *off = hs_output_lds_offset(b, st, intrin);
nir_def *off = hs_output_lds_offset(b, st, intrin);
return nir_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off);
}
@ -505,7 +505,7 @@ update_hs_barrier(nir_intrinsic_instr *intrin, lower_tess_io_state *st)
nir_intrinsic_set_memory_scope(intrin, SCOPE_SUBGROUP);
}
static nir_ssa_def *
static nir_def *
lower_hs_output_access(nir_builder *b,
nir_instr *instr,
void *state)
@ -571,7 +571,7 @@ hs_emit_write_tess_factors(nir_shader *shader,
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_shared);
}
nir_ssa_def *invocation_id = nir_load_invocation_id(b);
nir_def *invocation_id = nir_load_invocation_id(b);
/* Only the 1st invocation of each patch needs to do this. */
nir_if *invocation_id_zero = nir_push_if(b, nir_ieq_imm(b, invocation_id, 0));
@ -586,8 +586,8 @@ hs_emit_write_tess_factors(nir_shader *shader,
const bool tess_lvl_in_written = st->tcs_tess_lvl_in_loc >= 0;
const bool tess_lvl_out_written = st->tcs_tess_lvl_out_loc >= 0;
nir_ssa_def *tessfactors_outer = NULL;
nir_ssa_def *tessfactors_inner = NULL;
nir_def *tessfactors_outer = NULL;
nir_def *tessfactors_inner = NULL;
if (st->tcs_pass_tessfactors_by_reg) {
if (tess_lvl_out_written) {
tessfactors_outer = nir_load_var(b, st->tcs_tess_level_outer);
@ -600,7 +600,7 @@ hs_emit_write_tess_factors(nir_shader *shader,
}
} else {
/* Base LDS address of per-patch outputs in the current patch. */
nir_ssa_def *lds_base = hs_output_lds_offset(b, st, NULL);
nir_def *lds_base = hs_output_lds_offset(b, st, NULL);
/* Load all tessellation factors (aka. tess levels) from LDS. */
if (tess_lvl_out_written) {
@ -621,18 +621,18 @@ hs_emit_write_tess_factors(nir_shader *shader,
tessfactors_inner = nir_imm_zero(b, inner_comps, 32);
/* The descriptor where tess factors have to be stored by the shader. */
nir_ssa_def *tessfactor_ring = nir_load_ring_tess_factors_amd(b);
nir_def *tessfactor_ring = nir_load_ring_tess_factors_amd(b);
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
nir_ssa_def *tess_factors_base = nir_load_ring_tess_factors_offset_amd(b);
nir_ssa_def *tess_factors_offset = nir_imul_imm(b, rel_patch_id, (inner_comps + outer_comps) * 4u);
nir_def *zero = nir_imm_int(b, 0);
nir_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
nir_def *tess_factors_base = nir_load_ring_tess_factors_offset_amd(b);
nir_def *tess_factors_offset = nir_imul_imm(b, rel_patch_id, (inner_comps + outer_comps) * 4u);
unsigned tess_factors_const_offset = 0;
if (st->gfx_level <= GFX8) {
/* Store the dynamic HS control word. */
nir_if *rel_patch_id_zero = nir_push_if(b, nir_ieq_imm(b, rel_patch_id, 0));
nir_ssa_def *ctrlw = nir_imm_int(b, 0x80000000u);
nir_def *ctrlw = nir_imm_int(b, 0x80000000u);
nir_store_buffer_amd(b, ctrlw, tessfactor_ring, zero, tess_factors_base, zero,
.access = ACCESS_COHERENT);
tess_factors_const_offset += 4;
@ -642,11 +642,11 @@ hs_emit_write_tess_factors(nir_shader *shader,
/* Store tess factors for the tessellator */
if (shader->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) {
/* LINES reversal */
nir_ssa_def *t = nir_vec2(b, nir_channel(b, tessfactors_outer, 1), nir_channel(b, tessfactors_outer, 0));
nir_def *t = nir_vec2(b, nir_channel(b, tessfactors_outer, 1), nir_channel(b, tessfactors_outer, 0));
nir_store_buffer_amd(b, t, tessfactor_ring, tess_factors_offset, tess_factors_base, zero,
.base = tess_factors_const_offset, .access = ACCESS_COHERENT);
} else if (shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES) {
nir_ssa_def *t = nir_vec4(b, nir_channel(b, tessfactors_outer, 0), nir_channel(b, tessfactors_outer, 1),
nir_def *t = nir_vec4(b, nir_channel(b, tessfactors_outer, 0), nir_channel(b, tessfactors_outer, 1),
nir_channel(b, tessfactors_outer, 2), nir_channel(b, tessfactors_inner, 0));
nir_store_buffer_amd(b, t, tessfactor_ring, tess_factors_offset, tess_factors_base, zero,
.base = tess_factors_const_offset, .access = ACCESS_COHERENT);
@ -659,11 +659,11 @@ hs_emit_write_tess_factors(nir_shader *shader,
if (st->tes_reads_tessfactors) {
/* Store to offchip for TES to read - only if TES actually reads them */
nir_ssa_def *hs_ring_tess_offchip = nir_load_ring_tess_offchip_amd(b);
nir_ssa_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b);
nir_def *hs_ring_tess_offchip = nir_load_ring_tess_offchip_amd(b);
nir_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b);
if (tess_lvl_out_written) {
nir_ssa_def *vmem_off_outer =
nir_def *vmem_off_outer =
hs_per_patch_output_vmem_offset(b, st, NULL, st->tcs_tess_lvl_out_loc);
nir_store_buffer_amd(b, tessfactors_outer, hs_ring_tess_offchip,
@ -673,7 +673,7 @@ hs_emit_write_tess_factors(nir_shader *shader,
}
if (inner_comps && tess_lvl_in_written) {
nir_ssa_def *vmem_off_inner =
nir_def *vmem_off_inner =
hs_per_patch_output_vmem_offset(b, st, NULL, st->tcs_tess_lvl_in_loc);
nir_store_buffer_amd(b, tessfactors_inner, hs_ring_tess_offchip,
@ -688,7 +688,7 @@ hs_emit_write_tess_factors(nir_shader *shader,
nir_metadata_preserve(impl, nir_metadata_none);
}
static nir_ssa_def *
static nir_def *
lower_tes_input_load(nir_builder *b,
nir_instr *instr,
void *state)
@ -696,13 +696,13 @@ lower_tes_input_load(nir_builder *b,
lower_tess_io_state *st = (lower_tess_io_state *) state;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
nir_ssa_def *offchip_ring = nir_load_ring_tess_offchip_amd(b);
nir_ssa_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b);
nir_ssa_def *off = intrin->intrinsic == nir_intrinsic_load_per_vertex_input
nir_def *offchip_ring = nir_load_ring_tess_offchip_amd(b);
nir_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b);
nir_def *off = intrin->intrinsic == nir_intrinsic_load_per_vertex_input
? hs_per_vertex_output_vmem_offset(b, st, intrin)
: hs_per_patch_output_vmem_offset(b, st, intrin, 0);
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_def *zero = nir_imm_int(b, 0);
return nir_load_buffer_amd(b, intrin->dest.ssa.num_components,
intrin->dest.ssa.bit_size, offchip_ring,

View file

@ -35,26 +35,26 @@
* the selcoords major axis.
*/
static void
build_cube_select(nir_builder *b, nir_ssa_def *ma, nir_ssa_def *id, nir_ssa_def *deriv,
nir_ssa_def **out_ma, nir_ssa_def **out_sc, nir_ssa_def **out_tc)
build_cube_select(nir_builder *b, nir_def *ma, nir_def *id, nir_def *deriv,
nir_def **out_ma, nir_def **out_sc, nir_def **out_tc)
{
nir_ssa_def *deriv_x = nir_channel(b, deriv, 0);
nir_ssa_def *deriv_y = nir_channel(b, deriv, 1);
nir_ssa_def *deriv_z = nir_channel(b, deriv, 2);
nir_def *deriv_x = nir_channel(b, deriv, 0);
nir_def *deriv_y = nir_channel(b, deriv, 1);
nir_def *deriv_z = nir_channel(b, deriv, 2);
nir_ssa_def *is_ma_positive = nir_fge_imm(b, ma, 0.0);
nir_ssa_def *sgn_ma =
nir_def *is_ma_positive = nir_fge_imm(b, ma, 0.0);
nir_def *sgn_ma =
nir_bcsel(b, is_ma_positive, nir_imm_float(b, 1.0), nir_imm_float(b, -1.0));
nir_ssa_def *neg_sgn_ma = nir_fneg(b, sgn_ma);
nir_def *neg_sgn_ma = nir_fneg(b, sgn_ma);
nir_ssa_def *is_ma_z = nir_fge_imm(b, id, 4.0);
nir_ssa_def *is_ma_y = nir_fge_imm(b, id, 2.0);
nir_def *is_ma_z = nir_fge_imm(b, id, 4.0);
nir_def *is_ma_y = nir_fge_imm(b, id, 2.0);
is_ma_y = nir_iand(b, is_ma_y, nir_inot(b, is_ma_z));
nir_ssa_def *is_not_ma_x = nir_ior(b, is_ma_z, is_ma_y);
nir_def *is_not_ma_x = nir_ior(b, is_ma_z, is_ma_y);
/* Select sc */
nir_ssa_def *tmp = nir_bcsel(b, is_not_ma_x, deriv_x, deriv_z);
nir_ssa_def *sgn =
nir_def *tmp = nir_bcsel(b, is_not_ma_x, deriv_x, deriv_z);
nir_def *sgn =
nir_bcsel(b, is_ma_y, nir_imm_float(b, 1.0), nir_bcsel(b, is_ma_z, sgn_ma, neg_sgn_ma));
*out_sc = nir_fmul(b, tmp, sgn);
@ -69,10 +69,10 @@ build_cube_select(nir_builder *b, nir_ssa_def *ma, nir_ssa_def *id, nir_ssa_def
}
static void
prepare_cube_coords(nir_builder *b, nir_tex_instr *tex, nir_ssa_def **coord, nir_src *ddx,
prepare_cube_coords(nir_builder *b, nir_tex_instr *tex, nir_def **coord, nir_src *ddx,
nir_src *ddy, const ac_nir_lower_tex_options *options)
{
nir_ssa_def *coords[NIR_MAX_VEC_COMPONENTS] = {0};
nir_def *coords[NIR_MAX_VEC_COMPONENTS] = {0};
for (unsigned i = 0; i < (*coord)->num_components; i++)
coords[i] = nir_channel(b, *coord, i);
@ -98,12 +98,12 @@ prepare_cube_coords(nir_builder *b, nir_tex_instr *tex, nir_ssa_def **coord, nir
if (tex->is_array && options->gfx_level <= GFX8 && coords[3])
coords[3] = nir_fmax(b, coords[3], nir_imm_float(b, 0.0));
nir_ssa_def *cube_coords = nir_cube_amd(b, nir_vec(b, coords, 3));
nir_ssa_def *sc = nir_channel(b, cube_coords, 1);
nir_ssa_def *tc = nir_channel(b, cube_coords, 0);
nir_ssa_def *ma = nir_channel(b, cube_coords, 2);
nir_ssa_def *invma = nir_frcp(b, nir_fabs(b, ma));
nir_ssa_def *id = nir_channel(b, cube_coords, 3);
nir_def *cube_coords = nir_cube_amd(b, nir_vec(b, coords, 3));
nir_def *sc = nir_channel(b, cube_coords, 1);
nir_def *tc = nir_channel(b, cube_coords, 0);
nir_def *ma = nir_channel(b, cube_coords, 2);
nir_def *invma = nir_frcp(b, nir_fabs(b, ma));
nir_def *id = nir_channel(b, cube_coords, 3);
if (ddx || ddy) {
sc = nir_fmul(b, sc, invma);
@ -132,13 +132,13 @@ prepare_cube_coords(nir_builder *b, nir_tex_instr *tex, nir_ssa_def **coord, nir
* seems awfully quiet about how textureGrad for cube
* maps should be handled.
*/
nir_ssa_def *deriv_ma, *deriv_sc, *deriv_tc;
nir_def *deriv_ma, *deriv_sc, *deriv_tc;
build_cube_select(b, ma, id, i ? ddy->ssa : ddx->ssa, &deriv_ma, &deriv_sc, &deriv_tc);
deriv_ma = nir_fmul(b, deriv_ma, invma);
nir_ssa_def *x = nir_fsub(b, nir_fmul(b, deriv_sc, invma), nir_fmul(b, deriv_ma, sc));
nir_ssa_def *y = nir_fsub(b, nir_fmul(b, deriv_tc, invma), nir_fmul(b, deriv_ma, tc));
nir_def *x = nir_fsub(b, nir_fmul(b, deriv_sc, invma), nir_fmul(b, deriv_ma, sc));
nir_def *y = nir_fsub(b, nir_fmul(b, deriv_tc, invma), nir_fmul(b, deriv_ma, tc));
nir_instr_rewrite_src_ssa(&tex->instr, i ? ddy : ddx, nir_vec2(b, x, y));
}
@ -159,20 +159,20 @@ prepare_cube_coords(nir_builder *b, nir_tex_instr *tex, nir_ssa_def **coord, nir
}
static bool
lower_array_layer_round_even(nir_builder *b, nir_tex_instr *tex, nir_ssa_def **coords)
lower_array_layer_round_even(nir_builder *b, nir_tex_instr *tex, nir_def **coords)
{
int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
if (coord_index < 0 || nir_tex_instr_src_type(tex, coord_index) != nir_type_float)
return false;
unsigned layer = tex->coord_components - 1;
nir_ssa_def *rounded_layer = nir_fround_even(b, nir_channel(b, *coords, layer));
nir_def *rounded_layer = nir_fround_even(b, nir_channel(b, *coords, layer));
*coords = nir_vector_insert_imm(b, *coords, rounded_layer, layer);
return true;
}
static bool
lower_tex_coords(nir_builder *b, nir_tex_instr *tex, nir_ssa_def **coords,
lower_tex_coords(nir_builder *b, nir_tex_instr *tex, nir_def **coords,
const ac_nir_lower_tex_options *options)
{
bool progress = false;
@ -190,11 +190,11 @@ lower_tex_coords(nir_builder *b, nir_tex_instr *tex, nir_ssa_def **coords,
nir_src *ddy = ddy_idx >= 0 ? &tex->src[ddy_idx].src : NULL;
if (tex->sampler_dim == GLSL_SAMPLER_DIM_1D) {
nir_ssa_def *y =
nir_def *y =
nir_imm_floatN_t(b, tex->op == nir_texop_txf ? 0.0 : 0.5, (*coords)->bit_size);
if (tex->is_array && (*coords)->num_components > 1) {
nir_ssa_def *x = nir_channel(b, *coords, 0);
nir_ssa_def *idx = nir_channel(b, *coords, 1);
nir_def *x = nir_channel(b, *coords, 0);
nir_def *idx = nir_channel(b, *coords, 1);
*coords = nir_vec3(b, x, y, idx);
} else {
*coords = nir_vec2(b, *coords, y);
@ -203,12 +203,12 @@ lower_tex_coords(nir_builder *b, nir_tex_instr *tex, nir_ssa_def **coords,
int offset_src = nir_tex_instr_src_index(tex, nir_tex_src_offset);
if (offset_src >= 0) {
nir_src *offset = &tex->src[offset_src].src;
nir_ssa_def *zero = nir_imm_intN_t(b, 0, offset->ssa->bit_size);
nir_def *zero = nir_imm_intN_t(b, 0, offset->ssa->bit_size);
nir_instr_rewrite_src_ssa(&tex->instr, offset, nir_vec2(b, offset->ssa, zero));
}
if (ddx || ddy) {
nir_ssa_def *def = nir_vec2(b, ddx->ssa, nir_imm_floatN_t(b, 0.0, ddx->ssa->bit_size));
nir_def *def = nir_vec2(b, ddx->ssa, nir_imm_floatN_t(b, 0.0, ddx->ssa->bit_size));
nir_instr_rewrite_src_ssa(&tex->instr, ddx, def);
def = nir_vec2(b, ddy->ssa, nir_imm_floatN_t(b, 0.0, ddy->ssa->bit_size));
nir_instr_rewrite_src_ssa(&tex->instr, ddy, def);
@ -233,7 +233,7 @@ lower_tex(nir_builder *b, nir_instr *instr, void *options_)
return false;
b->cursor = nir_before_instr(instr);
nir_ssa_def *coords = tex->src[coord_idx].src.ssa;
nir_def *coords = tex->src[coord_idx].src.ssa;
if (lower_tex_coords(b, tex, &coords, options)) {
tex->coord_components = coords->num_components;
nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[coord_idx].src, coords);
@ -249,12 +249,12 @@ typedef struct {
} coord_info;
static bool
can_move_coord(nir_ssa_scalar scalar, coord_info *info)
can_move_coord(nir_scalar scalar, coord_info *info)
{
if (scalar.def->bit_size != 32)
return false;
if (nir_ssa_scalar_is_const(scalar))
if (nir_scalar_is_const(scalar))
return true;
if (scalar.def->parent_instr->type != nir_instr_type_intrinsic)
@ -270,8 +270,8 @@ can_move_coord(nir_ssa_scalar scalar, coord_info *info)
if (intrin->intrinsic != nir_intrinsic_load_interpolated_input)
return false;
nir_ssa_scalar coord_x = nir_ssa_scalar_resolved(intrin->src[0].ssa, 0);
nir_ssa_scalar coord_y = nir_ssa_scalar_resolved(intrin->src[0].ssa, 1);
nir_scalar coord_x = nir_scalar_resolved(intrin->src[0].ssa, 0);
nir_scalar coord_y = nir_scalar_resolved(intrin->src[0].ssa, 1);
if (coord_x.def->parent_instr->type != nir_instr_type_intrinsic || coord_x.comp != 0 ||
coord_y.def->parent_instr->type != nir_instr_type_intrinsic || coord_y.comp != 1)
return false;
@ -297,22 +297,22 @@ struct move_tex_coords_state {
nir_builder toplevel_b;
};
static nir_ssa_def *
build_coordinate(struct move_tex_coords_state *state, nir_ssa_scalar scalar, coord_info info)
static nir_def *
build_coordinate(struct move_tex_coords_state *state, nir_scalar scalar, coord_info info)
{
nir_builder *b = &state->toplevel_b;
if (nir_ssa_scalar_is_const(scalar))
return nir_imm_intN_t(b, nir_ssa_scalar_as_uint(scalar), scalar.def->bit_size);
if (nir_scalar_is_const(scalar))
return nir_imm_intN_t(b, nir_scalar_as_uint(scalar), scalar.def->bit_size);
ASSERTED nir_src offset = *nir_get_io_offset_src(info.load);
assert(nir_src_is_const(offset) && !nir_src_as_uint(offset));
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_ssa_def *res;
nir_def *zero = nir_imm_int(b, 0);
nir_def *res;
if (info.bary) {
enum glsl_interp_mode interp_mode = nir_intrinsic_interp_mode(info.bary);
nir_ssa_def *bary = nir_load_system_value(b, info.bary->intrinsic, interp_mode, 2, 32);
nir_def *bary = nir_load_system_value(b, info.bary->intrinsic, interp_mode, 2, 32);
res = nir_load_interpolated_input(b, 1, 32, bary, zero);
} else {
res = nir_load_input(b, 1, 32, zero);
@ -351,11 +351,11 @@ move_tex_coords(struct move_tex_coords_state *state, nir_function_impl *impl, ni
return false;
nir_tex_src *src = &tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)];
nir_ssa_scalar components[NIR_MAX_VEC_COMPONENTS];
nir_scalar components[NIR_MAX_VEC_COMPONENTS];
coord_info infos[NIR_MAX_VEC_COMPONENTS];
bool can_move_all = true;
for (unsigned i = 0; i < tex->coord_components; i++) {
components[i] = nir_ssa_scalar_resolved(src->src.ssa, i);
components[i] = nir_scalar_resolved(src->src.ssa, i);
can_move_all &= can_move_coord(components[i], &infos[i]);
}
if (!can_move_all)
@ -386,7 +386,7 @@ move_tex_coords(struct move_tex_coords_state *state, nir_function_impl *impl, ni
for (unsigned i = 0; i < tex->coord_components; i++)
components[i] = nir_get_ssa_scalar(build_coordinate(state, components[i], infos[i]), 0);
nir_ssa_def *linear_vgpr = nir_vec_scalars(&state->toplevel_b, components, tex->coord_components);
nir_def *linear_vgpr = nir_vec_scalars(&state->toplevel_b, components, tex->coord_components);
lower_tex_coords(&state->toplevel_b, tex, &linear_vgpr, state->options);
linear_vgpr = nir_strict_wqm_coord_amd(&state->toplevel_b, linear_vgpr, coord_base * 4);
@ -421,25 +421,25 @@ move_fddxy(struct move_tex_coords_state *state, nir_function_impl *impl, nir_alu
}
unsigned num_components = instr->dest.dest.ssa.num_components;
nir_ssa_scalar components[NIR_MAX_VEC_COMPONENTS];
nir_scalar components[NIR_MAX_VEC_COMPONENTS];
coord_info infos[NIR_MAX_VEC_COMPONENTS];
bool can_move_all = true;
for (unsigned i = 0; i < num_components; i++) {
components[i] = nir_ssa_scalar_chase_alu_src(nir_get_ssa_scalar(&instr->dest.dest.ssa, i), 0);
components[i] = nir_ssa_scalar_chase_movs(components[i]);
components[i] = nir_scalar_chase_alu_src(nir_get_ssa_scalar(&instr->dest.dest.ssa, i), 0);
components[i] = nir_scalar_chase_movs(components[i]);
can_move_all &= can_move_coord(components[i], &infos[i]);
}
if (!can_move_all || state->num_wqm_vgprs + num_components > state->options->max_wqm_vgprs)
return false;
for (unsigned i = 0; i < num_components; i++) {
nir_ssa_def *def = build_coordinate(state, components[i], infos[i]);
nir_def *def = build_coordinate(state, components[i], infos[i]);
components[i] = nir_get_ssa_scalar(def, 0);
}
nir_ssa_def *def = nir_vec_scalars(&state->toplevel_b, components, num_components);
nir_def *def = nir_vec_scalars(&state->toplevel_b, components, num_components);
def = nir_build_alu1(&state->toplevel_b, instr->op, def);
nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, def);
nir_def_rewrite_uses(&instr->dest.dest.ssa, def);
state->num_wqm_vgprs += num_components;

View file

@ -3312,16 +3312,16 @@ void ac_surface_print_info(FILE *out, const struct radeon_info *info,
}
}
static nir_ssa_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info,
static nir_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info,
struct gfx9_meta_equation *equation,
int blkSizeBias, unsigned blkStart,
nir_ssa_def *meta_pitch, nir_ssa_def *meta_slice_size,
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
nir_ssa_def *pipe_xor,
nir_ssa_def **bit_position)
nir_def *meta_pitch, nir_def *meta_slice_size,
nir_def *x, nir_def *y, nir_def *z,
nir_def *pipe_xor,
nir_def **bit_position)
{
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_ssa_def *one = nir_imm_int(b, 1);
nir_def *zero = nir_imm_int(b, 0);
nir_def *one = nir_imm_int(b, 1);
assert(info->gfx_level >= GFX10);
@ -3329,17 +3329,17 @@ static nir_ssa_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct
unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height);
unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + blkSizeBias;
nir_ssa_def *coord[] = {x, y, z, 0};
nir_ssa_def *address = zero;
nir_def *coord[] = {x, y, z, 0};
nir_def *address = zero;
for (unsigned i = blkStart; i < blkSizeLog2 + 1; i++) {
nir_ssa_def *v = zero;
nir_def *v = zero;
for (unsigned c = 0; c < 4; c++) {
unsigned index = i * 4 + c - (blkStart * 4);
if (equation->u.gfx10_bits[index]) {
unsigned mask = equation->u.gfx10_bits[index];
nir_ssa_def *bits = coord[c];
nir_def *bits = coord[c];
while (mask)
v = nir_ixor(b, v, nir_iand(b, nir_ushr_imm(b, bits, u_bit_scan(&mask)), one));
@ -3352,11 +3352,11 @@ static nir_ssa_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct
unsigned blkMask = (1 << blkSizeLog2) - 1;
unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1;
unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);
nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);
nir_ssa_def *pb = nir_ushr_imm(b, meta_pitch, meta_block_width_log2);
nir_ssa_def *blkIndex = nir_iadd(b, nir_imul(b, yb, pb), xb);
nir_ssa_def *pipeXor = nir_iand_imm(b, nir_ishl_imm(b, nir_iand_imm(b, pipe_xor, pipeMask),
nir_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);
nir_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);
nir_def *pb = nir_ushr_imm(b, meta_pitch, meta_block_width_log2);
nir_def *blkIndex = nir_iadd(b, nir_imul(b, yb, pb), xb);
nir_def *pipeXor = nir_iand_imm(b, nir_ishl_imm(b, nir_iand_imm(b, pipe_xor, pipeMask),
m_pipeInterleaveLog2), blkMask);
if (bit_position)
@ -3367,15 +3367,15 @@ static nir_ssa_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct
nir_ixor(b, nir_ushr(b, address, one), pipeXor));
}
static nir_ssa_def *gfx9_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info,
static nir_def *gfx9_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info,
struct gfx9_meta_equation *equation,
nir_ssa_def *meta_pitch, nir_ssa_def *meta_height,
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
nir_ssa_def *sample, nir_ssa_def *pipe_xor,
nir_ssa_def **bit_position)
nir_def *meta_pitch, nir_def *meta_height,
nir_def *x, nir_def *y, nir_def *z,
nir_def *sample, nir_def *pipe_xor,
nir_def **bit_position)
{
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_ssa_def *one = nir_imm_int(b, 1);
nir_def *zero = nir_imm_int(b, 0);
nir_def *one = nir_imm_int(b, 1);
assert(info->gfx_level >= GFX9);
@ -3385,32 +3385,32 @@ static nir_ssa_def *gfx9_nir_meta_addr_from_coord(nir_builder *b, const struct r
unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
unsigned numPipeBits = equation->u.gfx9.num_pipe_bits;
nir_ssa_def *pitchInBlock = nir_ushr_imm(b, meta_pitch, meta_block_width_log2);
nir_ssa_def *sliceSizeInBlock = nir_imul(b, nir_ushr_imm(b, meta_height, meta_block_height_log2),
nir_def *pitchInBlock = nir_ushr_imm(b, meta_pitch, meta_block_width_log2);
nir_def *sliceSizeInBlock = nir_imul(b, nir_ushr_imm(b, meta_height, meta_block_height_log2),
pitchInBlock);
nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);
nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);
nir_ssa_def *zb = nir_ushr_imm(b, z, meta_block_depth_log2);
nir_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);
nir_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);
nir_def *zb = nir_ushr_imm(b, z, meta_block_depth_log2);
nir_ssa_def *blockIndex = nir_iadd(b, nir_iadd(b, nir_imul(b, zb, sliceSizeInBlock),
nir_def *blockIndex = nir_iadd(b, nir_iadd(b, nir_imul(b, zb, sliceSizeInBlock),
nir_imul(b, yb, pitchInBlock)), xb);
nir_ssa_def *coords[] = {x, y, z, sample, blockIndex};
nir_def *coords[] = {x, y, z, sample, blockIndex};
nir_ssa_def *address = zero;
nir_def *address = zero;
unsigned num_bits = equation->u.gfx9.num_bits;
assert(num_bits <= 32);
/* Compute the address up until the last bit that doesn't use the block index. */
for (unsigned i = 0; i < num_bits - 1; i++) {
nir_ssa_def *xor = zero;
nir_def *xor = zero;
for (unsigned c = 0; c < 5; c++) {
if (equation->u.gfx9.bit[i].coord[c].dim >= 5)
continue;
assert(equation->u.gfx9.bit[i].coord[c].ord < 32);
nir_ssa_def *ison =
nir_def *ison =
nir_iand(b, nir_ushr_imm(b, coords[equation->u.gfx9.bit[i].coord[c].dim],
equation->u.gfx9.bit[i].coord[c].ord), one);
@ -3429,17 +3429,17 @@ static nir_ssa_def *gfx9_nir_meta_addr_from_coord(nir_builder *b, const struct r
if (bit_position)
*bit_position = nir_ishl_imm(b, nir_iand_imm(b, address, 1), 2);
nir_ssa_def *pipeXor = nir_iand_imm(b, pipe_xor, (1 << numPipeBits) - 1);
nir_def *pipeXor = nir_iand_imm(b, pipe_xor, (1 << numPipeBits) - 1);
return nir_ixor(b, nir_ushr(b, address, one),
nir_ishl_imm(b, pipeXor, m_pipeInterleaveLog2));
}
nir_ssa_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info,
nir_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info,
unsigned bpe, struct gfx9_meta_equation *equation,
nir_ssa_def *dcc_pitch, nir_ssa_def *dcc_height,
nir_ssa_def *dcc_slice_size,
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
nir_ssa_def *sample, nir_ssa_def *pipe_xor)
nir_def *dcc_pitch, nir_def *dcc_height,
nir_def *dcc_slice_size,
nir_def *x, nir_def *y, nir_def *z,
nir_def *sample, nir_def *pipe_xor)
{
if (info->gfx_level >= GFX10) {
unsigned bpp_log2 = util_logbase2(bpe);
@ -3454,15 +3454,15 @@ nir_ssa_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info
}
}
nir_ssa_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_info *info,
nir_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_info *info,
struct gfx9_meta_equation *equation,
nir_ssa_def *cmask_pitch, nir_ssa_def *cmask_height,
nir_ssa_def *cmask_slice_size,
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
nir_ssa_def *pipe_xor,
nir_ssa_def **bit_position)
nir_def *cmask_pitch, nir_def *cmask_height,
nir_def *cmask_slice_size,
nir_def *x, nir_def *y, nir_def *z,
nir_def *pipe_xor,
nir_def **bit_position)
{
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_def *zero = nir_imm_int(b, 0);
if (info->gfx_level >= GFX10) {
return gfx10_nir_meta_addr_from_coord(b, info, equation, -7, 1,
@ -3475,12 +3475,12 @@ nir_ssa_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_in
}
}
nir_ssa_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info,
nir_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info,
struct gfx9_meta_equation *equation,
nir_ssa_def *htile_pitch,
nir_ssa_def *htile_slice_size,
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
nir_ssa_def *pipe_xor)
nir_def *htile_pitch,
nir_def *htile_slice_size,
nir_def *x, nir_def *y, nir_def *z,
nir_def *pipe_xor)
{
return gfx10_nir_meta_addr_from_coord(b, info, equation, -4, 2,
htile_pitch, htile_slice_size,

View file

@ -490,27 +490,27 @@ unsigned ac_get_cb_number_type(enum pipe_format format);
unsigned ac_get_cb_format(enum amd_gfx_level gfx_level, enum pipe_format format);
#ifdef AC_SURFACE_INCLUDE_NIR
nir_ssa_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info,
nir_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info,
unsigned bpe, struct gfx9_meta_equation *equation,
nir_ssa_def *dcc_pitch, nir_ssa_def *dcc_height,
nir_ssa_def *dcc_slice_size,
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
nir_ssa_def *sample, nir_ssa_def *pipe_xor);
nir_def *dcc_pitch, nir_def *dcc_height,
nir_def *dcc_slice_size,
nir_def *x, nir_def *y, nir_def *z,
nir_def *sample, nir_def *pipe_xor);
nir_ssa_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_info *info,
nir_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_info *info,
struct gfx9_meta_equation *equation,
nir_ssa_def *cmask_pitch, nir_ssa_def *cmask_height,
nir_ssa_def *cmask_slice_size,
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
nir_ssa_def *pipe_xor,
nir_ssa_def **bit_position);
nir_def *cmask_pitch, nir_def *cmask_height,
nir_def *cmask_slice_size,
nir_def *x, nir_def *y, nir_def *z,
nir_def *pipe_xor,
nir_def **bit_position);
nir_ssa_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info,
nir_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info,
struct gfx9_meta_equation *equation,
nir_ssa_def *htile_pitch,
nir_ssa_def *htile_slice_size,
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
nir_ssa_def *pipe_xor);
nir_def *htile_pitch,
nir_def *htile_slice_size,
nir_def *x, nir_def *y, nir_def *z,
nir_def *pipe_xor);
#endif
#ifdef __cplusplus

View file

@ -128,7 +128,7 @@ append_logical_end(Block* b)
}
Temp
get_ssa_temp(struct isel_context* ctx, nir_ssa_def* def)
get_ssa_temp(struct isel_context* ctx, nir_def* def)
{
uint32_t id = ctx->first_temp_id + def->index;
return Temp(id, ctx->program->temp_rc[id]);
@ -576,7 +576,7 @@ byte_align_vector(isel_context* ctx, Temp vec, Operand offset, Temp dst, unsigne
}
Temp
get_ssa_temp_tex(struct isel_context* ctx, nir_ssa_def* def, bool is_16bit)
get_ssa_temp_tex(struct isel_context* ctx, nir_def* def, bool is_16bit)
{
RegClass rc = RegClass::get(RegType::vgpr, (is_16bit ? 2 : 4) * def->num_components);
Temp tmp = get_ssa_temp(ctx, def);
@ -806,8 +806,7 @@ get_alu_src_vop3p(struct isel_context* ctx, nir_alu_src src)
uint32_t
get_alu_src_ub(isel_context* ctx, nir_alu_instr* instr, int src_idx)
{
nir_ssa_scalar scalar =
nir_ssa_scalar{instr->src[src_idx].src.ssa, instr->src[src_idx].swizzle[0]};
nir_scalar scalar = nir_scalar{instr->src[src_idx].src.ssa, instr->src[src_idx].swizzle[0]};
return nir_unsigned_upper_bound(ctx->shader, ctx->range_ht, scalar, &ctx->ub_config);
}
@ -6131,7 +6130,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
unsigned result_size = instr->dest.ssa.num_components - is_sparse;
unsigned expand_mask =
nir_ssa_def_components_read(&instr->dest.ssa) & u_bit_consecutive(0, result_size);
nir_def_components_read(&instr->dest.ssa) & u_bit_consecutive(0, result_size);
expand_mask = MAX2(expand_mask, 1); /* this can be zero in the case of sparse image loads */
if (dim == GLSL_SAMPLER_DIM_BUF)
expand_mask = (1u << util_last_bit(expand_mask)) - 1u;
@ -6311,9 +6310,9 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
*/
if (instr->src[3].ssa->bit_size == 32 || instr->src[3].ssa->bit_size == 16) {
for (uint32_t i = 0; i < instr->num_components; i++) {
nir_ssa_scalar comp = nir_ssa_scalar_resolved(instr->src[3].ssa, i);
if ((nir_ssa_scalar_is_const(comp) && nir_ssa_scalar_as_uint(comp) == 0) ||
nir_ssa_scalar_is_undef(comp))
nir_scalar comp = nir_scalar_resolved(instr->src[3].ssa, i);
if ((nir_scalar_is_const(comp) && nir_scalar_as_uint(comp) == 0) ||
nir_scalar_is_undef(comp))
dmask &= ~BITFIELD_BIT(i);
}
@ -6444,7 +6443,7 @@ translate_buffer_image_atomic_op(const nir_atomic_op op, aco_opcode* buf_op, aco
void
visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
{
bool return_previous = !nir_ssa_def_is_unused(&instr->dest.ssa);
bool return_previous = !nir_def_is_unused(&instr->dest.ssa);
const enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
bool is_array = nir_intrinsic_image_array(instr);
Builder bld(ctx->program, ctx->block);
@ -6586,7 +6585,7 @@ void
visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
{
Builder bld(ctx->program, ctx->block);
bool return_previous = !nir_ssa_def_is_unused(&instr->dest.ssa);
bool return_previous = !nir_def_is_unused(&instr->dest.ssa);
Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa));
const nir_atomic_op nir_op = nir_intrinsic_atomic_op(instr);
@ -6788,7 +6787,7 @@ void
visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
{
Builder bld(ctx->program, ctx->block);
bool return_previous = !nir_ssa_def_is_unused(&instr->dest.ssa);
bool return_previous = !nir_def_is_unused(&instr->dest.ssa);
Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
const nir_atomic_op nir_op = nir_intrinsic_atomic_op(instr);
@ -7177,7 +7176,7 @@ emit_barrier(isel_context* ctx, nir_intrinsic_instr* instr)
void
visit_load_shared(isel_context* ctx, nir_intrinsic_instr* instr)
{
// TODO: implement sparse reads using ds_read2_b32 and nir_ssa_def_components_read()
// TODO: implement sparse reads using ds_read2_b32 and nir_def_components_read()
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
Builder bld(ctx->program, ctx->block);
@ -7294,7 +7293,7 @@ visit_shared_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
default: unreachable("Unhandled shared atomic intrinsic");
}
bool return_previous = !nir_ssa_def_is_unused(&instr->dest.ssa);
bool return_previous = !nir_def_is_unused(&instr->dest.ssa);
aco_opcode op;
if (data.size() == 1) {
@ -9102,7 +9101,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
}
void
get_const_vec(nir_ssa_def* vec, nir_const_value* cv[4])
get_const_vec(nir_def* vec, nir_const_value* cv[4])
{
if (vec->parent_instr->type != nir_instr_type_alu)
return;
@ -9339,7 +9338,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
}
/* Build tex instruction */
unsigned dmask = nir_ssa_def_components_read(&instr->dest.ssa) & 0xf;
unsigned dmask = nir_def_components_read(&instr->dest.ssa) & 0xf;
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
dmask = u_bit_consecutive(0, util_last_bit(dmask));
if (instr->is_sparse)
@ -9746,7 +9745,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
}
Operand
get_phi_operand(isel_context* ctx, nir_ssa_def* ssa, RegClass rc, bool logical)
get_phi_operand(isel_context* ctx, nir_def* ssa, RegClass rc, bool logical)
{
Temp tmp = get_ssa_temp(ctx, ssa);
if (ssa->parent_instr->type == nir_instr_type_ssa_undef) {
@ -9772,7 +9771,7 @@ visit_phi(isel_context* ctx, nir_phi_instr* instr)
aco_opcode opcode = logical ? aco_opcode::p_phi : aco_opcode::p_linear_phi;
/* we want a sorted list of sources, since the predecessor list is also sorted */
std::map<unsigned, nir_ssa_def*> phi_src;
std::map<unsigned, nir_def*> phi_src;
nir_foreach_phi_src (src, instr)
phi_src[src->pred->index] = src->src.ssa;
@ -9782,7 +9781,7 @@ visit_phi(isel_context* ctx, nir_phi_instr* instr)
(std::max(exec_list_length(&instr->srcs), (unsigned)preds.size()) + 1) * sizeof(Operand));
unsigned num_defined = 0;
unsigned cur_pred_idx = 0;
for (std::pair<unsigned, nir_ssa_def*> src : phi_src) {
for (std::pair<unsigned, nir_def*> src : phi_src) {
if (cur_pred_idx < preds.size()) {
/* handle missing preds (IF merges with discard/break) and extra preds
* (loop exit with discard) */
@ -9857,7 +9856,7 @@ visit_phi(isel_context* ctx, nir_phi_instr* instr)
}
void
visit_undef(isel_context* ctx, nir_ssa_undef_instr* instr)
visit_undef(isel_context* ctx, nir_undef_instr* instr)
{
Temp dst = get_ssa_temp(ctx, &instr->def);

View file

@ -66,7 +66,7 @@ is_block_reachable(nir_function_impl* impl, nir_block* known_reachable, nir_bloc
/* Check whether the given SSA def is only used by cross-lane instructions. */
bool
only_used_by_cross_lane_instrs(nir_ssa_def* ssa, bool follow_phis = true)
only_used_by_cross_lane_instrs(nir_def* ssa, bool follow_phis = true)
{
nir_foreach_use (src, ssa) {
switch (src->parent_instr->type) {
@ -178,13 +178,13 @@ sanitize_cf_list(nir_function_impl* impl, struct exec_list* cf_list)
}
void
apply_nuw_to_ssa(isel_context* ctx, nir_ssa_def* ssa)
apply_nuw_to_ssa(isel_context* ctx, nir_def* ssa)
{
nir_ssa_scalar scalar;
nir_scalar scalar;
scalar.def = ssa;
scalar.comp = 0;
if (!nir_ssa_scalar_is_alu(scalar) || nir_ssa_scalar_alu_op(scalar) != nir_op_iadd)
if (!nir_scalar_is_alu(scalar) || nir_scalar_alu_op(scalar) != nir_op_iadd)
return;
nir_alu_instr* add = nir_instr_as_alu(ssa->parent_instr);
@ -192,11 +192,11 @@ apply_nuw_to_ssa(isel_context* ctx, nir_ssa_def* ssa)
if (add->no_unsigned_wrap)
return;
nir_ssa_scalar src0 = nir_ssa_scalar_chase_alu_src(scalar, 0);
nir_ssa_scalar src1 = nir_ssa_scalar_chase_alu_src(scalar, 1);
nir_scalar src0 = nir_scalar_chase_alu_src(scalar, 0);
nir_scalar src1 = nir_scalar_chase_alu_src(scalar, 1);
if (nir_ssa_scalar_is_const(src0)) {
nir_ssa_scalar tmp = src0;
if (nir_scalar_is_const(src0)) {
nir_scalar tmp = src0;
src0 = src1;
src1 = tmp;
}

View file

@ -41,7 +41,7 @@ struct ac_nir_context {
LLVMBasicBlockRef break_block;
};
static LLVMTypeRef get_def_type(struct ac_nir_context *ctx, const nir_ssa_def *def)
static LLVMTypeRef get_def_type(struct ac_nir_context *ctx, const nir_def *def)
{
LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
if (def->num_components > 1) {
@ -1471,7 +1471,7 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_te
assert((!args->tfe || !args->d16) && "unsupported");
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
unsigned mask = nir_def_components_read(&instr->dest.ssa);
/* Buffers don't support A16. */
if (args->a16)
@ -2326,7 +2326,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intri
args.tfe = instr->intrinsic == nir_intrinsic_bindless_image_sparse_load;
if (dim == GLSL_SAMPLER_DIM_BUF) {
unsigned num_channels = util_last_bit(nir_ssa_def_components_read(&instr->dest.ssa));
unsigned num_channels = util_last_bit(nir_def_components_read(&instr->dest.ssa));
if (instr->dest.ssa.bit_size == 64)
num_channels = num_channels < 4 ? 2 : 4;
LLVMValueRef rsrc, vindex;
@ -4133,7 +4133,7 @@ static void phi_post_pass(struct ac_nir_context *ctx)
}
}
static bool is_def_used_in_an_export(const nir_ssa_def *def)
static bool is_def_used_in_an_export(const nir_def *def)
{
nir_foreach_use (use_src, def) {
if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
@ -4150,7 +4150,7 @@ static bool is_def_used_in_an_export(const nir_ssa_def *def)
return false;
}
static void visit_ssa_undef(struct ac_nir_context *ctx, const nir_ssa_undef_instr *instr)
static void visit_ssa_undef(struct ac_nir_context *ctx, const nir_undef_instr *instr)
{
unsigned num_components = instr->def.num_components;
LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);

View file

@ -618,7 +618,7 @@ radv_meta_build_nir_vs_generate_vertices(struct radv_device *dev)
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_VERTEX, "meta_vs_gen_verts");
nir_ssa_def *outvec = nir_gen_rect_vertices(&b, NULL, NULL);
nir_def *outvec = nir_gen_rect_vertices(&b, NULL, NULL);
v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
v_position->data.location = VARYING_SLOT_POS;
@ -636,10 +636,10 @@ radv_meta_build_nir_fs_noop(struct radv_device *dev)
void
radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, int samples,
nir_variable *input_img, nir_variable *color, nir_ssa_def *img_coord)
nir_variable *input_img, nir_variable *color, nir_def *img_coord)
{
nir_deref_instr *input_img_deref = nir_build_deref_var(b, input_img);
nir_ssa_def *sample0 = nir_txf_ms_deref(b, input_img_deref, img_coord, nir_imm_int(b, 0));
nir_def *sample0 = nir_txf_ms_deref(b, input_img_deref, img_coord, nir_imm_int(b, 0));
if (is_integer || samples <= 1) {
nir_store_var(b, color, sample0, 0xf);
@ -647,13 +647,13 @@ radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b,
}
if (device->physical_device->use_fmask) {
nir_ssa_def *all_same = nir_samples_identical_deref(b, input_img_deref, img_coord);
nir_def *all_same = nir_samples_identical_deref(b, input_img_deref, img_coord);
nir_push_if(b, nir_inot(b, all_same));
}
nir_ssa_def *accum = sample0;
nir_def *accum = sample0;
for (int i = 1; i < samples; i++) {
nir_ssa_def *sample = nir_txf_ms_deref(b, input_img_deref, img_coord, nir_imm_int(b, i));
nir_def *sample = nir_txf_ms_deref(b, input_img_deref, img_coord, nir_imm_int(b, i));
accum = nir_fadd(b, accum, sample);
}
@ -667,21 +667,21 @@ radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b,
}
}
nir_ssa_def *
nir_def *
radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding)
{
nir_ssa_def *rsrc = nir_vulkan_resource_index(b, 3, 32, nir_imm_int(b, 0), .desc_set = desc_set, .binding = binding);
nir_def *rsrc = nir_vulkan_resource_index(b, 3, 32, nir_imm_int(b, 0), .desc_set = desc_set, .binding = binding);
return nir_trim_vector(b, rsrc, 2);
}
nir_ssa_def *
nir_def *
get_global_ids(nir_builder *b, unsigned num_components)
{
unsigned mask = BITFIELD_MASK(num_components);
nir_ssa_def *local_ids = nir_channels(b, nir_load_local_invocation_id(b), mask);
nir_ssa_def *block_ids = nir_channels(b, nir_load_workgroup_id(b, 32), mask);
nir_ssa_def *block_size =
nir_def *local_ids = nir_channels(b, nir_load_local_invocation_id(b), mask);
nir_def *block_ids = nir_channels(b, nir_load_workgroup_id(b, 32), mask);
nir_def *block_size =
nir_channels(b,
nir_imm_ivec4(b, b->shader->info.workgroup_size[0], b->shader->info.workgroup_size[1],
b->shader->info.workgroup_size[2], 0),
@ -691,9 +691,9 @@ get_global_ids(nir_builder *b, unsigned num_components)
}
void
radv_break_on_count(nir_builder *b, nir_variable *var, nir_ssa_def *count)
radv_break_on_count(nir_builder *b, nir_variable *var, nir_def *count)
{
nir_ssa_def *counter = nir_load_var(b, var);
nir_def *counter = nir_load_var(b, var);
nir_push_if(b, nir_uge(b, counter, count));
nir_jump(b, nir_jump_break);

View file

@ -261,13 +261,13 @@ nir_shader *radv_meta_build_nir_vs_generate_vertices(struct radv_device *dev);
nir_shader *radv_meta_build_nir_fs_noop(struct radv_device *dev);
void radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, int samples,
nir_variable *input_img, nir_variable *color, nir_ssa_def *img_coord);
nir_variable *input_img, nir_variable *color, nir_def *img_coord);
nir_ssa_def *radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding);
nir_def *radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding);
nir_ssa_def *get_global_ids(nir_builder *b, unsigned num_components);
nir_def *get_global_ids(nir_builder *b, unsigned num_components);
void radv_break_on_count(nir_builder *b, nir_variable *var, nir_ssa_def *count);
void radv_break_on_count(nir_builder *b, nir_variable *var, nir_def *count);
#ifdef __cplusplus
}

View file

@ -47,14 +47,14 @@ build_nir_vertex_shader(struct radv_device *dev)
tex_pos_out->data.location = VARYING_SLOT_VAR0;
tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
nir_ssa_def *outvec = nir_gen_rect_vertices(&b, NULL, NULL);
nir_def *outvec = nir_gen_rect_vertices(&b, NULL, NULL);
nir_store_var(&b, pos_out, outvec, 0xf);
nir_ssa_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_ssa_def *src0_z = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
nir_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_def *src0_z = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(&b);
nir_def *vertex_id = nir_load_vertex_id_zero_base(&b);
/* vertex 0 - src0_x, src0_y, src0_z */
/* vertex 1 - src0_x, src1_y, src0_z*/
@ -62,16 +62,16 @@ build_nir_vertex_shader(struct radv_device *dev)
/* so channel 0 is vertex_id != 2 ? src_x : src_x + w
channel 1 is vertex id != 1 ? src_y : src_y + w */
nir_ssa_def *c0cmp = nir_ine_imm(&b, vertex_id, 2);
nir_ssa_def *c1cmp = nir_ine_imm(&b, vertex_id, 1);
nir_def *c0cmp = nir_ine_imm(&b, vertex_id, 2);
nir_def *c1cmp = nir_ine_imm(&b, vertex_id, 1);
nir_ssa_def *comp[4];
nir_def *comp[4];
comp[0] = nir_bcsel(&b, c0cmp, nir_channel(&b, src_box, 0), nir_channel(&b, src_box, 2));
comp[1] = nir_bcsel(&b, c1cmp, nir_channel(&b, src_box, 1), nir_channel(&b, src_box, 3));
comp[2] = src0_z;
comp[3] = nir_imm_float(&b, 1.0);
nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 4);
nir_def *out_tex_vec = nir_vec(&b, comp, 4);
nir_store_var(&b, tex_pos_out, out_tex_vec, 0xf);
return b.shader;
}
@ -89,7 +89,7 @@ build_nir_copy_fragment_shader(struct radv_device *dev, enum glsl_sampler_dim te
* position.
*/
unsigned swz[] = {0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2};
nir_ssa_def *const tex_pos =
nir_def *const tex_pos =
nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
const struct glsl_type *sampler_type =
@ -99,7 +99,7 @@ build_nir_copy_fragment_shader(struct radv_device *dev, enum glsl_sampler_dim te
sampler->data.binding = 0;
nir_deref_instr *tex_deref = nir_build_deref_var(&b, sampler);
nir_ssa_def *color = nir_tex_deref(&b, tex_deref, tex_deref, tex_pos);
nir_def *color = nir_tex_deref(&b, tex_deref, tex_deref, tex_pos);
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
color_out->data.location = FRAG_RESULT_DATA0;
@ -121,7 +121,7 @@ build_nir_copy_fragment_shader_depth(struct radv_device *dev, enum glsl_sampler_
* position.
*/
unsigned swz[] = {0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2};
nir_ssa_def *const tex_pos =
nir_def *const tex_pos =
nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
const struct glsl_type *sampler_type =
@ -131,7 +131,7 @@ build_nir_copy_fragment_shader_depth(struct radv_device *dev, enum glsl_sampler_
sampler->data.binding = 0;
nir_deref_instr *tex_deref = nir_build_deref_var(&b, sampler);
nir_ssa_def *color = nir_tex_deref(&b, tex_deref, tex_deref, tex_pos);
nir_def *color = nir_tex_deref(&b, tex_deref, tex_deref, tex_pos);
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
color_out->data.location = FRAG_RESULT_DEPTH;
@ -153,7 +153,7 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *dev, enum glsl_sample
* position.
*/
unsigned swz[] = {0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2};
nir_ssa_def *const tex_pos =
nir_def *const tex_pos =
nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
const struct glsl_type *sampler_type =
@ -163,7 +163,7 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *dev, enum glsl_sample
sampler->data.binding = 0;
nir_deref_instr *tex_deref = nir_build_deref_var(&b, sampler);
nir_ssa_def *color = nir_tex_deref(&b, tex_deref, tex_deref, tex_pos);
nir_def *color = nir_tex_deref(&b, tex_deref, tex_deref, tex_pos);
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
color_out->data.location = FRAG_RESULT_STENCIL;

View file

@ -375,11 +375,11 @@ build_nir_vertex_shader(struct radv_device *device)
tex_pos_out->data.location = VARYING_SLOT_VAR0;
tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
nir_ssa_def *outvec = nir_gen_rect_vertices(&b, NULL, NULL);
nir_def *outvec = nir_gen_rect_vertices(&b, NULL, NULL);
nir_store_var(&b, pos_out, outvec, 0xf);
nir_ssa_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(&b);
nir_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_def *vertex_id = nir_load_vertex_id_zero_base(&b);
/* vertex 0 - src_x, src_y */
/* vertex 1 - src_x, src_y+h */
@ -387,22 +387,22 @@ build_nir_vertex_shader(struct radv_device *device)
/* so channel 0 is vertex_id != 2 ? src_x : src_x + w
channel 1 is vertex id != 1 ? src_y : src_y + w */
nir_ssa_def *c0cmp = nir_ine_imm(&b, vertex_id, 2);
nir_ssa_def *c1cmp = nir_ine_imm(&b, vertex_id, 1);
nir_def *c0cmp = nir_ine_imm(&b, vertex_id, 2);
nir_def *c1cmp = nir_ine_imm(&b, vertex_id, 1);
nir_ssa_def *comp[2];
nir_def *comp[2];
comp[0] = nir_bcsel(&b, c0cmp, nir_channel(&b, src_box, 0), nir_channel(&b, src_box, 2));
comp[1] = nir_bcsel(&b, c1cmp, nir_channel(&b, src_box, 1), nir_channel(&b, src_box, 3));
nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 2);
nir_def *out_tex_vec = nir_vec(&b, comp, 2);
nir_store_var(&b, tex_pos_out, out_tex_vec, 0x3);
return b.shader;
}
typedef nir_ssa_def *(*texel_fetch_build_func)(struct nir_builder *, struct radv_device *, nir_ssa_def *, bool, bool);
typedef nir_def *(*texel_fetch_build_func)(struct nir_builder *, struct radv_device *, nir_def *, bool, bool);
static nir_ssa_def *
build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa_def *tex_pos, bool is_3d,
static nir_def *
build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device, nir_def *tex_pos, bool is_3d,
bool is_multisampled)
{
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D
@ -413,12 +413,12 @@ build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
nir_ssa_def *tex_pos_3d = NULL;
nir_ssa_def *sample_idx = NULL;
nir_def *tex_pos_3d = NULL;
nir_def *sample_idx = NULL;
if (is_3d) {
nir_ssa_def *layer = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
nir_def *layer = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
nir_ssa_def *chans[3];
nir_def *chans[3];
chans[0] = nir_channel(b, tex_pos, 0);
chans[1] = nir_channel(b, tex_pos, 1);
chans[2] = layer;
@ -437,8 +437,8 @@ build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa
}
}
static nir_ssa_def *
build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa_def *tex_pos, bool is_3d,
static nir_def *
build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device, nir_def *tex_pos, bool is_3d,
bool is_multisampled)
{
const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
@ -446,10 +446,10 @@ build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device, nir_ss
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
nir_ssa_def *width = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
nir_def *width = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
nir_ssa_def *pos_x = nir_channel(b, tex_pos, 0);
nir_ssa_def *pos_y = nir_channel(b, tex_pos, 1);
nir_def *pos_x = nir_channel(b, tex_pos, 0);
nir_def *pos_y = nir_channel(b, tex_pos, 1);
pos_y = nir_imul(b, pos_y, width);
pos_x = nir_iadd(b, pos_x, pos_y);
@ -477,10 +477,10 @@ build_nir_copy_fragment_shader(struct radv_device *device, texel_fetch_build_fun
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
color_out->data.location = FRAG_RESULT_DATA0;
nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
nir_ssa_def *tex_pos = nir_trim_vector(&b, pos_int, 2);
nir_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
nir_def *tex_pos = nir_trim_vector(&b, pos_int, 2);
nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
nir_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
nir_store_var(&b, color_out, color, 0xf);
b.shader->info.fs.uses_sample_shading = is_multisampled;
@ -502,10 +502,10 @@ build_nir_copy_fragment_shader_depth(struct radv_device *device, texel_fetch_bui
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
color_out->data.location = FRAG_RESULT_DEPTH;
nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
nir_ssa_def *tex_pos = nir_trim_vector(&b, pos_int, 2);
nir_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
nir_def *tex_pos = nir_trim_vector(&b, pos_int, 2);
nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
nir_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
nir_store_var(&b, color_out, color, 0x1);
b.shader->info.fs.uses_sample_shading = is_multisampled;
@ -527,10 +527,10 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *device, texel_fetch_b
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
color_out->data.location = FRAG_RESULT_STENCIL;
nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
nir_ssa_def *tex_pos = nir_trim_vector(&b, pos_int, 2);
nir_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
nir_def *tex_pos = nir_trim_vector(&b, pos_int, 2);
nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
nir_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
nir_store_var(&b, color_out, color, 0x1);
b.shader->info.fs.uses_sample_shading = is_multisampled;

View file

@ -10,17 +10,17 @@ build_buffer_fill_shader(struct radv_device *dev)
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_buffer_fill");
b.shader->info.workgroup_size[0] = 64;
nir_ssa_def *pconst = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_ssa_def *buffer_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b0011));
nir_ssa_def *max_offset = nir_channel(&b, pconst, 2);
nir_ssa_def *data = nir_swizzle(&b, nir_channel(&b, pconst, 3), (unsigned[]){0, 0, 0, 0}, 4);
nir_def *pconst = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_def *buffer_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b0011));
nir_def *max_offset = nir_channel(&b, pconst, 2);
nir_def *data = nir_swizzle(&b, nir_channel(&b, pconst, 3), (unsigned[]){0, 0, 0, 0}, 4);
nir_ssa_def *global_id = nir_iadd(
nir_def *global_id = nir_iadd(
&b, nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b, 32), 0), b.shader->info.workgroup_size[0]),
nir_load_local_invocation_index(&b));
nir_ssa_def *offset = nir_imin(&b, nir_imul_imm(&b, global_id, 16), max_offset);
nir_ssa_def *dst_addr = nir_iadd(&b, buffer_addr, nir_u2u64(&b, offset));
nir_def *offset = nir_imin(&b, nir_imul_imm(&b, global_id, 16), max_offset);
nir_def *dst_addr = nir_iadd(&b, buffer_addr, nir_u2u64(&b, offset));
nir_build_store_global(&b, data, dst_addr, .align_mul = 4);
return b.shader;
@ -32,18 +32,18 @@ build_buffer_copy_shader(struct radv_device *dev)
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_buffer_copy");
b.shader->info.workgroup_size[0] = 64;
nir_ssa_def *pconst = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_ssa_def *max_offset = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
nir_ssa_def *src_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b0011));
nir_ssa_def *dst_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b1100));
nir_def *pconst = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_def *max_offset = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
nir_def *src_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b0011));
nir_def *dst_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b1100));
nir_ssa_def *global_id = nir_iadd(
nir_def *global_id = nir_iadd(
&b, nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b, 32), 0), b.shader->info.workgroup_size[0]),
nir_load_local_invocation_index(&b));
nir_ssa_def *offset = nir_u2u64(&b, nir_imin(&b, nir_imul_imm(&b, global_id, 16), max_offset));
nir_def *offset = nir_u2u64(&b, nir_imin(&b, nir_imul_imm(&b, global_id, 16), max_offset));
nir_ssa_def *data = nir_build_load_global(&b, 4, 32, nir_iadd(&b, src_addr, offset), .align_mul = 4);
nir_def *data = nir_build_load_global(&b, 4, 32, nir_iadd(&b, src_addr, offset), .align_mul = 4);
nir_build_store_global(&b, data, nir_iadd(&b, dst_addr, offset), .align_mul = 4);
return b.shader;

View file

@ -46,24 +46,24 @@ build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
nir_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
nir_ssa_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
nir_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
nir_ssa_def *outval =
nir_def *img_coord = nir_iadd(&b, global_id, offset);
nir_def *outval =
nir_txf_deref(&b, nir_build_deref_var(&b, input_img), nir_trim_vector(&b, img_coord, 2 + is_3d), NULL);
nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
nir_def *pos_x = nir_channel(&b, global_id, 0);
nir_def *pos_y = nir_channel(&b, global_id, 1);
nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
nir_def *tmp = nir_imul(&b, pos_y, stride);
tmp = nir_iadd(&b, tmp, pos_x);
nir_ssa_def *coord = nir_replicate(&b, tmp, 4);
nir_def *coord = nir_replicate(&b, tmp, 4);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_ssa_undef(&b, 1, 32), outval,
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_undef(&b, 1, 32), outval,
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
return b.shader;
@ -196,26 +196,25 @@ build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
nir_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
nir_ssa_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
nir_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
nir_def *pos_x = nir_channel(&b, global_id, 0);
nir_def *pos_y = nir_channel(&b, global_id, 1);
nir_ssa_def *buf_coord = nir_imul(&b, pos_y, stride);
nir_def *buf_coord = nir_imul(&b, pos_y, stride);
buf_coord = nir_iadd(&b, buf_coord, pos_x);
nir_ssa_def *coord = nir_iadd(&b, global_id, offset);
nir_ssa_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), buf_coord, NULL);
nir_def *coord = nir_iadd(&b, global_id, offset);
nir_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), buf_coord, NULL);
nir_ssa_def *img_coord =
nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),
is_3d ? nir_channel(&b, coord, 2) : nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32));
nir_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),
is_3d ? nir_channel(&b, coord, 2) : nir_undef(&b, 1, 32), nir_undef(&b, 1, 32));
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32),
outval, nir_imm_int(&b, 0), .image_dim = dim);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_undef(&b, 1, 32), outval,
nir_imm_int(&b, 0), .image_dim = dim);
return b.shader;
}
@ -344,31 +343,31 @@ build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *global_id = get_global_ids(&b, 2);
nir_def *global_id = get_global_ids(&b, 2);
nir_ssa_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_ssa_def *pitch = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 12);
nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_def *pitch = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 12);
nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
nir_def *pos_x = nir_channel(&b, global_id, 0);
nir_def *pos_y = nir_channel(&b, global_id, 1);
nir_ssa_def *buf_coord = nir_imul(&b, pos_y, stride);
nir_def *buf_coord = nir_imul(&b, pos_y, stride);
buf_coord = nir_iadd(&b, buf_coord, pos_x);
nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
nir_def *img_coord = nir_iadd(&b, global_id, offset);
nir_ssa_def *global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, img_coord, 1), pitch),
nir_imul_imm(&b, nir_channel(&b, img_coord, 0), 3));
nir_def *global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, img_coord, 1), pitch),
nir_imul_imm(&b, nir_channel(&b, img_coord, 0), 3));
nir_ssa_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), buf_coord, NULL);
nir_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), buf_coord, NULL);
for (int chan = 0; chan < 3; chan++) {
nir_ssa_def *local_pos = nir_iadd_imm(&b, global_pos, chan);
nir_def *local_pos = nir_iadd_imm(&b, global_pos, chan);
nir_ssa_def *coord = nir_replicate(&b, local_pos, 4);
nir_def *coord = nir_replicate(&b, local_pos, 4);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_ssa_undef(&b, 1, 32),
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_undef(&b, 1, 32),
nir_channel(&b, outval, chan), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
}
@ -472,18 +471,17 @@ build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d, int samples)
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
nir_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
nir_ssa_def *src_offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
nir_ssa_def *dst_offset =
nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range = is_3d ? 24 : 20);
nir_def *src_offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
nir_def *dst_offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range = is_3d ? 24 : 20);
nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset);
nir_def *src_coord = nir_iadd(&b, global_id, src_offset);
nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
nir_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
nir_ssa_def *tex_vals[8];
nir_def *tex_vals[8];
if (is_multisampled) {
for (uint32_t i = 0; i < samples; i++) {
tex_vals[i] = nir_txf_ms_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2), nir_imm_int(&b, i));
@ -492,9 +490,8 @@ build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d, int samples)
tex_vals[0] = nir_txf_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2 + is_3d), nir_imm_int(&b, 0));
}
nir_ssa_def *img_coord =
nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1),
is_3d ? nir_channel(&b, dst_coord, 2) : nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32));
nir_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1),
is_3d ? nir_channel(&b, dst_coord, 2) : nir_undef(&b, 1, 32), nir_undef(&b, 1, 32));
for (uint32_t i = 0; i < samples; i++) {
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_imm_int(&b, i),
@ -641,34 +638,34 @@ build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *global_id = get_global_ids(&b, 2);
nir_def *global_id = get_global_ids(&b, 2);
nir_ssa_def *src_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 12);
nir_ssa_def *dst_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 12), .range = 24);
nir_def *src_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 12);
nir_def *dst_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 12), .range = 24);
nir_ssa_def *src_stride = nir_channel(&b, src_offset, 2);
nir_ssa_def *dst_stride = nir_channel(&b, dst_offset, 2);
nir_def *src_stride = nir_channel(&b, src_offset, 2);
nir_def *dst_stride = nir_channel(&b, dst_offset, 2);
nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, src_offset);
nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset);
nir_def *src_img_coord = nir_iadd(&b, global_id, src_offset);
nir_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset);
nir_ssa_def *src_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
nir_imul_imm(&b, nir_channel(&b, src_img_coord, 0), 3));
nir_def *src_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
nir_imul_imm(&b, nir_channel(&b, src_img_coord, 0), 3));
nir_ssa_def *dst_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
nir_imul_imm(&b, nir_channel(&b, dst_img_coord, 0), 3));
nir_def *dst_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
nir_imul_imm(&b, nir_channel(&b, dst_img_coord, 0), 3));
for (int chan = 0; chan < 3; chan++) {
/* src */
nir_ssa_def *src_local_pos = nir_iadd_imm(&b, src_global_pos, chan);
nir_ssa_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), src_local_pos, NULL);
nir_def *src_local_pos = nir_iadd_imm(&b, src_global_pos, chan);
nir_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), src_local_pos, NULL);
/* dst */
nir_ssa_def *dst_local_pos = nir_iadd_imm(&b, dst_global_pos, chan);
nir_def *dst_local_pos = nir_iadd_imm(&b, dst_global_pos, chan);
nir_ssa_def *dst_coord = nir_replicate(&b, dst_local_pos, 4);
nir_def *dst_coord = nir_replicate(&b, dst_local_pos, 4);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord, nir_ssa_undef(&b, 1, 32),
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord, nir_undef(&b, 1, 32),
nir_channel(&b, outval, 0), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
}
@ -769,16 +766,16 @@ build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d, int samples
output_img->data.descriptor_set = 0;
output_img->data.binding = 0;
nir_ssa_def *global_id = get_global_ids(&b, 2);
nir_def *global_id = get_global_ids(&b, 2);
nir_ssa_def *clear_val = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_ssa_def *layer = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20);
nir_def *clear_val = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_def *layer = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20);
nir_ssa_def *comps[4];
nir_def *comps[4];
comps[0] = nir_channel(&b, global_id, 0);
comps[1] = nir_channel(&b, global_id, 1);
comps[2] = layer;
comps[3] = nir_ssa_undef(&b, 1, 32);
comps[3] = nir_undef(&b, 1, 32);
global_id = nir_vec(&b, comps, 4);
for (uint32_t i = 0; i < samples; i++) {
@ -917,22 +914,22 @@ build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
output_img->data.descriptor_set = 0;
output_img->data.binding = 0;
nir_ssa_def *global_id = get_global_ids(&b, 2);
nir_def *global_id = get_global_ids(&b, 2);
nir_ssa_def *clear_val = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 12);
nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
nir_def *clear_val = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 12);
nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
nir_def *global_x = nir_channel(&b, global_id, 0);
nir_def *global_y = nir_channel(&b, global_id, 1);
nir_ssa_def *global_pos = nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul_imm(&b, global_x, 3));
nir_def *global_pos = nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul_imm(&b, global_x, 3));
for (unsigned chan = 0; chan < 3; chan++) {
nir_ssa_def *local_pos = nir_iadd_imm(&b, global_pos, chan);
nir_def *local_pos = nir_iadd_imm(&b, global_pos, chan);
nir_ssa_def *coord = nir_replicate(&b, local_pos, 4);
nir_def *coord = nir_replicate(&b, local_pos, 4);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_ssa_undef(&b, 1, 32),
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_undef(&b, 1, 32),
nir_channel(&b, clear_val, chan), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
}

View file

@ -45,24 +45,24 @@ build_color_shaders(struct radv_device *dev, struct nir_shader **out_vs, struct
nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position");
vs_out_pos->data.location = VARYING_SLOT_POS;
nir_ssa_def *in_color_load = nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16);
nir_def *in_color_load = nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16);
nir_variable *fs_out_color = nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color");
fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf);
nir_ssa_def *outvec = nir_gen_rect_vertices(&vs_b, NULL, NULL);
nir_def *outvec = nir_gen_rect_vertices(&vs_b, NULL, NULL);
nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
const struct glsl_type *layer_type = glsl_int_type();
nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
vs_out_layer->data.location = VARYING_SLOT_LAYER;
vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
nir_def *inst_id = nir_load_instance_id(&vs_b);
nir_def *base_instance = nir_load_base_instance(&vs_b);
nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
*out_vs = vs_b.shader;
@ -376,9 +376,9 @@ build_depthstencil_shader(struct radv_device *dev, struct nir_shader **out_vs, s
nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position");
vs_out_pos->data.location = VARYING_SLOT_POS;
nir_ssa_def *z;
nir_def *z;
if (unrestricted) {
nir_ssa_def *in_color_load = nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4);
nir_def *in_color_load = nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4);
nir_variable *fs_out_depth = nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth");
fs_out_depth->data.location = FRAG_RESULT_DEPTH;
@ -389,17 +389,17 @@ build_depthstencil_shader(struct radv_device *dev, struct nir_shader **out_vs, s
z = nir_load_push_constant(&vs_b, 1, 32, nir_imm_int(&vs_b, 0), .range = 4);
}
nir_ssa_def *outvec = nir_gen_rect_vertices(&vs_b, z, NULL);
nir_def *outvec = nir_gen_rect_vertices(&vs_b, z, NULL);
nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
const struct glsl_type *layer_type = glsl_int_type();
nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
vs_out_layer->data.location = VARYING_SLOT_LAYER;
vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
nir_def *inst_id = nir_load_instance_id(&vs_b);
nir_def *base_instance = nir_load_base_instance(&vs_b);
nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
*out_vs = vs_b.shader;
@ -808,19 +808,19 @@ build_clear_htile_mask_shader(struct radv_device *dev)
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_htile_mask");
b.shader->info.workgroup_size[0] = 64;
nir_ssa_def *global_id = get_global_ids(&b, 1);
nir_def *global_id = get_global_ids(&b, 1);
nir_ssa_def *offset = nir_imul_imm(&b, global_id, 16);
nir_def *offset = nir_imul_imm(&b, global_id, 16);
offset = nir_channel(&b, offset, 0);
nir_ssa_def *buf = radv_meta_load_descriptor(&b, 0, 0);
nir_def *buf = radv_meta_load_descriptor(&b, 0, 0);
nir_ssa_def *constants = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_def *constants = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_ssa_def *load = nir_load_ssbo(&b, 4, 32, buf, offset, .align_mul = 16);
nir_def *load = nir_load_ssbo(&b, 4, 32, buf, offset, .align_mul = 16);
/* data = (data & ~htile_mask) | (htile_value & htile_mask) */
nir_ssa_def *data = nir_iand(&b, load, nir_channel(&b, constants, 1));
nir_def *data = nir_iand(&b, load, nir_channel(&b, constants, 1));
data = nir_ior(&b, data, nir_channel(&b, constants, 0));
nir_store_ssbo(&b, data, buf, offset, .access = ACCESS_NON_READABLE, .align_mul = 16);
@ -906,29 +906,29 @@ build_clear_dcc_comp_to_single_shader(struct radv_device *dev, bool is_msaa)
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_ssa_def *global_id = get_global_ids(&b, 3);
nir_def *global_id = get_global_ids(&b, 3);
/* Load the dimensions in pixels of a block that gets compressed to one DCC byte. */
nir_ssa_def *dcc_block_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_def *dcc_block_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
/* Compute the coordinates. */
nir_ssa_def *coord = nir_trim_vector(&b, global_id, 2);
nir_def *coord = nir_trim_vector(&b, global_id, 2);
coord = nir_imul(&b, coord, dcc_block_size);
coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), nir_channel(&b, global_id, 2),
nir_ssa_undef(&b, 1, 32));
nir_undef(&b, 1, 32));
nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 0;
/* Load the clear color values. */
nir_ssa_def *clear_values = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8);
nir_def *clear_values = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8);
nir_ssa_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0), nir_channel(&b, clear_values, 1),
nir_channel(&b, clear_values, 1), nir_channel(&b, clear_values, 1));
nir_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0), nir_channel(&b, clear_values, 1),
nir_channel(&b, clear_values, 1), nir_channel(&b, clear_values, 1));
/* Store the clear color values. */
nir_ssa_def *sample_id = is_msaa ? nir_imm_int(&b, 0) : nir_ssa_undef(&b, 1, 32);
nir_def *sample_id = is_msaa ? nir_imm_int(&b, 0) : nir_undef(&b, 1, 32);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, sample_id, data, nir_imm_int(&b, 0),
.image_dim = dim, .image_array = true);

View file

@ -47,22 +47,22 @@ build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf
b.shader->info.workgroup_size[1] = 8;
/* Get coordinates. */
nir_ssa_def *global_id = get_global_ids(&b, 2);
nir_def *global_id = get_global_ids(&b, 2);
nir_ssa_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
/* Multiply the coordinates by the HTILE block size. */
nir_ssa_def *coord = nir_iadd(&b, nir_imul_imm(&b, global_id, 8), offset);
nir_def *coord = nir_iadd(&b, nir_imul_imm(&b, global_id, 8), offset);
/* Load constants. */
nir_ssa_def *constants = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 8), .range = 20);
nir_ssa_def *htile_pitch = nir_channel(&b, constants, 0);
nir_ssa_def *htile_slice_size = nir_channel(&b, constants, 1);
nir_ssa_def *read_htile_value = nir_channel(&b, constants, 2);
nir_def *constants = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 8), .range = 20);
nir_def *htile_pitch = nir_channel(&b, constants, 0);
nir_def *htile_slice_size = nir_channel(&b, constants, 1);
nir_def *read_htile_value = nir_channel(&b, constants, 2);
/* Get the HTILE addr from coordinates. */
nir_ssa_def *zero = nir_imm_int(&b, 0);
nir_ssa_def *htile_addr =
nir_def *zero = nir_imm_int(&b, 0);
nir_def *htile_addr =
ac_nir_htile_addr_from_coord(&b, &device->physical_device->rad_info, &surf->u.gfx9.zs.htile_equation, htile_pitch,
htile_slice_size, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero);
@ -73,7 +73,7 @@ build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf
input_vrs_img->data.binding = 0;
/* Load the VRS rates from the 2D image. */
nir_ssa_def *value = nir_txf_deref(&b, nir_build_deref_var(&b, input_vrs_img), global_id, NULL);
nir_def *value = nir_txf_deref(&b, nir_build_deref_var(&b, input_vrs_img), global_id, NULL);
/* Extract the X/Y rates and clamp them because the maximum supported VRS rate is 2x2 (1x1 in
* hardware).
@ -81,17 +81,17 @@ build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf
* VRS rate X = min(value >> 2, 1)
* VRS rate Y = min(value & 3, 1)
*/
nir_ssa_def *x_rate = nir_ushr_imm(&b, nir_channel(&b, value, 0), 2);
nir_def *x_rate = nir_ushr_imm(&b, nir_channel(&b, value, 0), 2);
x_rate = nir_umin(&b, x_rate, nir_imm_int(&b, 1));
nir_ssa_def *y_rate = nir_iand_imm(&b, nir_channel(&b, value, 0), 3);
nir_def *y_rate = nir_iand_imm(&b, nir_channel(&b, value, 0), 3);
y_rate = nir_umin(&b, y_rate, nir_imm_int(&b, 1));
/* Compute the final VRS rate. */
nir_ssa_def *vrs_rates = nir_ior(&b, nir_ishl_imm(&b, y_rate, 10), nir_ishl_imm(&b, x_rate, 6));
nir_def *vrs_rates = nir_ior(&b, nir_ishl_imm(&b, y_rate, 10), nir_ishl_imm(&b, x_rate, 6));
/* Load the HTILE buffer descriptor. */
nir_ssa_def *htile_buf = radv_meta_load_descriptor(&b, 0, 1);
nir_def *htile_buf = radv_meta_load_descriptor(&b, 0, 1);
/* Load the HTILE value if requested, otherwise use the default value. */
nir_variable *htile_value = nir_local_variable_create(b.impl, glsl_int_type(), "htile_value");
@ -99,7 +99,7 @@ build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf
nir_push_if(&b, nir_ieq_imm(&b, read_htile_value, 1));
{
/* Load the existing HTILE 32-bit value for this 8x8 pixels area. */
nir_ssa_def *input_value = nir_load_ssbo(&b, 1, 32, htile_buf, htile_addr);
nir_def *input_value = nir_load_ssbo(&b, 1, 32, htile_buf, htile_addr);
/* Clear the 4-bit VRS rates. */
nir_store_var(&b, htile_value, nir_iand_imm(&b, input_value, 0xfffff33f), 0x1);
@ -111,7 +111,7 @@ build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf
nir_pop_if(&b, NULL);
/* Set the VRS rates loaded from the image. */
nir_ssa_def *output_value = nir_ior(&b, nir_load_var(&b, htile_value), vrs_rates);
nir_def *output_value = nir_ior(&b, nir_load_var(&b, htile_value), vrs_rates);
/* Store the updated HTILE 32-bit which contains the VRS rates. */
nir_store_ssbo(&b, output_value, htile_buf, htile_addr, .access = ACCESS_NON_READABLE);

View file

@ -37,13 +37,13 @@ build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *sur
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_ssa_def *src_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_ssa_def *src_dcc_pitch = nir_channels(&b, src_dcc_size, 1);
nir_ssa_def *src_dcc_height = nir_channels(&b, src_dcc_size, 2);
nir_def *src_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_def *src_dcc_pitch = nir_channels(&b, src_dcc_size, 1);
nir_def *src_dcc_height = nir_channels(&b, src_dcc_size, 2);
nir_ssa_def *dst_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8);
nir_ssa_def *dst_dcc_pitch = nir_channels(&b, dst_dcc_size, 1);
nir_ssa_def *dst_dcc_height = nir_channels(&b, dst_dcc_size, 2);
nir_def *dst_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8);
nir_def *dst_dcc_pitch = nir_channels(&b, dst_dcc_size, 1);
nir_def *dst_dcc_height = nir_channels(&b, dst_dcc_size, 2);
nir_variable *input_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_in");
input_dcc->data.descriptor_set = 0;
input_dcc->data.binding = 0;
@ -51,25 +51,25 @@ build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *sur
output_dcc->data.descriptor_set = 0;
output_dcc->data.binding = 1;
nir_ssa_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->dest.ssa;
nir_ssa_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->dest.ssa;
nir_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->dest.ssa;
nir_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->dest.ssa;
nir_ssa_def *coord = get_global_ids(&b, 2);
nir_ssa_def *zero = nir_imm_int(&b, 0);
nir_def *coord = get_global_ids(&b, 2);
nir_def *zero = nir_imm_int(&b, 0);
coord =
nir_imul(&b, coord, nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height));
nir_ssa_def *src = ac_nir_dcc_addr_from_coord(
&b, &dev->physical_device->rad_info, surf->bpe, &surf->u.gfx9.color.dcc_equation, src_dcc_pitch, src_dcc_height,
zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero);
nir_ssa_def *dst = ac_nir_dcc_addr_from_coord(
nir_def *src = ac_nir_dcc_addr_from_coord(&b, &dev->physical_device->rad_info, surf->bpe,
&surf->u.gfx9.color.dcc_equation, src_dcc_pitch, src_dcc_height, zero,
nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero);
nir_def *dst = ac_nir_dcc_addr_from_coord(
&b, &dev->physical_device->rad_info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation, dst_dcc_pitch,
dst_dcc_height, zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero);
nir_ssa_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref, nir_vec4(&b, src, src, src, src),
nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = dim);
nir_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref, nir_vec4(&b, src, src, src, src),
nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = dim);
nir_image_deref_store(&b, output_dcc_ref, nir_vec4(&b, dst, dst, dst, dst), nir_ssa_undef(&b, 1, 32), dcc_val,
nir_image_deref_store(&b, output_dcc_ref, nir_vec4(&b, dst, dst, dst, dst), nir_undef(&b, 1, 32), dcc_val,
nir_imm_int(&b, 0), .image_dim = dim);
return b.shader;

View file

@ -51,16 +51,15 @@ build_expand_depth_stencil_compute_shader(struct radv_device *dev)
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
b.shader->info.workgroup_size[2], 0);
nir_def *invoc_id = nir_load_local_invocation_id(&b);
nir_def *wg_id = nir_load_workgroup_id(&b, 32);
nir_def *block_size = nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
b.shader->info.workgroup_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_ssa_def *data =
nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, global_id,
nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
nir_def *data = nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, global_id,
nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
/* We need a SCOPE_DEVICE memory_scope because ACO will avoid
* creating a vmcnt(0) because it expects the L1 cache to keep memory
@ -69,7 +68,7 @@ build_expand_depth_stencil_compute_shader(struct radv_device *dev)
nir_barrier(&b, .execution_scope = SCOPE_WORKGROUP, .memory_scope = SCOPE_DEVICE,
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id, nir_ssa_undef(&b, 1, 32), data,
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id, nir_undef(&b, 1, 32), data,
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
return b.shader;
}

View file

@ -41,13 +41,13 @@
* - the EAC shader doesn't do SNORM correctly, so this has that fixed.
*/
static nir_ssa_def *
flip_endian(nir_builder *b, nir_ssa_def *src, unsigned cnt)
static nir_def *
flip_endian(nir_builder *b, nir_def *src, unsigned cnt)
{
nir_ssa_def *v[2];
nir_def *v[2];
for (unsigned i = 0; i < cnt; ++i) {
nir_ssa_def *intermediate[4];
nir_ssa_def *chan = cnt == 1 ? src : nir_channel(b, src, i);
nir_def *intermediate[4];
nir_def *chan = cnt == 1 ? src : nir_channel(b, src, i);
for (unsigned j = 0; j < 4; ++j)
intermediate[j] = nir_ubfe_imm(b, chan, 8 * j, 8);
v[i] = nir_ior(b, nir_ior(b, nir_ishl_imm(b, intermediate[0], 24), nir_ishl_imm(b, intermediate[1], 16)),
@ -56,14 +56,14 @@ flip_endian(nir_builder *b, nir_ssa_def *src, unsigned cnt)
return cnt == 1 ? v[0] : nir_vec(b, v, cnt);
}
static nir_ssa_def *
etc1_color_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
static nir_def *
etc1_color_modifier_lookup(nir_builder *b, nir_def *x, nir_def *y)
{
const unsigned table[8][2] = {{2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183}};
nir_ssa_def *upper = nir_ieq_imm(b, y, 1);
nir_ssa_def *result = NULL;
nir_def *upper = nir_ieq_imm(b, y, 1);
nir_def *result = NULL;
for (unsigned i = 0; i < 8; ++i) {
nir_ssa_def *tmp = nir_bcsel(b, upper, nir_imm_int(b, table[i][1]), nir_imm_int(b, table[i][0]));
nir_def *tmp = nir_bcsel(b, upper, nir_imm_int(b, table[i][1]), nir_imm_int(b, table[i][0]));
if (result)
result = nir_bcsel(b, nir_ieq_imm(b, x, i), tmp, result);
else
@ -72,11 +72,11 @@ etc1_color_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
return result;
}
static nir_ssa_def *
etc2_distance_lookup(nir_builder *b, nir_ssa_def *x)
static nir_def *
etc2_distance_lookup(nir_builder *b, nir_def *x)
{
const unsigned table[8] = {3, 6, 11, 16, 23, 32, 41, 64};
nir_ssa_def *result = NULL;
nir_def *result = NULL;
for (unsigned i = 0; i < 8; ++i) {
if (result)
result = nir_bcsel(b, nir_ieq_imm(b, x, i), nir_imm_int(b, table[i]), result);
@ -86,14 +86,14 @@ etc2_distance_lookup(nir_builder *b, nir_ssa_def *x)
return result;
}
static nir_ssa_def *
etc1_alpha_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
static nir_def *
etc1_alpha_modifier_lookup(nir_builder *b, nir_def *x, nir_def *y)
{
const unsigned table[16] = {0xe852, 0xc962, 0xc741, 0xc531, 0xb752, 0xa862, 0xa763, 0xa742,
0x9751, 0x9741, 0x9731, 0x9641, 0x9632, 0x9210, 0x8753, 0x8642};
nir_ssa_def *result = NULL;
nir_def *result = NULL;
for (unsigned i = 0; i < 16; ++i) {
nir_ssa_def *tmp = nir_imm_int(b, table[i]);
nir_def *tmp = nir_imm_int(b, table[i]);
if (result)
result = nir_bcsel(b, nir_ieq_imm(b, x, i), tmp, result);
else
@ -102,45 +102,44 @@ etc1_alpha_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
return nir_ubfe(b, result, nir_imul_imm(b, y, 4), nir_imm_int(b, 4));
}
static nir_ssa_def *
etc_extend(nir_builder *b, nir_ssa_def *v, int bits)
static nir_def *
etc_extend(nir_builder *b, nir_def *v, int bits)
{
if (bits == 4)
return nir_imul_imm(b, v, 0x11);
return nir_ior(b, nir_ishl_imm(b, v, 8 - bits), nir_ushr_imm(b, v, bits - (8 - bits)));
}
static nir_ssa_def *
decode_etc2_alpha(struct nir_builder *b, nir_ssa_def *alpha_payload, nir_ssa_def *linear_pixel, bool eac,
nir_ssa_def *is_signed)
static nir_def *
decode_etc2_alpha(struct nir_builder *b, nir_def *alpha_payload, nir_def *linear_pixel, bool eac, nir_def *is_signed)
{
alpha_payload = flip_endian(b, alpha_payload, 2);
nir_ssa_def *alpha_x = nir_channel(b, alpha_payload, 1);
nir_ssa_def *alpha_y = nir_channel(b, alpha_payload, 0);
nir_ssa_def *bit_offset = nir_isub_imm(b, 45, nir_imul_imm(b, linear_pixel, 3));
nir_ssa_def *base = nir_ubfe_imm(b, alpha_y, 24, 8);
nir_ssa_def *multiplier = nir_ubfe_imm(b, alpha_y, 20, 4);
nir_ssa_def *table = nir_ubfe_imm(b, alpha_y, 16, 4);
nir_def *alpha_x = nir_channel(b, alpha_payload, 1);
nir_def *alpha_y = nir_channel(b, alpha_payload, 0);
nir_def *bit_offset = nir_isub_imm(b, 45, nir_imul_imm(b, linear_pixel, 3));
nir_def *base = nir_ubfe_imm(b, alpha_y, 24, 8);
nir_def *multiplier = nir_ubfe_imm(b, alpha_y, 20, 4);
nir_def *table = nir_ubfe_imm(b, alpha_y, 16, 4);
if (eac) {
nir_ssa_def *signed_base = nir_ibfe_imm(b, alpha_y, 24, 8);
nir_def *signed_base = nir_ibfe_imm(b, alpha_y, 24, 8);
signed_base = nir_imul_imm(b, signed_base, 8);
base = nir_iadd_imm(b, nir_imul_imm(b, base, 8), 4);
base = nir_bcsel(b, is_signed, signed_base, base);
multiplier = nir_imax(b, nir_imul_imm(b, multiplier, 8), nir_imm_int(b, 1));
}
nir_ssa_def *lsb_index = nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x),
nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 2));
nir_def *lsb_index = nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x),
nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 2));
bit_offset = nir_iadd_imm(b, bit_offset, 2);
nir_ssa_def *msb = nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x),
nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 1));
nir_ssa_def *mod = nir_ixor(b, etc1_alpha_modifier_lookup(b, table, lsb_index), nir_iadd_imm(b, msb, -1));
nir_ssa_def *a = nir_iadd(b, base, nir_imul(b, mod, multiplier));
nir_def *msb = nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x),
nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 1));
nir_def *mod = nir_ixor(b, etc1_alpha_modifier_lookup(b, table, lsb_index), nir_iadd_imm(b, msb, -1));
nir_def *a = nir_iadd(b, base, nir_imul(b, mod, multiplier));
nir_ssa_def *low_bound = nir_imm_int(b, 0);
nir_ssa_def *high_bound = nir_imm_int(b, 255);
nir_ssa_def *final_mult = nir_imm_float(b, 1 / 255.0);
nir_def *low_bound = nir_imm_int(b, 0);
nir_def *high_bound = nir_imm_int(b, 255);
nir_def *final_mult = nir_imm_float(b, 1 / 255.0);
if (eac) {
low_bound = nir_bcsel(b, is_signed, nir_imm_int(b, -1023), low_bound);
high_bound = nir_bcsel(b, is_signed, nir_imm_int(b, 1023), nir_imm_int(b, 2047));
@ -177,55 +176,55 @@ build_shader(struct radv_device *dev)
output_img_3d->data.descriptor_set = 0;
output_img_3d->data.binding = 1;
nir_ssa_def *global_id = get_global_ids(&b, 3);
nir_def *global_id = get_global_ids(&b, 3);
nir_ssa_def *consts = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_ssa_def *consts2 = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
nir_ssa_def *offset = nir_channels(&b, consts, 7);
nir_ssa_def *format = nir_channel(&b, consts, 3);
nir_ssa_def *image_type = nir_channel(&b, consts2, 0);
nir_ssa_def *is_3d = nir_ieq_imm(&b, image_type, VK_IMAGE_TYPE_3D);
nir_ssa_def *coord = nir_iadd(&b, global_id, offset);
nir_ssa_def *src_coord = nir_vec3(&b, nir_ushr_imm(&b, nir_channel(&b, coord, 0), 2),
nir_ushr_imm(&b, nir_channel(&b, coord, 1), 2), nir_channel(&b, coord, 2));
nir_def *consts = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_def *consts2 = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
nir_def *offset = nir_channels(&b, consts, 7);
nir_def *format = nir_channel(&b, consts, 3);
nir_def *image_type = nir_channel(&b, consts2, 0);
nir_def *is_3d = nir_ieq_imm(&b, image_type, VK_IMAGE_TYPE_3D);
nir_def *coord = nir_iadd(&b, global_id, offset);
nir_def *src_coord = nir_vec3(&b, nir_ushr_imm(&b, nir_channel(&b, coord, 0), 2),
nir_ushr_imm(&b, nir_channel(&b, coord, 1), 2), nir_channel(&b, coord, 2));
nir_variable *payload_var = nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "payload");
nir_push_if(&b, is_3d);
{
nir_ssa_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_3d), src_coord, nir_imm_int(&b, 0));
nir_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_3d), src_coord, nir_imm_int(&b, 0));
nir_store_var(&b, payload_var, color, 0xf);
}
nir_push_else(&b, NULL);
{
nir_ssa_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_2d), src_coord, nir_imm_int(&b, 0));
nir_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_2d), src_coord, nir_imm_int(&b, 0));
nir_store_var(&b, payload_var, color, 0xf);
}
nir_pop_if(&b, NULL);
nir_ssa_def *pixel_coord = nir_iand_imm(&b, nir_channels(&b, coord, 3), 3);
nir_ssa_def *linear_pixel =
nir_def *pixel_coord = nir_iand_imm(&b, nir_channels(&b, coord, 3), 3);
nir_def *linear_pixel =
nir_iadd(&b, nir_imul_imm(&b, nir_channel(&b, pixel_coord, 0), 4), nir_channel(&b, pixel_coord, 1));
nir_ssa_def *payload = nir_load_var(&b, payload_var);
nir_def *payload = nir_load_var(&b, payload_var);
nir_variable *color = nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "color");
nir_store_var(&b, color, nir_imm_vec4(&b, 1.0, 0.0, 0.0, 1.0), 0xf);
nir_push_if(&b, nir_ilt_imm(&b, format, VK_FORMAT_EAC_R11_UNORM_BLOCK));
{
nir_ssa_def *alpha_bits_8 = nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK);
nir_ssa_def *alpha_bits_1 = nir_iand(&b, nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK),
nir_ilt_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK));
nir_def *alpha_bits_8 = nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK);
nir_def *alpha_bits_1 = nir_iand(&b, nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK),
nir_ilt_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK));
nir_ssa_def *color_payload =
nir_def *color_payload =
nir_bcsel(&b, alpha_bits_8, nir_channels(&b, payload, 0xC), nir_channels(&b, payload, 3));
color_payload = flip_endian(&b, color_payload, 2);
nir_ssa_def *color_y = nir_channel(&b, color_payload, 0);
nir_ssa_def *color_x = nir_channel(&b, color_payload, 1);
nir_ssa_def *flip = nir_test_mask(&b, color_y, 1);
nir_ssa_def *subblock =
nir_def *color_y = nir_channel(&b, color_payload, 0);
nir_def *color_x = nir_channel(&b, color_payload, 1);
nir_def *flip = nir_test_mask(&b, color_y, 1);
nir_def *subblock =
nir_ushr_imm(&b, nir_bcsel(&b, flip, nir_channel(&b, pixel_coord, 1), nir_channel(&b, pixel_coord, 0)), 1);
nir_variable *punchthrough = nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "punchthrough");
nir_ssa_def *punchthrough_init = nir_iand(&b, alpha_bits_1, nir_inot(&b, nir_test_mask(&b, color_y, 2)));
nir_def *punchthrough_init = nir_iand(&b, alpha_bits_1, nir_inot(&b, nir_test_mask(&b, color_y, 2)));
nir_store_var(&b, punchthrough, punchthrough_init, 0x1);
nir_variable *etc1_compat = nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "etc1_compat");
@ -249,13 +248,13 @@ build_shader(struct radv_device *dev)
nir_variable *base_rgb = nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "base_rgb");
nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 255, 0, 0), 0x7);
nir_ssa_def *msb = nir_iand_imm(&b, nir_ushr(&b, color_x, nir_iadd_imm(&b, linear_pixel, 15)), 2);
nir_ssa_def *lsb = nir_iand_imm(&b, nir_ushr(&b, color_x, linear_pixel), 1);
nir_def *msb = nir_iand_imm(&b, nir_ushr(&b, color_x, nir_iadd_imm(&b, linear_pixel, 15)), 2);
nir_def *lsb = nir_iand_imm(&b, nir_ushr(&b, color_x, linear_pixel), 1);
nir_push_if(&b, nir_iand(&b, nir_inot(&b, alpha_bits_1), nir_inot(&b, nir_test_mask(&b, color_y, 2))));
{
nir_store_var(&b, etc1_compat, nir_imm_true(&b), 1);
nir_ssa_def *tmp[3];
nir_def *tmp[3];
for (unsigned i = 0; i < 3; ++i)
tmp[i] = etc_extend(
&b,
@ -266,29 +265,29 @@ build_shader(struct radv_device *dev)
}
nir_push_else(&b, NULL);
{
nir_ssa_def *rb = nir_ubfe_imm(&b, color_y, 27, 5);
nir_ssa_def *rd = nir_ibfe_imm(&b, color_y, 24, 3);
nir_ssa_def *gb = nir_ubfe_imm(&b, color_y, 19, 5);
nir_ssa_def *gd = nir_ibfe_imm(&b, color_y, 16, 3);
nir_ssa_def *bb = nir_ubfe_imm(&b, color_y, 11, 5);
nir_ssa_def *bd = nir_ibfe_imm(&b, color_y, 8, 3);
nir_ssa_def *r1 = nir_iadd(&b, rb, rd);
nir_ssa_def *g1 = nir_iadd(&b, gb, gd);
nir_ssa_def *b1 = nir_iadd(&b, bb, bd);
nir_def *rb = nir_ubfe_imm(&b, color_y, 27, 5);
nir_def *rd = nir_ibfe_imm(&b, color_y, 24, 3);
nir_def *gb = nir_ubfe_imm(&b, color_y, 19, 5);
nir_def *gd = nir_ibfe_imm(&b, color_y, 16, 3);
nir_def *bb = nir_ubfe_imm(&b, color_y, 11, 5);
nir_def *bd = nir_ibfe_imm(&b, color_y, 8, 3);
nir_def *r1 = nir_iadd(&b, rb, rd);
nir_def *g1 = nir_iadd(&b, gb, gd);
nir_def *b1 = nir_iadd(&b, bb, bd);
nir_push_if(&b, nir_ugt_imm(&b, r1, 31));
{
nir_ssa_def *r0 =
nir_def *r0 =
nir_ior(&b, nir_ubfe_imm(&b, color_y, 24, 2), nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 27, 2), 2));
nir_ssa_def *g0 = nir_ubfe_imm(&b, color_y, 20, 4);
nir_ssa_def *b0 = nir_ubfe_imm(&b, color_y, 16, 4);
nir_ssa_def *r2 = nir_ubfe_imm(&b, color_y, 12, 4);
nir_ssa_def *g2 = nir_ubfe_imm(&b, color_y, 8, 4);
nir_ssa_def *b2 = nir_ubfe_imm(&b, color_y, 4, 4);
nir_ssa_def *da =
nir_def *g0 = nir_ubfe_imm(&b, color_y, 20, 4);
nir_def *b0 = nir_ubfe_imm(&b, color_y, 16, 4);
nir_def *r2 = nir_ubfe_imm(&b, color_y, 12, 4);
nir_def *g2 = nir_ubfe_imm(&b, color_y, 8, 4);
nir_def *b2 = nir_ubfe_imm(&b, color_y, 4, 4);
nir_def *da =
nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 2), 1), nir_iand_imm(&b, color_y, 1));
nir_ssa_def *dist = etc2_distance_lookup(&b, da);
nir_ssa_def *index = nir_ior(&b, lsb, msb);
nir_def *dist = etc2_distance_lookup(&b, da);
nir_def *index = nir_ior(&b, lsb, msb);
nir_store_var(&b, punchthrough,
nir_iand(&b, nir_load_var(&b, punchthrough), nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)),
@ -300,8 +299,8 @@ build_shader(struct radv_device *dev)
nir_push_else(&b, NULL);
{
nir_ssa_def *tmp = nir_iadd(&b, etc_extend(&b, nir_vec3(&b, r2, g2, b2), 4),
nir_imul(&b, dist, nir_isub_imm(&b, 2, index)));
nir_def *tmp = nir_iadd(&b, etc_extend(&b, nir_vec3(&b, r2, g2, b2), 4),
nir_imul(&b, dist, nir_isub_imm(&b, 2, index)));
nir_store_var(&b, rgb_result, tmp, 0x7);
}
nir_pop_if(&b, NULL);
@ -309,23 +308,22 @@ build_shader(struct radv_device *dev)
nir_push_else(&b, NULL);
nir_push_if(&b, nir_ugt_imm(&b, g1, 31));
{
nir_ssa_def *r0 = nir_ubfe_imm(&b, color_y, 27, 4);
nir_ssa_def *g0 = nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 24, 3), 1),
nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 20), 1));
nir_ssa_def *b0 =
nir_def *r0 = nir_ubfe_imm(&b, color_y, 27, 4);
nir_def *g0 = nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 24, 3), 1),
nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 20), 1));
nir_def *b0 =
nir_ior(&b, nir_ubfe_imm(&b, color_y, 15, 3), nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 16), 8));
nir_ssa_def *r2 = nir_ubfe_imm(&b, color_y, 11, 4);
nir_ssa_def *g2 = nir_ubfe_imm(&b, color_y, 7, 4);
nir_ssa_def *b2 = nir_ubfe_imm(&b, color_y, 3, 4);
nir_ssa_def *da = nir_iand_imm(&b, color_y, 4);
nir_ssa_def *db = nir_iand_imm(&b, color_y, 1);
nir_ssa_def *d = nir_iadd(&b, da, nir_imul_imm(&b, db, 2));
nir_ssa_def *d0 = nir_iadd(&b, nir_ishl_imm(&b, r0, 16), nir_iadd(&b, nir_ishl_imm(&b, g0, 8), b0));
nir_ssa_def *d2 = nir_iadd(&b, nir_ishl_imm(&b, r2, 16), nir_iadd(&b, nir_ishl_imm(&b, g2, 8), b2));
nir_def *r2 = nir_ubfe_imm(&b, color_y, 11, 4);
nir_def *g2 = nir_ubfe_imm(&b, color_y, 7, 4);
nir_def *b2 = nir_ubfe_imm(&b, color_y, 3, 4);
nir_def *da = nir_iand_imm(&b, color_y, 4);
nir_def *db = nir_iand_imm(&b, color_y, 1);
nir_def *d = nir_iadd(&b, da, nir_imul_imm(&b, db, 2));
nir_def *d0 = nir_iadd(&b, nir_ishl_imm(&b, r0, 16), nir_iadd(&b, nir_ishl_imm(&b, g0, 8), b0));
nir_def *d2 = nir_iadd(&b, nir_ishl_imm(&b, r2, 16), nir_iadd(&b, nir_ishl_imm(&b, g2, 8), b2));
d = nir_bcsel(&b, nir_uge(&b, d0, d2), nir_iadd_imm(&b, d, 1), d);
nir_ssa_def *dist = etc2_distance_lookup(&b, d);
nir_ssa_def *base =
nir_bcsel(&b, nir_ine_imm(&b, msb, 0), nir_vec3(&b, r2, g2, b2), nir_vec3(&b, r0, g0, b0));
nir_def *dist = etc2_distance_lookup(&b, d);
nir_def *base = nir_bcsel(&b, nir_ine_imm(&b, msb, 0), nir_vec3(&b, r2, g2, b2), nir_vec3(&b, r0, g0, b0));
base = etc_extend(&b, base, 4);
base = nir_iadd(&b, base, nir_imul(&b, dist, nir_isub_imm(&b, 1, nir_imul_imm(&b, lsb, 2))));
nir_store_var(&b, rgb_result, base, 0x7);
@ -336,19 +334,19 @@ build_shader(struct radv_device *dev)
nir_push_else(&b, NULL);
nir_push_if(&b, nir_ugt_imm(&b, b1, 31));
{
nir_ssa_def *r0 = nir_ubfe_imm(&b, color_y, 25, 6);
nir_ssa_def *g0 =
nir_def *r0 = nir_ubfe_imm(&b, color_y, 25, 6);
nir_def *g0 =
nir_ior(&b, nir_ubfe_imm(&b, color_y, 17, 6), nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 18), 0x40));
nir_ssa_def *b0 = nir_ior(
nir_def *b0 = nir_ior(
&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 11, 2), 3),
nir_ior(&b, nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 11), 0x20), nir_ubfe_imm(&b, color_y, 7, 3)));
nir_ssa_def *rh =
nir_def *rh =
nir_ior(&b, nir_iand_imm(&b, color_y, 1), nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 5), 1));
nir_ssa_def *rv = nir_ubfe_imm(&b, color_x, 13, 6);
nir_ssa_def *gh = nir_ubfe_imm(&b, color_x, 25, 7);
nir_ssa_def *gv = nir_ubfe_imm(&b, color_x, 6, 7);
nir_ssa_def *bh = nir_ubfe_imm(&b, color_x, 19, 6);
nir_ssa_def *bv = nir_ubfe_imm(&b, color_x, 0, 6);
nir_def *rv = nir_ubfe_imm(&b, color_x, 13, 6);
nir_def *gh = nir_ubfe_imm(&b, color_x, 25, 7);
nir_def *gv = nir_ubfe_imm(&b, color_x, 6, 7);
nir_def *bh = nir_ubfe_imm(&b, color_x, 19, 6);
nir_def *bv = nir_ubfe_imm(&b, color_x, 0, 6);
r0 = etc_extend(&b, r0, 6);
g0 = etc_extend(&b, g0, 7);
@ -360,11 +358,9 @@ build_shader(struct radv_device *dev)
bh = etc_extend(&b, bh, 6);
bv = etc_extend(&b, bv, 6);
nir_ssa_def *rgb = nir_vec3(&b, r0, g0, b0);
nir_ssa_def *dx =
nir_imul(&b, nir_isub(&b, nir_vec3(&b, rh, gh, bh), rgb), nir_channel(&b, pixel_coord, 0));
nir_ssa_def *dy =
nir_imul(&b, nir_isub(&b, nir_vec3(&b, rv, gv, bv), rgb), nir_channel(&b, pixel_coord, 1));
nir_def *rgb = nir_vec3(&b, r0, g0, b0);
nir_def *dx = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rh, gh, bh), rgb), nir_channel(&b, pixel_coord, 0));
nir_def *dy = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rv, gv, bv), rgb), nir_channel(&b, pixel_coord, 1));
rgb = nir_iadd(&b, rgb, nir_ishr_imm(&b, nir_iadd_imm(&b, nir_iadd(&b, dx, dy), 2), 2));
nir_store_var(&b, rgb_result, rgb, 0x7);
nir_store_var(&b, punchthrough, nir_imm_false(&b), 0x1);
@ -372,8 +368,8 @@ build_shader(struct radv_device *dev)
nir_push_else(&b, NULL);
{
nir_store_var(&b, etc1_compat, nir_imm_true(&b), 1);
nir_ssa_def *subblock_b = nir_ine_imm(&b, subblock, 0);
nir_ssa_def *tmp[] = {
nir_def *subblock_b = nir_ine_imm(&b, subblock, 0);
nir_def *tmp[] = {
nir_bcsel(&b, subblock_b, r1, rb),
nir_bcsel(&b, subblock_b, g1, gb),
nir_bcsel(&b, subblock_b, b1, bb),
@ -387,14 +383,14 @@ build_shader(struct radv_device *dev)
nir_pop_if(&b, NULL);
nir_push_if(&b, nir_load_var(&b, etc1_compat));
{
nir_ssa_def *etc1_table_index =
nir_def *etc1_table_index =
nir_ubfe(&b, color_y, nir_isub_imm(&b, 5, nir_imul_imm(&b, subblock, 3)), nir_imm_int(&b, 3));
nir_ssa_def *sgn = nir_isub_imm(&b, 1, msb);
nir_def *sgn = nir_isub_imm(&b, 1, msb);
sgn = nir_bcsel(&b, nir_load_var(&b, punchthrough), nir_imul(&b, sgn, lsb), sgn);
nir_store_var(&b, punchthrough,
nir_iand(&b, nir_load_var(&b, punchthrough), nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)), 0x1);
nir_ssa_def *off = nir_imul(&b, etc1_color_modifier_lookup(&b, etc1_table_index, lsb), sgn);
nir_ssa_def *result = nir_iadd(&b, nir_load_var(&b, base_rgb), off);
nir_def *off = nir_imul(&b, etc1_color_modifier_lookup(&b, etc1_table_index, lsb), sgn);
nir_def *result = nir_iadd(&b, nir_load_var(&b, base_rgb), off);
nir_store_var(&b, rgb_result, result, 0x7);
}
nir_pop_if(&b, NULL);
@ -404,7 +400,7 @@ build_shader(struct radv_device *dev)
nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 0, 0, 0), 0x7);
}
nir_pop_if(&b, NULL);
nir_ssa_def *col[4];
nir_def *col[4];
for (unsigned i = 0; i < 3; ++i)
col[i] = nir_fdiv_imm(&b, nir_i2f32(&b, nir_channel(&b, nir_load_var(&b, rgb_result), i)), 255.0);
col[3] = nir_load_var(&b, alpha_result);
@ -412,9 +408,9 @@ build_shader(struct radv_device *dev)
}
nir_push_else(&b, NULL);
{ /* EAC */
nir_ssa_def *is_signed = nir_ior(&b, nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11_SNORM_BLOCK),
nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11G11_SNORM_BLOCK));
nir_ssa_def *val[4];
nir_def *is_signed = nir_ior(&b, nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11_SNORM_BLOCK),
nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11G11_SNORM_BLOCK));
nir_def *val[4];
for (int i = 0; i < 2; ++i) {
val[i] = decode_etc2_alpha(&b, nir_channels(&b, payload, 3 << (2 * i)), linear_pixel, true, is_signed);
}
@ -424,18 +420,18 @@ build_shader(struct radv_device *dev)
}
nir_pop_if(&b, NULL);
nir_ssa_def *outval = nir_load_var(&b, color);
nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),
nir_channel(&b, coord, 2), nir_ssa_undef(&b, 1, 32));
nir_def *outval = nir_load_var(&b, color);
nir_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), nir_channel(&b, coord, 2),
nir_undef(&b, 1, 32));
nir_push_if(&b, is_3d);
{
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_3d)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32),
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_3d)->dest.ssa, img_coord, nir_undef(&b, 1, 32),
outval, nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_3D);
}
nir_push_else(&b, NULL);
{
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_2d)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32),
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_2d)->dest.ssa, img_coord, nir_undef(&b, 1, 32),
outval, nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
}
nir_pop_if(&b, NULL);

View file

@ -52,13 +52,12 @@ build_dcc_decompress_compute_shader(struct radv_device *dev)
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *global_id = get_global_ids(&b, 2);
nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, global_id, 0), nir_channel(&b, global_id, 1),
nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32));
nir_def *global_id = get_global_ids(&b, 2);
nir_def *img_coord = nir_vec4(&b, nir_channel(&b, global_id, 0), nir_channel(&b, global_id, 1), nir_undef(&b, 1, 32),
nir_undef(&b, 1, 32));
nir_ssa_def *data =
nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, img_coord,
nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
nir_def *data = nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, img_coord,
nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
/* We need a SCOPE_DEVICE memory_scope because ACO will avoid
* creating a vmcnt(0) because it expects the L1 cache to keep memory
@ -67,7 +66,7 @@ build_dcc_decompress_compute_shader(struct radv_device *dev)
nir_barrier(&b, .execution_scope = SCOPE_WORKGROUP, .memory_scope = SCOPE_DEVICE,
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32), data,
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_undef(&b, 1, 32), data,
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
return b.shader;
}

View file

@ -42,28 +42,28 @@ build_fmask_copy_compute_shader(struct radv_device *dev, int samples)
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
nir_ssa_def *block_size = nir_imm_ivec3(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
b.shader->info.workgroup_size[2]);
nir_def *invoc_id = nir_load_local_invocation_id(&b);
nir_def *wg_id = nir_load_workgroup_id(&b, 32);
nir_def *block_size = nir_imm_ivec3(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
b.shader->info.workgroup_size[2]);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
/* Get coordinates. */
nir_ssa_def *src_coord = nir_trim_vector(&b, global_id, 2);
nir_ssa_def *dst_coord = nir_vec4(&b, nir_channel(&b, src_coord, 0), nir_channel(&b, src_coord, 1),
nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32));
nir_def *src_coord = nir_trim_vector(&b, global_id, 2);
nir_def *dst_coord = nir_vec4(&b, nir_channel(&b, src_coord, 0), nir_channel(&b, src_coord, 1), nir_undef(&b, 1, 32),
nir_undef(&b, 1, 32));
nir_tex_src frag_mask_srcs[] = {{
.src_type = nir_tex_src_coord,
.src = nir_src_for_ssa(src_coord),
}};
nir_ssa_def *frag_mask =
nir_def *frag_mask =
nir_build_tex_deref_instr(&b, nir_texop_fragment_mask_fetch_amd, nir_build_deref_var(&b, input_img), NULL,
ARRAY_SIZE(frag_mask_srcs), frag_mask_srcs);
/* Get the maximum sample used in this fragment. */
nir_ssa_def *max_sample_index = nir_imm_int(&b, 0);
nir_def *max_sample_index = nir_imm_int(&b, 0);
for (uint32_t s = 0; s < samples; s++) {
/* max_sample_index = MAX2(max_sample_index, (frag_mask >> (s * 4)) & 0xf) */
max_sample_index = nir_umax(&b, max_sample_index,
@ -75,7 +75,7 @@ build_fmask_copy_compute_shader(struct radv_device *dev, int samples)
nir_loop *loop = nir_push_loop(&b);
{
nir_ssa_def *sample_id = nir_load_var(&b, counter);
nir_def *sample_id = nir_load_var(&b, counter);
nir_tex_src frag_fetch_srcs[] = {{
.src_type = nir_tex_src_coord,
@ -85,9 +85,8 @@ build_fmask_copy_compute_shader(struct radv_device *dev, int samples)
.src_type = nir_tex_src_ms_index,
.src = nir_src_for_ssa(sample_id),
}};
nir_ssa_def *outval =
nir_build_tex_deref_instr(&b, nir_texop_fragment_fetch_amd, nir_build_deref_var(&b, input_img), NULL,
ARRAY_SIZE(frag_fetch_srcs), frag_fetch_srcs);
nir_def *outval = nir_build_tex_deref_instr(&b, nir_texop_fragment_fetch_amd, nir_build_deref_var(&b, input_img),
NULL, ARRAY_SIZE(frag_fetch_srcs), frag_fetch_srcs);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord, sample_id, outval,
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_MS);

View file

@ -48,17 +48,17 @@ build_fmask_expand_compute_shader(struct radv_device *device, int samples)
output_img->data.access = ACCESS_NON_READABLE;
nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
nir_ssa_def *output_img_deref = &nir_build_deref_var(&b, output_img)->dest.ssa;
nir_def *output_img_deref = &nir_build_deref_var(&b, output_img)->dest.ssa;
nir_ssa_def *tex_coord = get_global_ids(&b, 3);
nir_def *tex_coord = get_global_ids(&b, 3);
nir_ssa_def *tex_vals[8];
nir_def *tex_vals[8];
for (uint32_t i = 0; i < samples; i++) {
tex_vals[i] = nir_txf_ms_deref(&b, input_img_deref, tex_coord, nir_imm_int(&b, i));
}
nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, tex_coord, 0), nir_channel(&b, tex_coord, 1),
nir_channel(&b, tex_coord, 2), nir_ssa_undef(&b, 1, 32));
nir_def *img_coord = nir_vec4(&b, nir_channel(&b, tex_coord, 0), nir_channel(&b, tex_coord, 1),
nir_channel(&b, tex_coord, 2), nir_undef(&b, 1, 32));
for (uint32_t i = 0; i < samples; i++) {
nir_image_deref_store(&b, output_img_deref, img_coord, nir_imm_int(&b, i), tex_vals[i], nir_imm_int(&b, 0),

View file

@ -32,11 +32,11 @@
#include "sid.h"
#include "vk_format.h"
static nir_ssa_def *
radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_ssa_def *input)
static nir_def *
radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_def *input)
{
unsigned i;
nir_ssa_def *comp[4];
nir_def *comp[4];
for (i = 0; i < 3; i++)
comp[i] = nir_format_linear_to_srgb(b, nir_channel(b, input, i));
comp[3] = nir_channels(b, input, 1 << 3);
@ -62,27 +62,27 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *global_id = get_global_ids(&b, 2);
nir_def *global_id = get_global_ids(&b, 2);
nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_ssa_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
nir_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset);
nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
nir_def *src_coord = nir_iadd(&b, global_id, src_offset);
nir_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
radv_meta_build_resolve_shader_core(dev, &b, is_integer, samples, input_img, color, src_coord);
nir_ssa_def *outval = nir_load_var(&b, color);
nir_def *outval = nir_load_var(&b, color);
if (is_srgb)
outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1),
nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32));
nir_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1), nir_undef(&b, 1, 32),
nir_undef(&b, 1, 32));
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32),
outval, nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_undef(&b, 1, 32), outval,
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
return b.shader;
}
@ -130,21 +130,21 @@ build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *global_id = get_global_ids(&b, 3);
nir_def *global_id = get_global_ids(&b, 3);
nir_ssa_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_ssa_def *resolve_coord = nir_iadd(&b, nir_trim_vector(&b, global_id, 2), offset);
nir_def *resolve_coord = nir_iadd(&b, nir_trim_vector(&b, global_id, 2), offset);
nir_ssa_def *img_coord =
nir_def *img_coord =
nir_vec3(&b, nir_channel(&b, resolve_coord, 0), nir_channel(&b, resolve_coord, 1), nir_channel(&b, global_id, 2));
nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
nir_ssa_def *outval = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, 0));
nir_def *outval = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, 0));
if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) {
for (int i = 1; i < samples; i++) {
nir_ssa_def *si = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, i));
nir_def *si = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, i));
switch (resolve_mode) {
case VK_RESOLVE_MODE_AVERAGE_BIT:
@ -172,9 +172,9 @@ build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
outval = nir_fdiv_imm(&b, outval, samples);
}
nir_ssa_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
nir_channel(&b, img_coord, 2), nir_ssa_undef(&b, 1, 32));
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_ssa_undef(&b, 1, 32), outval,
nir_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
nir_channel(&b, img_coord, 2), nir_undef(&b, 1, 32));
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_undef(&b, 1, 32), outval,
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
return b.shader;
}

View file

@ -47,17 +47,17 @@ build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, int samp
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
color_out->data.location = FRAG_RESULT_DATA0;
nir_ssa_def *pos_in = nir_trim_vector(&b, nir_load_frag_coord(&b), 2);
nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_def *pos_in = nir_trim_vector(&b, nir_load_frag_coord(&b), 2);
nir_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_ssa_def *pos_int = nir_f2i32(&b, pos_in);
nir_def *pos_int = nir_f2i32(&b, pos_in);
nir_ssa_def *img_coord = nir_trim_vector(&b, nir_iadd(&b, pos_int, src_offset), 2);
nir_def *img_coord = nir_trim_vector(&b, nir_iadd(&b, pos_int, src_offset), 2);
nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
radv_meta_build_resolve_shader_core(dev, &b, is_integer, samples, input_img, color, img_coord);
nir_ssa_def *outval = nir_load_var(&b, color);
nir_def *outval = nir_load_var(&b, color);
nir_store_var(&b, color_out, outval, 0xf);
return b.shader;
}
@ -260,18 +260,18 @@ build_depth_stencil_resolve_fragment_shader(struct radv_device *dev, int samples
nir_variable *fs_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_out");
fs_out->data.location = index == DEPTH_RESOLVE ? FRAG_RESULT_DEPTH : FRAG_RESULT_STENCIL;
nir_ssa_def *pos_in = nir_trim_vector(&b, nir_load_frag_coord(&b), 2);
nir_def *pos_in = nir_trim_vector(&b, nir_load_frag_coord(&b), 2);
nir_ssa_def *pos_int = nir_f2i32(&b, pos_in);
nir_def *pos_int = nir_f2i32(&b, pos_in);
nir_ssa_def *img_coord = nir_trim_vector(&b, pos_int, 2);
nir_def *img_coord = nir_trim_vector(&b, pos_int, 2);
nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
nir_ssa_def *outval = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, 0));
nir_def *outval = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, 0));
if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) {
for (int i = 1; i < samples; i++) {
nir_ssa_def *si = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, i));
nir_def *si = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, i));
switch (resolve_mode) {
case VK_RESOLVE_MODE_AVERAGE_BIT:

View file

@ -41,25 +41,25 @@ typedef struct {
const struct radv_shader_layout *layout;
} apply_layout_state;
static nir_ssa_def *
static nir_def *
get_scalar_arg(nir_builder *b, unsigned size, struct ac_arg arg)
{
assert(arg.used);
return nir_load_scalar_arg_amd(b, size, .base = arg.arg_index);
}
static nir_ssa_def *
convert_pointer_to_64_bit(nir_builder *b, apply_layout_state *state, nir_ssa_def *ptr)
static nir_def *
convert_pointer_to_64_bit(nir_builder *b, apply_layout_state *state, nir_def *ptr)
{
return nir_pack_64_2x32_split(b, ptr, nir_imm_int(b, state->address32_hi));
}
static nir_ssa_def *
static nir_def *
load_desc_ptr(nir_builder *b, apply_layout_state *state, unsigned set)
{
const struct radv_userdata_locations *user_sgprs_locs = &state->info->user_sgprs_locs;
if (user_sgprs_locs->shader_data[AC_UD_INDIRECT_DESCRIPTOR_SETS].sgpr_idx != -1) {
nir_ssa_def *addr = get_scalar_arg(b, 1, state->args->descriptor_sets[0]);
nir_def *addr = get_scalar_arg(b, 1, state->args->descriptor_sets[0]);
addr = convert_pointer_to_64_bit(b, state, addr);
return nir_load_smem_amd(b, 1, addr, nir_imm_int(b, set * 4));
}
@ -77,7 +77,7 @@ visit_vulkan_resource_index(nir_builder *b, apply_layout_state *state, nir_intri
unsigned offset = layout->binding[binding].offset;
unsigned stride;
nir_ssa_def *set_ptr;
nir_def *set_ptr;
if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
unsigned idx = state->layout->set[desc_set].dynamic_offset_start + layout->binding[binding].dynamic_offset_offset;
@ -89,7 +89,7 @@ visit_vulkan_resource_index(nir_builder *b, apply_layout_state *state, nir_intri
stride = layout->binding[binding].size;
}
nir_ssa_def *binding_ptr = nir_imul_imm(b, intrin->src[0].ssa, stride);
nir_def *binding_ptr = nir_imul_imm(b, intrin->src[0].ssa, stride);
nir_instr_as_alu(binding_ptr->parent_instr)->no_unsigned_wrap = true;
binding_ptr = nir_iadd_imm(b, binding_ptr, offset);
@ -97,9 +97,9 @@ visit_vulkan_resource_index(nir_builder *b, apply_layout_state *state, nir_intri
if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
assert(stride == 16);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
nir_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
} else {
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_vec3(b, set_ptr, binding_ptr, nir_imm_int(b, stride)));
nir_def_rewrite_uses(&intrin->dest.ssa, nir_vec3(b, set_ptr, binding_ptr, nir_imm_int(b, stride)));
}
nir_instr_remove(&intrin->instr);
}
@ -109,27 +109,27 @@ visit_vulkan_resource_reindex(nir_builder *b, apply_layout_state *state, nir_int
{
VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
if (desc_type == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
nir_ssa_def *set_ptr = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
nir_ssa_def *binding_ptr = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);
nir_def *set_ptr = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
nir_def *binding_ptr = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);
nir_ssa_def *index = nir_imul_imm(b, intrin->src[1].ssa, 16);
nir_def *index = nir_imul_imm(b, intrin->src[1].ssa, 16);
nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true;
binding_ptr = nir_iadd_nuw(b, binding_ptr, index);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
nir_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
} else {
assert(desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
nir_ssa_def *binding_ptr = nir_channel(b, intrin->src[0].ssa, 1);
nir_ssa_def *stride = nir_channel(b, intrin->src[0].ssa, 2);
nir_def *binding_ptr = nir_channel(b, intrin->src[0].ssa, 1);
nir_def *stride = nir_channel(b, intrin->src[0].ssa, 2);
nir_ssa_def *index = nir_imul(b, intrin->src[1].ssa, stride);
nir_def *index = nir_imul(b, intrin->src[1].ssa, stride);
nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true;
binding_ptr = nir_iadd_nuw(b, binding_ptr, index);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_vector_insert_imm(b, intrin->src[0].ssa, binding_ptr, 1));
nir_def_rewrite_uses(&intrin->dest.ssa, nir_vector_insert_imm(b, intrin->src[0].ssa, binding_ptr, 1));
}
nir_instr_remove(&intrin->instr);
}
@ -138,20 +138,20 @@ static void
visit_load_vulkan_descriptor(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
{
if (nir_intrinsic_desc_type(intrin) == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
nir_ssa_def *addr = convert_pointer_to_64_bit(b, state,
nir_iadd(b, nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa),
nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa)));
nir_ssa_def *desc = nir_build_load_global(b, 1, 64, addr, .access = ACCESS_NON_WRITEABLE);
nir_def *addr = convert_pointer_to_64_bit(b, state,
nir_iadd(b, nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa),
nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa)));
nir_def *desc = nir_build_load_global(b, 1, 64, addr, .access = ACCESS_NON_WRITEABLE);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
nir_def_rewrite_uses(&intrin->dest.ssa, desc);
} else {
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_vector_insert_imm(b, intrin->src[0].ssa, nir_imm_int(b, 0), 2));
nir_def_rewrite_uses(&intrin->dest.ssa, nir_vector_insert_imm(b, intrin->src[0].ssa, nir_imm_int(b, 0), 2));
}
nir_instr_remove(&intrin->instr);
}
static nir_ssa_def *
load_inline_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc)
static nir_def *
load_inline_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_def *rsrc)
{
uint32_t desc_type = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
@ -169,8 +169,8 @@ load_inline_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa
nir_imm_int(b, desc_type));
}
static nir_ssa_def *
load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc, unsigned access)
static nir_def *
load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_def *rsrc, unsigned access)
{
nir_binding binding = nir_chase_binding(nir_src_for_ssa(rsrc));
@ -188,34 +188,34 @@ load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *r
if (access & ACCESS_NON_UNIFORM)
return nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
nir_ssa_def *desc_set = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
nir_def *desc_set = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
return nir_load_smem_amd(b, 4, desc_set, nir_channel(b, rsrc, 1), .align_mul = 16);
}
static void
visit_get_ssbo_size(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
{
nir_ssa_def *rsrc = intrin->src[0].ssa;
nir_def *rsrc = intrin->src[0].ssa;
nir_ssa_def *size;
nir_def *size;
if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM) {
nir_ssa_def *ptr = nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
nir_def *ptr = nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
ptr = nir_iadd_imm(b, ptr, 8);
ptr = convert_pointer_to_64_bit(b, state, ptr);
size = nir_build_load_global(b, 4, 32, ptr, .access = ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER, .align_mul = 16,
.align_offset = 4);
} else {
/* load the entire descriptor so it can be CSE'd */
nir_ssa_def *ptr = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
nir_ssa_def *desc = nir_load_smem_amd(b, 4, ptr, nir_channel(b, rsrc, 1), .align_mul = 16);
nir_def *ptr = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
nir_def *desc = nir_load_smem_amd(b, 4, ptr, nir_channel(b, rsrc, 1), .align_mul = 16);
size = nir_channel(b, desc, 2);
}
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, size);
nir_def_rewrite_uses(&intrin->dest.ssa, size);
nir_instr_remove(&intrin->instr);
}
static nir_ssa_def *
static nir_def *
get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *deref, enum ac_descriptor_type desc_type,
bool non_uniform, nir_tex_instr *tex, bool write)
{
@ -276,13 +276,13 @@ get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *der
break;
}
nir_ssa_def *index = NULL;
nir_def *index = NULL;
while (deref->deref_type != nir_deref_type_var) {
assert(deref->deref_type == nir_deref_type_array);
unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1);
array_size *= binding->size;
nir_ssa_def *tmp = nir_imul_imm(b, deref->arr.index.ssa, array_size);
nir_def *tmp = nir_imul_imm(b, deref->arr.index.ssa, array_size);
if (tmp != deref->arr.index.ssa)
nir_instr_as_alu(tmp->parent_instr)->no_unsigned_wrap = true;
@ -296,23 +296,23 @@ get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *der
deref = nir_deref_instr_parent(deref);
}
nir_ssa_def *index_offset = index ? nir_iadd_imm(b, index, offset) : nir_imm_int(b, offset);
nir_def *index_offset = index ? nir_iadd_imm(b, index, offset) : nir_imm_int(b, offset);
if (index && index_offset != index)
nir_instr_as_alu(index_offset->parent_instr)->no_unsigned_wrap = true;
if (non_uniform)
return nir_iadd(b, load_desc_ptr(b, state, desc_set), index_offset);
nir_ssa_def *addr = convert_pointer_to_64_bit(b, state, load_desc_ptr(b, state, desc_set));
nir_ssa_def *desc = nir_load_smem_amd(b, size, addr, index_offset, .align_mul = size * 4u);
nir_def *addr = convert_pointer_to_64_bit(b, state, load_desc_ptr(b, state, desc_set));
nir_def *desc = nir_load_smem_amd(b, size, addr, index_offset, .align_mul = size * 4u);
/* 3 plane formats always have same size and format for plane 1 & 2, so
* use the tail from plane 1 so that we can store only the first 16 bytes
* of the last plane. */
if (desc_type == AC_DESC_PLANE_2) {
nir_ssa_def *desc2 = get_sampler_desc(b, state, deref, AC_DESC_PLANE_1, non_uniform, tex, write);
nir_def *desc2 = get_sampler_desc(b, state, deref, AC_DESC_PLANE_1, non_uniform, tex, write);
nir_ssa_def *comp[8];
nir_def *comp[8];
for (unsigned i = 0; i < 4; i++)
comp[i] = nir_channel(b, desc, i);
for (unsigned i = 4; i < 8; i++)
@ -320,7 +320,7 @@ get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *der
return nir_vec(b, comp, 8);
} else if (desc_type == AC_DESC_IMAGE && state->has_image_load_dcc_bug && !tex && !write) {
nir_ssa_def *comp[8];
nir_def *comp[8];
for (unsigned i = 0; i < 8; i++)
comp[i] = nir_channel(b, desc, i);
@ -331,7 +331,7 @@ get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *der
return nir_vec(b, comp, 8);
} else if (desc_type == AC_DESC_SAMPLER && tex->op == nir_texop_tg4 && !state->conformant_trunc_coord) {
nir_ssa_def *comp[4];
nir_def *comp[4];
for (unsigned i = 0; i < 4; i++)
comp[i] = nir_channel(b, desc, i);
@ -354,11 +354,11 @@ update_image_intrinsic(nir_builder *b, apply_layout_state *state, nir_intrinsic_
bool is_load =
intrin->intrinsic == nir_intrinsic_image_deref_load || intrin->intrinsic == nir_intrinsic_image_deref_sparse_load;
nir_ssa_def *desc = get_sampler_desc(b, state, deref, dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE,
nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM, NULL, !is_load);
nir_def *desc = get_sampler_desc(b, state, deref, dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE,
nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM, NULL, !is_load);
if (intrin->intrinsic == nir_intrinsic_image_deref_descriptor_amd) {
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
nir_def_rewrite_uses(&intrin->dest.ssa, desc);
nir_instr_remove(&intrin->instr);
} else {
nir_rewrite_image_intrinsic(intrin, desc, true);
@ -370,7 +370,7 @@ apply_layout_to_intrin(nir_builder *b, apply_layout_state *state, nir_intrinsic_
{
b->cursor = nir_before_instr(&intrin->instr);
nir_ssa_def *rsrc;
nir_def *rsrc;
switch (intrin->intrinsic) {
case nir_intrinsic_vulkan_resource_index:
visit_vulkan_resource_index(b, state, intrin);
@ -435,8 +435,8 @@ apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *te
}
}
nir_ssa_def *image = NULL;
nir_ssa_def *sampler = NULL;
nir_def *image = NULL;
nir_def *sampler = NULL;
if (plane >= 0) {
assert(tex->op != nir_texop_txf_ms && tex->op != nir_texop_samples_identical);
assert(tex->sampler_dim != GLSL_SAMPLER_DIM_BUF);
@ -467,7 +467,7 @@ apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *te
*/
/* TODO: This is unnecessary for combined image+sampler.
* We can do this when updating the desc set. */
nir_ssa_def *comp[4];
nir_def *comp[4];
for (unsigned i = 0; i < 4; i++)
comp[i] = nir_channel(b, sampler, i);
comp[0] = nir_iand(b, comp[0], nir_channel(b, image, 7));
@ -477,7 +477,7 @@ apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *te
}
if (tex->op == nir_texop_descriptor_amd) {
nir_ssa_def_rewrite_uses(&tex->dest.ssa, image);
nir_def_rewrite_uses(&tex->dest.ssa, image);
nir_instr_remove(&tex->instr);
return;
}

View file

@ -39,31 +39,31 @@ typedef struct {
const struct radv_shader_info *info;
const struct radv_pipeline_key *pl_key;
uint32_t address32_hi;
nir_ssa_def *gsvs_ring[4];
nir_def *gsvs_ring[4];
} lower_abi_state;
static nir_ssa_def *
static nir_def *
load_ring(nir_builder *b, unsigned ring, lower_abi_state *s)
{
struct ac_arg arg =
b->shader->info.stage == MESA_SHADER_TASK ? s->args->task_ring_offsets : s->args->ac.ring_offsets;
nir_ssa_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, arg);
nir_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, arg);
ring_offsets = nir_pack_64_2x32_split(b, nir_channel(b, ring_offsets, 0), nir_channel(b, ring_offsets, 1));
return nir_load_smem_amd(b, 4, ring_offsets, nir_imm_int(b, ring * 16u), .align_mul = 4u);
}
static nir_ssa_def *
static nir_def *
nggc_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
{
nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
return nir_test_mask(b, settings, mask);
}
static nir_ssa_def *
static nir_def *
shader_query_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
{
nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->shader_query_state);
nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->shader_query_state);
return nir_test_mask(b, settings, mask);
}
@ -80,7 +80,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
b->cursor = nir_before_instr(instr);
nir_ssa_def *replacement = NULL;
nir_def *replacement = NULL;
bool progress = true;
switch (intrin->intrinsic) {
@ -129,13 +129,13 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
/* Note, the HW always assumes there is at least 1 per-vertex param. */
const unsigned total_num_params = MAX2(1, s->info->outinfo.param_exports) + s->info->outinfo.prim_param_exports;
nir_ssa_def *dword1 = nir_channel(b, replacement, 1);
nir_def *dword1 = nir_channel(b, replacement, 1);
dword1 = nir_ior_imm(b, dword1, S_008F04_STRIDE(16 * total_num_params));
replacement = nir_vector_insert_imm(b, replacement, dword1, 1);
break;
case nir_intrinsic_load_ring_attr_offset_amd: {
nir_ssa_def *ring_attr_offset = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_attr_offset);
nir_def *ring_attr_offset = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_attr_offset);
replacement = nir_ishl_imm(b, nir_ubfe_imm(b, ring_attr_offset, 0, 15), 9); /* 512b increments. */
break;
}
@ -148,7 +148,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
* to optimize some multiplications (in address calculations) so that
* constant additions can be added to the const offset in memory load instructions.
*/
nir_ssa_def *arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tes_rel_patch_id);
nir_def *arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tes_rel_patch_id);
if (s->info->tes.tcs_vertices_out) {
nir_intrinsic_instr *load_arg = nir_instr_as_intrinsic(arg->parent_instr);
@ -203,7 +203,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.merged_wave_info);
break;
case nir_intrinsic_load_cull_any_enabled_amd: {
nir_ssa_def *gs_tg_info = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info);
nir_def *gs_tg_info = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info);
/* Consider a workgroup small if it contains less than 16 triangles.
*
@ -211,12 +211,12 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
* so the below is equivalent to: "ult(ubfe(gs_tg_info, 22, 9), 16)", but
* ACO can optimize out the comparison to zero (see try_optimize_scc_nocompare).
*/
nir_ssa_def *small_workgroup = nir_ieq_imm(b, nir_iand_imm(b, gs_tg_info, BITFIELD_RANGE(22 + 4, 9 - 4)), 0);
nir_def *small_workgroup = nir_ieq_imm(b, nir_iand_imm(b, gs_tg_info, BITFIELD_RANGE(22 + 4, 9 - 4)), 0);
nir_ssa_def *mask =
nir_def *mask =
nir_bcsel(b, small_workgroup, nir_imm_int(b, radv_nggc_none),
nir_imm_int(b, radv_nggc_front_face | radv_nggc_back_face | radv_nggc_small_primitives));
nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
replacement = nir_ine_imm(b, nir_iand(b, settings, mask), 0);
break;
}
@ -238,14 +238,14 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
* exponent = nggc_settings >> 24
* precision = 1.0 * 2 ^ exponent
*/
nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
nir_ssa_def *exponent = nir_ishr_imm(b, settings, 24u);
nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
nir_def *exponent = nir_ishr_imm(b, settings, 24u);
replacement = nir_ldexp(b, nir_imm_float(b, 1.0f), exponent);
break;
}
case nir_intrinsic_load_viewport_xy_scale_and_offset: {
nir_ssa_def *comps[] = {
nir_def *comps[] = {
ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[0]),
ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[1]),
ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[0]),
@ -280,7 +280,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
if (s->info->inputs_linked) {
replacement = nir_imm_int(b, get_tcs_input_vertex_stride(s->info->tcs.num_linked_inputs));
} else {
nir_ssa_def *lshs_vertex_stride =
nir_def *lshs_vertex_stride =
GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_LSHS_VERTEX_STRIDE);
replacement = nir_ishl_imm(b, lshs_vertex_stride, 2);
}
@ -296,7 +296,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
break;
}
case nir_intrinsic_load_hs_out_patch_data_offset_amd: {
nir_ssa_def *out_vertices_per_patch;
nir_def *out_vertices_per_patch;
unsigned num_tcs_outputs =
stage == MESA_SHADER_TESS_CTRL ? s->info->tcs.num_linked_outputs : s->info->tes.num_linked_inputs;
@ -310,13 +310,13 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
}
}
nir_ssa_def *per_vertex_output_patch_size = nir_imul_imm(b, out_vertices_per_patch, num_tcs_outputs * 16u);
nir_def *per_vertex_output_patch_size = nir_imul_imm(b, out_vertices_per_patch, num_tcs_outputs * 16u);
if (s->info->num_tess_patches) {
unsigned num_patches = s->info->num_tess_patches;
replacement = nir_imul_imm(b, per_vertex_output_patch_size, num_patches);
} else {
nir_ssa_def *num_patches;
nir_def *num_patches;
if (stage == MESA_SHADER_TESS_CTRL) {
num_patches = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES);
@ -330,10 +330,10 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
case nir_intrinsic_load_sample_positions_amd: {
uint32_t sample_pos_offset = (RING_PS_SAMPLE_POSITIONS * 16) - 8;
nir_ssa_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, s->args->ac.ring_offsets);
nir_ssa_def *addr = nir_pack_64_2x32(b, ring_offsets);
nir_ssa_def *sample_id = nir_umin(b, intrin->src[0].ssa, nir_imm_int(b, 7));
nir_ssa_def *offset = nir_ishl_imm(b, sample_id, 3); /* 2 floats containing samplepos.xy */
nir_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, s->args->ac.ring_offsets);
nir_def *addr = nir_pack_64_2x32(b, ring_offsets);
nir_def *sample_id = nir_umin(b, intrin->src[0].ssa, nir_imm_int(b, 7));
nir_def *offset = nir_ishl_imm(b, sample_id, 3); /* 2 floats containing samplepos.xy */
nir_const_value *const_num_samples = nir_src_as_const_value(intrin->src[1]);
if (const_num_samples) {
@ -400,8 +400,8 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_write_index);
break;
case nir_intrinsic_load_streamout_buffer_amd: {
nir_ssa_def *ptr = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_buffers),
nir_imm_int(b, s->address32_hi));
nir_def *ptr = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_buffers),
nir_imm_int(b, s->address32_hi));
replacement = nir_load_smem_amd(b, 4, ptr, nir_imm_int(b, nir_intrinsic_base(intrin) * 16));
break;
}
@ -461,19 +461,19 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.force_vrs_rates);
break;
case nir_intrinsic_load_fully_covered: {
nir_ssa_def *sample_coverage = ac_nir_load_arg(b, &s->args->ac, s->args->ac.sample_coverage);
nir_def *sample_coverage = ac_nir_load_arg(b, &s->args->ac, s->args->ac.sample_coverage);
replacement = nir_ine_imm(b, sample_coverage, 0);
break;
}
case nir_intrinsic_load_barycentric_optimize_amd: {
nir_ssa_def *prim_mask = ac_nir_load_arg(b, &s->args->ac, s->args->ac.prim_mask);
nir_def *prim_mask = ac_nir_load_arg(b, &s->args->ac, s->args->ac.prim_mask);
/* enabled when bit 31 is set */
replacement = nir_ilt_imm(b, prim_mask, 0);
break;
}
case nir_intrinsic_load_poly_line_smooth_enabled:
if (s->pl_key->dynamic_line_rast_mode) {
nir_ssa_def *line_rast_mode = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_LINE_RAST_MODE);
nir_def *line_rast_mode = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_LINE_RAST_MODE);
replacement = nir_ieq_imm(b, line_rast_mode, VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT);
} else {
replacement = nir_imm_bool(b, s->pl_key->ps.line_smooth_enabled);
@ -499,7 +499,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
return false;
if (replacement)
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, replacement);
nir_def_rewrite_uses(&intrin->dest.ssa, replacement);
nir_instr_remove(instr);
nir_instr_free(instr);
@ -507,10 +507,10 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
return true;
}
static nir_ssa_def *
static nir_def *
load_gsvs_ring(nir_builder *b, lower_abi_state *s, unsigned stream_id)
{
nir_ssa_def *ring = load_ring(b, RING_GSVS_GS, s);
nir_def *ring = load_ring(b, RING_GSVS_GS, s);
unsigned stream_offset = 0;
unsigned stride = 0;
for (unsigned i = 0; i <= stream_id; i++) {
@ -523,7 +523,7 @@ load_gsvs_ring(nir_builder *b, lower_abi_state *s, unsigned stream_id)
assert(stride < (1 << 14));
if (stream_offset) {
nir_ssa_def *addr = nir_pack_64_2x32_split(b, nir_channel(b, ring, 0), nir_channel(b, ring, 1));
nir_def *addr = nir_pack_64_2x32_split(b, nir_channel(b, ring, 0), nir_channel(b, ring, 1));
addr = nir_iadd_imm(b, addr, stream_offset);
ring = nir_vector_insert_imm(b, ring, nir_unpack_64_2x32_split_x(b, addr), 0);
ring = nir_vector_insert_imm(b, ring, nir_unpack_64_2x32_split_y(b, addr), 1);

View file

@ -32,34 +32,34 @@ typedef struct {
unsigned rast_prim;
} lower_fs_barycentric_state;
static nir_ssa_def *
lower_interp_center_smooth(nir_builder *b, nir_ssa_def *offset)
static nir_def *
lower_interp_center_smooth(nir_builder *b, nir_def *offset)
{
nir_ssa_def *pull_model = nir_load_barycentric_model(b, 32);
nir_def *pull_model = nir_load_barycentric_model(b, 32);
nir_ssa_def *deriv_x =
nir_def *deriv_x =
nir_vec3(b, nir_fddx_fine(b, nir_channel(b, pull_model, 0)), nir_fddx_fine(b, nir_channel(b, pull_model, 1)),
nir_fddx_fine(b, nir_channel(b, pull_model, 2)));
nir_ssa_def *deriv_y =
nir_def *deriv_y =
nir_vec3(b, nir_fddy_fine(b, nir_channel(b, pull_model, 0)), nir_fddy_fine(b, nir_channel(b, pull_model, 1)),
nir_fddy_fine(b, nir_channel(b, pull_model, 2)));
nir_ssa_def *offset_x = nir_channel(b, offset, 0);
nir_ssa_def *offset_y = nir_channel(b, offset, 1);
nir_def *offset_x = nir_channel(b, offset, 0);
nir_def *offset_y = nir_channel(b, offset, 1);
nir_ssa_def *adjusted_x = nir_fadd(b, pull_model, nir_fmul(b, deriv_x, offset_x));
nir_ssa_def *adjusted = nir_fadd(b, adjusted_x, nir_fmul(b, deriv_y, offset_y));
nir_def *adjusted_x = nir_fadd(b, pull_model, nir_fmul(b, deriv_x, offset_x));
nir_def *adjusted = nir_fadd(b, adjusted_x, nir_fmul(b, deriv_y, offset_y));
nir_ssa_def *ij = nir_vec2(b, nir_channel(b, adjusted, 0), nir_channel(b, adjusted, 1));
nir_def *ij = nir_vec2(b, nir_channel(b, adjusted, 0), nir_channel(b, adjusted, 1));
/* Get W by using the reciprocal of 1/W. */
nir_ssa_def *w = nir_frcp(b, nir_channel(b, adjusted, 2));
nir_def *w = nir_frcp(b, nir_channel(b, adjusted, 2));
return nir_fmul(b, ij, w);
}
static nir_ssa_def *
lower_barycentric_coord_at_offset(nir_builder *b, nir_ssa_def *src, enum glsl_interp_mode mode)
static nir_def *
lower_barycentric_coord_at_offset(nir_builder *b, nir_def *src, enum glsl_interp_mode mode)
{
if (mode == INTERP_MODE_SMOOTH)
return lower_interp_center_smooth(b, src);
@ -67,15 +67,15 @@ lower_barycentric_coord_at_offset(nir_builder *b, nir_ssa_def *src, enum glsl_in
return nir_load_barycentric_at_offset(b, 32, src, .interp_mode = mode);
}
static nir_ssa_def *
static nir_def *
lower_barycentric_coord_at_sample(nir_builder *b, lower_fs_barycentric_state *state, nir_intrinsic_instr *intrin)
{
const enum glsl_interp_mode mode = (enum glsl_interp_mode)nir_intrinsic_interp_mode(intrin);
nir_ssa_def *num_samples = nir_load_rasterization_samples_amd(b);
nir_ssa_def *new_dest;
nir_def *num_samples = nir_load_rasterization_samples_amd(b);
nir_def *new_dest;
if (state->dynamic_rasterization_samples) {
nir_ssa_def *res1, *res2;
nir_def *res1, *res2;
nir_push_if(b, nir_ieq_imm(b, num_samples, 1));
{
@ -83,7 +83,7 @@ lower_barycentric_coord_at_sample(nir_builder *b, lower_fs_barycentric_state *st
}
nir_push_else(b, NULL);
{
nir_ssa_def *sample_pos = nir_load_sample_positions_amd(b, 32, intrin->src[0].ssa, num_samples);
nir_def *sample_pos = nir_load_sample_positions_amd(b, 32, intrin->src[0].ssa, num_samples);
/* sample_pos -= 0.5 */
sample_pos = nir_fadd_imm(b, sample_pos, -0.5f);
@ -97,7 +97,7 @@ lower_barycentric_coord_at_sample(nir_builder *b, lower_fs_barycentric_state *st
if (!state->num_rasterization_samples) {
new_dest = nir_load_barycentric_pixel(b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin));
} else {
nir_ssa_def *sample_pos = nir_load_sample_positions_amd(b, 32, intrin->src[0].ssa, num_samples);
nir_def *sample_pos = nir_load_sample_positions_amd(b, 32, intrin->src[0].ssa, num_samples);
/* sample_pos -= 0.5 */
sample_pos = nir_fadd_imm(b, sample_pos, -0.5f);
@ -109,7 +109,7 @@ lower_barycentric_coord_at_sample(nir_builder *b, lower_fs_barycentric_state *st
return new_dest;
}
static nir_ssa_def *
static nir_def *
get_interp_param(nir_builder *b, lower_fs_barycentric_state *state, nir_intrinsic_instr *intrin)
{
const enum glsl_interp_mode mode = (enum glsl_interp_mode)nir_intrinsic_interp_mode(intrin);
@ -130,10 +130,10 @@ get_interp_param(nir_builder *b, lower_fs_barycentric_state *state, nir_intrinsi
return NULL;
}
static nir_ssa_def *
static nir_def *
lower_point(nir_builder *b)
{
nir_ssa_def *coords[3];
nir_def *coords[3];
coords[0] = nir_imm_float(b, 1.0f);
coords[1] = nir_imm_float(b, 0.0f);
@ -142,10 +142,10 @@ lower_point(nir_builder *b)
return nir_vec(b, coords, 3);
}
static nir_ssa_def *
lower_line(nir_builder *b, nir_ssa_def *p1, nir_ssa_def *p2)
static nir_def *
lower_line(nir_builder *b, nir_def *p1, nir_def *p2)
{
nir_ssa_def *coords[3];
nir_def *coords[3];
coords[1] = nir_fadd(b, p1, p2);
coords[0] = nir_fsub_imm(b, 1.0f, coords[1]);
@ -154,20 +154,20 @@ lower_line(nir_builder *b, nir_ssa_def *p1, nir_ssa_def *p2)
return nir_vec(b, coords, 3);
}
static nir_ssa_def *
lower_triangle(nir_builder *b, nir_ssa_def *p1, nir_ssa_def *p2)
static nir_def *
lower_triangle(nir_builder *b, nir_def *p1, nir_def *p2)
{
nir_ssa_def *v0_bary[3], *v1_bary[3], *v2_bary[3];
nir_ssa_def *coords[3];
nir_def *v0_bary[3], *v1_bary[3], *v2_bary[3];
nir_def *coords[3];
/* Compute the provoking vertex ID:
*
* quad_id = thread_id >> 2
* provoking_vtx_id = (provoking_vtx >> (quad_id << 1)) & 3
*/
nir_ssa_def *quad_id = nir_ushr_imm(b, nir_load_subgroup_invocation(b), 2);
nir_ssa_def *provoking_vtx = nir_load_provoking_vtx_amd(b);
nir_ssa_def *provoking_vtx_id = nir_ubfe(b, provoking_vtx, nir_ishl_imm(b, quad_id, 1), nir_imm_int(b, 2));
nir_def *quad_id = nir_ushr_imm(b, nir_load_subgroup_invocation(b), 2);
nir_def *provoking_vtx = nir_load_provoking_vtx_amd(b);
nir_def *provoking_vtx_id = nir_ubfe(b, provoking_vtx, nir_ishl_imm(b, quad_id, 1), nir_imm_int(b, 2));
/* Compute barycentrics. */
v0_bary[0] = nir_fsub(b, nir_fsub_imm(b, 1.0f, p2), p1);
@ -194,30 +194,30 @@ lower_triangle(nir_builder *b, nir_ssa_def *p1, nir_ssa_def *p2)
static bool
lower_load_barycentric_coord(nir_builder *b, lower_fs_barycentric_state *state, nir_intrinsic_instr *intrin)
{
nir_ssa_def *interp, *p1, *p2;
nir_ssa_def *new_dest;
nir_def *interp, *p1, *p2;
nir_def *new_dest;
b->cursor = nir_after_instr(&intrin->instr);
/* When the rasterization primitive isn't known at compile time (GPL), load it. */
if (state->rast_prim == -1) {
nir_ssa_def *rast_prim = nir_load_rasterization_primitive_amd(b);
nir_ssa_def *res1, *res2;
nir_def *rast_prim = nir_load_rasterization_primitive_amd(b);
nir_def *res1, *res2;
nir_ssa_def *is_point = nir_ieq_imm(b, rast_prim, V_028A6C_POINTLIST);
nir_def *is_point = nir_ieq_imm(b, rast_prim, V_028A6C_POINTLIST);
nir_if *if_point = nir_push_if(b, is_point);
{
res1 = lower_point(b);
}
nir_push_else(b, if_point);
{
nir_ssa_def *res_line, *res_triangle;
nir_def *res_line, *res_triangle;
interp = get_interp_param(b, state, intrin);
p1 = nir_channel(b, interp, 0);
p2 = nir_channel(b, interp, 1);
nir_ssa_def *is_line = nir_ieq_imm(b, rast_prim, V_028A6C_LINESTRIP);
nir_def *is_line = nir_ieq_imm(b, rast_prim, V_028A6C_LINESTRIP);
nir_if *if_line = nir_push_if(b, is_line);
{
res_line = lower_line(b, p1, p2);
@ -250,7 +250,7 @@ lower_load_barycentric_coord(nir_builder *b, lower_fs_barycentric_state *state,
}
}
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest);
nir_def_rewrite_uses(&intrin->dest.ssa, new_dest);
nir_instr_remove(&intrin->instr);
return true;

View file

@ -49,21 +49,21 @@ radv_nir_lower_fs_intrinsics(nir_shader *nir, const struct radv_shader_stage *fs
switch (intrin->intrinsic) {
case nir_intrinsic_load_sample_mask_in: {
nir_ssa_def *sample_coverage = nir_load_vector_arg_amd(&b, 1, .base = args->ac.sample_coverage.arg_index);
nir_def *sample_coverage = nir_load_vector_arg_amd(&b, 1, .base = args->ac.sample_coverage.arg_index);
nir_ssa_def *def = NULL;
nir_def *def = NULL;
if (info->ps.uses_sample_shading || key->ps.sample_shading_enable) {
/* gl_SampleMaskIn[0] = (SampleCoverage & (PsIterMask << gl_SampleID)). */
nir_ssa_def *ps_state = nir_load_scalar_arg_amd(&b, 1, .base = args->ps_state.arg_index);
nir_ssa_def *ps_iter_mask =
nir_def *ps_state = nir_load_scalar_arg_amd(&b, 1, .base = args->ps_state.arg_index);
nir_def *ps_iter_mask =
nir_ubfe_imm(&b, ps_state, PS_STATE_PS_ITER_MASK__SHIFT, util_bitcount(PS_STATE_PS_ITER_MASK__MASK));
nir_ssa_def *sample_id = nir_load_sample_id(&b);
nir_def *sample_id = nir_load_sample_id(&b);
def = nir_iand(&b, sample_coverage, nir_ishl(&b, ps_iter_mask, sample_id));
} else {
def = sample_coverage;
}
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, def);
nir_def_rewrite_uses(&intrin->dest.ssa, def);
nir_instr_remove(instr);
progress = true;
@ -73,35 +73,35 @@ radv_nir_lower_fs_intrinsics(nir_shader *nir, const struct radv_shader_stage *fs
if (!key->adjust_frag_coord_z)
continue;
if (!(nir_ssa_def_components_read(&intrin->dest.ssa) & (1 << 2)))
if (!(nir_def_components_read(&intrin->dest.ssa) & (1 << 2)))
continue;
nir_ssa_def *frag_z = nir_channel(&b, &intrin->dest.ssa, 2);
nir_def *frag_z = nir_channel(&b, &intrin->dest.ssa, 2);
/* adjusted_frag_z = fddx_fine(frag_z) * 0.0625 + frag_z */
nir_ssa_def *adjusted_frag_z = nir_fddx_fine(&b, frag_z);
nir_def *adjusted_frag_z = nir_fddx_fine(&b, frag_z);
adjusted_frag_z = nir_ffma_imm1(&b, adjusted_frag_z, 0.0625f, frag_z);
/* VRS Rate X = Ancillary[2:3] */
nir_ssa_def *ancillary = nir_load_vector_arg_amd(&b, 1, .base = args->ac.ancillary.arg_index);
nir_ssa_def *x_rate = nir_ubfe_imm(&b, ancillary, 2, 2);
nir_def *ancillary = nir_load_vector_arg_amd(&b, 1, .base = args->ac.ancillary.arg_index);
nir_def *x_rate = nir_ubfe_imm(&b, ancillary, 2, 2);
/* xRate = xRate == 0x1 ? adjusted_frag_z : frag_z. */
nir_ssa_def *cond = nir_ieq_imm(&b, x_rate, 1);
nir_def *cond = nir_ieq_imm(&b, x_rate, 1);
frag_z = nir_bcsel(&b, cond, adjusted_frag_z, frag_z);
nir_ssa_def *new_dest = nir_vector_insert_imm(&b, &intrin->dest.ssa, frag_z, 2);
nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, new_dest, new_dest->parent_instr);
nir_def *new_dest = nir_vector_insert_imm(&b, &intrin->dest.ssa, frag_z, 2);
nir_def_rewrite_uses_after(&intrin->dest.ssa, new_dest, new_dest->parent_instr);
progress = true;
break;
}
case nir_intrinsic_load_barycentric_at_sample: {
nir_ssa_def *num_samples = nir_load_rasterization_samples_amd(&b);
nir_ssa_def *new_dest;
nir_def *num_samples = nir_load_rasterization_samples_amd(&b);
nir_def *new_dest;
if (key->dynamic_rasterization_samples) {
nir_ssa_def *res1, *res2;
nir_def *res1, *res2;
nir_push_if(&b, nir_ieq_imm(&b, num_samples, 1));
{
@ -109,7 +109,7 @@ radv_nir_lower_fs_intrinsics(nir_shader *nir, const struct radv_shader_stage *fs
}
nir_push_else(&b, NULL);
{
nir_ssa_def *sample_pos = nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa, num_samples);
nir_def *sample_pos = nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa, num_samples);
/* sample_pos -= 0.5 */
sample_pos = nir_fadd_imm(&b, sample_pos, -0.5f);
@ -124,7 +124,7 @@ radv_nir_lower_fs_intrinsics(nir_shader *nir, const struct radv_shader_stage *fs
if (!key->ps.num_samples) {
new_dest = nir_load_barycentric_pixel(&b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin));
} else {
nir_ssa_def *sample_pos = nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa, num_samples);
nir_def *sample_pos = nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa, num_samples);
/* sample_pos -= 0.5 */
sample_pos = nir_fadd_imm(&b, sample_pos, -0.5f);
@ -134,7 +134,7 @@ radv_nir_lower_fs_intrinsics(nir_shader *nir, const struct radv_shader_stage *fs
}
}
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest);
nir_def_rewrite_uses(&intrin->dest.ssa, new_dest);
nir_instr_remove(instr);
progress = true;

View file

@ -43,7 +43,7 @@ radv_nir_lower_intrinsics_early(nir_shader *nir, const struct radv_pipeline_key
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
b.cursor = nir_before_instr(&intrin->instr);
nir_ssa_def *def = NULL;
nir_def *def = NULL;
switch (intrin->intrinsic) {
case nir_intrinsic_is_sparse_texels_resident:
def = nir_ieq_imm(&b, intrin->src[0].ssa, 0);
@ -60,7 +60,7 @@ radv_nir_lower_intrinsics_early(nir_shader *nir, const struct radv_pipeline_key
continue;
}
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, def);
nir_def_rewrite_uses(&intrin->dest.ssa, def);
nir_instr_remove(instr);
progress = true;

View file

@ -51,17 +51,17 @@ radv_nir_lower_primitive_shading_rate(nir_shader *nir, enum amd_gfx_level gfx_le
b.cursor = nir_before_instr(instr);
nir_ssa_def *val = nir_ssa_for_src(&b, intr->src[1], 1);
nir_def *val = nir_ssa_for_src(&b, intr->src[1], 1);
/* x_rate = (shadingRate & (Horizontal2Pixels | Horizontal4Pixels)) ? 0x1 : 0x0; */
nir_ssa_def *x_rate = nir_iand_imm(&b, val, 12);
nir_def *x_rate = nir_iand_imm(&b, val, 12);
x_rate = nir_b2i32(&b, nir_ine_imm(&b, x_rate, 0));
/* y_rate = (shadingRate & (Vertical2Pixels | Vertical4Pixels)) ? 0x1 : 0x0; */
nir_ssa_def *y_rate = nir_iand_imm(&b, val, 3);
nir_def *y_rate = nir_iand_imm(&b, val, 3);
y_rate = nir_b2i32(&b, nir_ine_imm(&b, y_rate, 0));
nir_ssa_def *out = NULL;
nir_def *out = NULL;
/* MS:
* Primitive shading rate is a per-primitive output, it is

View file

@ -58,20 +58,20 @@ rq_variable_create(void *ctx, nir_shader *shader, unsigned array_length, const s
return result;
}
static nir_ssa_def *
nir_load_array(nir_builder *b, nir_variable *array, nir_ssa_def *index)
static nir_def *
nir_load_array(nir_builder *b, nir_variable *array, nir_def *index)
{
return nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, array), index));
}
static void
nir_store_array(nir_builder *b, nir_variable *array, nir_ssa_def *index, nir_ssa_def *value, unsigned writemask)
nir_store_array(nir_builder *b, nir_variable *array, nir_def *index, nir_def *value, unsigned writemask)
{
nir_store_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, array), index), value, writemask);
}
static nir_deref_instr *
rq_deref_var(nir_builder *b, nir_ssa_def *index, rq_variable *var)
rq_deref_var(nir_builder *b, nir_def *index, rq_variable *var)
{
if (var->array_length == 1)
return nir_build_deref_var(b, var->variable);
@ -79,8 +79,8 @@ rq_deref_var(nir_builder *b, nir_ssa_def *index, rq_variable *var)
return nir_build_deref_array(b, nir_build_deref_var(b, var->variable), index);
}
static nir_ssa_def *
rq_load_var(nir_builder *b, nir_ssa_def *index, rq_variable *var)
static nir_def *
rq_load_var(nir_builder *b, nir_def *index, rq_variable *var)
{
if (var->array_length == 1)
return nir_load_var(b, var->variable);
@ -89,7 +89,7 @@ rq_load_var(nir_builder *b, nir_ssa_def *index, rq_variable *var)
}
static void
rq_store_var(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *value, unsigned writemask)
rq_store_var(nir_builder *b, nir_def *index, rq_variable *var, nir_def *value, unsigned writemask)
{
if (var->array_length == 1) {
nir_store_var(b, var->variable, value, writemask);
@ -99,13 +99,13 @@ rq_store_var(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *
}
static void
rq_copy_var(nir_builder *b, nir_ssa_def *index, rq_variable *dst, rq_variable *src, unsigned mask)
rq_copy_var(nir_builder *b, nir_def *index, rq_variable *dst, rq_variable *src, unsigned mask)
{
rq_store_var(b, index, dst, rq_load_var(b, index, src), mask);
}
static nir_ssa_def *
rq_load_array(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *array_index)
static nir_def *
rq_load_array(nir_builder *b, nir_def *index, rq_variable *var, nir_def *array_index)
{
if (var->array_length == 1)
return nir_load_array(b, var->variable, array_index);
@ -115,7 +115,7 @@ rq_load_array(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def
}
static void
rq_store_array(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *array_index, nir_ssa_def *value,
rq_store_array(nir_builder *b, nir_def *index, rq_variable *var, nir_def *array_index, nir_def *value,
unsigned writemask)
{
if (var->array_length == 1) {
@ -282,7 +282,7 @@ lower_ray_query(nir_shader *shader, nir_variable *ray_query, struct hash_table *
}
static void
copy_candidate_to_closest(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars)
copy_candidate_to_closest(nir_builder *b, nir_def *index, struct ray_query_vars *vars)
{
rq_copy_var(b, index, vars->closest.barycentrics, vars->candidate.barycentrics, 0x3);
rq_copy_var(b, index, vars->closest.geometry_id_and_flags, vars->candidate.geometry_id_and_flags, 0x1);
@ -296,10 +296,10 @@ copy_candidate_to_closest(nir_builder *b, nir_ssa_def *index, struct ray_query_v
}
static void
insert_terminate_on_first_hit(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars,
insert_terminate_on_first_hit(nir_builder *b, nir_def *index, struct ray_query_vars *vars,
const struct radv_ray_flags *ray_flags, bool break_on_terminate)
{
nir_ssa_def *terminate_on_first_hit;
nir_def *terminate_on_first_hit;
if (ray_flags)
terminate_on_first_hit = ray_flags->terminate_on_first_hit;
else
@ -315,16 +315,14 @@ insert_terminate_on_first_hit(nir_builder *b, nir_ssa_def *index, struct ray_que
}
static void
lower_rq_confirm_intersection(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr,
struct ray_query_vars *vars)
lower_rq_confirm_intersection(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars)
{
copy_candidate_to_closest(b, index, vars);
insert_terminate_on_first_hit(b, index, vars, NULL, false);
}
static void
lower_rq_generate_intersection(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr,
struct ray_query_vars *vars)
lower_rq_generate_intersection(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars)
{
nir_push_if(b, nir_iand(b, nir_fge(b, rq_load_var(b, index, vars->closest.t), instr->src[1].ssa),
nir_fge(b, instr->src[1].ssa, rq_load_var(b, index, vars->tmin))));
@ -339,7 +337,7 @@ lower_rq_generate_intersection(nir_builder *b, nir_ssa_def *index, nir_intrinsic
enum rq_intersection_type { intersection_type_none, intersection_type_triangle, intersection_type_aabb };
static void
lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars,
lower_rq_initialize(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars,
struct radv_instance *instance)
{
rq_store_var(b, index, vars->flags, instr->src[2].ssa, 0x1);
@ -356,12 +354,12 @@ lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *ins
rq_store_var(b, index, vars->closest.t, instr->src[7].ssa, 0x1);
rq_store_var(b, index, vars->closest.intersection_type, nir_imm_int(b, intersection_type_none), 0x1);
nir_ssa_def *accel_struct = instr->src[1].ssa;
nir_def *accel_struct = instr->src[1].ssa;
nir_ssa_def *bvh_offset = nir_build_load_global(
nir_def *bvh_offset = nir_build_load_global(
b, 1, 32, nir_iadd_imm(b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
.access = ACCESS_NON_WRITEABLE);
nir_ssa_def *bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset));
nir_def *bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset));
bvh_base = build_addr_to_node(b, bvh_base);
rq_store_var(b, index, vars->root_bvh_base, bvh_base, 0x1);
@ -371,7 +369,7 @@ lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *ins
rq_store_var(b, index, vars->trav.stack, nir_imm_int(b, 0), 0x1);
rq_store_var(b, index, vars->trav.stack_low_watermark, nir_imm_int(b, 0), 0x1);
} else {
nir_ssa_def *base_offset = nir_imul_imm(b, nir_load_local_invocation_index(b), sizeof(uint32_t));
nir_def *base_offset = nir_imul_imm(b, nir_load_local_invocation_index(b), sizeof(uint32_t));
base_offset = nir_iadd_imm(b, base_offset, vars->shared_base);
rq_store_var(b, index, vars->trav.stack, base_offset, 0x1);
rq_store_var(b, index, vars->trav.stack_low_watermark, base_offset, 0x1);
@ -387,8 +385,8 @@ lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *ins
rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, !(instance->debug_flags & RADV_DEBUG_NO_RT)), 0x1);
}
static nir_ssa_def *
lower_rq_load(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars)
static nir_def *
lower_rq_load(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars)
{
bool committed = nir_intrinsic_committed(instr);
struct ray_query_intersection_vars *intersection = committed ? &vars->closest : &vars->candidate;
@ -409,7 +407,7 @@ lower_rq_load(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, st
case nir_ray_query_value_intersection_geometry_index:
return nir_iand_imm(b, rq_load_var(b, index, intersection->geometry_id_and_flags), 0xFFFFFF);
case nir_ray_query_value_intersection_instance_custom_index: {
nir_ssa_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
nir_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
return nir_iand_imm(
b,
nir_build_load_global(
@ -418,27 +416,27 @@ lower_rq_load(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, st
0xFFFFFF);
}
case nir_ray_query_value_intersection_instance_id: {
nir_ssa_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
nir_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
return nir_build_load_global(
b, 1, 32, nir_iadd_imm(b, instance_node_addr, offsetof(struct radv_bvh_instance_node, instance_id)));
}
case nir_ray_query_value_intersection_instance_sbt_index:
return nir_iand_imm(b, rq_load_var(b, index, intersection->sbt_offset_and_flags), 0xFFFFFF);
case nir_ray_query_value_intersection_object_ray_direction: {
nir_ssa_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
nir_ssa_def *wto_matrix[3];
nir_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
nir_def *wto_matrix[3];
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
return nir_build_vec3_mat_mult(b, rq_load_var(b, index, vars->direction), wto_matrix, false);
}
case nir_ray_query_value_intersection_object_ray_origin: {
nir_ssa_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
nir_ssa_def *wto_matrix[3];
nir_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
nir_def *wto_matrix[3];
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
return nir_build_vec3_mat_mult(b, rq_load_var(b, index, vars->origin), wto_matrix, true);
}
case nir_ray_query_value_intersection_object_to_world: {
nir_ssa_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
nir_ssa_def *rows[3];
nir_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
nir_def *rows[3];
for (unsigned r = 0; r < 3; ++r)
rows[r] = nir_build_load_global(
b, 4, 32,
@ -452,19 +450,19 @@ lower_rq_load(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, st
case nir_ray_query_value_intersection_t:
return rq_load_var(b, index, intersection->t);
case nir_ray_query_value_intersection_type: {
nir_ssa_def *intersection_type = rq_load_var(b, index, intersection->intersection_type);
nir_def *intersection_type = rq_load_var(b, index, intersection->intersection_type);
if (!committed)
intersection_type = nir_iadd_imm(b, intersection_type, -1);
return intersection_type;
}
case nir_ray_query_value_intersection_world_to_object: {
nir_ssa_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
nir_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
nir_ssa_def *wto_matrix[3];
nir_def *wto_matrix[3];
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
nir_ssa_def *vals[3];
nir_def *vals[3];
for (unsigned i = 0; i < 3; ++i)
vals[i] = nir_channel(b, wto_matrix[i], column);
@ -485,7 +483,7 @@ lower_rq_load(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, st
struct traversal_data {
struct ray_query_vars *vars;
nir_ssa_def *index;
nir_def *index;
};
static void
@ -494,7 +492,7 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
{
struct traversal_data *data = args->data;
struct ray_query_vars *vars = data->vars;
nir_ssa_def *index = data->index;
nir_def *index = data->index;
rq_store_var(b, index, vars->candidate.primitive_id, intersection->primitive_id, 1);
rq_store_var(b, index, vars->candidate.geometry_id_and_flags, intersection->geometry_id_and_flags, 1);
@ -510,7 +508,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
{
struct traversal_data *data = args->data;
struct ray_query_vars *vars = data->vars;
nir_ssa_def *index = data->index;
nir_def *index = data->index;
rq_store_var(b, index, vars->candidate.barycentrics, intersection->barycentrics, 3);
rq_store_var(b, index, vars->candidate.primitive_id, intersection->base.primitive_id, 1);
@ -533,7 +531,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
}
static void
store_stack_entry(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value, const struct radv_ray_traversal_args *args)
store_stack_entry(nir_builder *b, nir_def *index, nir_def *value, const struct radv_ray_traversal_args *args)
{
struct traversal_data *data = args->data;
if (data->vars->stack)
@ -542,8 +540,8 @@ store_stack_entry(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value, const
nir_store_shared(b, value, index, .base = 0, .align_mul = 4);
}
static nir_ssa_def *
load_stack_entry(nir_builder *b, nir_ssa_def *index, const struct radv_ray_traversal_args *args)
static nir_def *
load_stack_entry(nir_builder *b, nir_def *index, const struct radv_ray_traversal_args *args)
{
struct traversal_data *data = args->data;
if (data->vars->stack)
@ -552,8 +550,8 @@ load_stack_entry(nir_builder *b, nir_ssa_def *index, const struct radv_ray_trave
return nir_load_shared(b, 1, 32, index, .base = 0, .align_mul = 4);
}
static nir_ssa_def *
lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars, struct radv_device *device)
static nir_def *
lower_rq_proceed(nir_builder *b, nir_def *index, struct ray_query_vars *vars, struct radv_device *device)
{
nir_variable *inv_dir = nir_local_variable_create(b->impl, glsl_vector_type(GLSL_TYPE_FLOAT, 3), "inv_dir");
nir_store_var(b, inv_dir, nir_frcp(b, rq_load_var(b, index, vars->trav.direction)), 0x7);
@ -608,7 +606,7 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars
nir_push_if(b, rq_load_var(b, index, vars->incomplete));
{
nir_ssa_def *incomplete = radv_build_ray_traversal(device, b, &args);
nir_def *incomplete = radv_build_ray_traversal(device, b, &args);
rq_store_var(b, index, vars->incomplete, nir_iand(b, rq_load_var(b, index, vars->incomplete), incomplete), 1);
}
nir_pop_if(b, NULL);
@ -617,7 +615,7 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars
}
static void
lower_rq_terminate(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars)
lower_rq_terminate(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars)
{
rq_store_var(b, index, vars->incomplete, nir_imm_false(b), 0x1);
}
@ -663,7 +661,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
continue;
nir_deref_instr *ray_query_deref = nir_instr_as_deref(intrinsic->src[0].ssa->parent_instr);
nir_ssa_def *index = NULL;
nir_def *index = NULL;
if (ray_query_deref->deref_type == nir_deref_type_array) {
index = ray_query_deref->arr.index.ssa;
@ -677,7 +675,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
builder.cursor = nir_before_instr(instr);
nir_ssa_def *new_dest = NULL;
nir_def *new_dest = NULL;
switch (intrinsic->intrinsic) {
case nir_intrinsic_rq_confirm_intersection:
@ -703,7 +701,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
}
if (new_dest)
nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa, new_dest);
nir_def_rewrite_uses(&intrinsic->dest.ssa, new_dest);
nir_instr_remove(instr);
nir_instr_free(instr);

View file

@ -71,8 +71,8 @@ radv_nir_lower_view_index(nir_shader *nir, bool per_primitive)
layer->data.per_primitive = per_primitive;
b.cursor = nir_before_instr(instr);
nir_ssa_def *def = nir_load_var(&b, layer);
nir_ssa_def_rewrite_uses(&load->dest.ssa, def);
nir_def *def = nir_load_var(&b, layer);
nir_def_rewrite_uses(&load->dest.ssa, def);
/* Update inputs_read to reflect that the pass added a new input. */
nir->info.inputs_read |= VARYING_BIT_LAYER;

View file

@ -51,7 +51,7 @@ radv_nir_lower_viewport_to_zero(nir_shader *nir)
b.cursor = nir_before_instr(instr);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_imm_zero(&b, 1, 32));
nir_def_rewrite_uses(&intr->dest.ssa, nir_imm_zero(&b, 1, 32));
progress = true;
break;
}

View file

@ -37,7 +37,7 @@ typedef struct {
const struct radeon_info *rad_info;
} lower_vs_inputs_state;
static nir_ssa_def *
static nir_def *
lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs_state *s)
{
nir_src *offset_src = nir_get_io_offset_src(intrin);
@ -56,7 +56,7 @@ lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin, low
const unsigned arg_bit_size = MAX2(bit_size, 32);
unsigned num_input_args = 1;
nir_ssa_def *input_args[2] = {ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location]), NULL};
nir_def *input_args[2] = {ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location]), NULL};
if (component * 32 + arg_bit_size * num_components > 128) {
assert(bit_size == 64);
@ -64,8 +64,7 @@ lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin, low
input_args[1] = ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location + 1]);
}
nir_ssa_def *extracted =
nir_extract_bits(b, input_args, num_input_args, component * 32, num_components, arg_bit_size);
nir_def *extracted = nir_extract_bits(b, input_args, num_input_args, component * 32, num_components, arg_bit_size);
if (bit_size < arg_bit_size) {
assert(bit_size == 16);
@ -79,20 +78,20 @@ lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin, low
return extracted;
}
static nir_ssa_def *
static nir_def *
calc_vs_input_index_instance_rate(nir_builder *b, unsigned location, lower_vs_inputs_state *s)
{
const uint32_t divisor = s->pl_key->vs.instance_rate_divisors[location];
nir_ssa_def *start_instance = nir_load_base_instance(b);
nir_def *start_instance = nir_load_base_instance(b);
if (divisor == 0)
return start_instance;
nir_ssa_def *instance_id = nir_udiv_imm(b, nir_load_instance_id(b), divisor);
nir_def *instance_id = nir_udiv_imm(b, nir_load_instance_id(b), divisor);
return nir_iadd(b, start_instance, instance_id);
}
static nir_ssa_def *
static nir_def *
calc_vs_input_index(nir_builder *b, unsigned location, lower_vs_inputs_state *s)
{
if (s->pl_key->vs.instance_rate_inputs & BITFIELD_BIT(location))
@ -112,7 +111,7 @@ can_use_untyped_load(const struct util_format_description *f, const unsigned bit
return c->size == bit_size && bit_size >= 32;
}
static nir_ssa_def *
static nir_def *
oob_input_load_value(nir_builder *b, const unsigned channel_idx, const unsigned bit_size, const bool is_float)
{
/* 22.1.1. Attribute Location and Component Assignment of Vulkan 1.3 specification:
@ -120,7 +119,7 @@ oob_input_load_value(nir_builder *b, const unsigned channel_idx, const unsigned
* must not use more components than provided by the attribute.
*/
if (bit_size == 64)
return nir_ssa_undef(b, 1, bit_size);
return nir_undef(b, 1, bit_size);
if (channel_idx == 3) {
if (is_float)
@ -175,8 +174,8 @@ first_used_swizzled_channel(const struct util_format_description *f, const unsig
return first_used;
}
static nir_ssa_def *
adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alpha_adjust, nir_ssa_def *alpha)
static nir_def *
adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alpha_adjust, nir_def *alpha)
{
if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED)
alpha = nir_f2u32(b, alpha);
@ -201,7 +200,7 @@ adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alpha_ad
return alpha;
}
static nir_ssa_def *
static nir_def *
lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs_state *s)
{
nir_src *offset_src = nir_get_io_offset_src(intrin);
@ -226,13 +225,13 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs
/* Bitmask of components in bit_size units
* of the current input load that are actually used.
*/
const unsigned dest_use_mask = nir_ssa_def_components_read(&intrin->dest.ssa) << component;
const unsigned dest_use_mask = nir_def_components_read(&intrin->dest.ssa) << component;
/* If the input is entirely unused, just replace it with undef.
* This is just in case we debug this pass without running DCE first.
*/
if (!dest_use_mask)
return nir_ssa_undef(b, dest_num_components, bit_size);
return nir_undef(b, dest_num_components, bit_size);
const uint32_t attrib_binding = s->pl_key->vs.vertex_attribute_bindings[location];
const uint32_t attrib_offset = s->pl_key->vs.vertex_attribute_offsets[location];
@ -244,12 +243,11 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs
const unsigned binding_index = s->info->vs.use_per_attribute_vb_descs ? location : attrib_binding;
const unsigned desc_index = util_bitcount(s->info->vs.vb_desc_usage_mask & u_bit_consecutive(0, binding_index));
nir_ssa_def *vertex_buffers_arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.vertex_buffers);
nir_ssa_def *vertex_buffers =
nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->rad_info->address32_hi));
nir_ssa_def *descriptor = nir_load_smem_amd(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16));
nir_ssa_def *base_index = calc_vs_input_index(b, location, s);
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_def *vertex_buffers_arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.vertex_buffers);
nir_def *vertex_buffers = nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->rad_info->address32_hi));
nir_def *descriptor = nir_load_smem_amd(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16));
nir_def *base_index = calc_vs_input_index(b, location, s);
nir_def *zero = nir_imm_int(b, 0);
/* We currently implement swizzling for all formats in shaders.
* Note, it is possible to specify swizzling in the DST_SEL fields of descriptors,
@ -290,13 +288,13 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs
* This is necessary because the backend can't further roll the const offset
* into the index source of MUBUF / MTBUF instructions.
*/
nir_ssa_def *loads[NIR_MAX_VEC_COMPONENTS] = {0};
nir_def *loads[NIR_MAX_VEC_COMPONENTS] = {0};
unsigned num_loads = 0;
for (unsigned x = 0, channels; x < fetch_num_channels; x += channels) {
channels = fetch_num_channels - x;
const unsigned start = skipped_start + x;
enum pipe_format fetch_format = attrib_format;
nir_ssa_def *index = base_index;
nir_def *index = base_index;
/* Add excess constant offset to the index. */
unsigned const_off = attrib_offset + count_format_bytes(f, 0, start);
@ -339,7 +337,7 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs
}
}
nir_ssa_def *load = loads[0];
nir_def *load = loads[0];
/* Extract the channels we actually need when we couldn't skip starting
* components or had to emit more than one load intrinsic.
@ -357,7 +355,7 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs
* Apply swizzle and alpha adjust according to the format.
*/
const nir_alu_type dst_type = nir_alu_type_get_base_type(nir_intrinsic_dest_type(intrin));
nir_ssa_def *channels[NIR_MAX_VEC_COMPONENTS] = {0};
nir_def *channels[NIR_MAX_VEC_COMPONENTS] = {0};
for (unsigned i = 0; i < dest_num_components; ++i) {
const unsigned c = i + component;
@ -400,7 +398,7 @@ lower_vs_input_instr(nir_builder *b, nir_instr *instr, void *state)
b->cursor = nir_before_instr(instr);
nir_ssa_def *replacement = NULL;
nir_def *replacement = NULL;
if (s->info->vs.dynamic_inputs) {
replacement = lower_load_vs_input_from_prolog(b, intrin, s);
@ -408,7 +406,7 @@ lower_vs_input_instr(nir_builder *b, nir_instr *instr, void *state)
replacement = lower_load_vs_input(b, intrin, s);
}
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, replacement);
nir_def_rewrite_uses(&intrin->dest.ssa, replacement);
nir_instr_remove(instr);
nir_instr_free(instr);

View file

@ -156,15 +156,15 @@ enum {
};
struct dgc_cmdbuf {
nir_ssa_def *descriptor;
nir_def *descriptor;
nir_variable *offset;
};
static void
dgc_emit(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *value)
dgc_emit(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *value)
{
assert(value->bit_size >= 32);
nir_ssa_def *offset = nir_load_var(b, cs->offset);
nir_def *offset = nir_load_var(b, cs->offset);
nir_store_ssbo(b, value, cs->descriptor, offset, .access = ACCESS_NON_READABLE);
nir_store_var(b, cs->offset, nir_iadd_imm(b, offset, value->num_components * value->bit_size / 8), 0x1);
}
@ -188,14 +188,14 @@ dgc_emit(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *value)
nir_pack_64_2x32((b), nir_load_push_constant((b), 2, 32, nir_imm_int((b), 0), \
.base = offsetof(struct radv_dgc_params, field), .range = 8))
static nir_ssa_def *
nir_pkt3(nir_builder *b, unsigned op, nir_ssa_def *len)
static nir_def *
nir_pkt3(nir_builder *b, unsigned op, nir_def *len)
{
len = nir_iand_imm(b, len, 0x3fff);
return nir_ior_imm(b, nir_ishl_imm(b, len, 16), PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op));
}
static nir_ssa_def *
static nir_def *
dgc_get_nop_packet(nir_builder *b, const struct radv_device *device)
{
if (device->physical_device->rad_info.gfx_ib_pad_with_type2) {
@ -206,18 +206,18 @@ dgc_get_nop_packet(nir_builder *b, const struct radv_device *device)
}
static void
dgc_emit_userdata_vertex(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vtx_base_sgpr, nir_ssa_def *first_vertex,
nir_ssa_def *first_instance, nir_ssa_def *drawid, const struct radv_device *device)
dgc_emit_userdata_vertex(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *vtx_base_sgpr, nir_def *first_vertex,
nir_def *first_instance, nir_def *drawid, const struct radv_device *device)
{
vtx_base_sgpr = nir_u2u32(b, vtx_base_sgpr);
nir_ssa_def *has_drawid = nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID);
nir_ssa_def *has_baseinstance = nir_test_mask(b, vtx_base_sgpr, DGC_USES_BASEINSTANCE);
nir_def *has_drawid = nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID);
nir_def *has_baseinstance = nir_test_mask(b, vtx_base_sgpr, DGC_USES_BASEINSTANCE);
nir_ssa_def *pkt_cnt = nir_imm_int(b, 1);
nir_def *pkt_cnt = nir_imm_int(b, 1);
pkt_cnt = nir_bcsel(b, has_drawid, nir_iadd_imm(b, pkt_cnt, 1), pkt_cnt);
pkt_cnt = nir_bcsel(b, has_baseinstance, nir_iadd_imm(b, pkt_cnt, 1), pkt_cnt);
nir_ssa_def *values[5] = {
nir_def *values[5] = {
nir_pkt3(b, PKT3_SET_SH_REG, pkt_cnt), nir_iand_imm(b, vtx_base_sgpr, 0x3FFF), first_vertex,
dgc_get_nop_packet(b, device), dgc_get_nop_packet(b, device),
};
@ -230,51 +230,51 @@ dgc_emit_userdata_vertex(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vtx
}
static void
dgc_emit_instance_count(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *instance_count)
dgc_emit_instance_count(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *instance_count)
{
nir_ssa_def *values[2] = {nir_imm_int(b, PKT3(PKT3_NUM_INSTANCES, 0, false)), instance_count};
nir_def *values[2] = {nir_imm_int(b, PKT3(PKT3_NUM_INSTANCES, 0, false)), instance_count};
dgc_emit(b, cs, nir_vec(b, values, 2));
}
static void
dgc_emit_draw_index_offset_2(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *index_offset, nir_ssa_def *index_count,
nir_ssa_def *max_index_count)
dgc_emit_draw_index_offset_2(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *index_offset, nir_def *index_count,
nir_def *max_index_count)
{
nir_ssa_def *values[5] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_OFFSET_2, 3, false)), max_index_count, index_offset,
index_count, nir_imm_int(b, V_0287F0_DI_SRC_SEL_DMA)};
nir_def *values[5] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_OFFSET_2, 3, false)), max_index_count, index_offset,
index_count, nir_imm_int(b, V_0287F0_DI_SRC_SEL_DMA)};
dgc_emit(b, cs, nir_vec(b, values, 5));
}
static void
dgc_emit_draw_index_auto(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vertex_count)
dgc_emit_draw_index_auto(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *vertex_count)
{
nir_ssa_def *values[3] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_AUTO, 1, false)), vertex_count,
nir_imm_int(b, V_0287F0_DI_SRC_SEL_AUTO_INDEX)};
nir_def *values[3] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_AUTO, 1, false)), vertex_count,
nir_imm_int(b, V_0287F0_DI_SRC_SEL_AUTO_INDEX)};
dgc_emit(b, cs, nir_vec(b, values, 3));
}
static void
build_dgc_buffer_tail(nir_builder *b, nir_ssa_def *sequence_count, const struct radv_device *device)
build_dgc_buffer_tail(nir_builder *b, nir_def *sequence_count, const struct radv_device *device)
{
nir_ssa_def *global_id = get_global_ids(b, 1);
nir_def *global_id = get_global_ids(b, 1);
nir_ssa_def *cmd_buf_stride = load_param32(b, cmd_buf_stride);
nir_ssa_def *cmd_buf_size = load_param32(b, cmd_buf_size);
nir_def *cmd_buf_stride = load_param32(b, cmd_buf_stride);
nir_def *cmd_buf_size = load_param32(b, cmd_buf_size);
nir_push_if(b, nir_ieq_imm(b, global_id, 0));
{
nir_ssa_def *cmd_buf_tail_start = nir_imul(b, cmd_buf_stride, sequence_count);
nir_def *cmd_buf_tail_start = nir_imul(b, cmd_buf_stride, sequence_count);
nir_variable *offset = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "offset");
nir_store_var(b, offset, cmd_buf_tail_start, 0x1);
nir_ssa_def *dst_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PREPARE);
nir_def *dst_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PREPARE);
nir_push_loop(b);
{
nir_ssa_def *curr_offset = nir_load_var(b, offset);
nir_def *curr_offset = nir_load_var(b, offset);
const unsigned MAX_PACKET_WORDS = 0x3FFC;
nir_push_if(b, nir_ieq(b, curr_offset, cmd_buf_size));
@ -283,7 +283,7 @@ build_dgc_buffer_tail(nir_builder *b, nir_ssa_def *sequence_count, const struct
}
nir_pop_if(b, NULL);
nir_ssa_def *packet, *packet_size;
nir_def *packet, *packet_size;
if (device->physical_device->rad_info.gfx_ib_pad_with_type2) {
packet_size = nir_imm_int(b, 4);
@ -292,7 +292,7 @@ build_dgc_buffer_tail(nir_builder *b, nir_ssa_def *sequence_count, const struct
packet_size = nir_isub(b, cmd_buf_size, curr_offset);
packet_size = nir_umin(b, packet_size, nir_imm_int(b, MAX_PACKET_WORDS * 4));
nir_ssa_def *len = nir_ushr_imm(b, packet_size, 2);
nir_def *len = nir_ushr_imm(b, packet_size, 2);
len = nir_iadd_imm(b, len, -2);
packet = nir_pkt3(b, PKT3_NOP, len);
}
@ -309,17 +309,17 @@ build_dgc_buffer_tail(nir_builder *b, nir_ssa_def *sequence_count, const struct
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV.
*/
static void
dgc_emit_draw(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base,
nir_ssa_def *draw_params_offset, nir_ssa_def *sequence_id, const struct radv_device *device)
dgc_emit_draw(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
nir_def *draw_params_offset, nir_def *sequence_id, const struct radv_device *device)
{
nir_ssa_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
nir_ssa_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base);
nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
nir_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base);
nir_ssa_def *draw_data0 = nir_load_ssbo(b, 4, 32, stream_buf, stream_offset);
nir_ssa_def *vertex_count = nir_channel(b, draw_data0, 0);
nir_ssa_def *instance_count = nir_channel(b, draw_data0, 1);
nir_ssa_def *vertex_offset = nir_channel(b, draw_data0, 2);
nir_ssa_def *first_instance = nir_channel(b, draw_data0, 3);
nir_def *draw_data0 = nir_load_ssbo(b, 4, 32, stream_buf, stream_offset);
nir_def *vertex_count = nir_channel(b, draw_data0, 0);
nir_def *instance_count = nir_channel(b, draw_data0, 1);
nir_def *vertex_offset = nir_channel(b, draw_data0, 2);
nir_def *first_instance = nir_channel(b, draw_data0, 3);
nir_push_if(b, nir_iand(b, nir_ine_imm(b, vertex_count, 0), nir_ine_imm(b, instance_count, 0)));
{
@ -334,20 +334,20 @@ dgc_emit_draw(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, ni
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV.
*/
static void
dgc_emit_draw_indexed(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base,
nir_ssa_def *draw_params_offset, nir_ssa_def *sequence_id, nir_ssa_def *max_index_count,
dgc_emit_draw_indexed(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
nir_def *draw_params_offset, nir_def *sequence_id, nir_def *max_index_count,
const struct radv_device *device)
{
nir_ssa_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
nir_ssa_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base);
nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
nir_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base);
nir_ssa_def *draw_data0 = nir_load_ssbo(b, 4, 32, stream_buf, stream_offset);
nir_ssa_def *draw_data1 = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd_imm(b, stream_offset, 16));
nir_ssa_def *index_count = nir_channel(b, draw_data0, 0);
nir_ssa_def *instance_count = nir_channel(b, draw_data0, 1);
nir_ssa_def *first_index = nir_channel(b, draw_data0, 2);
nir_ssa_def *vertex_offset = nir_channel(b, draw_data0, 3);
nir_ssa_def *first_instance = nir_channel(b, draw_data1, 0);
nir_def *draw_data0 = nir_load_ssbo(b, 4, 32, stream_buf, stream_offset);
nir_def *draw_data1 = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd_imm(b, stream_offset, 16));
nir_def *index_count = nir_channel(b, draw_data0, 0);
nir_def *instance_count = nir_channel(b, draw_data0, 1);
nir_def *first_index = nir_channel(b, draw_data0, 2);
nir_def *vertex_offset = nir_channel(b, draw_data0, 3);
nir_def *first_instance = nir_channel(b, draw_data1, 0);
nir_push_if(b, nir_iand(b, nir_ine_imm(b, index_count, 0), nir_ine_imm(b, instance_count, 0)));
{
@ -362,25 +362,25 @@ dgc_emit_draw_indexed(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV.
*/
static void
dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base,
nir_ssa_def *index_buffer_offset, nir_ssa_def *ibo_type_32, nir_ssa_def *ibo_type_8,
dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
nir_def *index_buffer_offset, nir_def *ibo_type_32, nir_def *ibo_type_8,
nir_variable *index_size_var, nir_variable *max_index_count_var, const struct radv_device *device)
{
nir_ssa_def *index_stream_offset = nir_iadd(b, index_buffer_offset, stream_base);
nir_ssa_def *data = nir_load_ssbo(b, 4, 32, stream_buf, index_stream_offset);
nir_def *index_stream_offset = nir_iadd(b, index_buffer_offset, stream_base);
nir_def *data = nir_load_ssbo(b, 4, 32, stream_buf, index_stream_offset);
nir_ssa_def *vk_index_type = nir_channel(b, data, 3);
nir_ssa_def *index_type = nir_bcsel(b, nir_ieq(b, vk_index_type, ibo_type_32), nir_imm_int(b, V_028A7C_VGT_INDEX_32),
nir_imm_int(b, V_028A7C_VGT_INDEX_16));
nir_def *vk_index_type = nir_channel(b, data, 3);
nir_def *index_type = nir_bcsel(b, nir_ieq(b, vk_index_type, ibo_type_32), nir_imm_int(b, V_028A7C_VGT_INDEX_32),
nir_imm_int(b, V_028A7C_VGT_INDEX_16));
index_type = nir_bcsel(b, nir_ieq(b, vk_index_type, ibo_type_8), nir_imm_int(b, V_028A7C_VGT_INDEX_8), index_type);
nir_ssa_def *index_size = nir_iand_imm(b, nir_ushr(b, nir_imm_int(b, 0x142), nir_imul_imm(b, index_type, 4)), 0xf);
nir_def *index_size = nir_iand_imm(b, nir_ushr(b, nir_imm_int(b, 0x142), nir_imul_imm(b, index_type, 4)), 0xf);
nir_store_var(b, index_size_var, index_size, 0x1);
nir_ssa_def *max_index_count = nir_udiv(b, nir_channel(b, data, 2), index_size);
nir_def *max_index_count = nir_udiv(b, nir_channel(b, data, 2), index_size);
nir_store_var(b, max_index_count_var, max_index_count, 0x1);
nir_ssa_def *cmd_values[3 + 2 + 3];
nir_def *cmd_values[3 + 2 + 3];
if (device->physical_device->rad_info.gfx_level >= GFX9) {
unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
@ -396,7 +396,7 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream
cmd_values[2] = dgc_get_nop_packet(b, device);
}
nir_ssa_def *addr_upper = nir_channel(b, data, 1);
nir_def *addr_upper = nir_channel(b, data, 1);
addr_upper = nir_ishr_imm(b, nir_ishl_imm(b, addr_upper, 16), 16);
cmd_values[3] = nir_imm_int(b, PKT3(PKT3_INDEX_BASE, 1, 0));
@ -412,26 +412,26 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV.
*/
static void
dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base,
nir_ssa_def *push_const_mask, nir_variable *upload_offset)
dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
nir_def *push_const_mask, nir_variable *upload_offset)
{
nir_ssa_def *vbo_cnt = load_param8(b, vbo_cnt);
nir_ssa_def *const_copy = nir_ine_imm(b, load_param8(b, const_copy), 0);
nir_ssa_def *const_copy_size = load_param16(b, const_copy_size);
nir_ssa_def *const_copy_words = nir_ushr_imm(b, const_copy_size, 2);
nir_def *vbo_cnt = load_param8(b, vbo_cnt);
nir_def *const_copy = nir_ine_imm(b, load_param8(b, const_copy), 0);
nir_def *const_copy_size = load_param16(b, const_copy_size);
nir_def *const_copy_words = nir_ushr_imm(b, const_copy_size, 2);
const_copy_words = nir_bcsel(b, const_copy, const_copy_words, nir_imm_int(b, 0));
nir_variable *idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "const_copy_idx");
nir_store_var(b, idx, nir_imm_int(b, 0), 0x1);
nir_ssa_def *param_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PARAMS);
nir_ssa_def *param_offset = nir_imul_imm(b, vbo_cnt, 24);
nir_ssa_def *param_offset_offset = nir_iadd_imm(b, param_offset, MESA_VULKAN_SHADER_STAGES * 12);
nir_ssa_def *param_const_offset =
nir_def *param_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PARAMS);
nir_def *param_offset = nir_imul_imm(b, vbo_cnt, 24);
nir_def *param_offset_offset = nir_iadd_imm(b, param_offset, MESA_VULKAN_SHADER_STAGES * 12);
nir_def *param_const_offset =
nir_iadd_imm(b, param_offset, MAX_PUSH_CONSTANTS_SIZE + MESA_VULKAN_SHADER_STAGES * 12);
nir_push_loop(b);
{
nir_ssa_def *cur_idx = nir_load_var(b, idx);
nir_def *cur_idx = nir_load_var(b, idx);
nir_push_if(b, nir_uge(b, cur_idx, const_copy_words));
{
nir_jump(b, nir_jump_break);
@ -440,14 +440,14 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
nir_variable *data = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "copy_data");
nir_ssa_def *update = nir_iand(b, push_const_mask, nir_ishl(b, nir_imm_int64(b, 1), cur_idx));
nir_def *update = nir_iand(b, push_const_mask, nir_ishl(b, nir_imm_int64(b, 1), cur_idx));
update = nir_bcsel(b, nir_ult_imm(b, cur_idx, 64 /* bits in push_const_mask */), update, nir_imm_int64(b, 0));
nir_push_if(b, nir_ine_imm(b, update, 0));
{
nir_ssa_def *stream_offset =
nir_def *stream_offset =
nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_offset_offset, nir_ishl_imm(b, cur_idx, 2)));
nir_ssa_def *new_data = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd(b, stream_base, stream_offset));
nir_def *new_data = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd(b, stream_base, stream_offset));
nir_store_var(b, data, new_data, 0x1);
}
nir_push_else(b, NULL);
@ -468,27 +468,27 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
nir_variable *shader_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "shader_idx");
nir_store_var(b, shader_idx, nir_imm_int(b, 0), 0x1);
nir_ssa_def *shader_cnt = load_param16(b, push_constant_shader_cnt);
nir_def *shader_cnt = load_param16(b, push_constant_shader_cnt);
nir_push_loop(b);
{
nir_ssa_def *cur_shader_idx = nir_load_var(b, shader_idx);
nir_def *cur_shader_idx = nir_load_var(b, shader_idx);
nir_push_if(b, nir_uge(b, cur_shader_idx, shader_cnt));
{
nir_jump(b, nir_jump_break);
}
nir_pop_if(b, NULL);
nir_ssa_def *reg_info =
nir_def *reg_info =
nir_load_ssbo(b, 3, 32, param_buf, nir_iadd(b, param_offset, nir_imul_imm(b, cur_shader_idx, 12)));
nir_ssa_def *upload_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 0, 16);
nir_ssa_def *inline_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 16, 16);
nir_ssa_def *inline_mask = nir_pack_64_2x32(b, nir_channels(b, reg_info, 0x6));
nir_def *upload_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 0, 16);
nir_def *inline_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 16, 16);
nir_def *inline_mask = nir_pack_64_2x32(b, nir_channels(b, reg_info, 0x6));
nir_push_if(b, nir_ine_imm(b, upload_sgpr, 0));
{
nir_ssa_def *pkt[3] = {nir_imm_int(b, PKT3(PKT3_SET_SH_REG, 1, 0)), upload_sgpr,
nir_iadd(b, load_param32(b, upload_addr), nir_load_var(b, upload_offset))};
nir_def *pkt[3] = {nir_imm_int(b, PKT3(PKT3_SET_SH_REG, 1, 0)), upload_sgpr,
nir_iadd(b, load_param32(b, upload_addr), nir_load_var(b, upload_offset))};
dgc_emit(b, cs, nir_vec(b, pkt, 3));
}
@ -496,23 +496,23 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
nir_push_if(b, nir_ine_imm(b, inline_sgpr, 0));
{
nir_ssa_def *inline_len = nir_bit_count(b, inline_mask);
nir_def *inline_len = nir_bit_count(b, inline_mask);
nir_store_var(b, idx, nir_imm_int(b, 0), 0x1);
nir_ssa_def *pkt[2] = {nir_pkt3(b, PKT3_SET_SH_REG, inline_len), inline_sgpr};
nir_def *pkt[2] = {nir_pkt3(b, PKT3_SET_SH_REG, inline_len), inline_sgpr};
dgc_emit(b, cs, nir_vec(b, pkt, 2));
nir_push_loop(b);
{
nir_ssa_def *cur_idx = nir_load_var(b, idx);
nir_def *cur_idx = nir_load_var(b, idx);
nir_push_if(b, nir_uge_imm(b, cur_idx, 64 /* bits in inline_mask */));
{
nir_jump(b, nir_jump_break);
}
nir_pop_if(b, NULL);
nir_ssa_def *l = nir_ishl(b, nir_imm_int64(b, 1), cur_idx);
nir_def *l = nir_ishl(b, nir_imm_int64(b, 1), cur_idx);
nir_push_if(b, nir_ieq_imm(b, nir_iand(b, l, inline_mask), 0));
{
nir_store_var(b, idx, nir_iadd_imm(b, cur_idx, 1), 0x1);
@ -522,15 +522,15 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
nir_variable *data = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "copy_data");
nir_ssa_def *update = nir_iand(b, push_const_mask, nir_ishl(b, nir_imm_int64(b, 1), cur_idx));
nir_def *update = nir_iand(b, push_const_mask, nir_ishl(b, nir_imm_int64(b, 1), cur_idx));
update =
nir_bcsel(b, nir_ult_imm(b, cur_idx, 64 /* bits in push_const_mask */), update, nir_imm_int64(b, 0));
nir_push_if(b, nir_ine_imm(b, update, 0));
{
nir_ssa_def *stream_offset =
nir_def *stream_offset =
nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_offset_offset, nir_ishl_imm(b, cur_idx, 2)));
nir_ssa_def *new_data = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd(b, stream_base, stream_offset));
nir_def *new_data = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd(b, stream_base, stream_offset));
nir_store_var(b, data, new_data, 0x1);
}
nir_push_else(b, NULL);
@ -558,10 +558,10 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
* For emitting VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV.
*/
static void
dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base,
nir_ssa_def *vbo_bind_mask, nir_variable *upload_offset, const struct radv_device *device)
dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
nir_def *vbo_bind_mask, nir_variable *upload_offset, const struct radv_device *device)
{
nir_ssa_def *vbo_cnt = load_param8(b, vbo_cnt);
nir_def *vbo_cnt = load_param8(b, vbo_cnt);
nir_variable *vbo_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx");
nir_store_var(b, vbo_idx, nir_imm_int(b, 0), 0x1);
@ -573,40 +573,39 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
}
nir_pop_if(b, NULL);
nir_ssa_def *vbo_offset = nir_imul_imm(b, nir_load_var(b, vbo_idx), 16);
nir_def *vbo_offset = nir_imul_imm(b, nir_load_var(b, vbo_idx), 16);
nir_variable *vbo_data = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uvec4_type(), "vbo_data");
nir_ssa_def *param_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PARAMS);
nir_def *param_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PARAMS);
nir_store_var(b, vbo_data, nir_load_ssbo(b, 4, 32, param_buf, vbo_offset), 0xf);
nir_ssa_def *vbo_override =
nir_def *vbo_override =
nir_ine_imm(b, nir_iand(b, vbo_bind_mask, nir_ishl(b, nir_imm_int(b, 1), nir_load_var(b, vbo_idx))), 0);
nir_push_if(b, vbo_override);
{
nir_ssa_def *vbo_offset_offset =
nir_def *vbo_offset_offset =
nir_iadd(b, nir_imul_imm(b, vbo_cnt, 16), nir_imul_imm(b, nir_load_var(b, vbo_idx), 8));
nir_ssa_def *vbo_over_data = nir_load_ssbo(b, 2, 32, param_buf, vbo_offset_offset);
nir_ssa_def *stream_offset =
nir_iadd(b, stream_base, nir_iand_imm(b, nir_channel(b, vbo_over_data, 0), 0x7FFF));
nir_ssa_def *stream_data = nir_load_ssbo(b, 4, 32, stream_buf, stream_offset);
nir_def *vbo_over_data = nir_load_ssbo(b, 2, 32, param_buf, vbo_offset_offset);
nir_def *stream_offset = nir_iadd(b, stream_base, nir_iand_imm(b, nir_channel(b, vbo_over_data, 0), 0x7FFF));
nir_def *stream_data = nir_load_ssbo(b, 4, 32, stream_buf, stream_offset);
nir_ssa_def *va = nir_pack_64_2x32(b, nir_trim_vector(b, stream_data, 2));
nir_ssa_def *size = nir_channel(b, stream_data, 2);
nir_ssa_def *stride = nir_channel(b, stream_data, 3);
nir_def *va = nir_pack_64_2x32(b, nir_trim_vector(b, stream_data, 2));
nir_def *size = nir_channel(b, stream_data, 2);
nir_def *stride = nir_channel(b, stream_data, 3);
nir_ssa_def *dyn_stride = nir_test_mask(b, nir_channel(b, vbo_over_data, 0), DGC_DYNAMIC_STRIDE);
nir_ssa_def *old_stride = nir_ubfe_imm(b, nir_channel(b, nir_load_var(b, vbo_data), 1), 16, 14);
nir_def *dyn_stride = nir_test_mask(b, nir_channel(b, vbo_over_data, 0), DGC_DYNAMIC_STRIDE);
nir_def *old_stride = nir_ubfe_imm(b, nir_channel(b, nir_load_var(b, vbo_data), 1), 16, 14);
stride = nir_bcsel(b, dyn_stride, stride, old_stride);
nir_ssa_def *use_per_attribute_vb_descs = nir_test_mask(b, nir_channel(b, vbo_over_data, 0), 1u << 31);
nir_def *use_per_attribute_vb_descs = nir_test_mask(b, nir_channel(b, vbo_over_data, 0), 1u << 31);
nir_variable *num_records =
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "num_records");
nir_store_var(b, num_records, size, 0x1);
nir_push_if(b, use_per_attribute_vb_descs);
{
nir_ssa_def *attrib_end = nir_ubfe_imm(b, nir_channel(b, vbo_over_data, 1), 16, 16);
nir_ssa_def *attrib_index_offset = nir_ubfe_imm(b, nir_channel(b, vbo_over_data, 1), 0, 16);
nir_def *attrib_end = nir_ubfe_imm(b, nir_channel(b, vbo_over_data, 1), 16, 16);
nir_def *attrib_index_offset = nir_ubfe_imm(b, nir_channel(b, vbo_over_data, 1), 0, 16);
nir_push_if(b, nir_ult(b, nir_load_var(b, num_records), attrib_end));
{
@ -619,7 +618,7 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
}
nir_push_else(b, NULL);
{
nir_ssa_def *r = nir_iadd(
nir_def *r = nir_iadd(
b, nir_iadd_imm(b, nir_udiv(b, nir_isub(b, nir_load_var(b, num_records), attrib_end), stride), 1),
attrib_index_offset);
nir_store_var(b, num_records, r, 0x1);
@ -627,13 +626,13 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
nir_pop_if(b, NULL);
nir_pop_if(b, NULL);
nir_ssa_def *convert_cond = nir_ine_imm(b, nir_load_var(b, num_records), 0);
nir_def *convert_cond = nir_ine_imm(b, nir_load_var(b, num_records), 0);
if (device->physical_device->rad_info.gfx_level == GFX9)
convert_cond = nir_imm_false(b);
else if (device->physical_device->rad_info.gfx_level != GFX8)
convert_cond = nir_iand(b, convert_cond, nir_ieq_imm(b, stride, 0));
nir_ssa_def *new_records =
nir_def *new_records =
nir_iadd(b, nir_imul(b, nir_iadd_imm(b, nir_load_var(b, num_records), -1), stride), attrib_end);
new_records = nir_bcsel(b, convert_cond, new_records, nir_load_var(b, num_records));
nir_store_var(b, num_records, new_records, 0x1);
@ -643,7 +642,7 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
if (device->physical_device->rad_info.gfx_level != GFX8) {
nir_push_if(b, nir_ine_imm(b, stride, 0));
{
nir_ssa_def *r = nir_iadd(b, nir_load_var(b, num_records), nir_iadd_imm(b, stride, -1));
nir_def *r = nir_iadd(b, nir_load_var(b, num_records), nir_iadd_imm(b, stride, -1));
nir_store_var(b, num_records, nir_udiv(b, r, stride), 0x1);
}
nir_pop_if(b, NULL);
@ -651,19 +650,18 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
}
nir_pop_if(b, NULL);
nir_ssa_def *rsrc_word3 = nir_channel(b, nir_load_var(b, vbo_data), 3);
nir_def *rsrc_word3 = nir_channel(b, nir_load_var(b, vbo_data), 3);
if (device->physical_device->rad_info.gfx_level >= GFX10) {
nir_ssa_def *oob_select = nir_bcsel(b, nir_ieq_imm(b, stride, 0), nir_imm_int(b, V_008F0C_OOB_SELECT_RAW),
nir_imm_int(b, V_008F0C_OOB_SELECT_STRUCTURED));
nir_def *oob_select = nir_bcsel(b, nir_ieq_imm(b, stride, 0), nir_imm_int(b, V_008F0C_OOB_SELECT_RAW),
nir_imm_int(b, V_008F0C_OOB_SELECT_STRUCTURED));
rsrc_word3 = nir_iand_imm(b, rsrc_word3, C_008F0C_OOB_SELECT);
rsrc_word3 = nir_ior(b, rsrc_word3, nir_ishl_imm(b, oob_select, 28));
}
nir_ssa_def *va_hi = nir_iand_imm(b, nir_unpack_64_2x32_split_y(b, va), 0xFFFF);
nir_def *va_hi = nir_iand_imm(b, nir_unpack_64_2x32_split_y(b, va), 0xFFFF);
stride = nir_iand_imm(b, stride, 0x3FFF);
nir_ssa_def *new_vbo_data[4] = {nir_unpack_64_2x32_split_x(b, va),
nir_ior(b, nir_ishl_imm(b, stride, 16), va_hi), nir_load_var(b, num_records),
rsrc_word3};
nir_def *new_vbo_data[4] = {nir_unpack_64_2x32_split_x(b, va), nir_ior(b, nir_ishl_imm(b, stride, 16), va_hi),
nir_load_var(b, num_records), rsrc_word3};
nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf);
}
nir_pop_if(b, NULL);
@ -672,23 +670,23 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
* num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and
* GFX10.3 but it doesn't hurt.
*/
nir_ssa_def *num_records = nir_channel(b, nir_load_var(b, vbo_data), 2);
nir_ssa_def *buf_va =
nir_def *num_records = nir_channel(b, nir_load_var(b, vbo_data), 2);
nir_def *buf_va =
nir_iand_imm(b, nir_pack_64_2x32(b, nir_trim_vector(b, nir_load_var(b, vbo_data), 2)), (1ull << 48) - 1ull);
nir_push_if(b, nir_ior(b, nir_ieq_imm(b, num_records, 0), nir_ieq_imm(b, buf_va, 0)));
{
nir_ssa_def *new_vbo_data[4] = {nir_imm_int(b, 0), nir_imm_int(b, 0), nir_imm_int(b, 0), nir_imm_int(b, 0)};
nir_def *new_vbo_data[4] = {nir_imm_int(b, 0), nir_imm_int(b, 0), nir_imm_int(b, 0), nir_imm_int(b, 0)};
nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf);
}
nir_pop_if(b, NULL);
nir_ssa_def *upload_off = nir_iadd(b, nir_load_var(b, upload_offset), vbo_offset);
nir_def *upload_off = nir_iadd(b, nir_load_var(b, upload_offset), vbo_offset);
nir_store_ssbo(b, nir_load_var(b, vbo_data), cs->descriptor, upload_off, .access = ACCESS_NON_READABLE);
nir_store_var(b, vbo_idx, nir_iadd_imm(b, nir_load_var(b, vbo_idx), 1), 0x1);
}
nir_pop_loop(b, NULL);
nir_ssa_def *packet[3] = {nir_imm_int(b, PKT3(PKT3_SET_SH_REG, 1, 0)), load_param16(b, vbo_reg),
nir_iadd(b, load_param32(b, upload_addr), nir_load_var(b, upload_offset))};
nir_def *packet[3] = {nir_imm_int(b, PKT3(PKT3_SET_SH_REG, 1, 0)), load_param16(b, vbo_reg),
nir_iadd(b, load_param32(b, upload_addr), nir_load_var(b, upload_offset))};
dgc_emit(b, cs, nir_vec(b, packet, 3));
@ -701,15 +699,15 @@ build_dgc_prepare_shader(struct radv_device *dev)
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_dgc_prepare");
b.shader->info.workgroup_size[0] = 64;
nir_ssa_def *global_id = get_global_ids(&b, 1);
nir_def *global_id = get_global_ids(&b, 1);
nir_ssa_def *sequence_id = global_id;
nir_def *sequence_id = global_id;
nir_ssa_def *cmd_buf_stride = load_param32(&b, cmd_buf_stride);
nir_ssa_def *sequence_count = load_param32(&b, sequence_count);
nir_ssa_def *stream_stride = load_param32(&b, stream_stride);
nir_def *cmd_buf_stride = load_param32(&b, cmd_buf_stride);
nir_def *sequence_count = load_param32(&b, sequence_count);
nir_def *stream_stride = load_param32(&b, stream_stride);
nir_ssa_def *use_count = nir_iand_imm(&b, sequence_count, 1u << 31);
nir_def *use_count = nir_iand_imm(&b, sequence_count, 1u << 31);
sequence_count = nir_iand_imm(&b, sequence_count, UINT32_MAX >> 1);
/* The effective number of draws is
@ -720,8 +718,8 @@ build_dgc_prepare_shader(struct radv_device *dev)
nir_push_if(&b, nir_ine_imm(&b, use_count, 0));
{
nir_ssa_def *count_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_COUNT);
nir_ssa_def *cnt = nir_load_ssbo(&b, 1, 32, count_buf, nir_imm_int(&b, 0));
nir_def *count_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_COUNT);
nir_def *cnt = nir_load_ssbo(&b, 1, 32, count_buf, nir_imm_int(&b, 0));
/* Must clamp count against the API count explicitly.
* The workgroup potentially contains more threads than maxSequencesCount from API,
* and we have to ensure these threads write NOP packets to pad out the IB. */
@ -739,10 +737,10 @@ build_dgc_prepare_shader(struct radv_device *dev)
.offset = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "cmd_buf_offset"),
};
nir_store_var(&b, cmd_buf.offset, nir_imul(&b, global_id, cmd_buf_stride), 1);
nir_ssa_def *cmd_buf_end = nir_iadd(&b, nir_load_var(&b, cmd_buf.offset), cmd_buf_stride);
nir_def *cmd_buf_end = nir_iadd(&b, nir_load_var(&b, cmd_buf.offset), cmd_buf_stride);
nir_ssa_def *stream_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_STREAM);
nir_ssa_def *stream_base = nir_imul(&b, sequence_id, stream_stride);
nir_def *stream_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_STREAM);
nir_def *stream_base = nir_imul(&b, sequence_id, stream_stride);
nir_variable *upload_offset =
nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "upload_offset");
@ -750,14 +748,14 @@ build_dgc_prepare_shader(struct radv_device *dev)
&b, upload_offset,
nir_iadd(&b, load_param32(&b, cmd_buf_size), nir_imul(&b, load_param32(&b, upload_stride), sequence_id)), 0x1);
nir_ssa_def *vbo_bind_mask = load_param32(&b, vbo_bind_mask);
nir_def *vbo_bind_mask = load_param32(&b, vbo_bind_mask);
nir_push_if(&b, nir_ine_imm(&b, vbo_bind_mask, 0));
{
dgc_emit_vertex_buffer(&b, &cmd_buf, stream_buf, stream_base, vbo_bind_mask, upload_offset, dev);
}
nir_pop_if(&b, NULL);
nir_ssa_def *push_const_mask = load_param64(&b, push_constant_mask);
nir_def *push_const_mask = load_param64(&b, push_constant_mask);
nir_push_if(&b, nir_ine_imm(&b, push_const_mask, 0));
{
dgc_emit_push_constant(&b, &cmd_buf, stream_buf, stream_base, push_const_mask, upload_offset);
@ -777,7 +775,7 @@ build_dgc_prepare_shader(struct radv_device *dev)
nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "max_index_count");
nir_store_var(&b, max_index_count_var, load_param32(&b, max_index_count), 0x1);
nir_ssa_def *bind_index_buffer = nir_ieq_imm(&b, nir_load_var(&b, index_size_var), 0);
nir_def *bind_index_buffer = nir_ieq_imm(&b, nir_load_var(&b, index_size_var), 0);
nir_push_if(&b, bind_index_buffer);
{
dgc_emit_index_buffer(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, index_buffer_offset),
@ -786,8 +784,8 @@ build_dgc_prepare_shader(struct radv_device *dev)
}
nir_pop_if(&b, NULL);
nir_ssa_def *index_size = nir_load_var(&b, index_size_var);
nir_ssa_def *max_index_count = nir_load_var(&b, max_index_count_var);
nir_def *index_size = nir_load_var(&b, index_size_var);
nir_def *max_index_count = nir_load_var(&b, max_index_count_var);
index_size = nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, index_size_var), index_size);
max_index_count = nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, max_index_count_var), max_index_count);
@ -803,7 +801,7 @@ build_dgc_prepare_shader(struct radv_device *dev)
if (dev->physical_device->rad_info.gfx_ib_pad_with_type2) {
nir_push_loop(&b);
{
nir_ssa_def *curr_offset = nir_load_var(&b, cmd_buf.offset);
nir_def *curr_offset = nir_load_var(&b, cmd_buf.offset);
nir_push_if(&b, nir_ieq(&b, curr_offset, cmd_buf_end));
{
@ -811,16 +809,16 @@ build_dgc_prepare_shader(struct radv_device *dev)
}
nir_pop_if(&b, NULL);
nir_ssa_def *pkt = nir_imm_int(&b, PKT2_NOP_PAD);
nir_def *pkt = nir_imm_int(&b, PKT2_NOP_PAD);
dgc_emit(&b, &cmd_buf, pkt);
}
nir_pop_loop(&b, NULL);
} else {
nir_ssa_def *cnt = nir_isub(&b, cmd_buf_end, nir_load_var(&b, cmd_buf.offset));
nir_def *cnt = nir_isub(&b, cmd_buf_end, nir_load_var(&b, cmd_buf.offset));
cnt = nir_ushr_imm(&b, cnt, 2);
cnt = nir_iadd_imm(&b, cnt, -2);
nir_ssa_def *pkt = nir_pkt3(&b, PKT3_NOP, cnt);
nir_def *pkt = nir_pkt3(&b, PKT3_NOP, cnt);
dgc_emit(&b, &cmd_buf, pkt);
}

View file

@ -58,8 +58,7 @@ radv_get_pipelinestat_query_size(struct radv_device *device)
}
static void
radv_store_availability(nir_builder *b, nir_ssa_def *flags, nir_ssa_def *dst_buf, nir_ssa_def *offset,
nir_ssa_def *value32)
radv_store_availability(nir_builder *b, nir_def *flags, nir_def *dst_buf, nir_def *offset, nir_def *value32)
{
nir_push_if(b, nir_test_mask(b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
@ -129,23 +128,23 @@ build_occlusion_query_shader(struct radv_device *device)
uint64_t enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
unsigned db_count = device->physical_device->rad_info.max_render_backends;
nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
nir_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
nir_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
nir_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
nir_ssa_def *global_id = get_global_ids(&b, 1);
nir_def *global_id = get_global_ids(&b, 1);
nir_ssa_def *input_stride = nir_imm_int(&b, db_count * 16);
nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8);
nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
nir_def *input_stride = nir_imm_int(&b, db_count * 16);
nir_def *input_base = nir_imul(&b, input_stride, global_id);
nir_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8);
nir_def *output_base = nir_imul(&b, output_stride, global_id);
nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1);
nir_store_var(&b, outer_counter, nir_imm_int(&b, 0), 0x1);
nir_store_var(&b, available, nir_imm_true(&b), 0x1);
nir_ssa_def *query_result_wait = nir_test_mask(&b, flags, VK_QUERY_RESULT_WAIT_BIT);
nir_def *query_result_wait = nir_test_mask(&b, flags, VK_QUERY_RESULT_WAIT_BIT);
nir_push_if(&b, query_result_wait);
{
/* Wait on the upper word of the last DB entry. */
@ -156,8 +155,8 @@ build_occlusion_query_shader(struct radv_device *device)
/* Prevent the SSBO load to be moved out of the loop. */
nir_scoped_memory_barrier(&b, SCOPE_INVOCATION, NIR_MEMORY_ACQUIRE, nir_var_mem_ssbo);
nir_ssa_def *load_offset = nir_iadd_imm(&b, input_base, rb_avail_offset);
nir_ssa_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4, .access = ACCESS_COHERENT);
nir_def *load_offset = nir_iadd_imm(&b, input_base, rb_avail_offset);
nir_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4, .access = ACCESS_COHERENT);
nir_push_if(&b, nir_ige_imm(&b, load, 0x80000000));
{
@ -171,24 +170,23 @@ build_occlusion_query_shader(struct radv_device *device)
nir_push_loop(&b);
nir_ssa_def *current_outer_count = nir_load_var(&b, outer_counter);
nir_def *current_outer_count = nir_load_var(&b, outer_counter);
radv_break_on_count(&b, outer_counter, nir_imm_int(&b, db_count));
nir_ssa_def *enabled_cond =
nir_iand_imm(&b, nir_ishl(&b, nir_imm_int64(&b, 1), current_outer_count), enabled_rb_mask);
nir_def *enabled_cond = nir_iand_imm(&b, nir_ishl(&b, nir_imm_int64(&b, 1), current_outer_count), enabled_rb_mask);
nir_push_if(&b, nir_i2b(&b, enabled_cond));
nir_ssa_def *load_offset = nir_imul_imm(&b, current_outer_count, 16);
nir_def *load_offset = nir_imul_imm(&b, current_outer_count, 16);
load_offset = nir_iadd(&b, input_base, load_offset);
nir_ssa_def *load = nir_load_ssbo(&b, 2, 64, src_buf, load_offset, .align_mul = 16);
nir_def *load = nir_load_ssbo(&b, 2, 64, src_buf, load_offset, .align_mul = 16);
nir_store_var(&b, start, nir_channel(&b, load, 0), 0x1);
nir_store_var(&b, end, nir_channel(&b, load, 1), 0x1);
nir_ssa_def *start_done = nir_ilt_imm(&b, nir_load_var(&b, start), 0);
nir_ssa_def *end_done = nir_ilt_imm(&b, nir_load_var(&b, end), 0);
nir_def *start_done = nir_ilt_imm(&b, nir_load_var(&b, start), 0);
nir_def *end_done = nir_ilt_imm(&b, nir_load_var(&b, end), 0);
nir_push_if(&b, nir_iand(&b, start_done, end_done));
@ -206,8 +204,8 @@ build_occlusion_query_shader(struct radv_device *device)
/* Store the result if complete or if partial results have been requested. */
nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
nir_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
nir_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available)));
nir_push_if(&b, result_is_64bit);
@ -280,27 +278,27 @@ build_pipeline_statistics_query_shader(struct radv_device *device)
nir_variable *output_offset = nir_local_variable_create(b.impl, glsl_int_type(), "output_offset");
nir_variable *result = nir_local_variable_create(b.impl, glsl_int64_t_type(), "result");
nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
nir_ssa_def *stats_mask = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 12);
nir_ssa_def *avail_offset = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
nir_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
nir_def *stats_mask = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 12);
nir_def *avail_offset = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
nir_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
nir_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
nir_ssa_def *global_id = get_global_ids(&b, 1);
nir_def *global_id = get_global_ids(&b, 1);
nir_ssa_def *input_stride = nir_imm_int(&b, pipelinestat_block_size * 2);
nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8);
nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
nir_def *input_stride = nir_imm_int(&b, pipelinestat_block_size * 2);
nir_def *input_base = nir_imul(&b, input_stride, global_id);
nir_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8);
nir_def *output_base = nir_imul(&b, output_stride, global_id);
avail_offset = nir_iadd(&b, avail_offset, nir_imul_imm(&b, global_id, 4));
nir_ssa_def *available32 = nir_load_ssbo(&b, 1, 32, src_buf, avail_offset);
nir_def *available32 = nir_load_ssbo(&b, 1, 32, src_buf, avail_offset);
nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
nir_ssa_def *elem_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
nir_ssa_def *elem_count = nir_ushr_imm(&b, stats_mask, 16);
nir_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
nir_def *elem_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
nir_def *elem_count = nir_ushr_imm(&b, stats_mask, 16);
radv_store_availability(&b, flags, dst_buf, nir_iadd(&b, output_base, nir_imul(&b, elem_count, elem_size)),
available32);
@ -311,12 +309,11 @@ build_pipeline_statistics_query_shader(struct radv_device *device)
for (int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) {
nir_push_if(&b, nir_test_mask(&b, stats_mask, BITFIELD64_BIT(i)));
nir_ssa_def *start_offset = nir_iadd_imm(&b, input_base, pipeline_statistics_indices[i] * 8);
nir_ssa_def *start = nir_load_ssbo(&b, 1, 64, src_buf, start_offset);
nir_def *start_offset = nir_iadd_imm(&b, input_base, pipeline_statistics_indices[i] * 8);
nir_def *start = nir_load_ssbo(&b, 1, 64, src_buf, start_offset);
nir_ssa_def *end_offset =
nir_iadd_imm(&b, input_base, pipeline_statistics_indices[i] * 8 + pipelinestat_block_size);
nir_ssa_def *end = nir_load_ssbo(&b, 1, 64, src_buf, end_offset);
nir_def *end_offset = nir_iadd_imm(&b, input_base, pipeline_statistics_indices[i] * 8 + pipelinestat_block_size);
nir_def *end = nir_load_ssbo(&b, 1, 64, src_buf, end_offset);
nir_store_var(&b, result, nir_isub(&b, end, start), 0x1);
@ -347,10 +344,10 @@ build_pipeline_statistics_query_shader(struct radv_device *device)
nir_loop *loop = nir_push_loop(&b);
nir_ssa_def *current_counter = nir_load_var(&b, counter);
nir_def *current_counter = nir_load_var(&b, counter);
radv_break_on_count(&b, counter, elem_count);
nir_ssa_def *output_elem = nir_iadd(&b, output_base, nir_imul(&b, elem_size, current_counter));
nir_def *output_elem = nir_iadd(&b, output_base, nir_imul(&b, elem_size, current_counter));
nir_push_if(&b, result_is_64bit);
nir_store_ssbo(&b, nir_imm_int64(&b, 0), dst_buf, output_elem);
@ -415,44 +412,44 @@ build_tfb_query_shader(struct radv_device *device)
nir_store_var(&b, result, nir_replicate(&b, nir_imm_int64(&b, 0), 2), 0x3);
nir_store_var(&b, available, nir_imm_false(&b), 0x1);
nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
nir_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
/* Load resources. */
nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
nir_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
nir_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
/* Compute global ID. */
nir_ssa_def *global_id = get_global_ids(&b, 1);
nir_def *global_id = get_global_ids(&b, 1);
/* Compute src/dst strides. */
nir_ssa_def *input_stride = nir_imm_int(&b, 32);
nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8);
nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
nir_def *input_stride = nir_imm_int(&b, 32);
nir_def *input_base = nir_imul(&b, input_stride, global_id);
nir_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8);
nir_def *output_base = nir_imul(&b, output_stride, global_id);
/* Load data from the query pool. */
nir_ssa_def *load1 = nir_load_ssbo(&b, 4, 32, src_buf, input_base, .align_mul = 32);
nir_ssa_def *load2 = nir_load_ssbo(&b, 4, 32, src_buf, nir_iadd_imm(&b, input_base, 16), .align_mul = 16);
nir_def *load1 = nir_load_ssbo(&b, 4, 32, src_buf, input_base, .align_mul = 32);
nir_def *load2 = nir_load_ssbo(&b, 4, 32, src_buf, nir_iadd_imm(&b, input_base, 16), .align_mul = 16);
/* Check if result is available. */
nir_ssa_def *avails[2];
nir_def *avails[2];
avails[0] = nir_iand(&b, nir_channel(&b, load1, 1), nir_channel(&b, load1, 3));
avails[1] = nir_iand(&b, nir_channel(&b, load2, 1), nir_channel(&b, load2, 3));
nir_ssa_def *result_is_available = nir_test_mask(&b, nir_iand(&b, avails[0], avails[1]), 0x80000000);
nir_def *result_is_available = nir_test_mask(&b, nir_iand(&b, avails[0], avails[1]), 0x80000000);
/* Only compute result if available. */
nir_push_if(&b, result_is_available);
/* Pack values. */
nir_ssa_def *packed64[4];
nir_def *packed64[4];
packed64[0] = nir_pack_64_2x32(&b, nir_trim_vector(&b, load1, 2));
packed64[1] = nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load1, 2), nir_channel(&b, load1, 3)));
packed64[2] = nir_pack_64_2x32(&b, nir_trim_vector(&b, load2, 2));
packed64[3] = nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load2, 2), nir_channel(&b, load2, 3)));
/* Compute result. */
nir_ssa_def *num_primitive_written = nir_isub(&b, packed64[3], packed64[1]);
nir_ssa_def *primitive_storage_needed = nir_isub(&b, packed64[2], packed64[0]);
nir_def *num_primitive_written = nir_isub(&b, packed64[3], packed64[1]);
nir_def *primitive_storage_needed = nir_isub(&b, packed64[2], packed64[0]);
nir_store_var(&b, result, nir_vec2(&b, num_primitive_written, primitive_storage_needed), 0x3);
nir_store_var(&b, available, nir_imm_true(&b), 0x1);
@ -460,8 +457,8 @@ build_tfb_query_shader(struct radv_device *device)
nir_pop_if(&b, NULL);
/* Determine if result is 64 or 32 bit. */
nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16), nir_imm_int(&b, 8));
nir_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
nir_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16), nir_imm_int(&b, 8));
/* Store the result if complete or partial results have been requested. */
nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available)));
@ -527,30 +524,30 @@ build_timestamp_query_shader(struct radv_device *device)
nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1);
nir_store_var(&b, available, nir_imm_false(&b), 0x1);
nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
nir_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
/* Load resources. */
nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
nir_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
nir_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
/* Compute global ID. */
nir_ssa_def *global_id = get_global_ids(&b, 1);
nir_def *global_id = get_global_ids(&b, 1);
/* Compute src/dst strides. */
nir_ssa_def *input_stride = nir_imm_int(&b, 8);
nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8);
nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
nir_def *input_stride = nir_imm_int(&b, 8);
nir_def *input_base = nir_imul(&b, input_stride, global_id);
nir_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8);
nir_def *output_base = nir_imul(&b, output_stride, global_id);
/* Load data from the query pool. */
nir_ssa_def *load = nir_load_ssbo(&b, 2, 32, src_buf, input_base, .align_mul = 8);
nir_def *load = nir_load_ssbo(&b, 2, 32, src_buf, input_base, .align_mul = 8);
/* Pack the timestamp. */
nir_ssa_def *timestamp;
nir_def *timestamp;
timestamp = nir_pack_64_2x32(&b, nir_trim_vector(&b, load, 2));
/* Check if result is available. */
nir_ssa_def *result_is_available = nir_i2b(&b, nir_ine_imm(&b, timestamp, TIMESTAMP_NOT_READY));
nir_def *result_is_available = nir_i2b(&b, nir_ine_imm(&b, timestamp, TIMESTAMP_NOT_READY));
/* Only store result if available. */
nir_push_if(&b, result_is_available);
@ -561,8 +558,8 @@ build_timestamp_query_shader(struct radv_device *device)
nir_pop_if(&b, NULL);
/* Determine if result is 64 or 32 bit. */
nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
nir_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
nir_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
/* Store the result if complete or partial results have been requested. */
nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available)));
@ -639,58 +636,57 @@ build_pg_query_shader(struct radv_device *device)
nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1);
nir_store_var(&b, available, nir_imm_false(&b), 0x1);
nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 16);
nir_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 16);
/* Load resources. */
nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
nir_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
nir_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
/* Compute global ID. */
nir_ssa_def *global_id = get_global_ids(&b, 1);
nir_def *global_id = get_global_ids(&b, 1);
/* Determine if the query pool uses GDS for NGG. */
nir_ssa_def *uses_gds = nir_i2b(&b, nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20));
nir_def *uses_gds = nir_i2b(&b, nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20));
/* Compute src/dst strides. */
nir_ssa_def *input_stride =
nir_def *input_stride =
nir_bcsel(&b, uses_gds, nir_imm_int(&b, RADV_PGQ_STRIDE_GDS), nir_imm_int(&b, RADV_PGQ_STRIDE));
nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 16);
nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
nir_def *input_base = nir_imul(&b, input_stride, global_id);
nir_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 16);
nir_def *output_base = nir_imul(&b, output_stride, global_id);
/* Load data from the query pool. */
nir_ssa_def *load1 = nir_load_ssbo(&b, 2, 32, src_buf, input_base, .align_mul = 32);
nir_ssa_def *load2 =
nir_load_ssbo(&b, 2, 32, src_buf, nir_iadd(&b, input_base, nir_imm_int(&b, 16)), .align_mul = 16);
nir_def *load1 = nir_load_ssbo(&b, 2, 32, src_buf, input_base, .align_mul = 32);
nir_def *load2 = nir_load_ssbo(&b, 2, 32, src_buf, nir_iadd(&b, input_base, nir_imm_int(&b, 16)), .align_mul = 16);
/* Check if result is available. */
nir_ssa_def *avails[2];
nir_def *avails[2];
avails[0] = nir_channel(&b, load1, 1);
avails[1] = nir_channel(&b, load2, 1);
nir_ssa_def *result_is_available =
nir_def *result_is_available =
nir_i2b(&b, nir_iand(&b, nir_iand(&b, avails[0], avails[1]), nir_imm_int(&b, 0x80000000)));
/* Only compute result if available. */
nir_push_if(&b, result_is_available);
/* Pack values. */
nir_ssa_def *packed64[2];
nir_def *packed64[2];
packed64[0] = nir_pack_64_2x32(&b, nir_trim_vector(&b, load1, 2));
packed64[1] = nir_pack_64_2x32(&b, nir_trim_vector(&b, load2, 2));
/* Compute result. */
nir_ssa_def *primitive_storage_needed = nir_isub(&b, packed64[1], packed64[0]);
nir_def *primitive_storage_needed = nir_isub(&b, packed64[1], packed64[0]);
nir_store_var(&b, result, primitive_storage_needed, 0x1);
nir_push_if(&b, uses_gds);
{
nir_ssa_def *gds_start =
nir_def *gds_start =
nir_load_ssbo(&b, 1, 32, src_buf, nir_iadd(&b, input_base, nir_imm_int(&b, 32)), .align_mul = 4);
nir_ssa_def *gds_end =
nir_def *gds_end =
nir_load_ssbo(&b, 1, 32, src_buf, nir_iadd(&b, input_base, nir_imm_int(&b, 36)), .align_mul = 4);
nir_ssa_def *ngg_gds_result = nir_isub(&b, gds_end, gds_start);
nir_def *ngg_gds_result = nir_isub(&b, gds_end, gds_start);
nir_store_var(&b, result, nir_iadd(&b, nir_load_var(&b, result), nir_u2u64(&b, ngg_gds_result)), 0x1);
}
@ -701,8 +697,8 @@ build_pg_query_shader(struct radv_device *device)
nir_pop_if(&b, NULL);
/* Determine if result is 64 or 32 bit. */
nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
nir_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
nir_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
/* Store the result if complete or partial results have been requested. */
nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available)));

View file

@ -29,8 +29,7 @@
#include <llvm/Config/llvm-config.h>
#endif
static nir_ssa_def *build_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node,
bool skip_type_and);
static nir_def *build_node_to_addr(struct radv_device *device, nir_builder *b, nir_def *node, bool skip_type_and);
bool
radv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipelines)
@ -54,16 +53,14 @@ void
nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var_indices, uint32_t chan_1,
uint32_t chan_2)
{
nir_ssa_def *ssa_distances = nir_load_var(b, var_distances);
nir_ssa_def *ssa_indices = nir_load_var(b, var_indices);
nir_def *ssa_distances = nir_load_var(b, var_distances);
nir_def *ssa_indices = nir_load_var(b, var_indices);
/* if (distances[chan_2] < distances[chan_1]) { */
nir_push_if(b, nir_flt(b, nir_channel(b, ssa_distances, chan_2), nir_channel(b, ssa_distances, chan_1)));
{
/* swap(distances[chan_2], distances[chan_1]); */
nir_ssa_def *new_distances[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32),
nir_ssa_undef(b, 1, 32)};
nir_ssa_def *new_indices[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32),
nir_ssa_undef(b, 1, 32)};
nir_def *new_distances[4] = {nir_undef(b, 1, 32), nir_undef(b, 1, 32), nir_undef(b, 1, 32), nir_undef(b, 1, 32)};
nir_def *new_indices[4] = {nir_undef(b, 1, 32), nir_undef(b, 1, 32), nir_undef(b, 1, 32), nir_undef(b, 1, 32)};
new_distances[chan_2] = nir_channel(b, ssa_distances, chan_1);
new_distances[chan_1] = nir_channel(b, ssa_distances, chan_2);
new_indices[chan_2] = nir_channel(b, ssa_indices, chan_1);
@ -75,9 +72,9 @@ nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var
nir_pop_if(b, NULL);
}
nir_ssa_def *
intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node, nir_ssa_def *ray_tmax,
nir_ssa_def *origin, nir_ssa_def *dir, nir_ssa_def *inv_dir)
nir_def *
intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_def *bvh_node, nir_def *ray_tmax,
nir_def *origin, nir_def *dir, nir_def *inv_dir)
{
const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4);
const struct glsl_type *uvec4_type = glsl_vector_type(GLSL_TYPE_UINT, 4);
@ -85,7 +82,7 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_s
bool old_exact = b->exact;
b->exact = true;
nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node, false);
nir_def *node_addr = build_node_to_addr(device, b, bvh_node, false);
/* vec4 distances = vec4(INF, INF, INF, INF); */
nir_variable *distances = nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "distances");
@ -108,10 +105,10 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_s
};
/* node->children[i] -> uint */
nir_ssa_def *child_index = nir_build_load_global(b, 1, 32, nir_iadd_imm(b, node_addr, child_offset),
.align_mul = 64, .align_offset = child_offset % 64);
nir_def *child_index = nir_build_load_global(b, 1, 32, nir_iadd_imm(b, node_addr, child_offset), .align_mul = 64,
.align_offset = child_offset % 64);
/* node->coords[i][0], node->coords[i][1] -> vec3 */
nir_ssa_def *node_coords[2] = {
nir_def *node_coords[2] = {
nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0]), .align_mul = 64,
.align_offset = coord_offsets[0] % 64),
nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1]), .align_mul = 64,
@ -122,27 +119,27 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_s
* We don't need to care about any other components being NaN as that is UB.
* https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap36.html#VkAabbPositionsKHR
*/
nir_ssa_def *min_x = nir_channel(b, node_coords[0], 0);
nir_ssa_def *min_x_is_not_nan = nir_inot(b, nir_fneu(b, min_x, min_x)); /* NaN != NaN -> true */
nir_def *min_x = nir_channel(b, node_coords[0], 0);
nir_def *min_x_is_not_nan = nir_inot(b, nir_fneu(b, min_x, min_x)); /* NaN != NaN -> true */
/* vec3 bound0 = (node->coords[i][0] - origin) * inv_dir; */
nir_ssa_def *bound0 = nir_fmul(b, nir_fsub(b, node_coords[0], origin), inv_dir);
nir_def *bound0 = nir_fmul(b, nir_fsub(b, node_coords[0], origin), inv_dir);
/* vec3 bound1 = (node->coords[i][1] - origin) * inv_dir; */
nir_ssa_def *bound1 = nir_fmul(b, nir_fsub(b, node_coords[1], origin), inv_dir);
nir_def *bound1 = nir_fmul(b, nir_fsub(b, node_coords[1], origin), inv_dir);
/* float tmin = max(max(min(bound0.x, bound1.x), min(bound0.y, bound1.y)), min(bound0.z,
* bound1.z)); */
nir_ssa_def *tmin = nir_fmax(b,
nir_fmax(b, nir_fmin(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
nir_fmin(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
nir_fmin(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
nir_def *tmin = nir_fmax(b,
nir_fmax(b, nir_fmin(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
nir_fmin(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
nir_fmin(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
/* float tmax = min(min(max(bound0.x, bound1.x), max(bound0.y, bound1.y)), max(bound0.z,
* bound1.z)); */
nir_ssa_def *tmax = nir_fmin(b,
nir_fmin(b, nir_fmax(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
nir_fmax(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
nir_fmax(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
nir_def *tmax = nir_fmin(b,
nir_fmin(b, nir_fmax(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
nir_fmax(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
nir_fmax(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
/* if (!isnan(node->coords[i][0].x) && tmax >= max(0.0f, tmin) && tmin < ray_tmax) { */
nir_push_if(b, nir_iand(b, min_x_is_not_nan,
@ -150,11 +147,11 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_s
nir_flt(b, tmin, ray_tmax))));
{
/* child_indices[i] = node->children[i]; */
nir_ssa_def *new_child_indices[4] = {child_index, child_index, child_index, child_index};
nir_def *new_child_indices[4] = {child_index, child_index, child_index, child_index};
nir_store_var(b, child_indices, nir_vec(b, new_child_indices, 4), 1u << i);
/* distances[i] = tmin; */
nir_ssa_def *new_distances[4] = {tmin, tmin, tmin, tmin};
nir_def *new_distances[4] = {tmin, tmin, tmin, tmin};
nir_store_var(b, distances, nir_vec(b, new_distances, 4), 1u << i);
}
/* } */
@ -172,16 +169,16 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_s
return nir_load_var(b, child_indices);
}
nir_ssa_def *
intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node, nir_ssa_def *ray_tmax,
nir_ssa_def *origin, nir_ssa_def *dir, nir_ssa_def *inv_dir)
nir_def *
intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_def *bvh_node, nir_def *ray_tmax,
nir_def *origin, nir_def *dir, nir_def *inv_dir)
{
const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4);
bool old_exact = b->exact;
b->exact = true;
nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node, false);
nir_def *node_addr = build_node_to_addr(device, b, bvh_node, false);
const uint32_t coord_offsets[3] = {
offsetof(struct radv_bvh_triangle_node, coords[0]),
@ -190,7 +187,7 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s
};
/* node->coords[0], node->coords[1], node->coords[2] -> vec3 */
nir_ssa_def *node_coords[3] = {
nir_def *node_coords[3] = {
nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0]), .align_mul = 64,
.align_offset = coord_offsets[0] % 64),
nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1]), .align_mul = 64,
@ -206,22 +203,21 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s
* http://jcgt.org/published/0002/01/05/paper.pdf */
/* Calculate the dimension where the ray direction is largest */
nir_ssa_def *abs_dir = nir_fabs(b, dir);
nir_def *abs_dir = nir_fabs(b, dir);
nir_ssa_def *abs_dirs[3] = {
nir_def *abs_dirs[3] = {
nir_channel(b, abs_dir, 0),
nir_channel(b, abs_dir, 1),
nir_channel(b, abs_dir, 2),
};
/* Find index of greatest value of abs_dir and put that as kz. */
nir_ssa_def *kz =
nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[1]),
nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[2]), nir_imm_int(b, 0), nir_imm_int(b, 2)),
nir_bcsel(b, nir_fge(b, abs_dirs[1], abs_dirs[2]), nir_imm_int(b, 1), nir_imm_int(b, 2)));
nir_ssa_def *kx = nir_imod_imm(b, nir_iadd_imm(b, kz, 1), 3);
nir_ssa_def *ky = nir_imod_imm(b, nir_iadd_imm(b, kx, 1), 3);
nir_ssa_def *k_indices[3] = {kx, ky, kz};
nir_ssa_def *k = nir_vec(b, k_indices, 3);
nir_def *kz = nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[1]),
nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[2]), nir_imm_int(b, 0), nir_imm_int(b, 2)),
nir_bcsel(b, nir_fge(b, abs_dirs[1], abs_dirs[2]), nir_imm_int(b, 1), nir_imm_int(b, 2)));
nir_def *kx = nir_imod_imm(b, nir_iadd_imm(b, kz, 1), 3);
nir_def *ky = nir_imod_imm(b, nir_iadd_imm(b, kx, 1), 3);
nir_def *k_indices[3] = {kx, ky, kz};
nir_def *k = nir_vec(b, k_indices, 3);
/* Swap kx and ky dimensions to preserve winding order */
unsigned swap_xy_swizzle[4] = {1, 0, 2, 3};
@ -232,35 +228,35 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s
kz = nir_channel(b, k, 2);
/* Calculate shear constants */
nir_ssa_def *sz = nir_frcp(b, nir_vector_extract(b, dir, kz));
nir_ssa_def *sx = nir_fmul(b, nir_vector_extract(b, dir, kx), sz);
nir_ssa_def *sy = nir_fmul(b, nir_vector_extract(b, dir, ky), sz);
nir_def *sz = nir_frcp(b, nir_vector_extract(b, dir, kz));
nir_def *sx = nir_fmul(b, nir_vector_extract(b, dir, kx), sz);
nir_def *sy = nir_fmul(b, nir_vector_extract(b, dir, ky), sz);
/* Calculate vertices relative to ray origin */
nir_ssa_def *v_a = nir_fsub(b, node_coords[0], origin);
nir_ssa_def *v_b = nir_fsub(b, node_coords[1], origin);
nir_ssa_def *v_c = nir_fsub(b, node_coords[2], origin);
nir_def *v_a = nir_fsub(b, node_coords[0], origin);
nir_def *v_b = nir_fsub(b, node_coords[1], origin);
nir_def *v_c = nir_fsub(b, node_coords[2], origin);
/* Perform shear and scale */
nir_ssa_def *ax = nir_fsub(b, nir_vector_extract(b, v_a, kx), nir_fmul(b, sx, nir_vector_extract(b, v_a, kz)));
nir_ssa_def *ay = nir_fsub(b, nir_vector_extract(b, v_a, ky), nir_fmul(b, sy, nir_vector_extract(b, v_a, kz)));
nir_ssa_def *bx = nir_fsub(b, nir_vector_extract(b, v_b, kx), nir_fmul(b, sx, nir_vector_extract(b, v_b, kz)));
nir_ssa_def *by = nir_fsub(b, nir_vector_extract(b, v_b, ky), nir_fmul(b, sy, nir_vector_extract(b, v_b, kz)));
nir_ssa_def *cx = nir_fsub(b, nir_vector_extract(b, v_c, kx), nir_fmul(b, sx, nir_vector_extract(b, v_c, kz)));
nir_ssa_def *cy = nir_fsub(b, nir_vector_extract(b, v_c, ky), nir_fmul(b, sy, nir_vector_extract(b, v_c, kz)));
nir_def *ax = nir_fsub(b, nir_vector_extract(b, v_a, kx), nir_fmul(b, sx, nir_vector_extract(b, v_a, kz)));
nir_def *ay = nir_fsub(b, nir_vector_extract(b, v_a, ky), nir_fmul(b, sy, nir_vector_extract(b, v_a, kz)));
nir_def *bx = nir_fsub(b, nir_vector_extract(b, v_b, kx), nir_fmul(b, sx, nir_vector_extract(b, v_b, kz)));
nir_def *by = nir_fsub(b, nir_vector_extract(b, v_b, ky), nir_fmul(b, sy, nir_vector_extract(b, v_b, kz)));
nir_def *cx = nir_fsub(b, nir_vector_extract(b, v_c, kx), nir_fmul(b, sx, nir_vector_extract(b, v_c, kz)));
nir_def *cy = nir_fsub(b, nir_vector_extract(b, v_c, ky), nir_fmul(b, sy, nir_vector_extract(b, v_c, kz)));
nir_ssa_def *u = nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx));
nir_ssa_def *v = nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx));
nir_ssa_def *w = nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax));
nir_def *u = nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx));
nir_def *v = nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx));
nir_def *w = nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax));
/* Perform edge tests. */
nir_ssa_def *cond_back =
nir_def *cond_back =
nir_ior(b, nir_ior(b, nir_flt_imm(b, u, 0.0f), nir_flt_imm(b, v, 0.0f)), nir_flt_imm(b, w, 0.0f));
nir_ssa_def *cond_front =
nir_def *cond_front =
nir_ior(b, nir_ior(b, nir_fgt_imm(b, u, 0.0f), nir_fgt_imm(b, v, 0.0f)), nir_fgt_imm(b, w, 0.0f));
nir_ssa_def *cond = nir_inot(b, nir_iand(b, cond_back, cond_front));
nir_def *cond = nir_inot(b, nir_iand(b, cond_back, cond_front));
/* If the ray is exactly on the edge where v is 0, consider it a miss.
* This seems to correspond to what the hardware is doing.
@ -271,21 +267,21 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s
nir_push_if(b, cond);
{
nir_ssa_def *det = nir_fadd(b, u, nir_fadd(b, v, w));
nir_def *det = nir_fadd(b, u, nir_fadd(b, v, w));
nir_ssa_def *az = nir_fmul(b, sz, nir_vector_extract(b, v_a, kz));
nir_ssa_def *bz = nir_fmul(b, sz, nir_vector_extract(b, v_b, kz));
nir_ssa_def *cz = nir_fmul(b, sz, nir_vector_extract(b, v_c, kz));
nir_def *az = nir_fmul(b, sz, nir_vector_extract(b, v_a, kz));
nir_def *bz = nir_fmul(b, sz, nir_vector_extract(b, v_b, kz));
nir_def *cz = nir_fmul(b, sz, nir_vector_extract(b, v_c, kz));
nir_ssa_def *t = nir_fadd(b, nir_fadd(b, nir_fmul(b, u, az), nir_fmul(b, v, bz)), nir_fmul(b, w, cz));
nir_def *t = nir_fadd(b, nir_fadd(b, nir_fmul(b, u, az), nir_fmul(b, v, bz)), nir_fmul(b, w, cz));
nir_ssa_def *t_signed = nir_fmul(b, nir_fsign(b, det), t);
nir_def *t_signed = nir_fmul(b, nir_fsign(b, det), t);
nir_ssa_def *det_cond_front = nir_inot(b, nir_flt_imm(b, t_signed, 0.0f));
nir_def *det_cond_front = nir_inot(b, nir_flt_imm(b, t_signed, 0.0f));
nir_push_if(b, det_cond_front);
{
nir_ssa_def *indices[4] = {t, det, v, w};
nir_def *indices[4] = {t, det, v, w};
nir_store_var(b, result, nir_vec(b, indices, 4), 0xf);
}
nir_pop_if(b, NULL);
@ -296,35 +292,35 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s
return nir_load_var(b, result);
}
nir_ssa_def *
build_addr_to_node(nir_builder *b, nir_ssa_def *addr)
nir_def *
build_addr_to_node(nir_builder *b, nir_def *addr)
{
const uint64_t bvh_size = 1ull << 42;
nir_ssa_def *node = nir_ushr_imm(b, addr, 3);
nir_def *node = nir_ushr_imm(b, addr, 3);
return nir_iand_imm(b, node, (bvh_size - 1) << 3);
}
static nir_ssa_def *
build_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node, bool skip_type_and)
static nir_def *
build_node_to_addr(struct radv_device *device, nir_builder *b, nir_def *node, bool skip_type_and)
{
nir_ssa_def *addr = skip_type_and ? node : nir_iand_imm(b, node, ~7ull);
nir_def *addr = skip_type_and ? node : nir_iand_imm(b, node, ~7ull);
addr = nir_ishl_imm(b, addr, 3);
/* Assumes everything is in the top half of address space, which is true in
* GFX9+ for now. */
return device->physical_device->rad_info.gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr;
}
nir_ssa_def *
nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[], bool translation)
nir_def *
nir_build_vec3_mat_mult(nir_builder *b, nir_def *vec, nir_def *matrix[], bool translation)
{
nir_ssa_def *result_components[3] = {
nir_def *result_components[3] = {
nir_channel(b, matrix[0], 3),
nir_channel(b, matrix[1], 3),
nir_channel(b, matrix[2], 3),
};
for (unsigned i = 0; i < 3; ++i) {
for (unsigned j = 0; j < 3; ++j) {
nir_ssa_def *v = nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[i], 1 << j));
nir_def *v = nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[i], 1 << j));
result_components[i] = (translation || j) ? nir_fadd(b, result_components[i], v) : v;
}
}
@ -332,7 +328,7 @@ nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[],
}
void
nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out)
nir_build_wto_matrix_load(nir_builder *b, nir_def *instance_addr, nir_def **out)
{
unsigned offset = offsetof(struct radv_bvh_instance_node, wto_matrix);
for (unsigned i = 0; i < 3; ++i) {
@ -343,18 +339,18 @@ nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_de
/* When a hit is opaque the any_hit shader is skipped for this hit and the hit
* is assumed to be an actual hit. */
static nir_ssa_def *
hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags, const struct radv_ray_flags *ray_flags,
nir_ssa_def *geometry_id_and_flags)
static nir_def *
hit_is_opaque(nir_builder *b, nir_def *sbt_offset_and_flags, const struct radv_ray_flags *ray_flags,
nir_def *geometry_id_and_flags)
{
nir_ssa_def *opaque = nir_uge_imm(b, nir_ior(b, geometry_id_and_flags, sbt_offset_and_flags),
RADV_INSTANCE_FORCE_OPAQUE | RADV_INSTANCE_NO_FORCE_NOT_OPAQUE);
nir_def *opaque = nir_uge_imm(b, nir_ior(b, geometry_id_and_flags, sbt_offset_and_flags),
RADV_INSTANCE_FORCE_OPAQUE | RADV_INSTANCE_NO_FORCE_NOT_OPAQUE);
opaque = nir_bcsel(b, ray_flags->force_opaque, nir_imm_true(b), opaque);
opaque = nir_bcsel(b, ray_flags->force_not_opaque, nir_imm_false(b), opaque);
return opaque;
}
nir_ssa_def *
nir_def *
create_bvh_descriptor(nir_builder *b)
{
/* We create a BVH descriptor that covers the entire memory range. That way we can always
@ -367,25 +363,25 @@ create_bvh_descriptor(nir_builder *b)
static void
insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args,
const struct radv_ray_flags *ray_flags, nir_ssa_def *result, nir_ssa_def *bvh_node)
const struct radv_ray_flags *ray_flags, nir_def *result, nir_def *bvh_node)
{
if (!args->triangle_cb)
return;
struct radv_triangle_intersection intersection;
intersection.t = nir_channel(b, result, 0);
nir_ssa_def *div = nir_channel(b, result, 1);
nir_def *div = nir_channel(b, result, 1);
intersection.t = nir_fdiv(b, intersection.t, div);
nir_push_if(b, nir_flt(b, intersection.t, nir_load_deref(b, args->vars.tmax)));
{
intersection.frontface = nir_fgt_imm(b, div, 0);
nir_ssa_def *switch_ccw =
nir_def *switch_ccw =
nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), RADV_INSTANCE_TRIANGLE_FLIP_FACING);
intersection.frontface = nir_ixor(b, intersection.frontface, switch_ccw);
nir_ssa_def *not_cull = ray_flags->no_skip_triangles;
nir_ssa_def *not_facing_cull =
nir_def *not_cull = ray_flags->no_skip_triangles;
nir_def *not_facing_cull =
nir_bcsel(b, intersection.frontface, ray_flags->no_cull_front, ray_flags->no_cull_back);
not_cull = nir_iand(b, not_cull,
@ -398,7 +394,7 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const
nir_flt(b, args->tmin, intersection.t), not_cull));
{
intersection.base.node_addr = build_node_to_addr(device, b, bvh_node, false);
nir_ssa_def *triangle_info = nir_build_load_global(
nir_def *triangle_info = nir_build_load_global(
b, 2, 32,
nir_iadd_imm(b, intersection.base.node_addr, offsetof(struct radv_bvh_triangle_node, triangle_id)));
intersection.base.primitive_id = nir_channel(b, triangle_info, 0);
@ -409,7 +405,7 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const
not_cull = nir_bcsel(b, intersection.base.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque);
nir_push_if(b, not_cull);
{
nir_ssa_def *divs[2] = {div, div};
nir_def *divs[2] = {div, div};
intersection.barycentrics = nir_fdiv(b, nir_channels(b, result, 0xc), nir_vec(b, divs, 2));
args->triangle_cb(b, &intersection, args, ray_flags);
@ -423,21 +419,21 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const
static void
insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args,
const struct radv_ray_flags *ray_flags, nir_ssa_def *bvh_node)
const struct radv_ray_flags *ray_flags, nir_def *bvh_node)
{
if (!args->aabb_cb)
return;
struct radv_leaf_intersection intersection;
intersection.node_addr = build_node_to_addr(device, b, bvh_node, false);
nir_ssa_def *triangle_info = nir_build_load_global(
nir_def *triangle_info = nir_build_load_global(
b, 2, 32, nir_iadd_imm(b, intersection.node_addr, offsetof(struct radv_bvh_aabb_node, primitive_id)));
intersection.primitive_id = nir_channel(b, triangle_info, 0);
intersection.geometry_id_and_flags = nir_channel(b, triangle_info, 1);
intersection.opaque = hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), ray_flags,
intersection.geometry_id_and_flags);
nir_ssa_def *not_cull = nir_bcsel(b, intersection.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque);
nir_def *not_cull = nir_bcsel(b, intersection.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque);
not_cull = nir_iand(b, not_cull, ray_flags->no_skip_aabbs);
nir_push_if(b, not_cull);
{
@ -446,22 +442,22 @@ insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, const str
nir_pop_if(b, NULL);
}
static nir_ssa_def *
fetch_parent_node(nir_builder *b, nir_ssa_def *bvh, nir_ssa_def *node)
static nir_def *
fetch_parent_node(nir_builder *b, nir_def *bvh, nir_def *node)
{
nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, nir_udiv_imm(b, node, 8), 4), 4);
nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, nir_udiv_imm(b, node, 8), 4), 4);
return nir_build_load_global(b, 1, 32, nir_isub(b, bvh, nir_u2u64(b, offset)), .align_mul = 4);
}
nir_ssa_def *
nir_def *
radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args)
{
nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
nir_store_var(b, incomplete, nir_imm_true(b), 0x1);
nir_ssa_def *desc = create_bvh_descriptor(b);
nir_ssa_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0);
nir_def *desc = create_bvh_descriptor(b);
nir_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0);
struct radv_ray_flags ray_flags = {
.force_opaque = nir_test_mask(b, args->flags, SpvRayFlagsOpaqueKHRMask),
@ -487,9 +483,9 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
}
nir_pop_if(b, NULL);
nir_ssa_def *stack_instance_exit =
nir_def *stack_instance_exit =
nir_ige(b, nir_load_deref(b, args->vars.top_stack), nir_load_deref(b, args->vars.stack));
nir_ssa_def *root_instance_exit =
nir_def *root_instance_exit =
nir_ieq(b, nir_load_deref(b, args->vars.previous_node), nir_load_deref(b, args->vars.instance_bottom_node));
nir_if *instance_exit = nir_push_if(b, nir_ior(b, stack_instance_exit, root_instance_exit));
instance_exit->control = nir_selection_control_dont_flatten;
@ -508,10 +504,10 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
nir_push_if(
b, nir_ige(b, nir_load_deref(b, args->vars.stack_low_watermark), nir_load_deref(b, args->vars.stack)));
{
nir_ssa_def *prev = nir_load_deref(b, args->vars.previous_node);
nir_ssa_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
nir_def *prev = nir_load_deref(b, args->vars.previous_node);
nir_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
nir_ssa_def *parent = fetch_parent_node(b, bvh_addr, prev);
nir_def *parent = fetch_parent_node(b, bvh_addr, prev);
nir_push_if(b, nir_ieq_imm(b, parent, RADV_BVH_INVALID_NODE));
{
nir_store_var(b, incomplete, nir_imm_false(b), 0x1);
@ -525,9 +521,9 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
nir_store_deref(b, args->vars.stack,
nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_stride), 1);
nir_ssa_def *stack_ptr =
nir_def *stack_ptr =
nir_umod_imm(b, nir_load_deref(b, args->vars.stack), args->stack_stride * args->stack_entries);
nir_ssa_def *bvh_node = args->stack_load_cb(b, stack_ptr, args);
nir_def *bvh_node = args->stack_load_cb(b, stack_ptr, args);
nir_store_deref(b, args->vars.current_node, bvh_node, 0x1);
nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
}
@ -539,15 +535,15 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
}
nir_pop_if(b, NULL);
nir_ssa_def *bvh_node = nir_load_deref(b, args->vars.current_node);
nir_def *bvh_node = nir_load_deref(b, args->vars.current_node);
nir_ssa_def *prev_node = nir_load_deref(b, args->vars.previous_node);
nir_def *prev_node = nir_load_deref(b, args->vars.previous_node);
nir_store_deref(b, args->vars.previous_node, bvh_node, 0x1);
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
nir_ssa_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));
nir_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));
nir_ssa_def *intrinsic_result = NULL;
nir_def *intrinsic_result = NULL;
if (!radv_emulate_rt(device->physical_device)) {
intrinsic_result =
nir_bvh64_intersect_ray_amd(b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node),
@ -555,7 +551,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir));
}
nir_ssa_def *node_type = nir_iand_imm(b, bvh_node, 7);
nir_def *node_type = nir_iand_imm(b, bvh_node, 7);
nir_push_if(b, nir_uge_imm(b, node_type, radv_bvh_node_box16));
{
nir_push_if(b, nir_uge_imm(b, node_type, radv_bvh_node_instance));
@ -567,18 +563,18 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
nir_push_else(b, NULL);
{
/* instance */
nir_ssa_def *instance_node_addr = build_node_to_addr(device, b, global_bvh_node, false);
nir_def *instance_node_addr = build_node_to_addr(device, b, global_bvh_node, false);
nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1);
nir_ssa_def *instance_data =
nir_def *instance_data =
nir_build_load_global(b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0);
nir_ssa_def *wto_matrix[3];
nir_def *wto_matrix[3];
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), 1);
nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 2);
nir_def *instance_and_mask = nir_channel(b, instance_data, 2);
nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, args->cull_mask), nir_imm_int(b, 1 << 24)));
{
nir_jump(b, nir_jump_continue);
@ -602,7 +598,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
}
nir_push_else(b, NULL);
{
nir_ssa_def *result = intrinsic_result;
nir_def *result = intrinsic_result;
if (!result) {
/* If we didn't run the intrinsic cause the hardware didn't support it,
* emulate ray/box intersection here */
@ -614,7 +610,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
/* box */
nir_push_if(b, nir_ieq_imm(b, prev_node, RADV_BVH_INVALID_NODE));
{
nir_ssa_def *new_nodes[4];
nir_def *new_nodes[4];
for (unsigned i = 0; i < 4; ++i)
new_nodes[i] = nir_channel(b, result, i);
@ -622,13 +618,13 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
nir_push_if(b, nir_ine_imm(b, new_nodes[i], RADV_BVH_INVALID_NODE));
for (unsigned i = 4; i-- > 1;) {
nir_ssa_def *stack = nir_load_deref(b, args->vars.stack);
nir_ssa_def *stack_ptr = nir_umod_imm(b, stack, args->stack_entries * args->stack_stride);
nir_def *stack = nir_load_deref(b, args->vars.stack);
nir_def *stack_ptr = nir_umod_imm(b, stack, args->stack_entries * args->stack_stride);
args->stack_store_cb(b, stack_ptr, new_nodes[i], args);
nir_store_deref(b, args->vars.stack, nir_iadd_imm(b, stack, args->stack_stride), 1);
if (i == 1) {
nir_ssa_def *new_watermark =
nir_def *new_watermark =
nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_entries * args->stack_stride);
new_watermark = nir_imax(b, nir_load_deref(b, args->vars.stack_low_watermark), new_watermark);
nir_store_deref(b, args->vars.stack_low_watermark, new_watermark, 0x1);
@ -640,7 +636,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
}
nir_push_else(b, NULL);
{
nir_ssa_def *next = nir_imm_int(b, RADV_BVH_INVALID_NODE);
nir_def *next = nir_imm_int(b, RADV_BVH_INVALID_NODE);
for (unsigned i = 0; i < 3; ++i) {
next = nir_bcsel(b, nir_ieq(b, prev_node, nir_channel(b, result, i)), nir_channel(b, result, i + 1),
next);
@ -653,7 +649,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
}
nir_push_else(b, NULL);
{
nir_ssa_def *result = intrinsic_result;
nir_def *result = intrinsic_result;
if (!result) {
/* If we didn't run the intrinsic cause the hardware didn't support it,
* emulate ray/tri intersection here */

View file

@ -35,41 +35,39 @@
void nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var_indices, uint32_t chan_1,
uint32_t chan_2);
nir_ssa_def *intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node,
nir_ssa_def *ray_tmax, nir_ssa_def *origin, nir_ssa_def *dir,
nir_ssa_def *inv_dir);
nir_def *intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_def *bvh_node,
nir_def *ray_tmax, nir_def *origin, nir_def *dir, nir_def *inv_dir);
nir_ssa_def *intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node,
nir_ssa_def *ray_tmax, nir_ssa_def *origin, nir_ssa_def *dir,
nir_ssa_def *inv_dir);
nir_def *intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_def *bvh_node,
nir_def *ray_tmax, nir_def *origin, nir_def *dir, nir_def *inv_dir);
nir_ssa_def *build_addr_to_node(nir_builder *b, nir_ssa_def *addr);
nir_def *build_addr_to_node(nir_builder *b, nir_def *addr);
nir_ssa_def *nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[], bool translation);
nir_def *nir_build_vec3_mat_mult(nir_builder *b, nir_def *vec, nir_def *matrix[], bool translation);
void nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out);
void nir_build_wto_matrix_load(nir_builder *b, nir_def *instance_addr, nir_def **out);
nir_ssa_def *create_bvh_descriptor(nir_builder *b);
nir_def *create_bvh_descriptor(nir_builder *b);
struct radv_ray_traversal_args;
struct radv_ray_flags {
nir_ssa_def *force_opaque;
nir_ssa_def *force_not_opaque;
nir_ssa_def *terminate_on_first_hit;
nir_ssa_def *no_cull_front;
nir_ssa_def *no_cull_back;
nir_ssa_def *no_cull_opaque;
nir_ssa_def *no_cull_no_opaque;
nir_ssa_def *no_skip_triangles;
nir_ssa_def *no_skip_aabbs;
nir_def *force_opaque;
nir_def *force_not_opaque;
nir_def *terminate_on_first_hit;
nir_def *no_cull_front;
nir_def *no_cull_back;
nir_def *no_cull_opaque;
nir_def *no_cull_no_opaque;
nir_def *no_skip_triangles;
nir_def *no_skip_aabbs;
};
struct radv_leaf_intersection {
nir_ssa_def *node_addr;
nir_ssa_def *primitive_id;
nir_ssa_def *geometry_id_and_flags;
nir_ssa_def *opaque;
nir_def *node_addr;
nir_def *primitive_id;
nir_def *geometry_id_and_flags;
nir_def *opaque;
};
typedef void (*radv_aabb_intersection_cb)(nir_builder *b, struct radv_leaf_intersection *intersection,
@ -78,20 +76,19 @@ typedef void (*radv_aabb_intersection_cb)(nir_builder *b, struct radv_leaf_inter
struct radv_triangle_intersection {
struct radv_leaf_intersection base;
nir_ssa_def *t;
nir_ssa_def *frontface;
nir_ssa_def *barycentrics;
nir_def *t;
nir_def *frontface;
nir_def *barycentrics;
};
typedef void (*radv_triangle_intersection_cb)(nir_builder *b, struct radv_triangle_intersection *intersection,
const struct radv_ray_traversal_args *args,
const struct radv_ray_flags *ray_flags);
typedef void (*radv_rt_stack_store_cb)(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value,
typedef void (*radv_rt_stack_store_cb)(nir_builder *b, nir_def *index, nir_def *value,
const struct radv_ray_traversal_args *args);
typedef nir_ssa_def *(*radv_rt_stack_load_cb)(nir_builder *b, nir_ssa_def *index,
const struct radv_ray_traversal_args *args);
typedef nir_def *(*radv_rt_stack_load_cb)(nir_builder *b, nir_def *index, const struct radv_ray_traversal_args *args);
struct radv_ray_traversal_vars {
/* For each accepted hit, tmax will be set to the t value. This allows for automatic intersection
@ -132,12 +129,12 @@ struct radv_ray_traversal_vars {
};
struct radv_ray_traversal_args {
nir_ssa_def *root_bvh_base;
nir_ssa_def *flags;
nir_ssa_def *cull_mask;
nir_ssa_def *origin;
nir_ssa_def *tmin;
nir_ssa_def *dir;
nir_def *root_bvh_base;
nir_def *flags;
nir_def *cull_mask;
nir_def *origin;
nir_def *tmin;
nir_def *dir;
struct radv_ray_traversal_vars vars;
@ -164,7 +161,7 @@ struct radv_ray_traversal_args {
* rayQueryProceedEXT. Traversal will only be considered incomplete, if one of the specified
* callbacks breaks out of the traversal loop.
*/
nir_ssa_def *radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
const struct radv_ray_traversal_args *args);
nir_def *radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
const struct radv_ray_traversal_args *args);
#endif

View file

@ -44,7 +44,7 @@ lower_rt_derefs(nir_shader *shader)
nir_builder b = nir_builder_at(nir_before_cf_list(&impl->body));
nir_ssa_def *arg_offset = nir_load_rt_arg_scratch_offset_amd(&b);
nir_def *arg_offset = nir_load_rt_arg_scratch_offset_amd(&b);
nir_foreach_block (block, impl) {
nir_foreach_instr_safe (instr, block) {
@ -62,7 +62,7 @@ lower_rt_derefs(nir_shader *shader)
b.cursor = nir_before_instr(&deref->instr);
nir_deref_instr *replacement =
nir_build_deref_cast(&b, arg_offset, nir_var_function_temp, deref->var->type, 0);
nir_ssa_def_rewrite_uses(&deref->dest.ssa, &replacement->dest.ssa);
nir_def_rewrite_uses(&deref->dest.ssa, &replacement->dest.ssa);
nir_instr_remove(&deref->instr);
}
}
@ -239,25 +239,25 @@ enum sbt_entry {
SBT_ANY_HIT_IDX = offsetof(struct radv_pipeline_group_handle, any_hit_index),
};
static nir_ssa_def *
get_sbt_ptr(nir_builder *b, nir_ssa_def *idx, enum sbt_type binding)
static nir_def *
get_sbt_ptr(nir_builder *b, nir_def *idx, enum sbt_type binding)
{
nir_ssa_def *desc_base_addr = nir_load_sbt_base_amd(b);
nir_def *desc_base_addr = nir_load_sbt_base_amd(b);
nir_ssa_def *desc = nir_pack_64_2x32(b, nir_load_smem_amd(b, 2, desc_base_addr, nir_imm_int(b, binding)));
nir_def *desc = nir_pack_64_2x32(b, nir_load_smem_amd(b, 2, desc_base_addr, nir_imm_int(b, binding)));
nir_ssa_def *stride_offset = nir_imm_int(b, binding + (binding == SBT_RAYGEN ? 8 : 16));
nir_ssa_def *stride = nir_pack_64_2x32(b, nir_load_smem_amd(b, 2, desc_base_addr, stride_offset));
nir_def *stride_offset = nir_imm_int(b, binding + (binding == SBT_RAYGEN ? 8 : 16));
nir_def *stride = nir_pack_64_2x32(b, nir_load_smem_amd(b, 2, desc_base_addr, stride_offset));
return nir_iadd(b, desc, nir_imul(b, nir_u2u64(b, idx), stride));
}
static void
load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_ssa_def *idx, enum sbt_type binding,
load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_def *idx, enum sbt_type binding,
enum sbt_entry offset)
{
nir_ssa_def *addr = get_sbt_ptr(b, idx, binding);
nir_ssa_def *load_addr = nir_iadd_imm(b, addr, offset);
nir_def *addr = get_sbt_ptr(b, idx, binding);
nir_def *load_addr = nir_iadd_imm(b, addr, offset);
if (offset == SBT_RECURSIVE_PTR) {
nir_store_var(b, vars->shader_va, nir_build_load_global(b, 1, 64, load_addr), 1);
@ -265,7 +265,7 @@ load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_ssa_def *idx
nir_store_var(b, vars->idx, nir_build_load_global(b, 1, 32, load_addr), 1);
}
nir_ssa_def *record_addr = nir_iadd_imm(b, addr, RADV_RT_HANDLE_SIZE);
nir_def *record_addr = nir_iadd_imm(b, addr, RADV_RT_HANDLE_SIZE);
nir_store_var(b, vars->shader_record_ptr, record_addr, 1);
}
@ -282,12 +282,12 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
case nir_instr_type_intrinsic: {
b_shader.cursor = nir_before_instr(instr);
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
nir_ssa_def *ret = NULL;
nir_def *ret = NULL;
switch (intr->intrinsic) {
case nir_intrinsic_rt_execute_callable: {
uint32_t size = align(nir_intrinsic_stack_size(intr), 16);
nir_ssa_def *ret_ptr = nir_load_resume_shader_address_amd(&b_shader, nir_intrinsic_call_idx(intr));
nir_def *ret_ptr = nir_load_resume_shader_address_amd(&b_shader, nir_intrinsic_call_idx(intr));
ret_ptr = nir_ior_imm(&b_shader, ret_ptr, radv_get_rt_priority(shader->info.stage));
nir_store_var(&b_shader, vars->stack_ptr,
@ -305,7 +305,7 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
}
case nir_intrinsic_rt_trace_ray: {
uint32_t size = align(nir_intrinsic_stack_size(intr), 16);
nir_ssa_def *ret_ptr = nir_load_resume_shader_address_amd(&b_shader, nir_intrinsic_call_idx(intr));
nir_def *ret_ptr = nir_load_resume_shader_address_amd(&b_shader, nir_intrinsic_call_idx(intr));
ret_ptr = nir_ior_imm(&b_shader, ret_ptr, radv_get_rt_priority(shader->info.stage));
nir_store_var(&b_shader, vars->stack_ptr,
@ -385,8 +385,8 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
break;
}
case nir_intrinsic_load_ray_instance_custom_index: {
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
nir_ssa_def *custom_instance_and_mask = nir_build_load_global(
nir_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
nir_def *custom_instance_and_mask = nir_build_load_global(
&b_shader, 1, 32,
nir_iadd_imm(&b_shader, instance_node_addr,
offsetof(struct radv_bvh_instance_node, custom_instance_and_mask)));
@ -403,7 +403,7 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
break;
}
case nir_intrinsic_load_instance_id: {
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
nir_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
ret = nir_build_load_global(
&b_shader, 1, 32,
nir_iadd_imm(&b_shader, instance_node_addr, offsetof(struct radv_bvh_instance_node, instance_id)));
@ -419,11 +419,11 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
}
case nir_intrinsic_load_ray_world_to_object: {
unsigned c = nir_intrinsic_column(intr);
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
nir_ssa_def *wto_matrix[3];
nir_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
nir_def *wto_matrix[3];
nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix);
nir_ssa_def *vals[3];
nir_def *vals[3];
for (unsigned i = 0; i < 3; ++i)
vals[i] = nir_channel(&b_shader, wto_matrix[i], c);
@ -432,8 +432,8 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
}
case nir_intrinsic_load_ray_object_to_world: {
unsigned c = nir_intrinsic_column(intr);
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
nir_ssa_def *rows[3];
nir_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
nir_def *rows[3];
for (unsigned r = 0; r < 3; ++r)
rows[r] =
nir_build_load_global(&b_shader, 4, 32,
@ -444,15 +444,15 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
break;
}
case nir_intrinsic_load_ray_object_origin: {
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
nir_ssa_def *wto_matrix[3];
nir_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
nir_def *wto_matrix[3];
nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix);
ret = nir_build_vec3_mat_mult(&b_shader, nir_load_var(&b_shader, vars->origin), wto_matrix, true);
break;
}
case nir_intrinsic_load_ray_object_direction: {
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
nir_ssa_def *wto_matrix[3];
nir_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
nir_def *wto_matrix[3];
nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix);
ret = nir_build_vec3_mat_mult(&b_shader, nir_load_var(&b_shader, vars->direction), wto_matrix, false);
break;
@ -521,8 +521,8 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
nir_store_var(&b_shader, vars->hit_kind, intr->src[5].ssa, 0x1);
load_sbt_entry(&b_shader, vars, intr->src[0].ssa, SBT_HIT, SBT_RECURSIVE_PTR);
nir_ssa_def *should_return = nir_test_mask(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags),
SpvRayFlagsSkipClosestHitShaderKHRMask);
nir_def *should_return = nir_test_mask(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags),
SpvRayFlagsSkipClosestHitShaderKHRMask);
if (!(vars->flags & VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR)) {
should_return = nir_ior(&b_shader, should_return,
@ -538,12 +538,12 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
}
case nir_intrinsic_execute_miss_amd: {
nir_store_var(&b_shader, vars->tmax, intr->src[0].ssa, 0x1);
nir_ssa_def *undef = nir_ssa_undef(&b_shader, 1, 32);
nir_def *undef = nir_undef(&b_shader, 1, 32);
nir_store_var(&b_shader, vars->primitive_id, undef, 0x1);
nir_store_var(&b_shader, vars->instance_addr, nir_ssa_undef(&b_shader, 1, 64), 0x1);
nir_store_var(&b_shader, vars->instance_addr, nir_undef(&b_shader, 1, 64), 0x1);
nir_store_var(&b_shader, vars->geometry_id_and_flags, undef, 0x1);
nir_store_var(&b_shader, vars->hit_kind, undef, 0x1);
nir_ssa_def *miss_index = nir_load_var(&b_shader, vars->miss_index);
nir_def *miss_index = nir_load_var(&b_shader, vars->miss_index);
load_sbt_entry(&b_shader, vars, miss_index, SBT_MISS, SBT_RECURSIVE_PTR);
if (!(vars->flags & VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR)) {
@ -560,7 +560,7 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
}
if (ret)
nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
nir_def_rewrite_uses(&intr->dest.ssa, ret);
nir_instr_remove(instr);
break;
}
@ -603,7 +603,7 @@ lower_hit_attrib_deref(nir_builder *b, nir_instr *instr, void *data)
uint32_t num_components = intrin->dest.ssa.num_components;
uint32_t bit_size = intrin->dest.ssa.bit_size;
nir_ssa_def *components[NIR_MAX_VEC_COMPONENTS];
nir_def *components[NIR_MAX_VEC_COMPONENTS];
for (uint32_t comp = 0; comp < num_components; comp++) {
uint32_t offset = deref->var->data.driver_location + comp * bit_size / 8;
@ -626,9 +626,9 @@ lower_hit_attrib_deref(nir_builder *b, nir_instr *instr, void *data)
}
}
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_vec(b, components, num_components));
nir_def_rewrite_uses(&intrin->dest.ssa, nir_vec(b, components, num_components));
} else {
nir_ssa_def *value = intrin->src[1].ssa;
nir_def *value = intrin->src[1].ssa;
uint32_t num_components = value->num_components;
uint32_t bit_size = value->bit_size;
@ -637,7 +637,7 @@ lower_hit_attrib_deref(nir_builder *b, nir_instr *instr, void *data)
uint32_t base = offset / 4;
uint32_t comp_offset = offset % 4;
nir_ssa_def *component = nir_channel(b, value, comp);
nir_def *component = nir_channel(b, value, comp);
if (bit_size == 64) {
nir_store_hit_attrib_amd(b, nir_unpack_64_2x32_split_x(b, component), .base = base);
@ -645,14 +645,14 @@ lower_hit_attrib_deref(nir_builder *b, nir_instr *instr, void *data)
} else if (bit_size == 32) {
nir_store_hit_attrib_amd(b, component, .base = base);
} else if (bit_size == 16) {
nir_ssa_def *prev = nir_unpack_32_2x16(b, nir_load_hit_attrib_amd(b, .base = base));
nir_ssa_def *components[2];
nir_def *prev = nir_unpack_32_2x16(b, nir_load_hit_attrib_amd(b, .base = base));
nir_def *components[2];
for (uint32_t word = 0; word < 2; word++)
components[word] = (word == comp_offset / 2) ? nir_channel(b, value, comp) : nir_channel(b, prev, word);
nir_store_hit_attrib_amd(b, nir_pack_32_2x16(b, nir_vec(b, components, 2)), .base = base);
} else if (bit_size == 8) {
nir_ssa_def *prev = nir_unpack_bits(b, nir_load_hit_attrib_amd(b, .base = base), 8);
nir_ssa_def *components[4];
nir_def *prev = nir_unpack_bits(b, nir_load_hit_attrib_amd(b, .base = base), 8);
nir_def *components[4];
for (uint32_t byte = 0; byte < 4; byte++)
components[byte] = (byte == comp_offset) ? nir_channel(b, value, comp) : nir_channel(b, prev, byte);
nir_store_hit_attrib_amd(b, nir_pack_32_4x8(b, nir_vec(b, components, 4)), .base = base);
@ -703,19 +703,19 @@ lower_hit_attribs(nir_shader *shader, nir_variable **hit_attribs, uint32_t workg
b.cursor = nir_after_instr(instr);
nir_ssa_def *offset;
nir_def *offset;
if (!hit_attribs)
offset = nir_imul_imm(
&b, nir_iadd_imm(&b, nir_load_local_invocation_index(&b), nir_intrinsic_base(intrin) * workgroup_size),
sizeof(uint32_t));
if (intrin->intrinsic == nir_intrinsic_load_hit_attrib_amd) {
nir_ssa_def *ret;
nir_def *ret;
if (hit_attribs)
ret = nir_load_var(&b, hit_attribs[nir_intrinsic_base(intrin)]);
else
ret = nir_load_shared(&b, 1, 32, offset, .base = 0, .align_mul = 4);
nir_ssa_def_rewrite_uses(nir_instr_ssa_def(instr), ret);
nir_def_rewrite_uses(nir_instr_ssa_def(instr), ret);
} else {
if (hit_attribs)
nir_store_var(&b, hit_attribs[nir_intrinsic_base(intrin)], intrin->src->ssa, 0x1);
@ -772,7 +772,7 @@ inline_constants(nir_shader *dst, nir_shader *src)
}
static void
insert_rt_case(nir_builder *b, nir_shader *shader, struct rt_variables *vars, nir_ssa_def *idx, uint32_t call_idx_base,
insert_rt_case(nir_builder *b, nir_shader *shader, struct rt_variables *vars, nir_def *idx, uint32_t call_idx_base,
uint32_t call_idx, unsigned stage_idx, struct radv_ray_tracing_stage *stages)
{
uint32_t workgroup_size =
@ -880,10 +880,10 @@ lower_any_hit_for_intersection(nir_shader *any_hit)
nir_builder build = nir_builder_at(nir_before_cf_list(&impl->body));
nir_builder *b = &build;
nir_ssa_def *commit_ptr = nir_load_param(b, 0);
nir_ssa_def *hit_t = nir_load_param(b, 1);
nir_ssa_def *hit_kind = nir_load_param(b, 2);
nir_ssa_def *scratch_offset = nir_load_param(b, 3);
nir_def *commit_ptr = nir_load_param(b, 0);
nir_def *hit_t = nir_load_param(b, 1);
nir_def *hit_kind = nir_load_param(b, 2);
nir_def *scratch_offset = nir_load_param(b, 3);
nir_deref_instr *commit = nir_build_deref_cast(b, commit_ptr, nir_var_function_temp, glsl_bool_type(), 0);
@ -913,12 +913,12 @@ lower_any_hit_for_intersection(nir_shader *any_hit)
break;
case nir_intrinsic_load_ray_t_max:
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, hit_t);
nir_def_rewrite_uses(&intrin->dest.ssa, hit_t);
nir_instr_remove(&intrin->instr);
break;
case nir_intrinsic_load_ray_hit_kind:
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, hit_kind);
nir_def_rewrite_uses(&intrin->dest.ssa, hit_kind);
nir_instr_remove(&intrin->instr);
break;
@ -939,8 +939,8 @@ lower_any_hit_for_intersection(nir_shader *any_hit)
break;
case nir_intrinsic_load_rt_arg_scratch_offset_amd:
b->cursor = nir_after_instr(instr);
nir_ssa_def *arg_offset = nir_isub(b, &intrin->dest.ssa, scratch_offset);
nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, arg_offset, arg_offset->parent_instr);
nir_def *arg_offset = nir_isub(b, &intrin->dest.ssa, scratch_offset);
nir_def_rewrite_uses_after(&intrin->dest.ssa, arg_offset, arg_offset->parent_instr);
break;
default:
@ -1012,10 +1012,10 @@ nir_lower_intersection_shader(nir_shader *intersection, nir_shader *any_hit)
continue;
b->cursor = nir_instr_remove(&intrin->instr);
nir_ssa_def *hit_t = nir_ssa_for_src(b, intrin->src[0], 1);
nir_ssa_def *hit_kind = nir_ssa_for_src(b, intrin->src[1], 1);
nir_ssa_def *min_t = nir_load_ray_t_min(b);
nir_ssa_def *max_t = nir_load_ray_t_max(b);
nir_def *hit_t = nir_ssa_for_src(b, intrin->src[0], 1);
nir_def *hit_kind = nir_ssa_for_src(b, intrin->src[1], 1);
nir_def *min_t = nir_load_ray_t_min(b);
nir_def *max_t = nir_load_ray_t_max(b);
/* bool commit_tmp = false; */
nir_variable *commit_tmp = nir_local_variable_create(impl, glsl_bool_type(), "commit_tmp");
@ -1029,7 +1029,7 @@ nir_lower_intersection_shader(nir_shader *intersection, nir_shader *any_hit)
if (any_hit_impl != NULL) {
nir_push_if(b, nir_inot(b, nir_load_intersection_opaque_amd(b)));
{
nir_ssa_def *params[] = {
nir_def *params[] = {
&nir_build_deref_var(b, commit_tmp)->dest.ssa,
hit_t,
hit_kind,
@ -1048,8 +1048,8 @@ nir_lower_intersection_shader(nir_shader *intersection, nir_shader *any_hit)
}
nir_pop_if(b, NULL);
nir_ssa_def *accepted = nir_load_var(b, commit_tmp);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, accepted);
nir_def *accepted = nir_load_var(b, commit_tmp);
nir_def_rewrite_uses(&intrin->dest.ssa, accepted);
}
}
nir_metadata_preserve(impl, nir_metadata_none);
@ -1124,7 +1124,7 @@ static void
visit_any_hit_shaders(struct radv_device *device, nir_builder *b, struct traversal_data *data,
struct rt_variables *vars)
{
nir_ssa_def *sbt_idx = nir_load_var(b, vars->idx);
nir_def *sbt_idx = nir_load_var(b, vars->idx);
if (!(vars->flags & VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR))
nir_push_if(b, nir_ine_imm(b, sbt_idx, 0));
@ -1170,16 +1170,16 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
{
struct traversal_data *data = args->data;
nir_ssa_def *geometry_id = nir_iand_imm(b, intersection->base.geometry_id_and_flags, 0xfffffff);
nir_ssa_def *sbt_idx =
nir_def *geometry_id = nir_iand_imm(b, intersection->base.geometry_id_and_flags, 0xfffffff);
nir_def *sbt_idx =
nir_iadd(b,
nir_iadd(b, nir_load_var(b, data->vars->sbt_offset),
nir_iand_imm(b, nir_load_var(b, data->trav_vars->sbt_offset_and_flags), 0xffffff)),
nir_imul(b, nir_load_var(b, data->vars->sbt_stride), geometry_id));
nir_ssa_def *hit_kind = nir_bcsel(b, intersection->frontface, nir_imm_int(b, 0xFE), nir_imm_int(b, 0xFF));
nir_def *hit_kind = nir_bcsel(b, intersection->frontface, nir_imm_int(b, 0xFE), nir_imm_int(b, 0xFF));
nir_ssa_def *prev_barycentrics = nir_load_var(b, data->barycentrics);
nir_def *prev_barycentrics = nir_load_var(b, data->barycentrics);
nir_store_var(b, data->barycentrics, intersection->barycentrics, 0x3);
nir_store_var(b, data->vars->ahit_accept, nir_imm_true(b), 0x1);
@ -1217,7 +1217,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
nir_store_var(b, data->vars->idx, sbt_idx, 1);
nir_store_var(b, data->trav_vars->hit, nir_imm_true(b), 1);
nir_ssa_def *ray_terminated = nir_load_var(b, data->vars->ahit_terminate);
nir_def *ray_terminated = nir_load_var(b, data->vars->ahit_terminate);
nir_push_if(b, nir_ior(b, ray_flags->terminate_on_first_hit, ray_terminated));
{
nir_jump(b, nir_jump_break);
@ -1231,8 +1231,8 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
{
struct traversal_data *data = args->data;
nir_ssa_def *geometry_id = nir_iand_imm(b, intersection->geometry_id_and_flags, 0xfffffff);
nir_ssa_def *sbt_idx =
nir_def *geometry_id = nir_iand_imm(b, intersection->geometry_id_and_flags, 0xfffffff);
nir_def *sbt_idx =
nir_iadd(b,
nir_iadd(b, nir_load_var(b, data->vars->sbt_offset),
nir_iand_imm(b, nir_load_var(b, data->trav_vars->sbt_offset_and_flags), 0xffffff)),
@ -1317,8 +1317,8 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
nir_store_var(b, data->vars->idx, sbt_idx, 1);
nir_store_var(b, data->trav_vars->hit, nir_imm_true(b), 1);
nir_ssa_def *terminate_on_first_hit = nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask);
nir_ssa_def *ray_terminated = nir_load_var(b, data->vars->ahit_terminate);
nir_def *terminate_on_first_hit = nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask);
nir_def *ray_terminated = nir_load_var(b, data->vars->ahit_terminate);
nir_push_if(b, nir_ior(b, terminate_on_first_hit, ray_terminated));
{
nir_jump(b, nir_jump_break);
@ -1329,13 +1329,13 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
}
static void
store_stack_entry(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value, const struct radv_ray_traversal_args *args)
store_stack_entry(nir_builder *b, nir_def *index, nir_def *value, const struct radv_ray_traversal_args *args)
{
nir_store_shared(b, value, index, .base = 0, .align_mul = 4);
}
static nir_ssa_def *
load_stack_entry(nir_builder *b, nir_ssa_def *index, const struct radv_ray_traversal_args *args)
static nir_def *
load_stack_entry(nir_builder *b, nir_def *index, const struct radv_ray_traversal_args *args)
{
return nir_load_shared(b, 1, 32, index, .base = 0, .align_mul = 4);
}
@ -1366,8 +1366,8 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
barycentrics->data.driver_location = 0;
/* initialize trace_ray arguments */
nir_ssa_def *accel_struct = nir_load_accel_struct_amd(&b);
nir_ssa_def *cull_mask_and_flags = nir_load_cull_mask_and_flags_amd(&b);
nir_def *accel_struct = nir_load_accel_struct_amd(&b);
nir_def *cull_mask_and_flags = nir_load_cull_mask_and_flags_amd(&b);
nir_store_var(&b, vars.cull_mask_and_flags, cull_mask_and_flags, 0x1);
nir_store_var(&b, vars.sbt_offset, nir_load_sbt_offset_amd(&b), 0x1);
nir_store_var(&b, vars.sbt_stride, nir_load_sbt_stride_amd(&b), 0x1);
@ -1382,15 +1382,15 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
nir_store_var(&b, trav_vars.hit, nir_imm_false(&b), 1);
nir_ssa_def *bvh_offset = nir_build_load_global(
nir_def *bvh_offset = nir_build_load_global(
&b, 1, 32, nir_iadd_imm(&b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
.access = ACCESS_NON_WRITEABLE);
nir_ssa_def *root_bvh_base = nir_iadd(&b, accel_struct, nir_u2u64(&b, bvh_offset));
nir_def *root_bvh_base = nir_iadd(&b, accel_struct, nir_u2u64(&b, bvh_offset));
root_bvh_base = build_addr_to_node(&b, root_bvh_base);
nir_store_var(&b, trav_vars.bvh_base, root_bvh_base, 1);
nir_ssa_def *vec3ones = nir_imm_vec3(&b, 1.0, 1.0, 1.0);
nir_def *vec3ones = nir_imm_vec3(&b, 1.0, 1.0, 1.0);
nir_store_var(&b, trav_vars.origin, nir_load_var(&b, vars.origin), 7);
nir_store_var(&b, trav_vars.dir, nir_load_var(&b, vars.direction), 7);
@ -1504,15 +1504,15 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
* CHit / Miss : Callable > Chit / Miss > Traversal > Raygen
* Callable : Callable > Chit / Miss > > Raygen
*/
static nir_ssa_def *
select_next_shader(nir_builder *b, nir_ssa_def *shader_va, unsigned wave_size)
static nir_def *
select_next_shader(nir_builder *b, nir_def *shader_va, unsigned wave_size)
{
gl_shader_stage stage = b->shader->info.stage;
nir_ssa_def *prio = nir_iand_imm(b, shader_va, radv_rt_priority_mask);
nir_ssa_def *ballot = nir_ballot(b, 1, wave_size, nir_imm_bool(b, true));
nir_ssa_def *ballot_traversal = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_traversal));
nir_ssa_def *ballot_hit_miss = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_hit_miss));
nir_ssa_def *ballot_callable = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_callable));
nir_def *prio = nir_iand_imm(b, shader_va, radv_rt_priority_mask);
nir_def *ballot = nir_ballot(b, 1, wave_size, nir_imm_bool(b, true));
nir_def *ballot_traversal = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_traversal));
nir_def *ballot_hit_miss = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_hit_miss));
nir_def *ballot_callable = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_callable));
if (stage != MESA_SHADER_CALLABLE && stage != MESA_SHADER_INTERSECTION)
ballot = nir_bcsel(b, nir_ine_imm(b, ballot_traversal, 0), ballot_traversal, ballot);
@ -1521,8 +1521,8 @@ select_next_shader(nir_builder *b, nir_ssa_def *shader_va, unsigned wave_size)
if (stage != MESA_SHADER_INTERSECTION)
ballot = nir_bcsel(b, nir_ine_imm(b, ballot_callable, 0), ballot_callable, ballot);
nir_ssa_def *lsb = nir_find_lsb(b, ballot);
nir_ssa_def *next = nir_read_invocation(b, shader_va, lsb);
nir_def *lsb = nir_find_lsb(b, ballot);
nir_def *next = nir_read_invocation(b, shader_va, lsb);
return nir_iand_imm(b, next, ~radv_rt_priority_mask);
}
@ -1552,17 +1552,17 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
/* initialize variables */
nir_builder b = nir_builder_at(nir_before_cf_list(&impl->body));
nir_ssa_def *traversal_addr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.traversal_shader);
nir_def *traversal_addr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.traversal_shader);
nir_store_var(&b, vars.traversal_addr, nir_pack_64_2x32(&b, traversal_addr), 1);
nir_ssa_def *shader_va = ac_nir_load_arg(&b, &args->ac, args->ac.rt.next_shader);
nir_def *shader_va = ac_nir_load_arg(&b, &args->ac, args->ac.rt.next_shader);
shader_va = nir_pack_64_2x32(&b, shader_va);
nir_store_var(&b, vars.shader_va, shader_va, 1);
nir_store_var(&b, vars.stack_ptr, ac_nir_load_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base), 1);
nir_ssa_def *record_ptr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.shader_record);
nir_def *record_ptr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.shader_record);
nir_store_var(&b, vars.shader_record_ptr, nir_pack_64_2x32(&b, record_ptr), 1);
nir_store_var(&b, vars.arg, ac_nir_load_arg(&b, &args->ac, args->ac.rt.payload_offset), 1);
nir_ssa_def *accel_struct = ac_nir_load_arg(&b, &args->ac, args->ac.rt.accel_struct);
nir_def *accel_struct = ac_nir_load_arg(&b, &args->ac, args->ac.rt.accel_struct);
nir_store_var(&b, vars.accel_struct, nir_pack_64_2x32(&b, accel_struct), 1);
nir_store_var(&b, vars.cull_mask_and_flags, ac_nir_load_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags), 1);
nir_store_var(&b, vars.sbt_offset, ac_nir_load_arg(&b, &args->ac, args->ac.rt.sbt_offset), 1);
@ -1574,7 +1574,7 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
nir_store_var(&b, vars.tmax, ac_nir_load_arg(&b, &args->ac, args->ac.rt.ray_tmax), 1);
nir_store_var(&b, vars.primitive_id, ac_nir_load_arg(&b, &args->ac, args->ac.rt.primitive_id), 1);
nir_ssa_def *instance_addr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.instance_addr);
nir_def *instance_addr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.instance_addr);
nir_store_var(&b, vars.instance_addr, nir_pack_64_2x32(&b, instance_addr), 1);
nir_store_var(&b, vars.geometry_id_and_flags, ac_nir_load_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags), 1);
nir_store_var(&b, vars.hit_kind, ac_nir_load_arg(&b, &args->ac, args->ac.rt.hit_kind), 1);
@ -1582,7 +1582,7 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
/* guard the shader, so that only the correct invocations execute it */
nir_if *shader_guard = NULL;
if (shader->info.stage != MESA_SHADER_RAYGEN || resume_shader) {
nir_ssa_def *shader_pc = ac_nir_load_arg(&b, &args->ac, args->ac.rt.shader_pc);
nir_def *shader_pc = ac_nir_load_arg(&b, &args->ac, args->ac.rt.shader_pc);
shader_pc = nir_pack_64_2x32(&b, shader_pc);
shader_pc = nir_ior_imm(&b, shader_pc, radv_get_rt_priority(shader->info.stage));
@ -1598,7 +1598,7 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
/* select next shader */
b.cursor = nir_after_cf_list(&impl->body);
shader_va = nir_load_var(&b, vars.shader_va);
nir_ssa_def *next = select_next_shader(&b, shader_va, info->wave_size);
nir_def *next = select_next_shader(&b, shader_va, info->wave_size);
ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_pc, next);
/* store back all variables to registers */

View file

@ -40,7 +40,7 @@ gather_intrinsic_load_input_info(const nir_shader *nir, const nir_intrinsic_inst
case MESA_SHADER_VERTEX: {
unsigned idx = nir_intrinsic_io_semantics(instr).location;
unsigned component = nir_intrinsic_component(instr);
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
unsigned mask = nir_def_components_read(&instr->dest.ssa);
mask = (instr->dest.ssa.bit_size == 64 ? util_widen_mask(mask, 2) : mask) << component;
info->vs.input_usage_mask[idx] |= mask & 0xf;
@ -95,11 +95,11 @@ gather_intrinsic_store_output_info(const nir_shader *nir, const nir_intrinsic_in
unsigned pos_w_chan = 3 - component;
if (write_mask & BITFIELD_BIT(pos_w_chan)) {
nir_ssa_scalar pos_w = nir_ssa_scalar_resolved(instr->src[0].ssa, pos_w_chan);
nir_scalar pos_w = nir_scalar_resolved(instr->src[0].ssa, pos_w_chan);
/* Use coarse shading if the value of Pos.W can't be determined or if its value is != 1
* (typical for non-GUI elements).
*/
if (!nir_ssa_scalar_is_const(pos_w) || nir_ssa_scalar_as_uint(pos_w) != 0x3f800000u)
if (!nir_scalar_is_const(pos_w) || nir_scalar_as_uint(pos_w) != 0x3f800000u)
info->force_vrs_per_vertex = true;
}
}
@ -179,7 +179,7 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, s
break;
case nir_intrinsic_load_local_invocation_id:
case nir_intrinsic_load_workgroup_id: {
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
unsigned mask = nir_def_components_read(&instr->dest.ssa);
while (mask) {
unsigned i = u_bit_scan(&mask);
@ -191,10 +191,10 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, s
break;
}
case nir_intrinsic_load_frag_coord:
info->ps.reads_frag_coord_mask |= nir_ssa_def_components_read(&instr->dest.ssa);
info->ps.reads_frag_coord_mask |= nir_def_components_read(&instr->dest.ssa);
break;
case nir_intrinsic_load_sample_pos:
info->ps.reads_sample_pos_mask |= nir_ssa_def_components_read(&instr->dest.ssa);
info->ps.reads_sample_pos_mask |= nir_def_components_read(&instr->dest.ssa);
break;
case nir_intrinsic_load_push_constant:
gather_push_constant_info(nir, instr, info);

View file

@ -409,7 +409,7 @@ agx_emit_load_vary(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
nir_src *offset = nir_get_io_offset_src(instr);
assert(nir_src_is_const(*offset) && "no indirects");
assert(nir_ssa_def_components_read(&instr->dest.ssa) ==
assert(nir_def_components_read(&instr->dest.ssa) ==
nir_component_mask(components) &&
"iter does not handle write-after-write hazards");
@ -771,10 +771,10 @@ agx_emit_local_store(agx_builder *b, nir_intrinsic_instr *instr)
static agx_index
agx_translate_bindless_handle(agx_builder *b, nir_src *handle, agx_index *base)
{
nir_ssa_scalar base_scalar = nir_ssa_scalar_resolved(handle->ssa, 0);
assert(nir_ssa_scalar_is_const(base_scalar) && "base must be constant");
nir_scalar base_scalar = nir_scalar_resolved(handle->ssa, 0);
assert(nir_scalar_is_const(base_scalar) && "base must be constant");
unsigned base_uint = nir_ssa_scalar_as_uint(base_scalar);
unsigned base_uint = nir_scalar_as_uint(base_scalar);
*base = agx_uniform(base_uint, AGX_SIZE_64);
return agx_emit_extract(b, agx_src_index(handle), 1);
@ -801,7 +801,7 @@ static unsigned
agx_expand_tex_to(agx_builder *b, nir_dest *dest, agx_index src, bool masked)
{
unsigned nr_channels = nir_dest_num_components(*dest);
nir_component_mask_t mask = nir_ssa_def_components_read(&dest->ssa);
nir_component_mask_t mask = nir_def_components_read(&dest->ssa);
if (!masked)
mask = (nir_component_mask_t)BITFIELD_MASK(nr_channels);
@ -1798,7 +1798,7 @@ agx_emit_phis_deferred(agx_context *ctx)
}
static void
agx_emit_undef(agx_builder *b, nir_ssa_undef_instr *instr)
agx_emit_undef(agx_builder *b, nir_undef_instr *instr)
{
/* For now, just lower undefs to zero. This doesn't matter too much, since
* the lowering happens in NIR and this just allows for late lowering passes
@ -2095,17 +2095,17 @@ agx_lower_sincos_filter(const nir_instr *instr, UNUSED const void *_)
* implemented by shifting by one quadrant: cos(x) = sin(x + tau/4).
*/
static nir_ssa_def *
static nir_def *
agx_lower_sincos_impl(struct nir_builder *b, nir_instr *instr, UNUSED void *_)
{
nir_alu_instr *alu = nir_instr_as_alu(instr);
nir_ssa_def *x = nir_mov_alu(b, alu->src[0], 1);
nir_ssa_def *turns = nir_fmul_imm(b, x, M_1_PI * 0.5f);
nir_def *x = nir_mov_alu(b, alu->src[0], 1);
nir_def *turns = nir_fmul_imm(b, x, M_1_PI * 0.5f);
if (alu->op == nir_op_fcos)
turns = nir_fadd_imm(b, turns, 0.25f);
nir_ssa_def *quadrants = nir_fmul_imm(b, nir_ffract(b, turns), 4.0);
nir_def *quadrants = nir_fmul_imm(b, nir_ffract(b, turns), 4.0);
return nir_fsin_agx(b, quadrants);
}
@ -2126,11 +2126,11 @@ agx_lower_front_face(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
if (intr->intrinsic != nir_intrinsic_load_front_face)
return false;
nir_ssa_def *def = &intr->dest.ssa;
nir_def *def = &intr->dest.ssa;
assert(def->bit_size == 1);
b->cursor = nir_before_instr(&intr->instr);
nir_ssa_def_rewrite_uses(def, nir_inot(b, nir_load_back_face_agx(b, 1)));
nir_def_rewrite_uses(def, nir_inot(b, nir_load_back_face_agx(b, 1)));
return true;
}
@ -2347,8 +2347,8 @@ agx_gather_texcoords(nir_builder *b, nir_instr *instr, void *data)
return false;
nir_src src = tex->src[coord_idx].src;
nir_ssa_scalar x = nir_ssa_scalar_resolved(src.ssa, 0);
nir_ssa_scalar y = nir_ssa_scalar_resolved(src.ssa, 1);
nir_scalar x = nir_scalar_resolved(src.ssa, 0);
nir_scalar y = nir_scalar_resolved(src.ssa, 1);
if (x.def != y.def)
return false;

View file

@ -454,7 +454,7 @@ agx_size_for_bits(unsigned bits)
}
static inline agx_index
agx_nir_ssa_index(nir_ssa_def *ssa)
agx_nir_ssa_index(nir_def *ssa)
{
return agx_get_index(ssa->index, agx_size_for_bits(ssa->bit_size));
}

View file

@ -8,7 +8,7 @@
/* Results of pattern matching */
struct match {
nir_ssa_scalar base, offset;
nir_scalar base, offset;
bool has_offset;
bool sign_extend;
@ -25,18 +25,18 @@ struct match {
* variables. Otherwise, returns false.
*/
static bool
match_imul_imm(nir_ssa_scalar scalar, nir_ssa_scalar *variable, uint32_t *imm)
match_imul_imm(nir_scalar scalar, nir_scalar *variable, uint32_t *imm)
{
if (!nir_ssa_scalar_is_alu(scalar))
if (!nir_scalar_is_alu(scalar))
return false;
nir_op op = nir_ssa_scalar_alu_op(scalar);
nir_op op = nir_scalar_alu_op(scalar);
if (op != nir_op_imul && op != nir_op_ishl)
return false;
nir_ssa_scalar inputs[] = {
nir_ssa_scalar_chase_alu_src(scalar, 0),
nir_ssa_scalar_chase_alu_src(scalar, 1),
nir_scalar inputs[] = {
nir_scalar_chase_alu_src(scalar, 0),
nir_scalar_chase_alu_src(scalar, 1),
};
/* For imul check both operands for an immediate, since imul is commutative.
@ -45,12 +45,12 @@ match_imul_imm(nir_ssa_scalar scalar, nir_ssa_scalar *variable, uint32_t *imm)
bool commutes = (op == nir_op_imul);
for (unsigned i = commutes ? 0 : 1; i < ARRAY_SIZE(inputs); ++i) {
if (!nir_ssa_scalar_is_const(inputs[i]))
if (!nir_scalar_is_const(inputs[i]))
continue;
*variable = inputs[1 - i];
uint32_t value = nir_ssa_scalar_as_uint(inputs[i]);
uint32_t value = nir_scalar_as_uint(inputs[i]);
if (op == nir_op_imul)
*imm = value;
@ -75,17 +75,17 @@ match_imul_imm(nir_ssa_scalar scalar, nir_ssa_scalar *variable, uint32_t *imm)
static bool
match_soa(nir_builder *b, struct match *match, unsigned format_shift)
{
if (!nir_ssa_scalar_is_alu(match->offset) ||
nir_ssa_scalar_alu_op(match->offset) != nir_op_iadd)
if (!nir_scalar_is_alu(match->offset) ||
nir_scalar_alu_op(match->offset) != nir_op_iadd)
return false;
nir_ssa_scalar summands[] = {
nir_ssa_scalar_chase_alu_src(match->offset, 0),
nir_ssa_scalar_chase_alu_src(match->offset, 1),
nir_scalar summands[] = {
nir_scalar_chase_alu_src(match->offset, 0),
nir_scalar_chase_alu_src(match->offset, 1),
};
for (unsigned i = 0; i < ARRAY_SIZE(summands); ++i) {
if (!nir_ssa_scalar_is_const(summands[i]))
if (!nir_scalar_is_const(summands[i]))
continue;
/* Note: This is treated as signed regardless of the sign of the match.
@ -104,8 +104,8 @@ match_soa(nir_builder *b, struct match *match, unsigned format_shift)
* TODO: We need to confirm how the hardware handles 32-bit overflow when
* applying the format shift, which might need rework here again.
*/
int offset = nir_ssa_scalar_as_int(summands[i]);
nir_ssa_scalar variable;
int offset = nir_scalar_as_int(summands[i]);
nir_scalar variable;
uint32_t multiplier;
/* The other operand must multiply */
@ -123,9 +123,9 @@ match_soa(nir_builder *b, struct match *match, unsigned format_shift)
return false;
/* Otherwise, rewrite! */
nir_ssa_def *unmultiplied = nir_vec_scalars(b, &variable, 1);
nir_def *unmultiplied = nir_vec_scalars(b, &variable, 1);
nir_ssa_def *rewrite = nir_iadd_imm(
nir_def *rewrite = nir_iadd_imm(
b, nir_imul_imm(b, unmultiplied, multiplier_shifted), offset_shifted);
match->offset = nir_get_ssa_scalar(rewrite, 0);
@ -138,27 +138,26 @@ match_soa(nir_builder *b, struct match *match, unsigned format_shift)
/* Try to pattern match address calculation */
static struct match
match_address(nir_builder *b, nir_ssa_scalar base, int8_t format_shift)
match_address(nir_builder *b, nir_scalar base, int8_t format_shift)
{
struct match match = {.base = base};
/* All address calculations are iadd at the root */
if (!nir_ssa_scalar_is_alu(base) ||
nir_ssa_scalar_alu_op(base) != nir_op_iadd)
if (!nir_scalar_is_alu(base) || nir_scalar_alu_op(base) != nir_op_iadd)
return match;
/* Only 64+32 addition is supported, look for an extension */
nir_ssa_scalar summands[] = {
nir_ssa_scalar_chase_alu_src(base, 0),
nir_ssa_scalar_chase_alu_src(base, 1),
nir_scalar summands[] = {
nir_scalar_chase_alu_src(base, 0),
nir_scalar_chase_alu_src(base, 1),
};
for (unsigned i = 0; i < ARRAY_SIZE(summands); ++i) {
/* We can add a small constant to the 64-bit base for free */
if (nir_ssa_scalar_is_const(summands[i]) &&
nir_ssa_scalar_as_uint(summands[i]) < (1ull << 32)) {
if (nir_scalar_is_const(summands[i]) &&
nir_scalar_as_uint(summands[i]) < (1ull << 32)) {
uint32_t value = nir_ssa_scalar_as_uint(summands[i]);
uint32_t value = nir_scalar_as_uint(summands[i]);
return (struct match){
.base = summands[1 - i],
@ -169,17 +168,17 @@ match_address(nir_builder *b, nir_ssa_scalar base, int8_t format_shift)
}
/* Otherwise, we can only add an offset extended from 32-bits */
if (!nir_ssa_scalar_is_alu(summands[i]))
if (!nir_scalar_is_alu(summands[i]))
continue;
nir_op op = nir_ssa_scalar_alu_op(summands[i]);
nir_op op = nir_scalar_alu_op(summands[i]);
if (op != nir_op_u2u64 && op != nir_op_i2i64)
continue;
/* We've found a summand, commit to it */
match.base = summands[1 - i];
match.offset = nir_ssa_scalar_chase_alu_src(summands[i], 0);
match.offset = nir_scalar_chase_alu_src(summands[i], 0);
match.sign_extend = (op == nir_op_i2i64);
/* Undo the implicit shift from using as offset */
@ -192,7 +191,7 @@ match_address(nir_builder *b, nir_ssa_scalar base, int8_t format_shift)
return match;
/* But if we did, we can try to fold in in a multiply */
nir_ssa_scalar multiplied;
nir_scalar multiplied;
uint32_t multiplier;
if (match_imul_imm(match.offset, &multiplied, &multiplier)) {
@ -211,7 +210,7 @@ match_address(nir_builder *b, nir_ssa_scalar base, int8_t format_shift)
return match;
}
nir_ssa_def *multiplied_ssa = nir_vec_scalars(b, &multiplied, 1);
nir_def *multiplied_ssa = nir_vec_scalars(b, &multiplied, 1);
/* Only fold in if we wouldn't overflow the lsl field */
if (new_shift <= 2) {
@ -224,7 +223,7 @@ match_address(nir_builder *b, nir_ssa_scalar base, int8_t format_shift)
*/
assert(new_shift >= 3);
nir_ssa_def *rewrite =
nir_def *rewrite =
nir_imul_imm(b, multiplied_ssa, multiplier << new_shift);
match.offset = nir_get_ssa_scalar(rewrite, 0);
@ -276,13 +275,12 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
unsigned format_shift = util_logbase2(util_format_get_blocksize(format));
nir_src *orig_offset = nir_get_io_offset_src(intr);
nir_ssa_scalar base = nir_ssa_scalar_resolved(orig_offset->ssa, 0);
nir_scalar base = nir_scalar_resolved(orig_offset->ssa, 0);
struct match match = match_address(b, base, format_shift);
nir_ssa_def *offset =
match.offset.def != NULL
? nir_channel(b, match.offset.def, match.offset.comp)
: nir_imm_int(b, 0);
nir_def *offset = match.offset.def != NULL
? nir_channel(b, match.offset.def, match.offset.comp)
: nir_imm_int(b, 0);
/* If we were unable to fold in the shift, insert a right-shift now to undo
* the implicit left shift of the instruction.
@ -309,9 +307,9 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
}
assert(match.shift >= 0);
nir_ssa_def *new_base = nir_channel(b, match.base.def, match.base.comp);
nir_def *new_base = nir_channel(b, match.base.def, match.base.comp);
nir_ssa_def *repl = NULL;
nir_def *repl = NULL;
bool has_dest = (intr->intrinsic != nir_intrinsic_store_global);
unsigned num_components = has_dest ? nir_dest_num_components(intr->dest) : 0;
unsigned bit_size = has_dest ? nir_dest_bit_size(intr->dest) : 0;
@ -346,7 +344,7 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
}
if (repl)
nir_ssa_def_rewrite_uses(&intr->dest.ssa, repl);
nir_def_rewrite_uses(&intr->dest.ssa, repl);
nir_instr_remove(instr);
return true;

View file

@ -33,7 +33,7 @@ lower_zs_emit(nir_block *block)
nir_builder b = nir_builder_at(nir_before_instr(instr));
nir_ssa_def *value = intr->src[0].ssa;
nir_def *value = intr->src[0].ssa;
bool z = (sem.location == FRAG_RESULT_DEPTH);
unsigned src_idx = z ? 1 : 2;
@ -51,10 +51,10 @@ lower_zs_emit(nir_block *block)
/* Multisampling will get lowered later if needed, default to
* broadcast
*/
nir_ssa_def *sample_mask = nir_imm_intN_t(&b, ALL_SAMPLES, 16);
zs_emit = nir_store_zs_agx(&b, sample_mask,
nir_ssa_undef(&b, 1, 32) /* depth */,
nir_ssa_undef(&b, 1, 16) /* stencil */);
nir_def *sample_mask = nir_imm_intN_t(&b, ALL_SAMPLES, 16);
zs_emit =
nir_store_zs_agx(&b, sample_mask, nir_undef(&b, 1, 32) /* depth */,
nir_undef(&b, 1, 16) /* stencil */);
}
assert((nir_intrinsic_base(zs_emit) & base) == 0 &&
@ -83,9 +83,9 @@ lower_discard(nir_builder *b, nir_instr *instr, UNUSED void *data)
b->cursor = nir_before_instr(instr);
nir_ssa_def *all_samples = nir_imm_intN_t(b, ALL_SAMPLES, 16);
nir_ssa_def *no_samples = nir_imm_intN_t(b, 0, 16);
nir_ssa_def *killed_samples = all_samples;
nir_def *all_samples = nir_imm_intN_t(b, ALL_SAMPLES, 16);
nir_def *no_samples = nir_imm_intN_t(b, 0, 16);
nir_def *killed_samples = all_samples;
if (intr->intrinsic == nir_intrinsic_discard_if)
killed_samples = nir_bcsel(b, intr->src[0].ssa, all_samples, no_samples);

View file

@ -22,7 +22,7 @@
static void
insert_z_write(nir_builder *b)
{
nir_ssa_def *z = nir_load_frag_coord_zw(b, .component = 2);
nir_def *z = nir_load_frag_coord_zw(b, .component = 2);
nir_store_output(b, z, nir_imm_int(b, 0),
.io_semantics.location = FRAG_RESULT_DEPTH,

View file

@ -24,49 +24,48 @@
*/
/* XXX: It's not clear what this is for, but seems necessary */
static nir_ssa_def *
cf_valid(nir_builder *b, nir_ssa_def *cf)
static nir_def *
cf_valid(nir_builder *b, nir_def *cf)
{
nir_ssa_def *bit =
nir_ieq_imm(b, nir_iand_imm(b, nir_channel(b, cf, 0), 1), 0);
nir_def *bit = nir_ieq_imm(b, nir_iand_imm(b, nir_channel(b, cf, 0), 1), 0);
/* XXX: Apple's compiler actually checks that the significand is nonzero and
* the exponent is 0 or 1. This is probably a typo -- it doesn't make any
* logical sense. Presumably they just meant to check for denorms, so let's
* do that. Either way the tests pass.
*/
nir_ssa_def *cf01 = nir_trim_vector(b, cf, 2);
nir_def *cf01 = nir_trim_vector(b, cf, 2);
return nir_ior(b, bit, nir_fisnormal(b, cf01));
}
static nir_ssa_def *
interpolate_at_offset(nir_builder *b, nir_ssa_def *cf, nir_ssa_def *offset,
static nir_def *
interpolate_at_offset(nir_builder *b, nir_def *cf, nir_def *offset,
bool perspective)
{
/* Get the coordinate of the pixel within the tile */
nir_ssa_def *pixel_coords = nir_load_pixel_coord(b);
nir_ssa_def *tile_offs = nir_umod_imm(b, pixel_coords, 32);
nir_def *pixel_coords = nir_load_pixel_coord(b);
nir_def *tile_offs = nir_umod_imm(b, pixel_coords, 32);
/* Convert to float, getting the center of the pixel */
nir_ssa_def *center = nir_fadd_imm(b, nir_u2f32(b, tile_offs), 0.5);
nir_def *center = nir_fadd_imm(b, nir_u2f32(b, tile_offs), 0.5);
/* Calculate the location to interpolate. offset is defined relative to the
* center of the pixel and is a float.
*/
nir_ssa_def *pos = nir_fadd(b, center, nir_f2f32(b, offset));
nir_def *pos = nir_fadd(b, center, nir_f2f32(b, offset));
/* Interpolate with the given coefficients */
nir_ssa_def *interp = nir_ffma(b, nir_channel(b, pos, 1),
nir_channel(b, cf, 1), nir_channel(b, cf, 2));
nir_def *interp = nir_ffma(b, nir_channel(b, pos, 1), nir_channel(b, cf, 1),
nir_channel(b, cf, 2));
interp = nir_ffma(b, nir_channel(b, pos, 0), nir_channel(b, cf, 0), interp);
/* Divide by RHW. This load will be lowered recursively. */
if (perspective) {
nir_ssa_def *bary = nir_load_barycentric_at_offset(
nir_def *bary = nir_load_barycentric_at_offset(
b, 32, offset, .interp_mode = INTERP_MODE_NOPERSPECTIVE);
nir_ssa_def *rhw = nir_load_interpolated_input(
nir_def *rhw = nir_load_interpolated_input(
b, 1, 32, bary, nir_imm_int(b, 0), .component = 3,
.io_semantics = {
.location = VARYING_SLOT_POS,
@ -80,8 +79,8 @@ interpolate_at_offset(nir_builder *b, nir_ssa_def *cf, nir_ssa_def *offset,
return nir_bcsel(b, cf_valid(b, cf), interp, nir_channel(b, cf, 2));
}
static nir_ssa_def *
interpolate_flat(nir_builder *b, nir_ssa_def *coefficients)
static nir_def *
interpolate_flat(nir_builder *b, nir_def *coefficients)
{
/* Same value anywhere, so just take the constant (affine) component */
return nir_channel(b, coefficients, 2);
@ -114,7 +113,7 @@ needs_lower(const nir_instr *instr, UNUSED const void *_)
return (load->intrinsic == nir_intrinsic_load_input);
}
static nir_ssa_def *
static nir_def *
interpolate_channel(nir_builder *b, nir_intrinsic_instr *load, unsigned channel)
{
nir_io_semantics sem = nir_intrinsic_io_semantics(load);
@ -123,7 +122,7 @@ interpolate_channel(nir_builder *b, nir_intrinsic_instr *load, unsigned channel)
sem.location += nir_src_as_uint(*nir_get_io_offset_src(load));
sem.num_slots = 1;
nir_ssa_def *coefficients = nir_load_coefficients_agx(
nir_def *coefficients = nir_load_coefficients_agx(
b, .component = nir_intrinsic_component(load) + channel,
.interp_mode = interp_mode_for_load(load), .io_semantics = sem);
@ -133,7 +132,7 @@ interpolate_channel(nir_builder *b, nir_intrinsic_instr *load, unsigned channel)
} else {
nir_intrinsic_instr *bary = nir_src_as_intrinsic(load->src[0]);
nir_ssa_def *interp = interpolate_at_offset(
nir_def *interp = interpolate_at_offset(
b, coefficients, bary->src[0].ssa,
nir_intrinsic_interp_mode(bary) != INTERP_MODE_NOPERSPECTIVE);
@ -141,13 +140,13 @@ interpolate_channel(nir_builder *b, nir_intrinsic_instr *load, unsigned channel)
}
}
static nir_ssa_def *
static nir_def *
lower(nir_builder *b, nir_instr *instr, void *data)
{
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
/* Each component is loaded separated */
nir_ssa_def *values[NIR_MAX_VEC_COMPONENTS] = {NULL};
nir_def *values[NIR_MAX_VEC_COMPONENTS] = {NULL};
for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
values[i] = interpolate_channel(b, intr, i);
}

View file

@ -21,13 +21,13 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
if (intr->intrinsic != nir_intrinsic_load_interpolated_input)
return false;
unsigned mask = nir_ssa_def_components_read(&intr->dest.ssa);
unsigned mask = nir_def_components_read(&intr->dest.ssa);
if (mask == 0 || mask == nir_component_mask(intr->num_components))
return false;
b->cursor = nir_before_instr(instr);
unsigned bit_size = nir_dest_bit_size(intr->dest);
nir_ssa_def *comps[4] = {NULL};
nir_def *comps[4] = {NULL};
for (unsigned c = 0; c < intr->num_components; ++c) {
if (mask & BITFIELD_BIT(c)) {
@ -44,7 +44,7 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
/* Shrink the load to count contiguous components */
nir_ssa_dest_init(clone, &clone_intr->dest, count, bit_size);
nir_ssa_def *clone_vec = &clone_intr->dest.ssa;
nir_def *clone_vec = &clone_intr->dest.ssa;
clone_intr->num_components = count;
/* The load starts from component c relative to the original load */
@ -64,12 +64,12 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
/* The value of unused components is irrelevant, but use an undef for
* semantics. It will be eliminated by DCE after copyprop.
*/
comps[c] = nir_ssa_undef(b, 1, bit_size);
comps[c] = nir_undef(b, 1, bit_size);
}
}
nir_ssa_def_rewrite_uses(&intr->dest.ssa,
nir_vec(b, comps, intr->num_components));
nir_def_rewrite_uses(&intr->dest.ssa,
nir_vec(b, comps, intr->num_components));
return true;
}

View file

@ -84,7 +84,7 @@ lower_sample_mask_to_zs(nir_builder *b, nir_instr *instr, UNUSED void *data)
*/
if (intr->intrinsic == nir_intrinsic_store_zs_agx && !depth_written) {
/* Load the current depth at this pixel */
nir_ssa_def *z = nir_load_frag_coord_zw(b, .component = 2);
nir_def *z = nir_load_frag_coord_zw(b, .component = 2);
/* Write it out from this store_zs */
nir_intrinsic_set_base(intr, nir_intrinsic_base(intr) | BASE_Z);
@ -103,7 +103,7 @@ lower_sample_mask_to_zs(nir_builder *b, nir_instr *instr, UNUSED void *data)
/* Write a NaN depth value for discarded samples */
nir_store_zs_agx(b, intr->src[0].ssa, nir_imm_float(b, NAN),
stencil_written ? nir_imm_intN_t(b, 0, 16)
: nir_ssa_undef(b, 1, 16) /* stencil */,
: nir_undef(b, 1, 16) /* stencil */,
.base = BASE_Z | (stencil_written ? BASE_S : 0));
nir_instr_remove(instr);
@ -196,9 +196,9 @@ agx_nir_lower_sample_mask(nir_shader *shader, unsigned nr_samples)
/* Last discard is executed unconditionally, so fuse tests. */
b.cursor = nir_before_instr(&intr->instr);
nir_ssa_def *all_samples = nir_imm_intN_t(&b, ALL_SAMPLES, 16);
nir_ssa_def *killed = intr->src[0].ssa;
nir_ssa_def *live = nir_ixor(&b, killed, all_samples);
nir_def *all_samples = nir_imm_intN_t(&b, ALL_SAMPLES, 16);
nir_def *killed = intr->src[0].ssa;
nir_def *live = nir_ixor(&b, killed, all_samples);
nir_sample_mask_agx(&b, all_samples, live);
nir_instr_remove(&intr->instr);

View file

@ -19,30 +19,30 @@
#define AGX_FORMAT_RGB32_EMULATED 0x36
#define AGX_LAYOUT_LINEAR 0x0
static nir_ssa_def *
texture_descriptor_ptr_for_handle(nir_builder *b, nir_ssa_def *handle)
static nir_def *
texture_descriptor_ptr_for_handle(nir_builder *b, nir_def *handle)
{
/* Bindless handles are a vec2, where the first source is the (constant)
* uniform register number and the second source is the byte offset.
*/
nir_ssa_scalar uniform = nir_ssa_scalar_resolved(handle, 0);
unsigned uniform_idx = nir_ssa_scalar_as_uint(uniform);
nir_scalar uniform = nir_scalar_resolved(handle, 0);
unsigned uniform_idx = nir_scalar_as_uint(uniform);
nir_ssa_def *base = nir_load_preamble(b, 1, 64, uniform_idx);
nir_ssa_def *offset = nir_u2u64(b, nir_channel(b, handle, 1));
nir_def *base = nir_load_preamble(b, 1, 64, uniform_idx);
nir_def *offset = nir_u2u64(b, nir_channel(b, handle, 1));
return nir_iadd(b, base, offset);
}
static nir_ssa_def *
texture_descriptor_ptr_for_index(nir_builder *b, nir_ssa_def *index)
static nir_def *
texture_descriptor_ptr_for_index(nir_builder *b, nir_def *index)
{
return nir_iadd(
b, nir_load_texture_base_agx(b),
nir_u2u64(b, nir_imul_imm(b, index, AGX_TEXTURE_DESC_STRIDE)));
}
static nir_ssa_def *
static nir_def *
texture_descriptor_ptr(nir_builder *b, nir_tex_instr *tex)
{
int handle_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
@ -50,7 +50,7 @@ texture_descriptor_ptr(nir_builder *b, nir_tex_instr *tex)
return texture_descriptor_ptr_for_handle(b, tex->src[handle_idx].src.ssa);
/* For non-bindless, compute from the texture index */
nir_ssa_def *index;
nir_def *index;
int offs_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_offset);
if (offs_idx >= 0)
@ -66,40 +66,40 @@ texture_descriptor_ptr(nir_builder *b, nir_tex_instr *tex)
* original size is irrecoverable. Instead, we stash it in the "Acceleration
* buffer" field, which is unused for linear images. Fetch just that.
*/
static nir_ssa_def *
agx_txs_buffer(nir_builder *b, nir_ssa_def *descriptor)
static nir_def *
agx_txs_buffer(nir_builder *b, nir_def *descriptor)
{
nir_ssa_def *size_ptr = nir_iadd_imm(b, descriptor, 16);
nir_def *size_ptr = nir_iadd_imm(b, descriptor, 16);
return nir_load_global_constant(b, size_ptr, 8, 1, 32);
}
static nir_ssa_def *
static nir_def *
agx_txs(nir_builder *b, nir_tex_instr *tex)
{
nir_ssa_def *ptr = texture_descriptor_ptr(b, tex);
nir_ssa_def *comp[4] = {NULL};
nir_def *ptr = texture_descriptor_ptr(b, tex);
nir_def *comp[4] = {NULL};
if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
return agx_txs_buffer(b, ptr);
nir_ssa_def *desc = nir_load_global_constant(b, ptr, 8, 4, 32);
nir_ssa_def *w0 = nir_channel(b, desc, 0);
nir_ssa_def *w1 = nir_channel(b, desc, 1);
nir_ssa_def *w3 = nir_channel(b, desc, 3);
nir_def *desc = nir_load_global_constant(b, ptr, 8, 4, 32);
nir_def *w0 = nir_channel(b, desc, 0);
nir_def *w1 = nir_channel(b, desc, 1);
nir_def *w3 = nir_channel(b, desc, 3);
/* Width minus 1: bits [28, 42) */
nir_ssa_def *width_m1 =
nir_def *width_m1 =
nir_extr_agx(b, w0, w1, nir_imm_int(b, 28), nir_imm_int(b, 14));
/* Height minus 1: bits [42, 56) */
nir_ssa_def *height_m1 = nir_ubitfield_extract_imm(b, w1, 42 - 32, 14);
nir_def *height_m1 = nir_ubitfield_extract_imm(b, w1, 42 - 32, 14);
/* Depth minus 1: bits [110, 124) */
nir_ssa_def *depth_m1 = nir_ubitfield_extract_imm(b, w3, 110 - 96, 14);
nir_def *depth_m1 = nir_ubitfield_extract_imm(b, w3, 110 - 96, 14);
/* First level: bits [56, 60) */
nir_ssa_def *lod = nir_ubitfield_extract_imm(b, w1, 56 - 32, 4);
nir_def *lod = nir_ubitfield_extract_imm(b, w1, 56 - 32, 4);
/* Add LOD offset to first level to get the interesting LOD */
int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
@ -116,14 +116,13 @@ agx_txs(nir_builder *b, nir_tex_instr *tex)
* TODO: Optimize this, since linear 2D arrays aren't needed for APIs and
* this just gets used internally for blits.
*/
nir_ssa_def *layout = nir_ubitfield_extract_imm(b, w0, 4, 2);
nir_def *layout = nir_ubitfield_extract_imm(b, w0, 4, 2);
/* Get the 2 bytes after the first 128-bit descriptor */
nir_ssa_def *extension =
nir_def *extension =
nir_load_global_constant(b, nir_iadd_imm(b, ptr, 16), 8, 1, 16);
nir_ssa_def *depth_linear_m1 =
nir_iand_imm(b, extension, BITFIELD_MASK(11));
nir_def *depth_linear_m1 = nir_iand_imm(b, extension, BITFIELD_MASK(11));
depth_linear_m1 = nir_u2uN(b, depth_linear_m1, depth_m1->bit_size);
@ -132,9 +131,9 @@ agx_txs(nir_builder *b, nir_tex_instr *tex)
}
/* Add 1 to width-1, height-1 to get base dimensions */
nir_ssa_def *width = nir_iadd_imm(b, width_m1, 1);
nir_ssa_def *height = nir_iadd_imm(b, height_m1, 1);
nir_ssa_def *depth = nir_iadd_imm(b, depth_m1, 1);
nir_def *width = nir_iadd_imm(b, width_m1, 1);
nir_def *height = nir_iadd_imm(b, height_m1, 1);
nir_def *depth = nir_iadd_imm(b, depth_m1, 1);
/* 1D Arrays have their second component as the layer count */
if (tex->sampler_dim == GLSL_SAMPLER_DIM_1D && tex->is_array)
@ -179,42 +178,42 @@ lower_txs(nir_builder *b, nir_instr *instr, UNUSED void *data)
if (tex->op != nir_texop_txs)
return false;
nir_ssa_def *res = agx_txs(b, tex);
nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, res, instr);
nir_def *res = agx_txs(b, tex);
nir_def_rewrite_uses_after(&tex->dest.ssa, res, instr);
nir_instr_remove(instr);
return true;
}
static nir_ssa_def *
static nir_def *
format_is_rgb32(nir_builder *b, nir_tex_instr *tex)
{
nir_ssa_def *ptr = texture_descriptor_ptr(b, tex);
nir_ssa_def *desc = nir_load_global_constant(b, ptr, 8, 1, 32);
nir_ssa_def *channels = nir_ubitfield_extract_imm(b, desc, 6, 7);
nir_def *ptr = texture_descriptor_ptr(b, tex);
nir_def *desc = nir_load_global_constant(b, ptr, 8, 1, 32);
nir_def *channels = nir_ubitfield_extract_imm(b, desc, 6, 7);
return nir_ieq_imm(b, channels, AGX_FORMAT_RGB32_EMULATED);
}
/* Load from an RGB32 buffer texture */
static nir_ssa_def *
load_rgb32(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *coordinate)
static nir_def *
load_rgb32(nir_builder *b, nir_tex_instr *tex, nir_def *coordinate)
{
/* Base address right-shifted 4: bits [66, 102) */
nir_ssa_def *ptr_hi = nir_iadd_imm(b, texture_descriptor_ptr(b, tex), 8);
nir_ssa_def *desc_hi_words = nir_load_global_constant(b, ptr_hi, 8, 2, 32);
nir_ssa_def *desc_hi = nir_pack_64_2x32(b, desc_hi_words);
nir_ssa_def *base_shr4 =
nir_def *ptr_hi = nir_iadd_imm(b, texture_descriptor_ptr(b, tex), 8);
nir_def *desc_hi_words = nir_load_global_constant(b, ptr_hi, 8, 2, 32);
nir_def *desc_hi = nir_pack_64_2x32(b, desc_hi_words);
nir_def *base_shr4 =
nir_iand_imm(b, nir_ushr_imm(b, desc_hi, 2), BITFIELD64_MASK(36));
nir_ssa_def *base = nir_ishl_imm(b, base_shr4, 4);
nir_def *base = nir_ishl_imm(b, base_shr4, 4);
nir_ssa_def *raw = nir_load_constant_agx(
b, 3, nir_dest_bit_size(tex->dest), base, nir_imul_imm(b, coordinate, 3),
.format = AGX_INTERNAL_FORMAT_I32);
nir_def *raw = nir_load_constant_agx(b, 3, nir_dest_bit_size(tex->dest),
base, nir_imul_imm(b, coordinate, 3),
.format = AGX_INTERNAL_FORMAT_I32);
/* Set alpha to 1 (in the appropriate format) */
bool is_float = nir_alu_type_get_base_type(tex->dest_type) == nir_type_float;
nir_ssa_def *swizzled[4] = {
nir_def *swizzled[4] = {
nir_channel(b, raw, 0), nir_channel(b, raw, 1), nir_channel(b, raw, 2),
is_float ? nir_imm_float(b, 1.0) : nir_imm_int(b, 1)};
@ -225,8 +224,8 @@ load_rgb32(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *coordinate)
* Given a 1D buffer texture coordinate, calculate the 2D coordinate vector that
* will be used to access the linear 2D texture bound to the buffer.
*/
static nir_ssa_def *
coords_for_buffer_texture(nir_builder *b, nir_ssa_def *coord)
static nir_def *
coords_for_buffer_texture(nir_builder *b, nir_def *coord)
{
return nir_vec2(b, nir_iand_imm(b, coord, BITFIELD_MASK(10)),
nir_ushr_imm(b, coord, 10));
@ -247,7 +246,7 @@ coords_for_buffer_texture(nir_builder *b, nir_ssa_def *coord)
static bool
lower_buffer_texture(nir_builder *b, nir_tex_instr *tex)
{
nir_ssa_def *coord = nir_steal_tex_src(tex, nir_tex_src_coord);
nir_def *coord = nir_steal_tex_src(tex, nir_tex_src_coord);
/* The OpenGL ES 3.2 specification says on page 187:
*
@ -258,19 +257,19 @@ lower_buffer_texture(nir_builder *b, nir_tex_instr *tex)
*
* However, faulting would be undesirable for robustness, so clamp.
*/
nir_ssa_def *size = nir_get_texture_size(b, tex);
nir_def *size = nir_get_texture_size(b, tex);
coord = nir_umin(b, coord, nir_iadd_imm(b, size, -1));
/* Lower RGB32 reads if the format requires */
nir_if *nif = nir_push_if(b, format_is_rgb32(b, tex));
nir_ssa_def *rgb32 = load_rgb32(b, tex, coord);
nir_def *rgb32 = load_rgb32(b, tex, coord);
nir_push_else(b, nif);
/* Otherwise, lower the texture instruction to read from 2D */
assert(coord->num_components == 1 && "buffer textures are 1D");
tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
nir_ssa_def *coord2d = coords_for_buffer_texture(b, coord);
nir_def *coord2d = coords_for_buffer_texture(b, coord);
nir_instr_remove(&tex->instr);
nir_builder_instr_insert(b, &tex->instr);
nir_tex_instr_add_src(tex, nir_tex_src_backend1, nir_src_for_ssa(coord2d));
@ -278,8 +277,8 @@ lower_buffer_texture(nir_builder *b, nir_tex_instr *tex)
nir_pop_if(b, nif);
/* Put it together with a phi */
nir_ssa_def *phi = nir_if_phi(b, rgb32, &tex->dest.ssa);
nir_ssa_def_rewrite_uses(&tex->dest.ssa, phi);
nir_def *phi = nir_if_phi(b, rgb32, &tex->dest.ssa);
nir_def_rewrite_uses(&tex->dest.ssa, phi);
nir_phi_instr *phi_instr = nir_instr_as_phi(phi->parent_instr);
nir_phi_src *else_src = nir_phi_get_src_from_block(phi_instr, else_block);
nir_instr_rewrite_src_ssa(phi->parent_instr, &else_src->src, &tex->dest.ssa);
@ -307,8 +306,8 @@ lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
return lower_buffer_texture(b, tex);
/* Get the coordinates */
nir_ssa_def *coord = nir_steal_tex_src(tex, nir_tex_src_coord);
nir_ssa_def *ms_idx = nir_steal_tex_src(tex, nir_tex_src_ms_index);
nir_def *coord = nir_steal_tex_src(tex, nir_tex_src_coord);
nir_def *ms_idx = nir_steal_tex_src(tex, nir_tex_src_ms_index);
/* It's unclear if mipmapped 1D textures work in the hardware. For now, we
* always lower to 2D.
@ -333,7 +332,7 @@ lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
};
for (unsigned i = 0; i < ARRAY_SIZE(other_srcs); ++i) {
nir_ssa_def *src = nir_steal_tex_src(tex, other_srcs[i]);
nir_def *src = nir_steal_tex_src(tex, other_srcs[i]);
if (!src)
continue;
@ -350,11 +349,11 @@ lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
/* The layer is always the last component of the NIR coordinate, split it off
* because we'll need to swizzle.
*/
nir_ssa_def *layer = NULL;
nir_def *layer = NULL;
if (tex->is_array) {
unsigned lidx = coord->num_components - 1;
nir_ssa_def *unclamped_layer = nir_channel(b, coord, lidx);
nir_def *unclamped_layer = nir_channel(b, coord, lidx);
coord = nir_trim_vector(b, coord, lidx);
/* Round layer to nearest even */
@ -364,9 +363,9 @@ lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
/* Clamp to max layer = (# of layers - 1) for out-of-bounds handling.
* Layer must be 16-bits for the hardware, drop top bits after clamping.
*/
nir_ssa_def *txs = nir_get_texture_size(b, tex);
nir_ssa_def *nr_layers = nir_channel(b, txs, lidx);
nir_ssa_def *max_layer = nir_iadd_imm(b, nr_layers, -1);
nir_def *txs = nir_get_texture_size(b, tex);
nir_def *nr_layers = nir_channel(b, txs, lidx);
nir_def *max_layer = nir_iadd_imm(b, nr_layers, -1);
layer = nir_u2u16(b, nir_umin(b, unclamped_layer, max_layer));
}
@ -374,11 +373,11 @@ lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
* vec6 16-bit coordinate tuple, which would be inconvenient in NIR for
* little benefit (a minor optimization, I guess).
*/
nir_ssa_def *sample_array = (ms_idx && layer)
? nir_pack_32_2x16_split(b, ms_idx, layer)
: ms_idx ? nir_u2u32(b, ms_idx)
: layer ? nir_u2u32(b, layer)
: NULL;
nir_def *sample_array = (ms_idx && layer)
? nir_pack_32_2x16_split(b, ms_idx, layer)
: ms_idx ? nir_u2u32(b, ms_idx)
: layer ? nir_u2u32(b, layer)
: NULL;
/* Combine into the final 32-bit tuple */
if (sample_array != NULL) {
@ -390,14 +389,14 @@ lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
nir_tex_instr_add_src(tex, nir_tex_src_backend1, nir_src_for_ssa(coord));
/* Furthermore, if there is an offset vector, it must be packed */
nir_ssa_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset);
nir_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset);
if (offset != NULL) {
nir_ssa_def *packed = NULL;
nir_def *packed = NULL;
for (unsigned c = 0; c < offset->num_components; ++c) {
nir_ssa_def *nibble = nir_iand_imm(b, nir_channel(b, offset, c), 0xF);
nir_ssa_def *shifted = nir_ishl_imm(b, nibble, 4 * c);
nir_def *nibble = nir_iand_imm(b, nir_channel(b, offset, c), 0xF);
nir_def *shifted = nir_ishl_imm(b, nibble, 4 * c);
if (packed != NULL)
packed = nir_ior(b, packed, shifted);
@ -411,7 +410,7 @@ lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
return true;
}
static nir_ssa_def *
static nir_def *
bias_for_tex(nir_builder *b, nir_tex_instr *tex)
{
nir_instr *instr = nir_get_texture_size(b, tex)->parent_instr;
@ -446,7 +445,7 @@ lower_sampler_bias(nir_builder *b, nir_instr *instr, UNUSED void *data)
nir_tex_src_type src =
tex->op == nir_texop_txl ? nir_tex_src_lod : nir_tex_src_bias;
nir_ssa_def *orig = nir_steal_tex_src(tex, src);
nir_def *orig = nir_steal_tex_src(tex, src);
assert(orig != NULL && "invalid NIR");
if (orig->bit_size != 16)
@ -463,14 +462,14 @@ lower_sampler_bias(nir_builder *b, nir_instr *instr, UNUSED void *data)
* derivatives. So scale derivatives by exp2(bias) to
* get level-of-detail log2(exp2(bias) * rho) = bias + log2(rho).
*/
nir_ssa_def *scale = nir_fexp2(b, nir_f2f32(b, bias_for_tex(b, tex)));
nir_def *scale = nir_fexp2(b, nir_f2f32(b, bias_for_tex(b, tex)));
nir_tex_src_type src[] = {nir_tex_src_ddx, nir_tex_src_ddy};
for (unsigned s = 0; s < ARRAY_SIZE(src); ++s) {
nir_ssa_def *orig = nir_steal_tex_src(tex, src[s]);
nir_def *orig = nir_steal_tex_src(tex, src[s]);
assert(orig != NULL && "invalid");
nir_ssa_def *scaled = nir_fmul(b, nir_f2f32(b, orig), scale);
nir_def *scaled = nir_fmul(b, nir_f2f32(b, orig), scale);
nir_tex_instr_add_src(tex, src[s], nir_src_for_ssa(scaled));
}
@ -520,11 +519,11 @@ legalize_image_lod(nir_builder *b, nir_instr *instr, UNUSED void *data)
return false;
b->cursor = nir_before_instr(instr);
nir_src_rewrite_ssa(src, nir_i2i16(b, src->ssa));
nir_src_rewrite(src, nir_i2i16(b, src->ssa));
return true;
}
static nir_ssa_def *
static nir_def *
txs_for_image(nir_builder *b, nir_intrinsic_instr *intr,
unsigned num_components, unsigned bit_size)
{
@ -554,44 +553,40 @@ txs_for_image(nir_builder *b, nir_intrinsic_instr *intr,
return &tex->dest.ssa;
}
static nir_ssa_def *
nir_bitfield_mask(nir_builder *b, nir_ssa_def *x)
static nir_def *
nir_bitfield_mask(nir_builder *b, nir_def *x)
{
nir_ssa_def *one = nir_imm_intN_t(b, 1, x->bit_size);
nir_def *one = nir_imm_intN_t(b, 1, x->bit_size);
return nir_iadd_imm(b, nir_ishl(b, one, nir_u2u32(b, x)), -1);
}
static nir_ssa_def *
calculate_twiddled_coordinates(nir_builder *b, nir_ssa_def *coord,
nir_ssa_def *tile_w_px_log2,
nir_ssa_def *tile_h_px_log2,
nir_ssa_def *width_tl,
nir_ssa_def *layer_stride_el)
static nir_def *
calculate_twiddled_coordinates(nir_builder *b, nir_def *coord,
nir_def *tile_w_px_log2, nir_def *tile_h_px_log2,
nir_def *width_tl, nir_def *layer_stride_el)
{
/* SIMD-within-a-register */
nir_ssa_def *coord_px = nir_pack_32_2x16(b, nir_u2u16(b, coord));
nir_ssa_def *tile_mask =
nir_def *coord_px = nir_pack_32_2x16(b, nir_u2u16(b, coord));
nir_def *tile_mask =
nir_pack_32_2x16_split(b, nir_bitfield_mask(b, tile_w_px_log2),
nir_bitfield_mask(b, tile_h_px_log2));
/* Modulo by the tile width/height to get the offsets within the tile */
nir_ssa_def *offs_xy_px = nir_iand(b, coord_px, tile_mask);
nir_def *offs_xy_px = nir_iand(b, coord_px, tile_mask);
/* Get the coordinates of the corner of the tile */
nir_ssa_def *tile_xy_px = nir_isub(b, coord_px, offs_xy_px);
nir_def *tile_xy_px = nir_isub(b, coord_px, offs_xy_px);
/* Unpack SIMD-within-a-register */
nir_ssa_def *offs_x_px = nir_unpack_32_2x16_split_x(b, offs_xy_px);
nir_ssa_def *offs_y_px = nir_unpack_32_2x16_split_y(b, offs_xy_px);
nir_ssa_def *tile_x_px =
nir_u2u32(b, nir_unpack_32_2x16_split_x(b, tile_xy_px));
nir_ssa_def *tile_y_px =
nir_u2u32(b, nir_unpack_32_2x16_split_y(b, tile_xy_px));
nir_def *offs_x_px = nir_unpack_32_2x16_split_x(b, offs_xy_px);
nir_def *offs_y_px = nir_unpack_32_2x16_split_y(b, offs_xy_px);
nir_def *tile_x_px = nir_u2u32(b, nir_unpack_32_2x16_split_x(b, tile_xy_px));
nir_def *tile_y_px = nir_u2u32(b, nir_unpack_32_2x16_split_y(b, tile_xy_px));
/* Get the tile size */
nir_ssa_def *one_32 = nir_imm_int(b, 1);
nir_ssa_def *tile_w_px = nir_ishl(b, one_32, nir_u2u32(b, tile_w_px_log2));
nir_ssa_def *tile_h_px = nir_ishl(b, one_32, nir_u2u32(b, tile_h_px_log2));
nir_def *one_32 = nir_imm_int(b, 1);
nir_def *tile_w_px = nir_ishl(b, one_32, nir_u2u32(b, tile_w_px_log2));
nir_def *tile_h_px = nir_ishl(b, one_32, nir_u2u32(b, tile_h_px_log2));
/* tile row start (px) =
* (y // tile height) * (# of tiles/row) * (# of pix/tile) =
@ -599,7 +594,7 @@ calculate_twiddled_coordinates(nir_builder *b, nir_ssa_def *coord,
* tile height =
* align_down(y, tile height) * width_tl * tile width
*/
nir_ssa_def *tile_row_start_px =
nir_def *tile_row_start_px =
nir_imul(b, nir_u2u32(b, tile_y_px), nir_imul(b, width_tl, tile_w_px));
/* tile column start (px) =
@ -607,38 +602,37 @@ calculate_twiddled_coordinates(nir_builder *b, nir_ssa_def *coord,
* align(x, tile width) / tile width * tile width * tile height =
* align(x, tile width) * tile height
*/
nir_ssa_def *tile_col_start_px = nir_imul(b, tile_x_px, tile_h_px);
nir_def *tile_col_start_px = nir_imul(b, tile_x_px, tile_h_px);
/* The pixel at which the tile starts is thus... */
nir_ssa_def *tile_offset_px =
nir_iadd(b, tile_row_start_px, tile_col_start_px);
nir_def *tile_offset_px = nir_iadd(b, tile_row_start_px, tile_col_start_px);
/* Get the total offset */
nir_ssa_def *offs_px = nir_interleave_agx(b, offs_x_px, offs_y_px);
nir_ssa_def *total_px = nir_iadd(b, tile_offset_px, nir_u2u32(b, offs_px));
nir_def *offs_px = nir_interleave_agx(b, offs_x_px, offs_y_px);
nir_def *total_px = nir_iadd(b, tile_offset_px, nir_u2u32(b, offs_px));
if (layer_stride_el) {
nir_ssa_def *layer = nir_channel(b, coord, 2);
nir_ssa_def *layer_offset_px = nir_imul(b, layer, layer_stride_el);
nir_def *layer = nir_channel(b, coord, 2);
nir_def *layer_offset_px = nir_imul(b, layer, layer_stride_el);
total_px = nir_iadd(b, total_px, layer_offset_px);
}
return total_px;
}
static nir_ssa_def *
static nir_def *
image_texel_address(nir_builder *b, nir_intrinsic_instr *intr,
bool return_index)
{
/* First, calculate the address of the PBE descriptor */
nir_ssa_def *desc_address;
nir_def *desc_address;
if (intr->intrinsic == nir_intrinsic_bindless_image_texel_address ||
intr->intrinsic == nir_intrinsic_bindless_image_store)
desc_address = texture_descriptor_ptr_for_handle(b, intr->src[0].ssa);
else
desc_address = texture_descriptor_ptr_for_index(b, intr->src[0].ssa);
nir_ssa_def *coord = intr->src[1].ssa;
nir_def *coord = intr->src[1].ssa;
enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
bool layered = nir_intrinsic_image_array(intr) ||
@ -649,36 +643,36 @@ image_texel_address(nir_builder *b, nir_intrinsic_instr *intr,
* software-defined atomic descriptor, or (if array image) a pointer to the
* descriptor. Grab it.
*/
nir_ssa_def *meta_ptr = nir_iadd_imm(b, desc_address, 16);
nir_ssa_def *meta = nir_load_global_constant(b, meta_ptr, 8, 1, 64);
nir_ssa_def *layer_stride_el = NULL;
nir_def *meta_ptr = nir_iadd_imm(b, desc_address, 16);
nir_def *meta = nir_load_global_constant(b, meta_ptr, 8, 1, 64);
nir_def *layer_stride_el = NULL;
if (layered) {
nir_ssa_def *desc = nir_load_global_constant(b, meta, 8, 3, 32);
nir_def *desc = nir_load_global_constant(b, meta, 8, 3, 32);
meta = nir_pack_64_2x32(b, nir_trim_vector(b, desc, 2));
layer_stride_el = nir_channel(b, desc, 2);
}
nir_ssa_def *meta_hi = nir_unpack_64_2x32_split_y(b, meta);
nir_def *meta_hi = nir_unpack_64_2x32_split_y(b, meta);
/* See the GenXML definitions of the software-defined atomic descriptors */
nir_ssa_def *base;
nir_def *base;
if (dim == GLSL_SAMPLER_DIM_BUF)
base = meta;
else
base = nir_ishl_imm(b, nir_iand_imm(b, meta, BITFIELD64_MASK(33)), 7);
nir_ssa_def *tile_w_px_log2 =
nir_def *tile_w_px_log2 =
nir_u2u16(b, nir_ubitfield_extract_imm(b, meta_hi, 33 - 32, 3));
nir_ssa_def *tile_h_px_log2 =
nir_def *tile_h_px_log2 =
nir_u2u16(b, nir_ubitfield_extract_imm(b, meta_hi, 36 - 32, 3));
nir_ssa_def *width_tl = nir_ubitfield_extract_imm(b, meta_hi, 39 - 32, 14);
nir_def *width_tl = nir_ubitfield_extract_imm(b, meta_hi, 39 - 32, 14);
/* We do not allow atomics on linear 2D or linear 2D arrays, as there are no
* known use cases. So, we're linear if buffer or 1D, and twiddled otherwise.
*/
nir_ssa_def *total_px;
nir_def *total_px;
if (dim == GLSL_SAMPLER_DIM_BUF || dim == GLSL_SAMPLER_DIM_1D) {
/* 1D linear is indexed directly */
total_px = nir_channel(b, coord, 0);
@ -687,12 +681,11 @@ image_texel_address(nir_builder *b, nir_intrinsic_instr *intr,
b, coord, tile_w_px_log2, tile_h_px_log2, width_tl, layer_stride_el);
}
nir_ssa_def *total_sa;
nir_def *total_sa;
if (dim == GLSL_SAMPLER_DIM_MS) {
nir_ssa_def *sample_idx = intr->src[2].ssa;
nir_ssa_def *samples_log2 =
nir_ubitfield_extract_imm(b, meta_hi, 54 - 32, 2);
nir_def *sample_idx = intr->src[2].ssa;
nir_def *samples_log2 = nir_ubitfield_extract_imm(b, meta_hi, 54 - 32, 2);
total_sa = nir_iadd(b, nir_ishl(b, total_px, samples_log2), sample_idx);
} else {
@ -709,7 +702,7 @@ image_texel_address(nir_builder *b, nir_intrinsic_instr *intr,
enum pipe_format format = nir_intrinsic_format(intr);
unsigned bytes_per_sample_B = util_format_get_blocksize(format);
nir_ssa_def *total_B = nir_imul_imm(b, total_sa, bytes_per_sample_B);
nir_def *total_B = nir_imul_imm(b, total_sa, bytes_per_sample_B);
return nir_iadd(b, base, nir_u2u64(b, total_B));
}
@ -719,14 +712,14 @@ lower_buffer_image(nir_builder *b, nir_intrinsic_instr *intr)
if (nir_intrinsic_image_dim(intr) != GLSL_SAMPLER_DIM_BUF)
return false;
nir_ssa_def *coord_vector = intr->src[1].ssa;
nir_ssa_def *coord = nir_channel(b, coord_vector, 0);
nir_def *coord_vector = intr->src[1].ssa;
nir_def *coord = nir_channel(b, coord_vector, 0);
/* Lower the buffer load/store to a 2D image load/store, matching the 2D
* texture/PBE descriptor the driver supplies for buffer images.
*/
nir_ssa_def *coord2d = coords_for_buffer_texture(b, coord);
nir_src_rewrite_ssa(&intr->src[1], nir_pad_vector(b, coord2d, 4));
nir_def *coord2d = coords_for_buffer_texture(b, coord);
nir_src_rewrite(&intr->src[1], nir_pad_vector(b, coord2d, 4));
nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
return true;
}
@ -749,7 +742,7 @@ lower_images(nir_builder *b, nir_instr *instr, UNUSED void *data)
case nir_intrinsic_image_size:
case nir_intrinsic_bindless_image_size:
nir_ssa_def_rewrite_uses(
nir_def_rewrite_uses(
&intr->dest.ssa,
txs_for_image(b, intr, nir_dest_num_components(intr->dest),
nir_dest_bit_size(intr->dest)));
@ -757,8 +750,8 @@ lower_images(nir_builder *b, nir_instr *instr, UNUSED void *data)
case nir_intrinsic_image_texel_address:
case nir_intrinsic_bindless_image_texel_address:
nir_ssa_def_rewrite_uses(&intr->dest.ssa,
image_texel_address(b, intr, false));
nir_def_rewrite_uses(&intr->dest.ssa,
image_texel_address(b, intr, false));
return true;
default:
@ -842,10 +835,10 @@ lower_multisampled_store(nir_builder *b, nir_instr *instr, UNUSED void *data)
if (nir_intrinsic_image_dim(intr) != GLSL_SAMPLER_DIM_MS)
return false;
nir_ssa_def *index_px = image_texel_address(b, intr, true);
nir_ssa_def *coord2d = coords_for_buffer_texture(b, index_px);
nir_def *index_px = image_texel_address(b, intr, true);
nir_def *coord2d = coords_for_buffer_texture(b, index_px);
nir_src_rewrite_ssa(&intr->src[1], nir_pad_vector(b, coord2d, 4));
nir_src_rewrite(&intr->src[1], nir_pad_vector(b, coord2d, 4));
nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
return true;
}

View file

@ -20,15 +20,15 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
b->cursor = nir_before_instr(instr);
nir_ssa_def *ubo_index = nir_ssa_for_src(b, intr->src[0], 1);
nir_ssa_def *offset = nir_ssa_for_src(b, *nir_get_io_offset_src(intr), 1);
nir_ssa_def *address =
nir_def *ubo_index = nir_ssa_for_src(b, intr->src[0], 1);
nir_def *offset = nir_ssa_for_src(b, *nir_get_io_offset_src(intr), 1);
nir_def *address =
nir_iadd(b, nir_load_ubo_base_agx(b, ubo_index), nir_u2u64(b, offset));
nir_ssa_def *value = nir_load_global_constant(
nir_def *value = nir_load_global_constant(
b, address, nir_intrinsic_align(intr), intr->num_components,
nir_dest_bit_size(intr->dest));
nir_ssa_def_rewrite_uses(&intr->dest.ssa, value);
nir_def_rewrite_uses(&intr->dest.ssa, value);
return true;
}

View file

@ -8,7 +8,7 @@
#include "agx_compiler.h"
static void
def_size(nir_ssa_def *def, unsigned *size, unsigned *align)
def_size(nir_def *def, unsigned *size, unsigned *align)
{
unsigned bit_size = MAX2(def->bit_size, 16);
@ -50,7 +50,7 @@ instr_cost(nir_instr *instr, const void *data)
}
static float
rewrite_cost(nir_ssa_def *def, const void *data)
rewrite_cost(nir_def *def, const void *data)
{
bool mov_needed = false;
nir_foreach_use(use, def) {
@ -76,7 +76,7 @@ rewrite_cost(nir_ssa_def *def, const void *data)
static bool
avoid_instr(const nir_instr *instr, const void *data)
{
const nir_ssa_def *def = nir_instr_ssa_def((nir_instr *)instr);
const nir_def *def = nir_instr_ssa_def((nir_instr *)instr);
/* Do not move bindless handles, since we need those to retain their constant
* base index.

View file

@ -36,7 +36,7 @@ agx_compile_meta_shader(struct agx_meta_cache *cache, nir_shader *shader,
return res;
}
static nir_ssa_def *
static nir_def *
build_background_op(nir_builder *b, enum agx_meta_op op, unsigned rt,
unsigned nr, bool msaa)
{

View file

@ -10,8 +10,8 @@
#include "nir_builder.h"
#include "nir_format_convert.h"
static inline nir_ssa_def *
nir_sign_extend_if_sint(nir_builder *b, nir_ssa_def *x, enum pipe_format format)
static inline nir_def *
nir_sign_extend_if_sint(nir_builder *b, nir_def *x, enum pipe_format format)
{
if (!util_format_is_pure_sint(format))
return x;

View file

@ -46,7 +46,7 @@ agx_nir_lower_alpha_to_coverage(nir_shader *shader, uint8_t nr_samples)
return;
/* Similarly, if there are less than 4 components, alpha is undefined */
nir_ssa_def *rgba = store->src[0].ssa;
nir_def *rgba = store->src[0].ssa;
if (rgba->num_components < 4)
return;
@ -59,9 +59,9 @@ agx_nir_lower_alpha_to_coverage(nir_shader *shader, uint8_t nr_samples)
* # of bits = (unsigned int) (alpha * nr_samples)
* mask = (1 << (# of bits)) - 1
*/
nir_ssa_def *alpha = nir_channel(b, rgba, 3);
nir_ssa_def *bits = nir_f2u32(b, nir_fmul_imm(b, alpha, nr_samples));
nir_ssa_def *mask =
nir_def *alpha = nir_channel(b, rgba, 3);
nir_def *bits = nir_f2u32(b, nir_fmul_imm(b, alpha, nr_samples));
nir_def *mask =
nir_iadd_imm(b, nir_ishl(b, nir_imm_intN_t(b, 1, 16), bits), -1);
/* Discard samples that aren't covered */
@ -100,12 +100,12 @@ agx_nir_lower_alpha_to_one(nir_shader *shader)
if (sem.location < FRAG_RESULT_DATA0)
continue;
nir_ssa_def *rgba = intr->src[0].ssa;
nir_def *rgba = intr->src[0].ssa;
if (rgba->num_components < 4)
continue;
nir_builder b = nir_builder_at(nir_before_instr(instr));
nir_ssa_def *rgb1 = nir_vector_insert_imm(
nir_def *rgb1 = nir_vector_insert_imm(
&b, rgba, nir_imm_floatN_t(&b, 1.0, rgba->bit_size), 3);
nir_instr_rewrite_src_ssa(instr, &intr->src[0], rgb1);

View file

@ -11,7 +11,7 @@
static bool
lower_wrapped(nir_builder *b, nir_instr *instr, void *data)
{
nir_ssa_def *sample_id = data;
nir_def *sample_id = data;
if (instr->type != nir_instr_type_intrinsic)
return false;
@ -21,7 +21,7 @@ lower_wrapped(nir_builder *b, nir_instr *instr, void *data)
switch (intr->intrinsic) {
case nir_intrinsic_load_sample_id: {
unsigned size = nir_dest_bit_size(intr->dest);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_u2uN(b, sample_id, size));
nir_def_rewrite_uses(&intr->dest.ssa, nir_u2uN(b, sample_id, size));
nir_instr_remove(instr);
return true;
}
@ -34,10 +34,10 @@ lower_wrapped(nir_builder *b, nir_instr *instr, void *data)
unsigned mask_index =
(intr->intrinsic == nir_intrinsic_store_local_pixel_agx) ? 1 : 0;
nir_ssa_def *mask = intr->src[mask_index].ssa;
nir_ssa_def *id_mask = nir_ishl(b, nir_imm_intN_t(b, 1, mask->bit_size),
nir_u2u32(b, sample_id));
nir_src_rewrite_ssa(&intr->src[mask_index], nir_iand(b, mask, id_mask));
nir_def *mask = intr->src[mask_index].ssa;
nir_def *id_mask = nir_ishl(b, nir_imm_intN_t(b, 1, mask->bit_size),
nir_u2u32(b, sample_id));
nir_src_rewrite(&intr->src[mask_index], nir_iand(b, mask, id_mask));
return true;
}
@ -70,7 +70,7 @@ agx_nir_wrap_per_sample_loop(nir_shader *shader, uint8_t nr_samples)
nir_variable *i =
nir_local_variable_create(impl, glsl_uintN_t_type(16), NULL);
nir_store_var(&b, i, nir_imm_intN_t(&b, 0, 16), ~0);
nir_ssa_def *index = NULL;
nir_def *index = NULL;
/* Create a loop in the wrapped function */
nir_loop *loop = nir_push_loop(&b);
@ -151,11 +151,11 @@ lower_sample_mask_read(nir_builder *b, nir_instr *instr, UNUSED void *_)
if (intr->intrinsic != nir_intrinsic_load_sample_mask_in)
return false;
nir_ssa_def *old = &intr->dest.ssa;
nir_ssa_def *lowered = nir_iand(
nir_def *old = &intr->dest.ssa;
nir_def *lowered = nir_iand(
b, old, nir_u2uN(b, nir_load_api_sample_mask_agx(b), old->bit_size));
nir_ssa_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
nir_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
return true;
}

View file

@ -7,10 +7,10 @@
#include "agx_tilebuffer.h"
#include "nir_builder.h"
static nir_ssa_def *
mask_by_sample_id(nir_builder *b, nir_ssa_def *mask)
static nir_def *
mask_by_sample_id(nir_builder *b, nir_def *mask)
{
nir_ssa_def *id_mask =
nir_def *id_mask =
nir_ishl(b, nir_imm_intN_t(b, 1, mask->bit_size), nir_load_sample_id(b));
return nir_iand(b, mask, id_mask);
}
@ -36,16 +36,16 @@ lower_to_sample(nir_builder *b, nir_instr *instr, void *_)
* xy[component] = ((float)nibble) / 16.0;
* }
*/
nir_ssa_def *packed = nir_load_sample_positions_agx(b);
nir_def *packed = nir_load_sample_positions_agx(b);
/* The n'th sample is the in the n'th byte of the register */
nir_ssa_def *shifted = nir_ushr(
nir_def *shifted = nir_ushr(
b, packed, nir_u2u32(b, nir_imul_imm(b, nir_load_sample_id(b), 8)));
nir_ssa_def *xy[2];
nir_def *xy[2];
for (unsigned i = 0; i < 2; ++i) {
/* Get the appropriate nibble */
nir_ssa_def *nibble =
nir_def *nibble =
nir_iand_imm(b, nir_ushr_imm(b, shifted, i * 4), 0xF);
/* Convert it from fixed point to float */
@ -56,7 +56,7 @@ lower_to_sample(nir_builder *b, nir_instr *instr, void *_)
}
/* Collect and rewrite */
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec2(b, xy[0], xy[1]));
nir_def_rewrite_uses(&intr->dest.ssa, nir_vec2(b, xy[0], xy[1]));
nir_instr_remove(instr);
return true;
}
@ -67,9 +67,9 @@ lower_to_sample(nir_builder *b, nir_instr *instr, void *_)
* by the sample ID to make that happen.
*/
b->cursor = nir_after_instr(instr);
nir_ssa_def *old = &intr->dest.ssa;
nir_ssa_def *lowered = mask_by_sample_id(b, old);
nir_ssa_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
nir_def *old = &intr->dest.ssa;
nir_def *lowered = mask_by_sample_id(b, old);
nir_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
return true;
}
@ -78,13 +78,13 @@ lower_to_sample(nir_builder *b, nir_instr *instr, void *_)
* interpolateAtSample() with the sample ID
*/
b->cursor = nir_after_instr(instr);
nir_ssa_def *old = &intr->dest.ssa;
nir_def *old = &intr->dest.ssa;
nir_ssa_def *lowered = nir_load_barycentric_at_sample(
nir_def *lowered = nir_load_barycentric_at_sample(
b, nir_dest_bit_size(intr->dest), nir_load_sample_id(b),
.interp_mode = nir_intrinsic_interp_mode(intr));
nir_ssa_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
nir_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
return true;
}

View file

@ -43,7 +43,7 @@ tib_filter(const nir_instr *instr, UNUSED const void *_)
static void
store_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
enum pipe_format format, enum pipe_format logical_format,
unsigned rt, nir_ssa_def *value, unsigned write_mask)
unsigned rt, nir_def *value, unsigned write_mask)
{
/* The hardware cannot extend for a 32-bit format. Extend ourselves. */
if (format == PIPE_FORMAT_R32_UINT && value->bit_size == 16) {
@ -61,7 +61,7 @@ store_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
.format = format);
}
static nir_ssa_def *
static nir_def *
load_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
uint8_t load_comps, uint8_t bit_size, unsigned rt,
enum pipe_format format, enum pipe_format logical_format)
@ -74,7 +74,7 @@ load_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
format = PIPE_FORMAT_R16_UINT;
uint8_t offset_B = agx_tilebuffer_offset_B(tib, rt);
nir_ssa_def *res = nir_load_local_pixel_agx(
nir_def *res = nir_load_local_pixel_agx(
b, MIN2(load_comps, comps), f16 ? 16 : bit_size,
nir_imm_intN_t(b, ALL_SAMPLES, 16), .base = offset_B, .format = format);
@ -100,7 +100,7 @@ load_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
* texture/PBE descriptors are alternated for each render target. This is
* ABI. If we need to make this more flexible for Vulkan later, we can.
*/
static nir_ssa_def *
static nir_def *
handle_for_rt(nir_builder *b, unsigned base, unsigned rt, bool pbe,
bool *bindless)
{
@ -117,7 +117,7 @@ handle_for_rt(nir_builder *b, unsigned base, unsigned rt, bool pbe,
}
static enum glsl_sampler_dim
dim_for_rt(nir_builder *b, unsigned nr_samples, nir_ssa_def **sample)
dim_for_rt(nir_builder *b, unsigned nr_samples, nir_def **sample)
{
if (nr_samples == 1) {
*sample = nir_imm_intN_t(b, 0, 16);
@ -129,7 +129,7 @@ dim_for_rt(nir_builder *b, unsigned nr_samples, nir_ssa_def **sample)
}
}
static nir_ssa_def *
static nir_def *
image_coords(nir_builder *b)
{
return nir_pad_vector(b, nir_u2u32(b, nir_load_pixel_coord(b)), 4);
@ -137,25 +137,25 @@ image_coords(nir_builder *b)
static void
store_memory(nir_builder *b, unsigned bindless_base, unsigned nr_samples,
enum pipe_format format, unsigned rt, nir_ssa_def *value)
enum pipe_format format, unsigned rt, nir_def *value)
{
/* Force bindless for multisampled image writes. It avoids the late lowering
* needing a texture_base_agx sysval.
*/
bool bindless = (nr_samples > 1);
nir_ssa_def *image = handle_for_rt(b, bindless_base, rt, true, &bindless);
nir_ssa_def *zero = nir_imm_intN_t(b, 0, 16);
nir_ssa_def *lod = zero;
nir_def *image = handle_for_rt(b, bindless_base, rt, true, &bindless);
nir_def *zero = nir_imm_intN_t(b, 0, 16);
nir_def *lod = zero;
nir_ssa_def *sample;
nir_def *sample;
enum glsl_sampler_dim dim = dim_for_rt(b, nr_samples, &sample);
nir_ssa_def *coords = image_coords(b);
nir_def *coords = image_coords(b);
nir_begin_invocation_interlock(b);
if (nr_samples > 1) {
nir_ssa_def *coverage = nir_load_sample_mask(b);
nir_ssa_def *covered = nir_ubitfield_extract(
nir_def *coverage = nir_load_sample_mask(b);
nir_def *covered = nir_ubitfield_extract(
b, coverage, nir_u2u32(b, sample), nir_imm_int(b, 1));
nir_push_if(b, nir_ine_imm(b, covered, 0));
@ -176,19 +176,19 @@ store_memory(nir_builder *b, unsigned bindless_base, unsigned nr_samples,
b->shader->info.writes_memory = true;
}
static nir_ssa_def *
static nir_def *
load_memory(nir_builder *b, unsigned bindless_base, unsigned nr_samples,
uint8_t comps, uint8_t bit_size, unsigned rt,
enum pipe_format format)
{
bool bindless = false;
nir_ssa_def *image = handle_for_rt(b, bindless_base, rt, false, &bindless);
nir_ssa_def *zero = nir_imm_intN_t(b, 0, 16);
nir_ssa_def *lod = zero;
nir_def *image = handle_for_rt(b, bindless_base, rt, false, &bindless);
nir_def *zero = nir_imm_intN_t(b, 0, 16);
nir_def *lod = zero;
nir_ssa_def *sample;
nir_def *sample;
enum glsl_sampler_dim dim = dim_for_rt(b, nr_samples, &sample);
nir_ssa_def *coords = image_coords(b);
nir_def *coords = image_coords(b);
/* Ensure pixels below this one have written out their results */
nir_begin_invocation_interlock(b);
@ -204,7 +204,7 @@ load_memory(nir_builder *b, unsigned bindless_base, unsigned nr_samples,
}
}
static nir_ssa_def *
static nir_def *
tib_impl(nir_builder *b, nir_instr *instr, void *data)
{
struct ctx *ctx = data;
@ -250,7 +250,7 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
if (!write_mask)
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
nir_ssa_def *value = intr->src[0].ssa;
nir_def *value = intr->src[0].ssa;
/* Trim to format as required by hardware */
value = nir_trim_vector(b, intr->src[0].ssa, comps);
@ -272,7 +272,7 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
* possible to encode in the hardware, delete them.
*/
if (logical_format == PIPE_FORMAT_NONE) {
return nir_ssa_undef(b, intr->num_components, bit_size);
return nir_undef(b, intr->num_components, bit_size);
} else if (tib->spilled[rt]) {
*(ctx->translucent) = true;

View file

@ -78,8 +78,8 @@ agx_vbo_supports_format(enum pipe_format format)
return agx_vbo_internal_format(format) != PIPE_FORMAT_NONE;
}
static nir_ssa_def *
apply_swizzle_channel(nir_builder *b, nir_ssa_def *vec, unsigned swizzle,
static nir_def *
apply_swizzle_channel(nir_builder *b, nir_def *vec, unsigned swizzle,
bool is_int)
{
switch (swizzle) {
@ -158,12 +158,11 @@ pass(struct nir_builder *b, nir_instr *instr, void *data)
/* Calculate the element to fetch the vertex for. Divide the instance ID by
* the divisor for per-instance data. Divisor=0 specifies per-vertex data.
*/
nir_ssa_def *el =
(attrib.divisor == 0)
? nir_load_vertex_id(b)
: nir_udiv_imm(b, nir_load_instance_id(b), attrib.divisor);
nir_def *el = (attrib.divisor == 0)
? nir_load_vertex_id(b)
: nir_udiv_imm(b, nir_load_instance_id(b), attrib.divisor);
nir_ssa_def *base = nir_load_vbo_base_agx(b, nir_imm_int(b, attrib.buf));
nir_def *base = nir_load_vbo_base_agx(b, nir_imm_int(b, attrib.buf));
assert((stride % interchange_align) == 0 && "must be aligned");
assert((offset % interchange_align) == 0 && "must be aligned");
@ -183,11 +182,11 @@ pass(struct nir_builder *b, nir_instr *instr, void *data)
stride_el = 1;
}
nir_ssa_def *stride_offset_el =
nir_def *stride_offset_el =
nir_iadd_imm(b, nir_imul_imm(b, el, stride_el), offset_el);
/* Load the raw vector */
nir_ssa_def *memory = nir_load_constant_agx(
nir_def *memory = nir_load_constant_agx(
b, interchange_comps, interchange_register_size, base, stride_offset_el,
.format = interchange_format, .base = shift);
@ -240,14 +239,14 @@ pass(struct nir_builder *b, nir_instr *instr, void *data)
/* We now have a properly formatted vector of the components in memory. Apply
* the format swizzle forwards to trim/pad/reorder as needed.
*/
nir_ssa_def *channels[4] = {NULL};
nir_def *channels[4] = {NULL};
assert(nir_intrinsic_component(intr) == 0 && "unimplemented");
for (unsigned i = 0; i < intr->num_components; ++i)
channels[i] = apply_swizzle_channel(b, memory, desc->swizzle[i], is_int);
nir_ssa_def *logical = nir_vec(b, channels, intr->num_components);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, logical);
nir_def *logical = nir_vec(b, channels, intr->num_components);
nir_def_rewrite_uses(&intr->dest.ssa, logical);
return true;
}

View file

@ -310,7 +310,7 @@ ntq_add_pending_tmu_flush(struct v3d_compile *c,
nir_intrinsic_instr *store = nir_store_reg_for_def(&dest->ssa);
if (store != NULL) {
nir_ssa_def *reg = store->src[1].ssa;
nir_def *reg = store->src[1].ssa;
_mesa_set_add(c->tmu.outstanding_regs, reg);
}
}
@ -716,7 +716,7 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
}
static struct qreg *
ntq_init_ssa_def(struct v3d_compile *c, nir_ssa_def *def)
ntq_init_ssa_def(struct v3d_compile *c, nir_def *def)
{
struct qreg *qregs = ralloc_array(c->def_ht, struct qreg,
def->num_components);
@ -789,7 +789,7 @@ ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan,
qregs[chan] = result;
} else {
nir_ssa_def *reg = store->src[1].ssa;
nir_def *reg = store->src[1].ssa;
ASSERTED nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
assert(nir_intrinsic_base(store) == 0);
assert(nir_intrinsic_num_array_elems(decl) == 0);
@ -858,7 +858,7 @@ ntq_get_src(struct v3d_compile *c, nir_src src, int i)
entry = _mesa_hash_table_search(c->def_ht, src.ssa);
}
} else {
nir_ssa_def *reg = load->src[0].ssa;
nir_def *reg = load->src[0].ssa;
ASSERTED nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
assert(nir_intrinsic_base(load) == 0);
assert(nir_intrinsic_num_array_elems(decl) == 0);
@ -2471,7 +2471,7 @@ ntq_setup_registers(struct v3d_compile *c, nir_function_impl *impl)
struct qreg *qregs = ralloc_array(c->def_ht, struct qreg,
array_len * num_components);
nir_ssa_def *nir_reg = &decl->dest.ssa;
nir_def *nir_reg = &decl->dest.ssa;
_mesa_hash_table_insert(c->def_ht, nir_reg, qregs);
for (int i = 0; i < array_len * num_components; i++)

View file

@ -135,7 +135,7 @@ v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
* instruction writes and how many the instruction could produce.
*/
p1_unpacked.return_words_of_texture_data =
nir_ssa_def_components_read(&instr->dest.ssa);
nir_def_components_read(&instr->dest.ssa);
uint32_t p0_packed;
V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL,

View file

@ -253,15 +253,15 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
nir_intrinsic_instr *store = nir_store_reg_for_def(&instr->dest.ssa);
if (store == NULL) {
p0_unpacked.return_words_of_texture_data =
nir_ssa_def_components_read(&instr->dest.ssa);
nir_def_components_read(&instr->dest.ssa);
} else {
nir_ssa_def *reg = store->src[1].ssa;
nir_def *reg = store->src[1].ssa;
nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
unsigned reg_num_components =
nir_intrinsic_num_components(decl);
/* For the non-ssa case we don't have a full equivalent to
* nir_ssa_def_components_read. This is a problem for the 16
* nir_def_components_read. This is a problem for the 16
* bit case. nir_lower_tex will not change the destination as
* nir_tex_instr_dest_size will still return 4. The driver is
* just expected to not store on other channels, so we

View file

@ -622,7 +622,7 @@ struct v3d_compile {
void *debug_output_data;
/**
* Mapping from nir_register * or nir_ssa_def * to array of struct
* Mapping from nir_register * or nir_def * to array of struct
* qreg for the values.
*/
struct hash_table *def_ht;

View file

@ -60,14 +60,14 @@ v3d_gl_format_is_return_32(enum pipe_format format)
/* Packs a 32-bit vector of colors in the range [0, (1 << bits[i]) - 1] to a
* 32-bit SSA value, with as many channels as necessary to store all the bits
*/
static nir_ssa_def *
pack_bits(nir_builder *b, nir_ssa_def *color, const unsigned *bits,
static nir_def *
pack_bits(nir_builder *b, nir_def *color, const unsigned *bits,
int num_components, bool mask)
{
nir_ssa_def *results[4];
nir_def *results[4];
int offset = 0;
for (int i = 0; i < num_components; i++) {
nir_ssa_def *chan = nir_channel(b, color, i);
nir_def *chan = nir_channel(b, color, i);
/* Channels being stored shouldn't cross a 32-bit boundary. */
assert((offset & ~31) == ((offset + bits[i] - 1) & ~31));
@ -103,10 +103,10 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr)
b->cursor = nir_before_instr(&instr->instr);
nir_ssa_def *color = nir_trim_vector(b,
nir_def *color = nir_trim_vector(b,
nir_ssa_for_src(b, instr->src[3], 4),
num_components);
nir_ssa_def *formatted = NULL;
nir_def *formatted = NULL;
if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
formatted = nir_format_pack_11f11f10f(b, color);
@ -182,14 +182,14 @@ v3d_nir_lower_image_load(nir_builder *b, nir_intrinsic_instr *instr)
b->cursor = nir_after_instr(&instr->instr);
nir_ssa_def *result = &instr->dest.ssa;
nir_def *result = &instr->dest.ssa;
if (util_format_is_pure_uint(format)) {
result = nir_format_unpack_uint(b, result, bits16, 4);
} else if (util_format_is_pure_sint(format)) {
result = nir_format_unpack_sint(b, result, bits16, 4);
} else {
nir_ssa_def *rg = nir_channel(b, result, 0);
nir_ssa_def *ba = nir_channel(b, result, 1);
nir_def *rg = nir_channel(b, result, 0);
nir_def *ba = nir_channel(b, result, 1);
result = nir_vec4(b,
nir_unpack_half_2x16_split_x(b, rg),
nir_unpack_half_2x16_split_y(b, rg),
@ -197,7 +197,7 @@ v3d_nir_lower_image_load(nir_builder *b, nir_intrinsic_instr *instr)
nir_unpack_half_2x16_split_y(b, ba));
}
nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, result,
nir_def_rewrite_uses_after(&instr->dest.ssa, result,
result->parent_instr);
return true;

View file

@ -62,7 +62,7 @@ struct v3d_nir_lower_io_state {
BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS)];
nir_ssa_def *pos[4];
nir_def *pos[4];
};
static void
@ -70,8 +70,8 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
struct v3d_nir_lower_io_state *state);
static void
v3d_nir_store_output(nir_builder *b, int base, nir_ssa_def *offset,
nir_ssa_def *chan)
v3d_nir_store_output(nir_builder *b, int base, nir_def *offset,
nir_def *chan)
{
if (offset) {
/* When generating the VIR instruction, the base and the offset
@ -134,13 +134,13 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
/* If this is a geometry shader we need to emit our outputs
* to the current vertex offset in the VPM.
*/
nir_ssa_def *offset_reg =
nir_def *offset_reg =
c->s->info.stage == MESA_SHADER_GEOMETRY ?
nir_load_var(b, state->gs.output_offset_var) : NULL;
int start_comp = nir_intrinsic_component(intr);
unsigned location = nir_intrinsic_io_semantics(intr).location;
nir_ssa_def *src = nir_ssa_for_src(b, intr->src[0],
nir_def *src = nir_ssa_for_src(b, intr->src[0],
intr->num_components);
/* Save off the components of the position for the setup of VPM inputs
* read by fixed function HW.
@ -159,7 +159,7 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
if (location == VARYING_SLOT_LAYER) {
assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
nir_def *header = nir_load_var(b, state->gs.header_var);
header = nir_iand_imm(b, header, 0xff00ffff);
/* From the GLES 3.2 spec:
@ -180,9 +180,9 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
* to 0 in that case (we always allocate tile state for at
* least one layer).
*/
nir_ssa_def *fb_layers = nir_load_fb_layers_v3d(b, 32);
nir_ssa_def *cond = nir_ige(b, src, fb_layers);
nir_ssa_def *layer_id =
nir_def *fb_layers = nir_load_fb_layers_v3d(b, 32);
nir_def *cond = nir_ige(b, src, fb_layers);
nir_def *layer_id =
nir_bcsel(b, cond,
nir_imm_int(b, 0),
nir_ishl_imm(b, src, 16));
@ -238,9 +238,9 @@ v3d_nir_lower_emit_vertex(struct v3d_compile *c, nir_builder *b,
{
b->cursor = nir_before_instr(&instr->instr);
nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
nir_ssa_def *header_offset = nir_load_var(b, state->gs.header_offset_var);
nir_ssa_def *output_offset = nir_load_var(b, state->gs.output_offset_var);
nir_def *header = nir_load_var(b, state->gs.header_var);
nir_def *header_offset = nir_load_var(b, state->gs.header_offset_var);
nir_def *output_offset = nir_load_var(b, state->gs.output_offset_var);
/* Emit fixed function outputs */
v3d_nir_emit_ff_vpm_outputs(c, b, state);
@ -476,16 +476,16 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
/* If this is a geometry shader we need to emit our fixed function
* outputs to the current vertex offset in the VPM.
*/
nir_ssa_def *offset_reg =
nir_def *offset_reg =
c->s->info.stage == MESA_SHADER_GEOMETRY ?
nir_load_var(b, state->gs.output_offset_var) : NULL;
for (int i = 0; i < 4; i++) {
if (!state->pos[i])
state->pos[i] = nir_ssa_undef(b, 1, 32);
state->pos[i] = nir_undef(b, 1, 32);
}
nir_ssa_def *rcp_wc = nir_frcp(b, state->pos[3]);
nir_def *rcp_wc = nir_frcp(b, state->pos[3]);
if (state->pos_vpm_offset != -1) {
for (int i = 0; i < 4; i++) {
@ -496,8 +496,8 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
if (state->vp_vpm_offset != -1) {
for (int i = 0; i < 2; i++) {
nir_ssa_def *pos;
nir_ssa_def *scale;
nir_def *pos;
nir_def *scale;
pos = state->pos[i];
if (i == 0)
scale = nir_load_viewport_x_scale(b);
@ -523,7 +523,7 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
}
if (state->zs_vpm_offset != -1) {
nir_ssa_def *z = state->pos[2];
nir_def *z = state->pos[2];
z = nir_fmul(b, z, nir_load_viewport_z_scale(b));
z = nir_fmul(b, z, rcp_wc);
z = nir_fadd(b, z, nir_load_viewport_z_offset(b));
@ -599,11 +599,11 @@ emit_gs_vpm_output_header_prolog(struct v3d_compile *c, nir_builder *b,
* offset variable by removing the one generic header slot that always
* goes at the beginning of out header.
*/
nir_ssa_def *header_offset =
nir_def *header_offset =
nir_load_var(b, state->gs.header_offset_var);
nir_ssa_def *vertex_count =
nir_def *vertex_count =
nir_iadd_imm(b, header_offset, -1);
nir_ssa_def *header =
nir_def *header =
nir_ior_imm(b,
nir_ishl_imm(b, vertex_count,
VERTEX_COUNT_OFFSET),

View file

@ -42,11 +42,11 @@ lower_line_smooth_intrinsic(struct lower_line_smooth_state *state,
{
b->cursor = nir_before_instr(&intr->instr);
nir_ssa_def *one = nir_imm_float(b, 1.0f);
nir_def *one = nir_imm_float(b, 1.0f);
nir_ssa_def *coverage = nir_load_var(b, state->coverage);
nir_def *coverage = nir_load_var(b, state->coverage);
nir_ssa_def *new_val = nir_fmul(b, nir_vec4(b, one, one, one, coverage),
nir_def *new_val = nir_fmul(b, nir_vec4(b, one, one, one, coverage),
intr->src[0].ssa);
nir_instr_rewrite_src(&intr->instr,
@ -89,21 +89,21 @@ initialise_coverage_var(struct lower_line_smooth_state *state,
{
nir_builder b = nir_builder_at(nir_before_block(nir_start_block(impl)));
nir_ssa_def *line_width = nir_load_line_width(&b);
nir_def *line_width = nir_load_line_width(&b);
nir_ssa_def *real_line_width = nir_load_aa_line_width(&b);
nir_def *real_line_width = nir_load_aa_line_width(&b);
/* The line coord varies from 0.0 to 1.0 across the width of the line */
nir_ssa_def *line_coord = nir_load_line_coord(&b);
nir_def *line_coord = nir_load_line_coord(&b);
/* fabs(line_coord - 0.5) * real_line_width */
nir_ssa_def *pixels_from_center =
nir_def *pixels_from_center =
nir_fmul(&b, real_line_width,
nir_fabs(&b, nir_fsub(&b, line_coord,
nir_imm_float(&b, 0.5f))));
/* 0.5 - 1/√2 * (pixels_from_center - line_width * 0.5) */
nir_ssa_def *coverage =
nir_def *coverage =
nir_fsub(&b,
nir_imm_float(&b, 0.5f),
nir_fmul(&b,
@ -114,14 +114,14 @@ initialise_coverage_var(struct lower_line_smooth_state *state,
0.5f))));
/* Discard fragments that arent covered at all by the line */
nir_ssa_def *outside = nir_fle_imm(&b, coverage, 0.0f);
nir_def *outside = nir_fle_imm(&b, coverage, 0.0f);
nir_discard_if(&b, outside);
/* Clamp to at most 1.0. If it was less than 0.0 then the fragment will
* be discarded so we dont need to handle that.
*/
nir_ssa_def *clamped = nir_fmin(&b, coverage, nir_imm_float(&b, 1.0f));
nir_def *clamped = nir_fmin(&b, coverage, nir_imm_float(&b, 1.0f));
nir_store_var(&b, state->coverage, clamped, 0x1 /* writemask */);
}

View file

@ -71,9 +71,9 @@ static nir_intrinsic_instr *
init_scalar_intrinsic(nir_builder *b,
nir_intrinsic_instr *intr,
uint32_t component,
nir_ssa_def *offset,
nir_def *offset,
uint32_t bit_size,
nir_ssa_def **scalar_offset)
nir_def **scalar_offset)
{
nir_intrinsic_instr *new_intr =
@ -129,20 +129,20 @@ lower_load_bitsize(nir_builder *b,
/* For global 2x32 we ignore Y component because it must be zero */
unsigned offset_idx = offset_src(intr->intrinsic);
nir_ssa_def *offset = nir_ssa_for_src(b, intr->src[offset_idx], 1);
nir_def *offset = nir_ssa_for_src(b, intr->src[offset_idx], 1);
/* Split vector store to multiple scalar loads */
nir_ssa_def *dest_components[4] = { NULL };
nir_def *dest_components[4] = { NULL };
const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
for (int component = 0; component < num_comp; component++) {
nir_ssa_def *scalar_offset;
nir_def *scalar_offset;
nir_intrinsic_instr *new_intr =
init_scalar_intrinsic(b, intr, component, offset,
bit_size, &scalar_offset);
for (unsigned i = 0; i < info->num_srcs; i++) {
if (i == offset_idx) {
nir_ssa_def *final_offset;
nir_def *final_offset;
final_offset = intr->intrinsic != nir_intrinsic_load_global_2x32 ?
scalar_offset :
nir_vec2(b, scalar_offset,
@ -160,8 +160,8 @@ lower_load_bitsize(nir_builder *b,
nir_builder_instr_insert(b, &new_intr->instr);
}
nir_ssa_def *new_dst = nir_vec(b, dest_components, num_comp);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_dst);
nir_def *new_dst = nir_vec(b, dest_components, num_comp);
nir_def_rewrite_uses(&intr->dest.ssa, new_dst);
nir_instr_remove(&intr->instr);
return true;
@ -181,13 +181,13 @@ lower_store_bitsize(nir_builder *b,
if (nir_src_bit_size(intr->src[value_idx]) == 32)
return false;
nir_ssa_def *value = nir_ssa_for_src(b, intr->src[value_idx], num_comp);
nir_def *value = nir_ssa_for_src(b, intr->src[value_idx], num_comp);
b->cursor = nir_before_instr(&intr->instr);
/* For global 2x32 we ignore Y component because it must be zero */
unsigned offset_idx = offset_src(intr->intrinsic);
nir_ssa_def *offset = nir_ssa_for_src(b, intr->src[offset_idx], 1);
nir_def *offset = nir_ssa_for_src(b, intr->src[offset_idx], 1);
/* Split vector store to multiple scalar stores */
const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
@ -195,7 +195,7 @@ lower_store_bitsize(nir_builder *b,
while (wrmask) {
unsigned component = ffs(wrmask) - 1;
nir_ssa_def *scalar_offset;
nir_def *scalar_offset;
nir_intrinsic_instr *new_intr =
init_scalar_intrinsic(b, intr, component, offset,
value->bit_size, &scalar_offset);
@ -204,11 +204,11 @@ lower_store_bitsize(nir_builder *b,
for (unsigned i = 0; i < info->num_srcs; i++) {
if (i == value_idx) {
nir_ssa_def *scalar_value =
nir_def *scalar_value =
nir_channels(b, value, 1 << component);
new_intr->src[i] = nir_src_for_ssa(scalar_value);
} else if (i == offset_idx) {
nir_ssa_def *final_offset;
nir_def *final_offset;
final_offset = intr->intrinsic != nir_intrinsic_store_global_2x32 ?
scalar_offset :
nir_vec2(b, scalar_offset,

View file

@ -36,8 +36,8 @@
#include "v3d_compiler.h"
typedef nir_ssa_def *(*nir_pack_func)(nir_builder *b, nir_ssa_def *c);
typedef nir_ssa_def *(*nir_unpack_func)(nir_builder *b, nir_ssa_def *c);
typedef nir_def *(*nir_pack_func)(nir_builder *b, nir_def *c);
typedef nir_def *(*nir_unpack_func)(nir_builder *b, nir_def *c);
static bool
logicop_depends_on_dst_color(int logicop_func)
@ -53,9 +53,9 @@ logicop_depends_on_dst_color(int logicop_func)
}
}
static nir_ssa_def *
static nir_def *
v3d_logicop(nir_builder *b, int logicop_func,
nir_ssa_def *src, nir_ssa_def *dst)
nir_def *src, nir_def *dst)
{
switch (logicop_func) {
case PIPE_LOGICOP_CLEAR:
@ -96,8 +96,8 @@ v3d_logicop(nir_builder *b, int logicop_func,
}
}
static nir_ssa_def *
v3d_nir_get_swizzled_channel(nir_builder *b, nir_ssa_def **srcs, int swiz)
static nir_def *
v3d_nir_get_swizzled_channel(nir_builder *b, nir_def **srcs, int swiz)
{
switch (swiz) {
default:
@ -116,48 +116,48 @@ v3d_nir_get_swizzled_channel(nir_builder *b, nir_ssa_def **srcs, int swiz)
}
}
static nir_ssa_def *
v3d_nir_swizzle_and_pack(nir_builder *b, nir_ssa_def **chans,
static nir_def *
v3d_nir_swizzle_and_pack(nir_builder *b, nir_def **chans,
const uint8_t *swiz, nir_pack_func pack_func)
{
nir_ssa_def *c[4];
nir_def *c[4];
for (int i = 0; i < 4; i++)
c[i] = v3d_nir_get_swizzled_channel(b, chans, swiz[i]);
return pack_func(b, nir_vec4(b, c[0], c[1], c[2], c[3]));
}
static nir_ssa_def *
v3d_nir_unpack_and_swizzle(nir_builder *b, nir_ssa_def *packed,
static nir_def *
v3d_nir_unpack_and_swizzle(nir_builder *b, nir_def *packed,
const uint8_t *swiz, nir_unpack_func unpack_func)
{
nir_ssa_def *unpacked = unpack_func(b, packed);
nir_def *unpacked = unpack_func(b, packed);
nir_ssa_def *unpacked_chans[4];
nir_def *unpacked_chans[4];
for (int i = 0; i < 4; i++)
unpacked_chans[i] = nir_channel(b, unpacked, i);
nir_ssa_def *c[4];
nir_def *c[4];
for (int i = 0; i < 4; i++)
c[i] = v3d_nir_get_swizzled_channel(b, unpacked_chans, swiz[i]);
return nir_vec4(b, c[0], c[1], c[2], c[3]);
}
static nir_ssa_def *
pack_unorm_rgb10a2(nir_builder *b, nir_ssa_def *c)
static nir_def *
pack_unorm_rgb10a2(nir_builder *b, nir_def *c)
{
static const unsigned bits[4] = { 10, 10, 10, 2 };
nir_ssa_def *unorm = nir_format_float_to_unorm(b, c, bits);
nir_def *unorm = nir_format_float_to_unorm(b, c, bits);
nir_ssa_def *chans[4];
nir_def *chans[4];
for (int i = 0; i < 4; i++)
chans[i] = nir_channel(b, unorm, i);
nir_ssa_def *result = nir_mov(b, chans[0]);
nir_def *result = nir_mov(b, chans[0]);
int offset = bits[0];
for (int i = 1; i < 4; i++) {
nir_ssa_def *shifted_chan =
nir_def *shifted_chan =
nir_ishl_imm(b, chans[i], offset);
result = nir_ior(b, result, shifted_chan);
offset += bits[i];
@ -165,8 +165,8 @@ pack_unorm_rgb10a2(nir_builder *b, nir_ssa_def *c)
return result;
}
static nir_ssa_def *
unpack_unorm_rgb10a2(nir_builder *b, nir_ssa_def *c)
static nir_def *
unpack_unorm_rgb10a2(nir_builder *b, nir_def *c)
{
static const unsigned bits[4] = { 10, 10, 10, 2 };
const unsigned masks[4] = { BITFIELD_MASK(bits[0]),
@ -174,9 +174,9 @@ unpack_unorm_rgb10a2(nir_builder *b, nir_ssa_def *c)
BITFIELD_MASK(bits[2]),
BITFIELD_MASK(bits[3]) };
nir_ssa_def *chans[4];
nir_def *chans[4];
for (int i = 0; i < 4; i++) {
nir_ssa_def *unorm = nir_iand_imm(b, c, masks[i]);
nir_def *unorm = nir_iand_imm(b, c, masks[i]);
chans[i] = nir_format_unorm_to_float(b, unorm, &bits[i]);
c = nir_ushr_imm(b, c, bits[i]);
}
@ -201,13 +201,13 @@ v3d_get_format_swizzle_for_rt(struct v3d_compile *c, int rt)
}
}
static nir_ssa_def *
static nir_def *
v3d_nir_get_tlb_color(nir_builder *b, struct v3d_compile *c, int rt, int sample)
{
uint32_t num_components =
util_format_get_nr_components(c->fs_key->color_fmt[rt].format);
nir_ssa_def *color[4];
nir_def *color[4];
for (int i = 0; i < 4; i++) {
if (i < num_components) {
color[i] =
@ -222,17 +222,17 @@ v3d_nir_get_tlb_color(nir_builder *b, struct v3d_compile *c, int rt, int sample)
return nir_vec4(b, color[0], color[1], color[2], color[3]);
}
static nir_ssa_def *
static nir_def *
v3d_emit_logic_op_raw(struct v3d_compile *c, nir_builder *b,
nir_ssa_def **src_chans, nir_ssa_def **dst_chans,
nir_def **src_chans, nir_def **dst_chans,
int rt, int sample)
{
const uint8_t *fmt_swz = v3d_get_format_swizzle_for_rt(c, rt);
nir_ssa_def *op_res[4];
nir_def *op_res[4];
for (int i = 0; i < 4; i++) {
nir_ssa_def *src = src_chans[i];
nir_ssa_def *dst =
nir_def *src = src_chans[i];
nir_def *dst =
v3d_nir_get_swizzled_channel(b, dst_chans, fmt_swz[i]);
op_res[i] = v3d_logicop(b, c->fs_key->logicop_func, src, dst);
@ -250,40 +250,40 @@ v3d_emit_logic_op_raw(struct v3d_compile *c, nir_builder *b,
}
}
nir_ssa_def *r[4];
nir_def *r[4];
for (int i = 0; i < 4; i++)
r[i] = v3d_nir_get_swizzled_channel(b, op_res, fmt_swz[i]);
return nir_vec4(b, r[0], r[1], r[2], r[3]);
}
static nir_ssa_def *
static nir_def *
v3d_emit_logic_op_unorm(struct v3d_compile *c, nir_builder *b,
nir_ssa_def **src_chans, nir_ssa_def **dst_chans,
nir_def **src_chans, nir_def **dst_chans,
int rt, int sample,
nir_pack_func pack_func, nir_unpack_func unpack_func)
{
static const uint8_t src_swz[4] = { 0, 1, 2, 3 };
nir_ssa_def *packed_src =
nir_def *packed_src =
v3d_nir_swizzle_and_pack(b, src_chans, src_swz, pack_func);
const uint8_t *fmt_swz = v3d_get_format_swizzle_for_rt(c, rt);
nir_ssa_def *packed_dst =
nir_def *packed_dst =
v3d_nir_swizzle_and_pack(b, dst_chans, fmt_swz, pack_func);
nir_ssa_def *packed_result =
nir_def *packed_result =
v3d_logicop(b, c->fs_key->logicop_func, packed_src, packed_dst);
return v3d_nir_unpack_and_swizzle(b, packed_result, fmt_swz, unpack_func);
}
static nir_ssa_def *
static nir_def *
v3d_nir_emit_logic_op(struct v3d_compile *c, nir_builder *b,
nir_ssa_def *src, int rt, int sample)
nir_def *src, int rt, int sample)
{
nir_ssa_def *dst = v3d_nir_get_tlb_color(b, c, rt, sample);
nir_def *dst = v3d_nir_get_tlb_color(b, c, rt, sample);
nir_ssa_def *src_chans[4], *dst_chans[4];
nir_def *src_chans[4], *dst_chans[4];
for (unsigned i = 0; i < 4; i++) {
src_chans[i] = nir_channel(b, src, i);
dst_chans[i] = nir_channel(b, dst, i);
@ -306,7 +306,7 @@ v3d_nir_emit_logic_op(struct v3d_compile *c, nir_builder *b,
static void
v3d_emit_ms_output(nir_builder *b,
nir_ssa_def *color, nir_src *offset,
nir_def *color, nir_src *offset,
nir_alu_type type, int rt, int sample)
{
nir_store_tlb_sample_color_v3d(b, color, nir_imm_int(b, rt), .base = sample, .component = 0, .src_type = type);
@ -318,7 +318,7 @@ v3d_nir_lower_logic_op_instr(struct v3d_compile *c,
nir_intrinsic_instr *intr,
int rt)
{
nir_ssa_def *frag_color = intr->src[0].ssa;
nir_def *frag_color = intr->src[0].ssa;
const int logic_op = c->fs_key->logicop_func;
@ -328,7 +328,7 @@ v3d_nir_lower_logic_op_instr(struct v3d_compile *c,
nir_src *offset = &intr->src[1];
nir_alu_type type = nir_intrinsic_src_type(intr);
for (int i = 0; i < V3D_MAX_SAMPLES; i++) {
nir_ssa_def *sample =
nir_def *sample =
v3d_nir_emit_logic_op(c, b, frag_color, rt, i);
v3d_emit_ms_output(b, sample, offset, type, rt, i);
@ -336,7 +336,7 @@ v3d_nir_lower_logic_op_instr(struct v3d_compile *c,
nir_instr_remove(&intr->instr);
} else {
nir_ssa_def *result =
nir_def *result =
v3d_nir_emit_logic_op(c, b, frag_color, rt, 0);
nir_instr_rewrite_src(&intr->instr, &intr->src[0],

View file

@ -34,11 +34,11 @@
* writemasks in the process.
*/
static nir_ssa_def *
static nir_def *
v3d_nir_scratch_offset(nir_builder *b, nir_intrinsic_instr *instr)
{
bool is_store = instr->intrinsic == nir_intrinsic_store_scratch;
nir_ssa_def *offset = nir_ssa_for_src(b, instr->src[is_store ? 1 : 0], 1);
nir_def *offset = nir_ssa_for_src(b, instr->src[is_store ? 1 : 0], 1);
assert(nir_intrinsic_align_mul(instr) >= 4);
assert(nir_intrinsic_align_offset(instr) == 0);
@ -55,11 +55,11 @@ v3d_nir_lower_load_scratch(nir_builder *b, nir_intrinsic_instr *instr)
{
b->cursor = nir_before_instr(&instr->instr);
nir_ssa_def *offset = v3d_nir_scratch_offset(b,instr);
nir_def *offset = v3d_nir_scratch_offset(b,instr);
nir_ssa_def *chans[NIR_MAX_VEC_COMPONENTS];
nir_def *chans[NIR_MAX_VEC_COMPONENTS];
for (int i = 0; i < instr->num_components; i++) {
nir_ssa_def *chan_offset =
nir_def *chan_offset =
nir_iadd_imm(b, offset, V3D_CHANNELS * i * 4);
nir_intrinsic_instr *chan_instr =
@ -77,8 +77,8 @@ v3d_nir_lower_load_scratch(nir_builder *b, nir_intrinsic_instr *instr)
chans[i] = &chan_instr->dest.ssa;
}
nir_ssa_def *result = nir_vec(b, chans, instr->num_components);
nir_ssa_def_rewrite_uses(&instr->dest.ssa, result);
nir_def *result = nir_vec(b, chans, instr->num_components);
nir_def_rewrite_uses(&instr->dest.ssa, result);
nir_instr_remove(&instr->instr);
}
@ -87,15 +87,15 @@ v3d_nir_lower_store_scratch(nir_builder *b, nir_intrinsic_instr *instr)
{
b->cursor = nir_before_instr(&instr->instr);
nir_ssa_def *offset = v3d_nir_scratch_offset(b, instr);
nir_ssa_def *value = nir_ssa_for_src(b, instr->src[0],
nir_def *offset = v3d_nir_scratch_offset(b, instr);
nir_def *value = nir_ssa_for_src(b, instr->src[0],
instr->num_components);
for (int i = 0; i < instr->num_components; i++) {
if (!(nir_intrinsic_write_mask(instr) & (1 << i)))
continue;
nir_ssa_def *chan_offset =
nir_def *chan_offset =
nir_iadd_imm(b, offset, V3D_CHANNELS * i * 4);
nir_intrinsic_instr *chan_instr =

View file

@ -32,21 +32,21 @@
* 2x2 quad.
*/
static nir_ssa_def *
static nir_def *
v3d_nir_lower_txf_ms_instr(nir_builder *b, nir_instr *in_instr, void *data)
{
nir_tex_instr *instr = nir_instr_as_tex(in_instr);
b->cursor = nir_before_instr(&instr->instr);
nir_ssa_def *coord = nir_steal_tex_src(instr, nir_tex_src_coord);
nir_ssa_def *sample = nir_steal_tex_src(instr, nir_tex_src_ms_index);
nir_def *coord = nir_steal_tex_src(instr, nir_tex_src_coord);
nir_def *sample = nir_steal_tex_src(instr, nir_tex_src_ms_index);
nir_ssa_def *one = nir_imm_int(b, 1);
nir_ssa_def *x = nir_iadd(b,
nir_def *one = nir_imm_int(b, 1);
nir_def *x = nir_iadd(b,
nir_ishl(b, nir_channel(b, coord, 0), one),
nir_iand(b, sample, one));
nir_ssa_def *y = nir_iadd(b,
nir_def *y = nir_iadd(b,
nir_ishl(b, nir_channel(b, coord, 1), one),
nir_iand(b, nir_ushr(b, sample, one), one));
if (instr->is_array)

View file

@ -1481,8 +1481,8 @@ lower_load_num_subgroups(struct v3d_compile *c,
DIV_ROUND_UP(c->s->info.workgroup_size[0] *
c->s->info.workgroup_size[1] *
c->s->info.workgroup_size[2], V3D_CHANNELS);
nir_ssa_def *result = nir_imm_int(b, num_subgroups);
nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
nir_def *result = nir_imm_int(b, num_subgroups);
nir_def_rewrite_uses(&intr->dest.ssa, result);
nir_instr_remove(&intr->instr);
}

View file

@ -33,16 +33,16 @@ get_set_event_cs()
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
"set event cs");
nir_ssa_def *buf =
nir_def *buf =
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
.desc_set = 0,
.binding = 0,
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
nir_ssa_def *offset =
nir_def *offset =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
nir_ssa_def *value =
nir_def *value =
nir_load_push_constant(&b, 1, 8, nir_imm_int(&b, 0), .base = 4, .range = 4);
nir_store_ssbo(&b, value, buf, offset,
@ -58,19 +58,19 @@ get_wait_event_cs()
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
"wait event cs");
nir_ssa_def *buf =
nir_def *buf =
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
.desc_set = 0,
.binding = 0,
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
nir_ssa_def *offset =
nir_def *offset =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
nir_loop *loop = nir_push_loop(&b);
nir_ssa_def *load =
nir_def *load =
nir_load_ssbo(&b, 1, 8, buf, offset, .access = 0, .align_mul = 4);
nir_ssa_def *value = nir_i2i32(&b, load);
nir_def *value = nir_i2i32(&b, load);
nir_if *if_stmt = nir_push_if(&b, nir_ieq_imm(&b, value, 1));
nir_jump(&b, nir_jump_break);

View file

@ -329,7 +329,7 @@ get_clear_rect_vs()
nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
vs_out_pos->data.location = VARYING_SLOT_POS;
nir_ssa_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
nir_store_var(&b, vs_out_pos, pos, 0xf);
return b.shader;
@ -386,7 +386,7 @@ get_clear_rect_gs(uint32_t push_constant_layer_base)
nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
/* gl_Layer from push constants */
nir_ssa_def *layer =
nir_def *layer =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
.base = push_constant_layer_base, .range = 4);
nir_store_var(&b, gs_out_layer, layer, 0x1);
@ -414,7 +414,7 @@ get_color_clear_rect_fs(uint32_t rt_idx, VkFormat format)
nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
fs_out_color->data.location = FRAG_RESULT_DATA0 + rt_idx;
nir_ssa_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
nir_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
nir_store_var(&b, fs_out_color, color_load, 0xf);
return b.shader;
@ -432,7 +432,7 @@ get_depth_clear_rect_fs()
"out_depth");
fs_out_depth->data.location = FRAG_RESULT_DEPTH;
nir_ssa_def *depth_load =
nir_def *depth_load =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
nir_store_var(&b, fs_out_depth, depth_load, 0x1);

View file

@ -2151,7 +2151,7 @@ get_texel_buffer_copy_vs()
glsl_vec4_type(), "gl_Position");
vs_out_pos->data.location = VARYING_SLOT_POS;
nir_ssa_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
nir_store_var(&b, vs_out_pos, pos, 0xf);
return b.shader;
@ -2208,7 +2208,7 @@ get_texel_buffer_copy_gs()
nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
/* gl_Layer from push constants */
nir_ssa_def *layer =
nir_def *layer =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
.base = TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET,
.range = 4);
@ -2222,7 +2222,7 @@ get_texel_buffer_copy_gs()
return nir;
}
static nir_ssa_def *
static nir_def *
load_frag_coord(nir_builder *b)
{
nir_foreach_shader_in_variable(var, b->shader) {
@ -2286,24 +2286,24 @@ get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
/* Load the box describing the pixel region we want to copy from the
* texel buffer.
*/
nir_ssa_def *box =
nir_def *box =
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0),
.base = TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET,
.range = 16);
/* Load the buffer stride (this comes in texel units) */
nir_ssa_def *stride =
nir_def *stride =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
.base = TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET,
.range = 4);
/* Load the buffer offset (this comes in texel units) */
nir_ssa_def *offset =
nir_def *offset =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
.base = TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET,
.range = 4);
nir_ssa_def *coord = nir_f2i32(&b, load_frag_coord(&b));
nir_def *coord = nir_f2i32(&b, load_frag_coord(&b));
/* Load pixel data from texel buffer based on the x,y offset of the pixel
* within the box. Texel buffers are 1D arrays of texels.
@ -2313,17 +2313,17 @@ get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
* texel buffer should always be within its bounds and we we don't need
* to add a check for that here.
*/
nir_ssa_def *x_offset =
nir_def *x_offset =
nir_isub(&b, nir_channel(&b, coord, 0),
nir_channel(&b, box, 0));
nir_ssa_def *y_offset =
nir_def *y_offset =
nir_isub(&b, nir_channel(&b, coord, 1),
nir_channel(&b, box, 1));
nir_ssa_def *texel_offset =
nir_def *texel_offset =
nir_iadd(&b, nir_iadd(&b, offset, x_offset),
nir_imul(&b, y_offset, stride));
nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
nir_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
tex->op = nir_texop_txf;
@ -2344,7 +2344,7 @@ get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_B, cswizzle->b);
swiz[3] =
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_A, cswizzle->a);
nir_ssa_def *s = nir_swizzle(&b, &tex->dest.ssa, swiz, 4);
nir_def *s = nir_swizzle(&b, &tex->dest.ssa, swiz, 4);
nir_store_var(&b, fs_out_color, s, 0xf);
return b.shader;
@ -3543,16 +3543,16 @@ create_blit_render_pass(struct v3dv_device *device,
return result == VK_SUCCESS;
}
static nir_ssa_def *
static nir_def *
gen_tex_coords(nir_builder *b)
{
nir_ssa_def *tex_box =
nir_def *tex_box =
nir_load_push_constant(b, 4, 32, nir_imm_int(b, 0), .base = 0, .range = 16);
nir_ssa_def *tex_z =
nir_def *tex_z =
nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
nir_ssa_def *vertex_id = nir_load_vertex_id(b);
nir_def *vertex_id = nir_load_vertex_id(b);
/* vertex 0: src0_x, src0_y
* vertex 1: src0_x, src1_y
@ -3565,11 +3565,11 @@ gen_tex_coords(nir_builder *b)
* channel 1 is vertex id & 1 ? src1_y : src0_y
*/
nir_ssa_def *one = nir_imm_int(b, 1);
nir_ssa_def *c0cmp = nir_ilt_imm(b, vertex_id, 2);
nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
nir_def *one = nir_imm_int(b, 1);
nir_def *c0cmp = nir_ilt_imm(b, vertex_id, 2);
nir_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
nir_ssa_def *comp[4];
nir_def *comp[4];
comp[0] = nir_bcsel(b, c0cmp,
nir_channel(b, tex_box, 0),
nir_channel(b, tex_box, 2));
@ -3582,9 +3582,9 @@ gen_tex_coords(nir_builder *b)
return nir_vec(b, comp, 4);
}
static nir_ssa_def *
static nir_def *
build_nir_tex_op_read(struct nir_builder *b,
nir_ssa_def *tex_pos,
nir_def *tex_pos,
enum glsl_base_type tex_type,
enum glsl_sampler_dim dim)
{
@ -3597,7 +3597,7 @@ build_nir_tex_op_read(struct nir_builder *b,
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
nir_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
tex->sampler_dim = dim;
tex->op = nir_texop_tex;
@ -3613,13 +3613,13 @@ build_nir_tex_op_read(struct nir_builder *b,
return &tex->dest.ssa;
}
static nir_ssa_def *
static nir_def *
build_nir_tex_op_ms_fetch_sample(struct nir_builder *b,
nir_variable *sampler,
nir_ssa_def *tex_deref,
nir_def *tex_deref,
enum glsl_base_type tex_type,
nir_ssa_def *tex_pos,
nir_ssa_def *sample_idx)
nir_def *tex_pos,
nir_def *sample_idx)
{
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
@ -3637,9 +3637,9 @@ build_nir_tex_op_ms_fetch_sample(struct nir_builder *b,
}
/* Fetches all samples at the given position and averages them */
static nir_ssa_def *
static nir_def *
build_nir_tex_op_ms_resolve(struct nir_builder *b,
nir_ssa_def *tex_pos,
nir_def *tex_pos,
enum glsl_base_type tex_type,
VkSampleCountFlagBits src_samples)
{
@ -3653,10 +3653,10 @@ build_nir_tex_op_ms_resolve(struct nir_builder *b,
const bool is_int = glsl_base_type_is_integer(tex_type);
nir_ssa_def *tmp = NULL;
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
nir_def *tmp = NULL;
nir_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
for (uint32_t i = 0; i < src_samples; i++) {
nir_ssa_def *s =
nir_def *s =
build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
tex_type, tex_pos,
nir_imm_int(b, i));
@ -3675,9 +3675,9 @@ build_nir_tex_op_ms_resolve(struct nir_builder *b,
}
/* Fetches the current sample (gl_SampleID) at the given position */
static nir_ssa_def *
static nir_def *
build_nir_tex_op_ms_read(struct nir_builder *b,
nir_ssa_def *tex_pos,
nir_def *tex_pos,
enum glsl_base_type tex_type)
{
const struct glsl_type *sampler_type =
@ -3687,17 +3687,17 @@ build_nir_tex_op_ms_read(struct nir_builder *b,
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
nir_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
return build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
tex_type, tex_pos,
nir_load_sample_id(b));
}
static nir_ssa_def *
static nir_def *
build_nir_tex_op(struct nir_builder *b,
struct v3dv_device *device,
nir_ssa_def *tex_pos,
nir_def *tex_pos,
enum glsl_base_type tex_type,
VkSampleCountFlagBits dst_samples,
VkSampleCountFlagBits src_samples,
@ -3741,10 +3741,10 @@ get_blit_vs()
vs_out_tex_coord->data.location = VARYING_SLOT_VAR0;
vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH;
nir_ssa_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
nir_store_var(&b, vs_out_pos, pos, 0xf);
nir_ssa_def *tex_coord = gen_tex_coords(&b);
nir_def *tex_coord = gen_tex_coords(&b);
nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf);
return b.shader;
@ -3795,11 +3795,11 @@ get_color_blit_fs(struct v3dv_device *device,
nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
fs_out_color->data.location = FRAG_RESULT_DATA0;
nir_ssa_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
nir_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim);
tex_coord = nir_channels(&b, tex_coord, channel_mask);
nir_ssa_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
nir_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
dst_samples, src_samples, sampler_dim);
/* For integer textures, if the bit-size of the destination is too small to
@ -3814,7 +3814,7 @@ get_color_blit_fs(struct v3dv_device *device,
enum pipe_format src_pformat = vk_format_to_pipe_format(src_format);
enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format);
nir_ssa_def *c[4];
nir_def *c[4];
for (uint32_t i = 0; i < 4; i++) {
c[i] = nir_channel(&b, color, i);
@ -3832,11 +3832,11 @@ get_color_blit_fs(struct v3dv_device *device,
assert(dst_bit_size > 0);
if (util_format_is_pure_uint(dst_pformat)) {
nir_ssa_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
nir_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
c[i] = nir_umin(&b, c[i], max);
} else {
nir_ssa_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
nir_ssa_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
nir_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
nir_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min);
}
}

View file

@ -568,7 +568,7 @@ lower_vulkan_resource_index(nir_builder *b,
* vulkan_load_descriptor return a vec2 providing an index and
* offset. Our backend compiler only cares about the index part.
*/
nir_ssa_def_rewrite_uses(&instr->dest.ssa,
nir_def_rewrite_uses(&instr->dest.ssa,
nir_imm_ivec2(b, index, 0));
nir_instr_remove(&instr->instr);
}
@ -594,7 +594,7 @@ lower_tex_src(nir_builder *b,
unsigned src_idx,
struct lower_pipeline_layout_state *state)
{
nir_ssa_def *index = NULL;
nir_def *index = NULL;
unsigned base_index = 0;
unsigned array_elements = 1;
nir_tex_src *src = &instr->src[src_idx];
@ -739,7 +739,7 @@ lower_image_deref(nir_builder *b,
struct lower_pipeline_layout_state *state)
{
nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
nir_ssa_def *index = NULL;
nir_def *index = NULL;
unsigned array_elements = 1;
unsigned base_index = 0;
@ -826,7 +826,7 @@ lower_intrinsic(nir_builder *b,
/* Loading the descriptor happens as part of load/store instructions,
* so for us this is a no-op.
*/
nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);
nir_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);
nir_instr_remove(&instr->instr);
return true;
}
@ -907,11 +907,11 @@ lower_point_coord_cb(nir_builder *b, nir_instr *instr, void *_state)
return false;
b->cursor = nir_after_instr(&intr->instr);
nir_ssa_def *result = &intr->dest.ssa;
nir_def *result = &intr->dest.ssa;
result =
nir_vector_insert_imm(b, result,
nir_fsub_imm(b, 1.0, nir_channel(b, result, 1)), 1);
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
nir_def_rewrite_uses_after(&intr->dest.ssa,
result, result->parent_instr);
return true;
}
@ -2257,7 +2257,7 @@ pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
out_layer->data.location = VARYING_SLOT_LAYER;
/* Get the view index value that we will write to gl_Layer */
nir_ssa_def *layer =
nir_def *layer =
nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
/* Emit all output vertices */

View file

@ -1345,23 +1345,23 @@ v3dv_ReleaseProfilingLockKHR(VkDevice device)
static inline void
nir_set_query_availability(nir_builder *b,
nir_ssa_def *buf,
nir_ssa_def *offset,
nir_ssa_def *query_idx,
nir_ssa_def *avail)
nir_def *buf,
nir_def *offset,
nir_def *query_idx,
nir_def *avail)
{
offset = nir_iadd(b, offset, query_idx); /* we use 1B per query */
nir_store_ssbo(b, avail, buf, offset, .write_mask = 0x1, .align_mul = 1);
}
static inline nir_ssa_def *
static inline nir_def *
nir_get_query_availability(nir_builder *b,
nir_ssa_def *buf,
nir_ssa_def *offset,
nir_ssa_def *query_idx)
nir_def *buf,
nir_def *offset,
nir_def *query_idx)
{
offset = nir_iadd(b, offset, query_idx); /* we use 1B per query */
nir_ssa_def *avail = nir_load_ssbo(b, 1, 8, buf, offset, .align_mul = 1);
nir_def *avail = nir_load_ssbo(b, 1, 8, buf, offset, .align_mul = 1);
return nir_i2i32(b, avail);
}
@ -1372,7 +1372,7 @@ get_set_query_availability_cs()
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
"set query availability cs");
nir_ssa_def *buf =
nir_def *buf =
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
.desc_set = 0,
.binding = 0,
@ -1382,15 +1382,15 @@ get_set_query_availability_cs()
* ever change any of these parameters we need to update how we compute the
* query index here.
*/
nir_ssa_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b, 32), 0);
nir_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b, 32), 0);
nir_ssa_def *offset =
nir_def *offset =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
nir_ssa_def *query_idx =
nir_def *query_idx =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 4, .range = 4);
nir_ssa_def *avail =
nir_def *avail =
nir_load_push_constant(&b, 1, 8, nir_imm_int(&b, 0), .base = 8, .range = 1);
query_idx = nir_iadd(&b, query_idx, wg_id);
@ -1399,12 +1399,12 @@ get_set_query_availability_cs()
return b.shader;
}
static inline nir_ssa_def *
nir_get_occlusion_counter_offset(nir_builder *b, nir_ssa_def *query_idx)
static inline nir_def *
nir_get_occlusion_counter_offset(nir_builder *b, nir_def *query_idx)
{
nir_ssa_def *query_group = nir_udiv_imm(b, query_idx, 16);
nir_ssa_def *query_group_offset = nir_umod_imm(b, query_idx, 16);
nir_ssa_def *offset =
nir_def *query_group = nir_udiv_imm(b, query_idx, 16);
nir_def *query_group_offset = nir_umod_imm(b, query_idx, 16);
nir_def *offset =
nir_iadd(b, nir_imul_imm(b, query_group, 1024),
nir_imul_imm(b, query_group_offset, 4));
return offset;
@ -1412,20 +1412,20 @@ nir_get_occlusion_counter_offset(nir_builder *b, nir_ssa_def *query_idx)
static inline void
nir_reset_occlusion_counter(nir_builder *b,
nir_ssa_def *buf,
nir_ssa_def *query_idx)
nir_def *buf,
nir_def *query_idx)
{
nir_ssa_def *offset = nir_get_occlusion_counter_offset(b, query_idx);
nir_ssa_def *zero = nir_imm_int(b, 0);
nir_def *offset = nir_get_occlusion_counter_offset(b, query_idx);
nir_def *zero = nir_imm_int(b, 0);
nir_store_ssbo(b, zero, buf, offset, .write_mask = 0x1, .align_mul = 4);
}
static inline nir_ssa_def *
static inline nir_def *
nir_read_occlusion_counter(nir_builder *b,
nir_ssa_def *buf,
nir_ssa_def *query_idx)
nir_def *buf,
nir_def *query_idx)
{
nir_ssa_def *offset = nir_get_occlusion_counter_offset(b, query_idx);
nir_def *offset = nir_get_occlusion_counter_offset(b, query_idx);
return nir_load_ssbo(b, 1, 32, buf, offset, .access = 0, .align_mul = 4);
}
@ -1436,7 +1436,7 @@ get_reset_occlusion_query_cs()
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
"reset occlusion query cs");
nir_ssa_def *buf =
nir_def *buf =
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
.desc_set = 0,
.binding = 0,
@ -1446,15 +1446,15 @@ get_reset_occlusion_query_cs()
* ever change any of these parameters we need to update how we compute the
* query index here.
*/
nir_ssa_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b, 32), 0);
nir_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b, 32), 0);
nir_ssa_def *avail_offset =
nir_def *avail_offset =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
nir_ssa_def *base_query_idx =
nir_def *base_query_idx =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 4, .range = 4);
nir_ssa_def *query_idx = nir_iadd(&b, base_query_idx, wg_id);
nir_def *query_idx = nir_iadd(&b, base_query_idx, wg_id);
nir_set_query_availability(&b, buf, avail_offset, query_idx,
nir_imm_intN_t(&b, 0, 8));
@ -1465,16 +1465,16 @@ get_reset_occlusion_query_cs()
static void
write_query_buffer(nir_builder *b,
nir_ssa_def *buf,
nir_ssa_def **offset,
nir_ssa_def *value,
nir_def *buf,
nir_def **offset,
nir_def *value,
bool flag_64bit)
{
if (flag_64bit) {
/* Create a 64-bit value using a vec2 with the .Y component set to 0
* so we can write a 64-bit value in a single store.
*/
nir_ssa_def *value64 = nir_vec2(b, value, nir_imm_int(b, 0));
nir_def *value64 = nir_vec2(b, value, nir_imm_int(b, 0));
nir_store_ssbo(b, value64, buf, *offset, .write_mask = 0x3, .align_mul = 8);
*offset = nir_iadd_imm(b, *offset, 8);
} else {
@ -1494,55 +1494,55 @@ get_copy_query_results_cs(VkQueryResultFlags flags)
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
"copy query results cs");
nir_ssa_def *buf =
nir_def *buf =
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
.desc_set = 0,
.binding = 0,
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
nir_ssa_def *buf_out =
nir_def *buf_out =
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
.desc_set = 1,
.binding = 0,
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
/* Read push constants */
nir_ssa_def *avail_offset =
nir_def *avail_offset =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
nir_ssa_def *base_query_idx =
nir_def *base_query_idx =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 4, .range = 4);
nir_ssa_def *base_offset_out =
nir_def *base_offset_out =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 8, .range = 4);
nir_ssa_def *stride =
nir_def *stride =
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 12, .range = 4);
/* This assumes a local size of 1 and a horizontal-only dispatch. If we
* ever change any of these parameters we need to update how we compute the
* query index here.
*/
nir_ssa_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b, 32), 0);
nir_ssa_def *query_idx = nir_iadd(&b, base_query_idx, wg_id);
nir_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b, 32), 0);
nir_def *query_idx = nir_iadd(&b, base_query_idx, wg_id);
/* Read query availability if needed */
nir_ssa_def *avail = NULL;
nir_def *avail = NULL;
if (flag_avail || !flag_partial)
avail = nir_get_query_availability(&b, buf, avail_offset, query_idx);
/* Write occusion query result... */
nir_ssa_def *offset =
nir_def *offset =
nir_iadd(&b, base_offset_out, nir_imul(&b, wg_id, stride));
/* ...if partial is requested, we always write */
if(flag_partial) {
nir_ssa_def *query_res = nir_read_occlusion_counter(&b, buf, query_idx);
nir_def *query_res = nir_read_occlusion_counter(&b, buf, query_idx);
write_query_buffer(&b, buf_out, &offset, query_res, flag_64bit);
} else {
/*...otherwise, we only write if the query is available */
nir_if *if_stmt = nir_push_if(&b, nir_ine_imm(&b, avail, 0));
nir_ssa_def *query_res = nir_read_occlusion_counter(&b, buf, query_idx);
nir_def *query_res = nir_read_occlusion_counter(&b, buf, query_idx);
write_query_buffer(&b, buf_out, &offset, query_res, flag_64bit);
nir_pop_if(&b, if_stmt);
}

View file

@ -53,7 +53,7 @@ lower_clc_call_instr(nir_instr *instr, nir_builder *b,
return false;
}
nir_ssa_def **params = rzalloc_array(b->shader, nir_ssa_def*, call->num_params);
nir_def **params = rzalloc_array(b->shader, nir_def*, call->num_params);
for (unsigned i = 0; i < call->num_params; i++) {
params[i] = nir_ssa_for_src(b, call->params[i],

View file

@ -2610,10 +2610,10 @@ replace_unused_interpolate_at_with_undef(nir_builder *b, nir_instr *instr,
nir_variable *var = nir_intrinsic_get_var(intrin, 0);
if (var->data.mode == nir_var_shader_temp) {
/* Create undef and rewrite the interp uses */
nir_ssa_def *undef =
nir_ssa_undef(b, intrin->dest.ssa.num_components,
nir_def *undef =
nir_undef(b, intrin->dest.ssa.num_components,
intrin->dest.ssa.bit_size);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, undef);
nir_def_rewrite_uses(&intrin->dest.ssa, undef);
nir_instr_remove(&intrin->instr);
return true;

View file

@ -936,7 +936,7 @@ static void
zero_array_members(nir_builder *b, nir_variable *var)
{
nir_deref_instr *deref = nir_build_deref_var(b, var);
nir_ssa_def *zero = nir_imm_zero(b, 4, 32);
nir_def *zero = nir_imm_zero(b, 4, 32);
for (int i = 0; i < glsl_array_size(var->type); i++) {
nir_deref_instr *arr = nir_build_deref_array_imm(b, deref, i);
uint32_t mask = BITFIELD_MASK(glsl_get_vector_elements(arr->type));

View file

@ -118,7 +118,7 @@ lower_deref_instr(nir_builder *b, nir_intrinsic_instr *instr,
else
range_base = var->data.offset;
nir_ssa_def *offset = nir_imm_int(b, offset_value);
nir_def *offset = nir_imm_int(b, offset_value);
for (nir_deref_instr *d = deref; d->deref_type != nir_deref_type_var;
d = nir_deref_instr_parent(d)) {
assert(d->deref_type == nir_deref_type_array);

View file

@ -34,8 +34,8 @@
#define imm1(b, x) nir_imm_float(b, x)
#define imm3(b, x) nir_imm_vec3(b, x, x, x)
static nir_ssa_def *
swizzle(nir_builder *b, nir_ssa_def *src, int swizzle, int components)
static nir_def *
swizzle(nir_builder *b, nir_def *src, int swizzle, int components)
{
unsigned swizzle_arr[4];
swizzle_arr[0] = GET_SWZ(swizzle, 0);
@ -46,72 +46,72 @@ swizzle(nir_builder *b, nir_ssa_def *src, int swizzle, int components)
return nir_swizzle(b, src, swizzle_arr, components);
}
static nir_ssa_def *
swizzle_x(nir_builder *b, nir_ssa_def *src)
static nir_def *
swizzle_x(nir_builder *b, nir_def *src)
{
return nir_channel(b, src, 0);
}
static nir_ssa_def *
swizzle_y(nir_builder *b, nir_ssa_def *src)
static nir_def *
swizzle_y(nir_builder *b, nir_def *src)
{
return nir_channel(b, src, 1);
}
static nir_ssa_def *
swizzle_z(nir_builder *b, nir_ssa_def *src)
static nir_def *
swizzle_z(nir_builder *b, nir_def *src)
{
return nir_channel(b, src, 2);
}
static nir_ssa_def *
swizzle_w(nir_builder *b, nir_ssa_def *src)
static nir_def *
swizzle_w(nir_builder *b, nir_def *src)
{
return nir_channel(b, src, 3);
}
static nir_ssa_def *
blend_multiply(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
static nir_def *
blend_multiply(nir_builder *b, nir_def *src, nir_def *dst)
{
/* f(Cs,Cd) = Cs*Cd */
return nir_fmul(b, src, dst);
}
static nir_ssa_def *
blend_screen(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
static nir_def *
blend_screen(nir_builder *b, nir_def *src, nir_def *dst)
{
/* f(Cs,Cd) = Cs+Cd-Cs*Cd */
return nir_fsub(b, nir_fadd(b, src, dst), nir_fmul(b, src, dst));
}
static nir_ssa_def *
blend_overlay(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
static nir_def *
blend_overlay(nir_builder *b, nir_def *src, nir_def *dst)
{
/* f(Cs,Cd) = 2*Cs*Cd, if Cd <= 0.5
* 1-2*(1-Cs)*(1-Cd), otherwise
*/
nir_ssa_def *rule_1 = nir_fmul(b, nir_fmul(b, src, dst), imm3(b, 2.0));
nir_ssa_def *rule_2 =
nir_def *rule_1 = nir_fmul(b, nir_fmul(b, src, dst), imm3(b, 2.0));
nir_def *rule_2 =
nir_fsub(b, imm3(b, 1.0), nir_fmul(b, nir_fmul(b, nir_fsub(b, imm3(b, 1.0), src), nir_fsub(b, imm3(b, 1.0), dst)), imm3(b, 2.0)));
return nir_bcsel(b, nir_fge(b, imm3(b, 0.5f), dst), rule_1, rule_2);
}
static nir_ssa_def *
blend_darken(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
static nir_def *
blend_darken(nir_builder *b, nir_def *src, nir_def *dst)
{
/* f(Cs,Cd) = min(Cs,Cd) */
return nir_fmin(b, src, dst);
}
static nir_ssa_def *
blend_lighten(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
static nir_def *
blend_lighten(nir_builder *b, nir_def *src, nir_def *dst)
{
/* f(Cs,Cd) = max(Cs,Cd) */
return nir_fmax(b, src, dst);
}
static nir_ssa_def *
blend_colordodge(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
static nir_def *
blend_colordodge(nir_builder *b, nir_def *src, nir_def *dst)
{
/* f(Cs,Cd) =
* 0, if Cd <= 0
@ -123,8 +123,8 @@ blend_colordodge(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
nir_fmin(b, imm3(b, 1.0), nir_fdiv(b, dst, nir_fsub(b, imm3(b, 1.0), src)))));
}
static nir_ssa_def *
blend_colorburn(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
static nir_def *
blend_colorburn(nir_builder *b, nir_def *src, nir_def *dst)
{
/* f(Cs,Cd) =
* 1, if Cd >= 1
@ -136,20 +136,20 @@ blend_colorburn(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
nir_fsub(b, imm3(b, 1.0), nir_fmin(b, imm3(b, 1.0), nir_fdiv(b, nir_fsub(b, imm3(b, 1.0), dst), src)))));
}
static nir_ssa_def *
blend_hardlight(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
static nir_def *
blend_hardlight(nir_builder *b, nir_def *src, nir_def *dst)
{
/* f(Cs,Cd) = 2*Cs*Cd, if Cs <= 0.5
* 1-2*(1-Cs)*(1-Cd), otherwise
*/
nir_ssa_def *rule_1 = nir_fmul(b, imm3(b, 2.0), nir_fmul(b, src, dst));
nir_ssa_def *rule_2 =
nir_def *rule_1 = nir_fmul(b, imm3(b, 2.0), nir_fmul(b, src, dst));
nir_def *rule_2 =
nir_fsub(b, imm3(b, 1.0), nir_fmul(b, imm3(b, 2.0), nir_fmul(b, nir_fsub(b, imm3(b, 1.0), src), nir_fsub(b, imm3(b, 1.0), dst))));
return nir_bcsel(b, nir_fge(b, imm3(b, 0.5), src), rule_1, rule_2);
}
static nir_ssa_def *
blend_softlight(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
static nir_def *
blend_softlight(nir_builder *b, nir_def *src, nir_def *dst)
{
/* f(Cs,Cd) =
* Cd-(1-2*Cs)*Cd*(1-Cd),
@ -166,49 +166,49 @@ blend_softlight(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
* Cd*((16*Cd-12)*Cd+3) if Cs > 0.5 and Cd <= 0.25
* sqrt(Cd)-Cd, otherwise
*/
nir_ssa_def *factor_1 = nir_fmul(b, dst, nir_fsub(b, imm3(b, 1.0), dst));
nir_ssa_def *factor_2 =
nir_def *factor_1 = nir_fmul(b, dst, nir_fsub(b, imm3(b, 1.0), dst));
nir_def *factor_2 =
nir_fmul(b, dst, nir_fadd(b, nir_fmul(b, nir_fsub(b, nir_fmul(b, imm3(b, 16.0), dst), imm3(b, 12.0)), dst), imm3(b, 3.0)));
nir_ssa_def *factor_3 = nir_fsub(b, nir_fsqrt(b, dst), dst);
nir_ssa_def *factor = nir_bcsel(b, nir_fge(b, imm3(b, 0.5), src), factor_1,
nir_def *factor_3 = nir_fsub(b, nir_fsqrt(b, dst), dst);
nir_def *factor = nir_bcsel(b, nir_fge(b, imm3(b, 0.5), src), factor_1,
nir_bcsel(b, nir_fge(b, imm3(b, 0.25), dst), factor_2, factor_3));
return nir_fadd(b, dst, nir_fmul(b, nir_fsub(b, nir_fmul(b, imm3(b, 2.0), src), imm3(b, 1.0)), factor));
}
static nir_ssa_def *
blend_difference(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
static nir_def *
blend_difference(nir_builder *b, nir_def *src, nir_def *dst)
{
return nir_fabs(b, nir_fsub(b, dst, src));
}
static nir_ssa_def *
blend_exclusion(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
static nir_def *
blend_exclusion(nir_builder *b, nir_def *src, nir_def *dst)
{
return nir_fadd(b, src, nir_fsub(b, dst, nir_fmul(b, imm3(b, 2.0), nir_fmul(b, src, dst))));
}
/* Return the minimum of a vec3's components */
static nir_ssa_def *
minv3(nir_builder *b, nir_ssa_def *v)
static nir_def *
minv3(nir_builder *b, nir_def *v)
{
return nir_fmin(b, nir_fmin(b, swizzle_x(b, v), swizzle_y(b, v)), swizzle_z(b, v));
}
/* Return the maximum of a vec3's components */
static nir_ssa_def *
maxv3(nir_builder *b, nir_ssa_def *v)
static nir_def *
maxv3(nir_builder *b, nir_def *v)
{
return nir_fmax(b, nir_fmax(b, swizzle_x(b, v), swizzle_y(b, v)), swizzle_z(b, v));
}
static nir_ssa_def *
lumv3(nir_builder *b, nir_ssa_def *c)
static nir_def *
lumv3(nir_builder *b, nir_def *c)
{
return nir_fdot(b, c, nir_imm_vec3(b, 0.30, 0.59, 0.11));
}
static nir_ssa_def *
satv3(nir_builder *b, nir_ssa_def *c)
static nir_def *
satv3(nir_builder *b, nir_def *c)
{
return nir_fsub(b, maxv3(b, c), minv3(b, c));
}
@ -240,20 +240,20 @@ set_lum(nir_builder *b,
nir_variable *cbase,
nir_variable *clum)
{
nir_ssa_def *cbase_def = nir_load_var(b, cbase);
nir_def *cbase_def = nir_load_var(b, cbase);
nir_store_var(b, color, nir_fadd(b, cbase_def, nir_fsub(b, lumv3(b, nir_load_var(b, clum)), lumv3(b, cbase_def))), ~0);
nir_variable *llum = add_temp_var(b, "__blend_lum", glsl_float_type());
nir_variable *mincol = add_temp_var(b, "__blend_mincol", glsl_float_type());
nir_variable *maxcol = add_temp_var(b, "__blend_maxcol", glsl_float_type());
nir_ssa_def *color_def = nir_load_var(b, color);
nir_def *color_def = nir_load_var(b, color);
nir_store_var(b, llum, lumv3(b, color_def), ~0);
nir_store_var(b, mincol, minv3(b, color_def), ~0);
nir_store_var(b, maxcol, maxv3(b, color_def), ~0);
nir_ssa_def *mincol_def = nir_load_var(b, mincol);
nir_ssa_def *llum_def = nir_load_var(b, llum);
nir_def *mincol_def = nir_load_var(b, mincol);
nir_def *llum_def = nir_load_var(b, llum);
nir_if *nif = nir_push_if(b, nir_flt(b, mincol_def, imm1(b, 0.0)));
/* Add then block */
@ -261,7 +261,7 @@ set_lum(nir_builder *b,
/* Add else block */
nir_push_else(b, nif);
nir_ssa_def *maxcol_def = nir_load_var(b, maxcol);
nir_def *maxcol_def = nir_load_var(b, maxcol);
nir_if *nif2 = nir_push_if(b, nir_flt(b, imm1(b, 1.0), maxcol_def));
nir_store_var(b, color, nir_fadd(b, llum_def, nir_fdiv(b, nir_fmul(b, nir_fsub(b, color_def, llum_def), nir_fsub(b, imm3(b, 1.0), llum_def)), nir_fsub(b, maxcol_def, llum_def))), ~0);
nir_pop_if(b, nif2);
@ -279,8 +279,8 @@ set_lum_sat(nir_builder *b,
nir_variable *csat,
nir_variable *clum)
{
nir_ssa_def *cbase_def = nir_load_var(b, cbase);
nir_ssa_def *csat_def = nir_load_var(b, csat);
nir_def *cbase_def = nir_load_var(b, cbase);
nir_def *csat_def = nir_load_var(b, csat);
nir_variable *sbase = add_temp_var(b, "__blend_sbase", glsl_float_type());
nir_store_var(b, sbase, satv3(b, cbase_def), ~0);
@ -290,10 +290,10 @@ set_lum_sat(nir_builder *b,
* and interpolating the "middle" component based on its
* original value relative to the smallest/largest.
*/
nir_ssa_def *sbase_def = nir_load_var(b, sbase);
nir_def *sbase_def = nir_load_var(b, sbase);
nir_if *nif = nir_push_if(b, nir_flt(b, imm1(b, 0.0), sbase_def));
nir_ssa_def *ssat = satv3(b, csat_def);
nir_ssa_def *minbase = minv3(b, cbase_def);
nir_def *ssat = satv3(b, csat_def);
nir_def *minbase = minv3(b, cbase_def);
nir_store_var(b, color, nir_fdiv(b, nir_fmul(b, nir_fsub(b, cbase_def, minbase), ssat), sbase_def), ~0);
nir_push_else(b, nif);
nir_store_var(b, color, imm3(b, 0.0), ~0);
@ -302,7 +302,7 @@ set_lum_sat(nir_builder *b,
set_lum(b, color, color, clum);
}
static nir_ssa_def *
static nir_def *
is_mode(nir_builder *b, nir_variable *mode, enum gl_advanced_blend_mode q)
{
return nir_ieq_imm(b, nir_load_var(b, mode), (unsigned) q);
@ -312,7 +312,7 @@ static nir_variable *
calc_blend_result(nir_builder *b,
nir_variable *mode,
nir_variable *fb,
nir_ssa_def *blend_src,
nir_def *blend_src,
GLbitfield blend_qualifiers)
{
nir_variable *result = add_temp_var(b, "__blend_result", glsl_vec4_type());
@ -337,10 +337,10 @@ calc_blend_result(nir_builder *b,
nir_variable *dst_rgb = add_temp_var(b, "__blend_dst_rgb", glsl_vec_type(3));
nir_variable *dst_alpha = add_temp_var(b, "__blend_dst_a", glsl_float_type());
nir_ssa_def *fb_def = nir_load_var(b, fb);
nir_def *fb_def = nir_load_var(b, fb);
nir_store_var(b, dst_alpha, swizzle_w(b, fb_def), ~0);
nir_ssa_def *dst_alpha_def = nir_load_var(b, dst_alpha);
nir_def *dst_alpha_def = nir_load_var(b, dst_alpha);
nir_if *nif = nir_push_if(b, nir_feq(b, dst_alpha_def, imm1(b, 0.0)));
nir_store_var(b, dst_rgb, imm3(b, 0.0), ~0);
nir_push_else(b, nif);
@ -348,7 +348,7 @@ calc_blend_result(nir_builder *b,
nir_pop_if(b, nif);
nir_store_var(b, src_alpha, swizzle_w(b, blend_src), ~0);
nir_ssa_def *src_alpha_def = nir_load_var(b, src_alpha);
nir_def *src_alpha_def = nir_load_var(b, src_alpha);
nif = nir_push_if(b, nir_feq(b, src_alpha_def, imm1(b, 0.0)));
nir_store_var(b, src_rgb, imm3(b, 0.0), ~0);
nir_push_else(b, nif);
@ -357,15 +357,15 @@ calc_blend_result(nir_builder *b,
nir_variable *factor = add_temp_var(b, "__blend_factor", glsl_vec_type(3));
nir_ssa_def *src_rgb_def = nir_load_var(b, src_rgb);
nir_ssa_def *dst_rgb_def = nir_load_var(b, dst_rgb);
nir_def *src_rgb_def = nir_load_var(b, src_rgb);
nir_def *dst_rgb_def = nir_load_var(b, dst_rgb);
unsigned choices = blend_qualifiers;
while (choices) {
enum gl_advanced_blend_mode choice = (enum gl_advanced_blend_mode)u_bit_scan(&choices);
nir_if *iff = nir_push_if(b, is_mode(b, mode, choice));
nir_ssa_def *val = NULL;
nir_def *val = NULL;
switch (choice) {
case BLEND_MULTIPLY:
@ -454,7 +454,7 @@ calc_blend_result(nir_builder *b,
/* WRITEMASK_XYZ */
nir_store_var(b, result, nir_pad_vec4(b, nir_fadd(b, nir_fadd(b, nir_fmul(b, nir_load_var(b, factor), nir_load_var(b, p0)), nir_fmul(b, src_rgb_def, nir_load_var(b, p1))), nir_fmul(b, dst_rgb_def, nir_load_var(b, p2)))), 0x7);
/* WRITEMASK_W */
nir_ssa_def *val = nir_fadd(b, nir_fadd(b, nir_load_var(b, p0), nir_load_var(b, p1)), nir_load_var(b, p2));
nir_def *val = nir_fadd(b, nir_fadd(b, nir_load_var(b, p0), nir_load_var(b, p1)), nir_load_var(b, p2));
nir_store_var(b, result, nir_vec4(b, val, val, val, val), 0x8);
/* reset cursor to the end of the main function */
@ -466,10 +466,10 @@ calc_blend_result(nir_builder *b,
/**
* Dereference var, or var[0] if it's an array.
*/
static nir_ssa_def *
static nir_def *
load_output(nir_builder *b, nir_variable *var)
{
nir_ssa_def *var_def;
nir_def *var_def;
if (glsl_type_is_array(var->type)) {
var_def = nir_load_array_var_imm(b, var, 0);
} else {
@ -539,12 +539,12 @@ gl_nir_lower_blend_equation_advanced(nir_shader *sh, bool coherent)
/* Combine values written to outputs into a single RGBA blend source.
* We assign <0, 0, 0, 1> to any components with no corresponding output.
*/
nir_ssa_def *blend_source;
nir_def *blend_source;
if (outputs[0] &&
glsl_get_vector_elements(glsl_without_array(outputs[0]->type)) == 4) {
blend_source = load_output(&b, outputs[0]);
} else {
nir_ssa_def *blend_comps[4];
nir_def *blend_comps[4];
for (int i = 0; i < 4; i++) {
nir_variable *var = outputs[i];
if (var) {
@ -570,7 +570,7 @@ gl_nir_lower_blend_equation_advanced(nir_shader *sh, bool coherent)
if (glsl_type_is_array(outputs[i]->type)) {
nir_store_array_var_imm(&b, outputs[i], 0, nir_load_var(&b, result_dest), 1 << i);
} else {
nir_ssa_def *val = swizzle(&b, nir_load_var(&b, result_dest), i, 1);
nir_def *val = swizzle(&b, nir_load_var(&b, result_dest), i, 1);
nir_store_var(&b, outputs[i], nir_vec4(&b, val, val, val, val), 1 << i);
}
}

View file

@ -29,7 +29,7 @@
#include "util/compiler.h"
#include "main/shader_types.h"
static nir_ssa_def *
static nir_def *
get_block_array_index(nir_builder *b, nir_deref_instr *deref,
const struct gl_shader_program *shader_program)
{
@ -41,7 +41,7 @@ get_block_array_index(nir_builder *b, nir_deref_instr *deref,
*/
int const_array_offset = 0;
const char *block_name = "";
nir_ssa_def *nonconst_index = NULL;
nir_def *nonconst_index = NULL;
while (deref->deref_type == nir_deref_type_array) {
nir_deref_instr *parent = nir_deref_instr_parent(deref);
assert(parent && glsl_type_is_array(parent->type));
@ -56,9 +56,9 @@ get_block_array_index(nir_builder *b, nir_deref_instr *deref,
const_array_offset += arr_index * array_elements;
} else {
nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
nir_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
arr_index = nir_umin(b, arr_index, nir_imm_int(b, arr_size - 1));
nir_ssa_def *arr_offset = nir_amul_imm(b, arr_index, array_elements);
nir_def *arr_offset = nir_amul_imm(b, arr_index, array_elements);
if (nonconst_index)
nonconst_index = nir_iadd(b, nonconst_index, arr_offset);
else
@ -202,7 +202,7 @@ lower_buffer_interface_derefs_impl(nir_function_impl *impl,
b.cursor = nir_before_instr(&deref->instr);
unsigned offset = 0;
nir_ssa_def *ptr;
nir_def *ptr;
if (deref->deref_type == nir_deref_type_var &&
!glsl_type_is_interface(glsl_without_array(deref->var->type))) {
/* This variable is contained in an interface block rather than
@ -219,7 +219,7 @@ lower_buffer_interface_derefs_impl(nir_function_impl *impl,
* Everything after this point is a byte offset and will be
* handled by nir_lower_explicit_io().
*/
nir_ssa_def *index = get_block_array_index(&b, deref,
nir_def *index = get_block_array_index(&b, deref,
shader_program);
ptr = nir_vec2(&b, index, nir_imm_int(&b, offset));
} else {
@ -237,7 +237,7 @@ lower_buffer_interface_derefs_impl(nir_function_impl *impl,
cast->cast.align_mul = NIR_ALIGN_MUL_MAX;
cast->cast.align_offset = offset % NIR_ALIGN_MUL_MAX;
nir_ssa_def_rewrite_uses(&deref->dest.ssa,
nir_def_rewrite_uses(&deref->dest.ssa,
&cast->dest.ssa);
nir_deref_instr_remove_if_unused(deref);
break;
@ -262,8 +262,8 @@ lower_buffer_interface_derefs_impl(nir_function_impl *impl,
if (glsl_type_is_boolean(deref->type)) {
b.cursor = nir_after_instr(&intrin->instr);
intrin->dest.ssa.bit_size = 32;
nir_ssa_def *bval = nir_i2b(&b, &intrin->dest.ssa);
nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
nir_def *bval = nir_i2b(&b, &intrin->dest.ssa);
nir_def_rewrite_uses_after(&intrin->dest.ssa,
bval,
bval->parent_instr);
progress = true;
@ -288,7 +288,7 @@ lower_buffer_interface_derefs_impl(nir_function_impl *impl,
*/
if (glsl_type_is_boolean(deref->type)) {
b.cursor = nir_before_instr(&intrin->instr);
nir_ssa_def *ival = nir_b2i32(&b, intrin->src[1].ssa);
nir_def *ival = nir_b2i32(&b, intrin->src[1].ssa);
nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
nir_src_for_ssa(ival));
progress = true;

View file

@ -86,7 +86,7 @@ lower_instr(nir_builder *b, nir_instr *instr, void *cb_data)
b->cursor = nir_before_instr(instr);
nir_ssa_def *src;
nir_def *src;
int range_base = 0;
if (bindless) {
src = nir_load_deref(b, deref);

View file

@ -359,7 +359,7 @@ get_packed_varying_deref(struct lower_packed_varyings_state *state,
struct packing_store_values {
bool is_64bit;
unsigned writemasks[2];
nir_ssa_def *values[2];
nir_def *values[2];
nir_deref_instr *deref;
};
@ -374,7 +374,7 @@ bitwise_assign_pack(struct lower_packed_varyings_state *state,
nir_deref_instr *packed_deref,
nir_deref_instr *unpacked_deref,
const struct glsl_type *unpacked_type,
nir_ssa_def *value,
nir_def *value,
unsigned writemask)
{
@ -406,7 +406,7 @@ bitwise_assign_pack(struct lower_packed_varyings_state *state,
unsigned swiz_x = 0;
unsigned writemask = 0x3;
nir_ssa_def *swizzle = nir_swizzle(&state->b, value, &swiz_x, 1);
nir_def *swizzle = nir_swizzle(&state->b, value, &swiz_x, 1);
store_state->is_64bit = true;
store_state->deref = packed_deref;
@ -450,7 +450,7 @@ bitwise_assign_unpack(struct lower_packed_varyings_state *state,
nir_deref_instr *unpacked_deref,
nir_deref_instr *packed_deref,
const struct glsl_type *unpacked_type,
nir_ssa_def *value, unsigned writemask)
nir_def *value, unsigned writemask)
{
nir_variable *packed_var = nir_deref_instr_get_variable(packed_deref);
@ -523,7 +523,7 @@ bitwise_assign_unpack(struct lower_packed_varyings_state *state,
static void
create_store_deref(struct lower_packed_varyings_state *state,
nir_deref_instr *deref, nir_ssa_def *value,
nir_deref_instr *deref, nir_def *value,
unsigned writemask, bool is_64bit)
{
/* If dest and value have different number of components pack the srcs
@ -532,7 +532,7 @@ create_store_deref(struct lower_packed_varyings_state *state,
const struct glsl_type *type = glsl_without_array(deref->type);
unsigned comps = glsl_get_vector_elements(type);
if (value->num_components != comps) {
nir_ssa_def *srcs[4];
nir_def *srcs[4];
unsigned comp = 0;
for (unsigned i = 0; i < comps; i++) {
@ -543,7 +543,7 @@ create_store_deref(struct lower_packed_varyings_state *state,
srcs[i] = nir_swizzle(&state->b, value, &comp, 1);
comp++;
} else {
srcs[i] = nir_ssa_undef(&state->b, 1,
srcs[i] = nir_undef(&state->b, 1,
glsl_type_is_64bit(type) ? 64 : 32);
}
}
@ -555,7 +555,7 @@ create_store_deref(struct lower_packed_varyings_state *state,
static unsigned
lower_varying(struct lower_packed_varyings_state *state,
nir_ssa_def *rhs_swizzle, unsigned writemask,
nir_def *rhs_swizzle, unsigned writemask,
const struct glsl_type *type, unsigned fine_location,
nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
const char *name, bool gs_input_toplevel, unsigned vertex_index);
@ -576,7 +576,7 @@ lower_varying(struct lower_packed_varyings_state *state,
*/
static unsigned
lower_arraylike(struct lower_packed_varyings_state *state,
nir_ssa_def *rhs_swizzle, unsigned writemask,
nir_def *rhs_swizzle, unsigned writemask,
const struct glsl_type *type, unsigned fine_location,
nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
const char *name, bool gs_input_toplevel, unsigned vertex_index)
@ -640,7 +640,7 @@ lower_arraylike(struct lower_packed_varyings_state *state,
*/
static unsigned
lower_varying(struct lower_packed_varyings_state *state,
nir_ssa_def *rhs_swizzle, unsigned writemask,
nir_def *rhs_swizzle, unsigned writemask,
const struct glsl_type *type, unsigned fine_location,
nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
const char *name, bool gs_input_toplevel, unsigned vertex_index)
@ -741,10 +741,10 @@ lower_varying(struct lower_packed_varyings_state *state,
ralloc_asprintf(state->mem_ctx, "%s.%s", name, left_swizzle_name) :
NULL;
nir_ssa_def *left_swizzle = NULL;
nir_def *left_swizzle = NULL;
unsigned left_writemask = ~0u;
if (state->mode == nir_var_shader_out) {
nir_ssa_def *ssa_def = rhs_swizzle ?
nir_def *ssa_def = rhs_swizzle ?
rhs_swizzle : nir_load_deref(&state->b, unpacked_var_deref);
left_swizzle =
nir_swizzle(&state->b, ssa_def,
@ -767,10 +767,10 @@ lower_varying(struct lower_packed_varyings_state *state,
ralloc_asprintf(state->mem_ctx, "%s.%s", name, right_swizzle_name) :
NULL;
nir_ssa_def *right_swizzle = NULL;
nir_def *right_swizzle = NULL;
unsigned right_writemask = ~0u;
if (state->mode == nir_var_shader_out) {
nir_ssa_def *ssa_def = rhs_swizzle ?
nir_def *ssa_def = rhs_swizzle ?
rhs_swizzle : nir_load_deref(&state->b, unpacked_var_deref);
right_swizzle =
nir_swizzle(&state->b, ssa_def,
@ -810,7 +810,7 @@ lower_varying(struct lower_packed_varyings_state *state,
struct packing_store_values *store_value;
if (state->mode == nir_var_shader_out) {
unsigned writemask = ((1 << components) - 1) << location_frac;
nir_ssa_def *value = rhs_swizzle ? rhs_swizzle :
nir_def *value = rhs_swizzle ? rhs_swizzle :
nir_load_deref(&state->b, unpacked_var_deref);
store_value =
@ -822,9 +822,9 @@ lower_varying(struct lower_packed_varyings_state *state,
swizzle_values[i] = i + location_frac;
}
nir_ssa_def *ssa_def = &packed_deref->dest.ssa;
nir_def *ssa_def = &packed_deref->dest.ssa;
ssa_def = nir_load_deref(&state->b, packed_deref);
nir_ssa_def *swizzle =
nir_def *swizzle =
nir_swizzle(&state->b, ssa_def, swizzle_values, components);
store_value = bitwise_assign_unpack(state, unpacked_var_deref,

View file

@ -140,11 +140,11 @@ copy_to_new_var(nir_builder *b, nir_deref_instr *deref,
nir_deref_instr *new_var_m_deref =
nir_build_deref_array(b, new_var_deref, &c->def);
nir_ssa_def *value = nir_load_deref(b, m_deref);
nir_def *value = nir_load_deref(b, m_deref);
nir_store_deref(b, new_var_m_deref, value, writemask);
}
} else {
nir_ssa_def *value = nir_load_deref(b, deref);
nir_def *value = nir_load_deref(b, deref);
nir_store_deref(b, new_var_deref, value, writemask);
}
}

View file

@ -298,7 +298,7 @@ rewrite_varying_deref(nir_builder *b, struct replace_varyings_data *rv_data,
unsigned i = nir_src_as_uint(deref->arr.index);
nir_deref_instr *new_deref =
nir_build_deref_var(b, rv_data->new_texcoord[i]);
nir_ssa_def_rewrite_uses(&deref->dest.ssa, &new_deref->dest.ssa);
nir_def_rewrite_uses(&deref->dest.ssa, &new_deref->dest.ssa);
return;
}
}

View file

@ -84,21 +84,21 @@ public:
private:
void add_instr(nir_instr *instr, unsigned num_components, unsigned bit_size);
nir_ssa_def *evaluate_rvalue(ir_rvalue *ir);
nir_def *evaluate_rvalue(ir_rvalue *ir);
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs);
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1);
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
nir_ssa_def *src2);
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
nir_ssa_def *src2, nir_ssa_def *src3);
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_def **srcs);
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_def *src1);
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_def *src1,
nir_def *src2);
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_def *src1,
nir_def *src2, nir_def *src3);
bool supports_std430;
nir_shader *shader;
nir_function_impl *impl;
nir_builder b;
nir_ssa_def *result; /* result of the expression tree last visited */
nir_def *result; /* result of the expression tree last visited */
nir_deref_instr *evaluate_deref(ir_instruction *ir);
@ -122,7 +122,7 @@ private:
struct set *sparse_variable_set;
void adjust_sparse_variable(nir_deref_instr *var_deref, const glsl_type *type,
nir_ssa_def *dest);
nir_def *dest);
const struct gl_constants *consts;
};
@ -465,7 +465,7 @@ nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx)
void
nir_visitor::adjust_sparse_variable(nir_deref_instr *var_deref, const glsl_type *type,
nir_ssa_def *dest)
nir_def *dest)
{
const glsl_type *texel_type = type->field_type("texel");
assert(texel_type);
@ -914,7 +914,7 @@ nir_visitor::visit(ir_return *ir)
nir_build_deref_cast(&b, nir_load_param(&b, 0),
nir_var_function_temp, ir->value->type, 0);
nir_ssa_def *val = evaluate_rvalue(ir->value);
nir_def *val = evaluate_rvalue(ir->value);
nir_store_deref(&b, ret_deref, val, ~0);
}
@ -1180,7 +1180,7 @@ nir_visitor::visit(ir_call *ir)
}
nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
nir_ssa_def *ret = &instr->dest.ssa;
nir_def *ret = &instr->dest.ssa;
switch (op) {
case nir_intrinsic_deref_atomic:
@ -1341,15 +1341,15 @@ nir_visitor::visit(ir_call *ir)
/* Set the address argument, extending the coordinate vector to four
* components.
*/
nir_ssa_def *src_addr =
nir_def *src_addr =
evaluate_rvalue((ir_dereference *)param);
nir_ssa_def *srcs[4];
nir_def *srcs[4];
for (int i = 0; i < 4; i++) {
if (i < type->coordinate_components())
srcs[i] = nir_channel(&b, src_addr, i);
else
srcs[i] = nir_ssa_undef(&b, 1, 32);
srcs[i] = nir_undef(&b, 1, 32);
}
instr->src[1] = nir_src_for_ssa(nir_vec(&b, srcs, 4));
@ -1363,7 +1363,7 @@ nir_visitor::visit(ir_call *ir)
nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
param = param->get_next();
} else {
instr->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
instr->src[2] = nir_src_for_ssa(nir_undef(&b, 1, 32));
}
/* Set the intrinsic parameters. */
@ -1468,7 +1468,7 @@ nir_visitor::visit(ir_call *ir)
ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
assert(write_mask);
nir_ssa_def *nir_val = evaluate_rvalue(val);
nir_def *nir_val = evaluate_rvalue(val);
if (val->type->is_boolean())
nir_val = nir_b2i32(&b, nir_val);
@ -1521,7 +1521,7 @@ nir_visitor::visit(ir_call *ir)
nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
nir_ssa_def *nir_val = evaluate_rvalue(val);
nir_def *nir_val = evaluate_rvalue(val);
/* The value in shared memory is a 32-bit value */
if (val->type->is_boolean())
nir_val = nir_b2b32(&b, nir_val);
@ -1639,7 +1639,7 @@ nir_visitor::visit(ir_call *ir)
nir_deref_instr *out_deref = evaluate_deref(param_rvalue);
call->params[i] = nir_src_for_ssa(&out_deref->dest.ssa);
} else if (sig_param->data.mode == ir_var_function_in) {
nir_ssa_def *val = evaluate_rvalue(param_rvalue);
nir_def *val = evaluate_rvalue(param_rvalue);
nir_src src = nir_src_for_ssa(val);
nir_src_copy(&call->params[i], &src, &call->instr);
@ -1685,7 +1685,7 @@ nir_visitor::visit(ir_assignment *ir)
ir->lhs->accept(this);
nir_deref_instr *lhs_deref = this->deref;
nir_ssa_def *src = evaluate_rvalue(ir->rhs);
nir_def *src = evaluate_rvalue(ir->rhs);
if (is_sparse) {
adjust_sparse_variable(lhs_deref, tex->type, src);
@ -1767,7 +1767,7 @@ nir_visitor::add_instr(nir_instr *instr, unsigned num_components,
}
}
nir_ssa_def *
nir_def *
nir_visitor::evaluate_rvalue(ir_rvalue* ir)
{
ir->accept(this);
@ -1880,7 +1880,7 @@ nir_visitor::visit(ir_expression *ir)
break;
}
nir_ssa_def *srcs[4];
nir_def *srcs[4];
for (unsigned i = 0; i < ir->num_operands; i++)
srcs[i] = evaluate_rvalue(ir->operands[i]);
@ -2457,7 +2457,7 @@ nir_visitor::visit(ir_texture *ir)
/* check for bindless handles */
if (!nir_deref_mode_is(sampler_deref, nir_var_uniform) ||
nir_deref_instr_get_variable(sampler_deref)->data.bindless) {
nir_ssa_def *load = nir_load_deref(&b, sampler_deref);
nir_def *load = nir_load_deref(&b, sampler_deref);
instr->src[0] = nir_tex_src_for_ssa(nir_tex_src_texture_handle, load);
instr->src[1] = nir_tex_src_for_ssa(nir_tex_src_sampler_handle, load);
} else {
@ -2618,10 +2618,10 @@ nir_visitor::visit(ir_dereference_record *ir)
*/
if (this->deref->deref_type == nir_deref_type_var &&
_mesa_set_search(this->sparse_variable_set, this->deref->var)) {
nir_ssa_def *load = nir_load_deref(&b, this->deref);
nir_def *load = nir_load_deref(&b, this->deref);
assert(load->num_components >= 2);
nir_ssa_def *ssa;
nir_def *ssa;
const glsl_type *type = ir->record->type;
if (field_index == type->field_index("code")) {
/* last channel holds residency code */
@ -2645,7 +2645,7 @@ nir_visitor::visit(ir_dereference_record *ir)
void
nir_visitor::visit(ir_dereference_array *ir)
{
nir_ssa_def *index = evaluate_rvalue(ir->array_index);
nir_def *index = evaluate_rvalue(ir->array_index);
ir->array->accept(this);

View file

@ -114,7 +114,7 @@ files_libnir = files(
'nir_format_convert.h',
'nir_from_ssa.c',
'nir_gather_info.c',
'nir_gather_ssa_types.c',
'nir_gather_types.c',
'nir_gather_xfb_info.c',
'nir_group_loads.c',
'nir_gs_count_vertices.c',

View file

@ -718,7 +718,7 @@ nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
gc_zalloc_zla(shader->gctx, nir_load_const_instr, nir_const_value, num_components);
instr_init(&instr->instr, nir_instr_type_load_const);
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
return instr;
}
@ -870,15 +870,15 @@ nir_parallel_copy_instr_create(nir_shader *shader)
return instr;
}
nir_ssa_undef_instr *
nir_ssa_undef_instr_create(nir_shader *shader,
unsigned num_components,
unsigned bit_size)
nir_undef_instr *
nir_undef_instr_create(nir_shader *shader,
unsigned num_components,
unsigned bit_size)
{
nir_ssa_undef_instr *instr = gc_alloc(shader->gctx, nir_ssa_undef_instr, 1);
nir_undef_instr *instr = gc_alloc(shader->gctx, nir_undef_instr, 1);
instr_init(&instr->instr, nir_instr_type_ssa_undef);
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
return instr;
}
@ -1036,7 +1036,7 @@ add_use_cb(nir_src *src, void *state)
}
static bool
add_ssa_def_cb(nir_ssa_def *def, void *state)
add_ssa_def_cb(nir_def *def, void *state)
{
nir_instr *instr = state;
@ -1194,11 +1194,11 @@ nir_instr_free_list(struct exec_list *list)
}
static bool
nir_instr_free_and_dce_live_cb(nir_ssa_def *def, void *state)
nir_instr_free_and_dce_live_cb(nir_def *def, void *state)
{
bool *live = state;
if (!nir_ssa_def_is_unused(def)) {
if (!nir_def_is_unused(def)) {
*live = true;
return false;
} else {
@ -1291,7 +1291,7 @@ struct foreach_ssa_def_state {
};
static inline bool
nir_ssa_def_visitor(nir_dest *dest, void *void_state)
nir_def_visitor(nir_dest *dest, void *void_state)
{
struct foreach_ssa_def_state *state = void_state;
@ -1309,7 +1309,7 @@ nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
case nir_instr_type_phi:
case nir_instr_type_parallel_copy: {
struct foreach_ssa_def_state foreach_state = { cb, state };
return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
return nir_foreach_dest(instr, nir_def_visitor, &foreach_state);
}
case nir_instr_type_load_const:
@ -1324,7 +1324,7 @@ nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
}
}
nir_ssa_def *
nir_def *
nir_instr_ssa_def(nir_instr *instr)
{
switch (instr->type) {
@ -1539,9 +1539,9 @@ nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
}
void
nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
unsigned num_components,
unsigned bit_size)
nir_def_init(nir_instr *instr, nir_def *def,
unsigned num_components,
unsigned bit_size)
{
def->parent_instr = instr;
list_inithead(&def->uses);
@ -1565,22 +1565,22 @@ void
nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
unsigned num_components, unsigned bit_size)
{
nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size);
nir_def_init(instr, &dest->ssa, num_components, bit_size);
}
void
nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_ssa_def *new_ssa)
nir_def_rewrite_uses(nir_def *def, nir_def *new_ssa)
{
assert(def != new_ssa);
nir_foreach_use_including_if_safe(use_src, def) {
nir_src_rewrite_ssa(use_src, new_ssa);
nir_src_rewrite(use_src, new_ssa);
}
}
void
nir_ssa_def_rewrite_uses_src(nir_ssa_def *def, nir_src new_src)
nir_def_rewrite_uses_src(nir_def *def, nir_src new_src)
{
nir_ssa_def_rewrite_uses(def, new_src.ssa);
nir_def_rewrite_uses(def, new_src.ssa);
}
static bool
@ -1614,8 +1614,8 @@ is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
* def->parent_instr and that after_me comes after def->parent_instr.
*/
void
nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa,
nir_instr *after_me)
nir_def_rewrite_uses_after(nir_def *def, nir_def *new_ssa,
nir_instr *after_me)
{
if (def == new_ssa)
return;
@ -1632,11 +1632,11 @@ nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa,
continue;
}
nir_src_rewrite_ssa(use_src, new_ssa);
nir_src_rewrite(use_src, new_ssa);
}
}
static nir_ssa_def *
static nir_def *
get_store_value(nir_intrinsic_instr *intrin)
{
assert(nir_intrinsic_has_write_mask(intrin));
@ -1672,7 +1672,7 @@ nir_src_components_read(const nir_src *src)
}
nir_component_mask_t
nir_ssa_def_components_read(const nir_ssa_def *def)
nir_def_components_read(const nir_def *def)
{
nir_component_mask_t read_mask = 0;
@ -1950,7 +1950,7 @@ nir_index_blocks(nir_function_impl *impl)
}
static bool
index_ssa_def_cb(nir_ssa_def *def, void *state)
index_ssa_def_cb(nir_def *def, void *state)
{
unsigned *index = (unsigned *)state;
def->index = (*index)++;
@ -2084,14 +2084,14 @@ nir_function_impl_lower_instructions(nir_function_impl *impl,
continue;
}
nir_ssa_def *old_def = nir_instr_ssa_def(instr);
nir_def *old_def = nir_instr_ssa_def(instr);
struct list_head old_uses;
if (old_def != NULL) {
/* We're about to ask the callback to generate a replacement for instr.
* Save off the uses from instr's SSA def so we know what uses to
* rewrite later. If we use nir_ssa_def_rewrite_uses, it fails in the
* rewrite later. If we use nir_def_rewrite_uses, it fails in the
* case where the generated replacement code uses the result of instr
* itself. If we use nir_ssa_def_rewrite_uses_after (which is the
* itself. If we use nir_def_rewrite_uses_after (which is the
* normal solution to this problem), it doesn't work well if control-
* flow is inserted as part of the replacement, doesn't handle cases
* where the replacement is something consumed by instr, and suffers
@ -2104,7 +2104,7 @@ nir_function_impl_lower_instructions(nir_function_impl *impl,
}
b.cursor = nir_after_instr(instr);
nir_ssa_def *new_def = lower(&b, instr, cb_data);
nir_def *new_def = lower(&b, instr, cb_data);
if (new_def && new_def != NIR_LOWER_INSTR_PROGRESS &&
new_def != NIR_LOWER_INSTR_PROGRESS_REPLACE) {
assert(old_def != NULL);
@ -2119,7 +2119,7 @@ nir_function_impl_lower_instructions(nir_function_impl *impl,
nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
}
if (nir_ssa_def_is_unused(old_def)) {
if (nir_def_is_unused(old_def)) {
iter = nir_instr_free_and_dce(instr);
} else {
iter = nir_after_instr(instr);
@ -2530,7 +2530,7 @@ nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
}
void
nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src,
nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_def *src,
bool bindless)
{
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
@ -2760,10 +2760,10 @@ nir_alu_instr_is_copy(nir_alu_instr *instr)
return nir_op_is_vec(instr->op);
}
nir_ssa_scalar
nir_ssa_scalar_chase_movs(nir_ssa_scalar s)
nir_scalar
nir_scalar_chase_movs(nir_scalar s)
{
while (nir_ssa_scalar_is_alu(s)) {
while (nir_scalar_is_alu(s)) {
nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
if (!nir_alu_instr_is_copy(alu))
break;

View file

@ -946,7 +946,7 @@ nir_instr_is_last(const nir_instr *instr)
return exec_node_is_tail_sentinel(exec_node_get_next_const(&instr->node));
}
typedef struct nir_ssa_def {
typedef struct nir_def {
/** Instruction which produces this SSA value. */
nir_instr *parent_instr;
@ -966,7 +966,7 @@ typedef struct nir_ssa_def {
* invocations of the shader. This is set by nir_divergence_analysis.
*/
bool divergent;
} nir_ssa_def;
} nir_def;
struct nir_src;
struct nir_if;
@ -979,7 +979,7 @@ typedef struct nir_src {
};
struct list_head use_link;
nir_ssa_def *ssa;
nir_def *ssa;
bool is_if;
} nir_src;
@ -1030,7 +1030,7 @@ nir_src_init(void)
if (src->is_if)
static inline bool
nir_ssa_def_used_by_if(const nir_ssa_def *def)
nir_def_used_by_if(const nir_def *def)
{
nir_foreach_if_use(_, def)
return true;
@ -1039,7 +1039,7 @@ nir_ssa_def_used_by_if(const nir_ssa_def *def)
}
typedef struct {
nir_ssa_def ssa;
nir_def ssa;
} nir_dest;
static inline nir_dest
@ -1052,7 +1052,7 @@ nir_dest_init(void)
#define NIR_DEST_INIT nir_dest_init()
static inline nir_src
nir_src_for_ssa(nir_ssa_def *def)
nir_src_for_ssa(nir_def *def)
{
nir_src src = NIR_SRC_INIT;
@ -1984,7 +1984,7 @@ nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr);
/* Converts a image_deref_* intrinsic into a image_* one */
void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr,
nir_ssa_def *handle, bool bindless);
nir_def *handle, bool bindless);
/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */
static inline bool
@ -2412,7 +2412,7 @@ bool nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex);
typedef struct {
nir_instr instr;
nir_ssa_def def;
nir_def def;
nir_const_value value[];
} nir_load_const_instr;
@ -2478,8 +2478,8 @@ typedef struct {
typedef struct {
nir_instr instr;
nir_ssa_def def;
} nir_ssa_undef_instr;
nir_def def;
} nir_undef_instr;
typedef struct {
struct exec_node node;
@ -2554,7 +2554,7 @@ NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr,
type, nir_instr_type_intrinsic)
NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr,
type, nir_instr_type_load_const)
NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr,
NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_undef_instr, instr,
type, nir_instr_type_ssa_undef)
NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr,
type, nir_instr_type_phi)
@ -2589,36 +2589,36 @@ NIR_DEFINE_SRC_AS_CONST(double, float)
#undef NIR_DEFINE_SRC_AS_CONST
typedef struct {
nir_ssa_def *def;
nir_def *def;
unsigned comp;
} nir_ssa_scalar;
} nir_scalar;
static inline bool
nir_ssa_scalar_is_const(nir_ssa_scalar s)
nir_scalar_is_const(nir_scalar s)
{
return s.def->parent_instr->type == nir_instr_type_load_const;
}
static inline bool
nir_ssa_scalar_is_undef(nir_ssa_scalar s)
nir_scalar_is_undef(nir_scalar s)
{
return s.def->parent_instr->type == nir_instr_type_ssa_undef;
}
static inline nir_const_value
nir_ssa_scalar_as_const_value(nir_ssa_scalar s)
nir_scalar_as_const_value(nir_scalar s)
{
assert(s.comp < s.def->num_components);
nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr);
return load->value[s.comp];
}
#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \
static inline type \
nir_ssa_scalar_as_##suffix(nir_ssa_scalar s) \
{ \
return nir_const_value_as_##suffix( \
nir_ssa_scalar_as_const_value(s), s.def->bit_size); \
#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \
static inline type \
nir_scalar_as_##suffix(nir_scalar s) \
{ \
return nir_const_value_as_##suffix( \
nir_scalar_as_const_value(s), s.def->bit_size); \
}
NIR_DEFINE_SCALAR_AS_CONST(int64_t, int)
@ -2629,21 +2629,21 @@ NIR_DEFINE_SCALAR_AS_CONST(double, float)
#undef NIR_DEFINE_SCALAR_AS_CONST
static inline bool
nir_ssa_scalar_is_alu(nir_ssa_scalar s)
nir_scalar_is_alu(nir_scalar s)
{
return s.def->parent_instr->type == nir_instr_type_alu;
}
static inline nir_op
nir_ssa_scalar_alu_op(nir_ssa_scalar s)
nir_scalar_alu_op(nir_scalar s)
{
return nir_instr_as_alu(s.def->parent_instr)->op;
}
static inline nir_ssa_scalar
nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx)
static inline nir_scalar
nir_scalar_chase_alu_src(nir_scalar s, unsigned alu_src_idx)
{
nir_ssa_scalar out = { NULL, 0 };
nir_scalar out = { NULL, 0 };
nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
assert(alu_src_idx < nir_op_infos[alu->op].num_inputs);
@ -2671,27 +2671,27 @@ nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx)
return out;
}
nir_ssa_scalar nir_ssa_scalar_chase_movs(nir_ssa_scalar s);
nir_scalar nir_scalar_chase_movs(nir_scalar s);
static inline nir_ssa_scalar
nir_get_ssa_scalar(nir_ssa_def *def, unsigned channel)
static inline nir_scalar
nir_get_ssa_scalar(nir_def *def, unsigned channel)
{
nir_ssa_scalar s = { def, channel };
nir_scalar s = { def, channel };
return s;
}
/** Returns a nir_ssa_scalar where we've followed the bit-exact mov/vec use chain to the original definition */
static inline nir_ssa_scalar
nir_ssa_scalar_resolved(nir_ssa_def *def, unsigned channel)
/** Returns a nir_scalar where we've followed the bit-exact mov/vec use chain to the original definition */
static inline nir_scalar
nir_scalar_resolved(nir_def *def, unsigned channel)
{
return nir_ssa_scalar_chase_movs(nir_get_ssa_scalar(def, channel));
return nir_scalar_chase_movs(nir_get_ssa_scalar(def, channel));
}
static inline uint64_t
nir_alu_src_as_uint(nir_alu_src src)
{
nir_ssa_scalar scalar = nir_get_ssa_scalar(src.src.ssa, src.swizzle[0]);
return nir_ssa_scalar_as_uint(scalar);
nir_scalar scalar = nir_get_ssa_scalar(src.src.ssa, src.swizzle[0]);
return nir_scalar_as_uint(scalar);
}
typedef struct {
@ -2966,7 +2966,7 @@ typedef struct {
typedef struct {
/* Induction variable. */
nir_ssa_def *def;
nir_def *def;
/* Init statement with only uniform. */
nir_src *init_src;
@ -4112,9 +4112,9 @@ nir_phi_src *nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_sr
nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader,
unsigned num_components,
unsigned bit_size);
nir_undef_instr *nir_undef_instr_create(nir_shader *shader,
unsigned num_components,
unsigned bit_size);
nir_const_value nir_alu_binop_identity(nir_op binop, unsigned bit_size);
@ -4385,9 +4385,9 @@ nir_cursor nir_instr_free_and_dce(nir_instr *instr);
/** @} */
nir_ssa_def *nir_instr_ssa_def(nir_instr *instr);
nir_def *nir_instr_ssa_def(nir_instr *instr);
typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state);
typedef bool (*nir_foreach_ssa_def_cb)(nir_def *def, void *state);
typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state);
typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state);
bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb,
@ -4419,7 +4419,7 @@ bool nir_srcs_equal(nir_src src1, nir_src src2);
bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2);
static inline void
nir_src_rewrite_ssa(nir_src *src, nir_ssa_def *new_ssa)
nir_src_rewrite(nir_src *src, nir_def *new_ssa)
{
assert(src->ssa);
assert(src->is_if ? (src->parent_if != NULL) : (src->parent_instr != NULL));
@ -4430,11 +4430,11 @@ nir_src_rewrite_ssa(nir_src *src, nir_ssa_def *new_ssa)
static inline void
nir_instr_rewrite_src_ssa(ASSERTED nir_instr *instr,
nir_src *src, nir_ssa_def *new_ssa)
nir_src *src, nir_def *new_ssa)
{
assert(!src->is_if);
assert(src->parent_instr == instr);
nir_src_rewrite_ssa(src, new_ssa);
nir_src_rewrite(src, new_ssa);
}
void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
@ -4444,8 +4444,8 @@ void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
unsigned num_components, unsigned bit_size);
void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
unsigned num_components, unsigned bit_size);
void nir_def_init(nir_instr *instr, nir_def *def,
unsigned num_components, unsigned bit_size);
static inline void
nir_ssa_dest_init_for_type(nir_instr *instr, nir_dest *dest,
const struct glsl_type *type)
@ -4454,16 +4454,16 @@ nir_ssa_dest_init_for_type(nir_instr *instr, nir_dest *dest,
nir_ssa_dest_init(instr, dest, glsl_get_components(type),
glsl_get_bit_size(type));
}
void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_ssa_def *new_ssa);
void nir_ssa_def_rewrite_uses_src(nir_ssa_def *def, nir_src new_src);
void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa,
nir_instr *after_me);
void nir_def_rewrite_uses(nir_def *def, nir_def *new_ssa);
void nir_def_rewrite_uses_src(nir_def *def, nir_src new_src);
void nir_def_rewrite_uses_after(nir_def *def, nir_def *new_ssa,
nir_instr *after_me);
nir_component_mask_t nir_src_components_read(const nir_src *src);
nir_component_mask_t nir_ssa_def_components_read(const nir_ssa_def *def);
nir_component_mask_t nir_def_components_read(const nir_def *def);
static inline bool
nir_ssa_def_is_unused(nir_ssa_def *ssa)
nir_def_is_unused(nir_def *ssa)
{
return list_is_empty(&ssa->uses);
}
@ -4715,17 +4715,17 @@ typedef bool (*nir_instr_writemask_filter_cb)(const nir_instr *,
* should either return NULL indicating that no lowering needs to be done or
* emit a sequence of instructions using the provided builder (whose cursor
* will already be placed after the instruction to be lowered) and return the
* resulting nir_ssa_def.
* resulting nir_def.
*/
typedef nir_ssa_def *(*nir_lower_instr_cb)(struct nir_builder *,
nir_instr *, void *);
typedef nir_def *(*nir_lower_instr_cb)(struct nir_builder *,
nir_instr *, void *);
/**
* Special return value for nir_lower_instr_cb when some progress occurred
* (like changing an input to the instr) that didn't result in a replacement
* SSA def being generated.
*/
#define NIR_LOWER_INSTR_PROGRESS ((nir_ssa_def *)(uintptr_t)1)
#define NIR_LOWER_INSTR_PROGRESS ((nir_def *)(uintptr_t)1)
/**
* Special return value for nir_lower_instr_cb when some progress occurred
@ -4733,7 +4733,7 @@ typedef nir_ssa_def *(*nir_lower_instr_cb)(struct nir_builder *,
* (like a store)
*/
#define NIR_LOWER_INSTR_PROGRESS_REPLACE ((nir_ssa_def *)(uintptr_t)2)
#define NIR_LOWER_INSTR_PROGRESS_REPLACE ((nir_def *)(uintptr_t)2)
/** Iterate over all the instructions in a nir_function_impl and lower them
* using the provided callbacks
@ -4804,7 +4804,7 @@ bool nir_lower_returns(nir_shader *shader);
void nir_inline_function_impl(struct nir_builder *b,
const nir_function_impl *impl,
nir_ssa_def **params,
nir_def **params,
struct hash_table *shader_var_remap);
bool nir_inline_functions(nir_shader *shader);
@ -4864,9 +4864,9 @@ void nir_lower_clip_halfz(nir_shader *shader);
void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
void nir_gather_ssa_types(nir_function_impl *impl,
BITSET_WORD *float_types,
BITSET_WORD *int_types);
void nir_gather_types(nir_function_impl *impl,
BITSET_WORD *float_types,
BITSET_WORD *int_types);
void nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
unsigned *size,
@ -5056,26 +5056,26 @@ nir_address_format_to_glsl_type(nir_address_format addr_format)
const nir_const_value *nir_address_format_null_value(nir_address_format addr_format);
nir_ssa_def *nir_build_addr_iadd(struct nir_builder *b, nir_ssa_def *addr,
nir_def *nir_build_addr_iadd(struct nir_builder *b, nir_def *addr,
nir_address_format addr_format,
nir_variable_mode modes,
nir_def *offset);
nir_def *nir_build_addr_iadd_imm(struct nir_builder *b, nir_def *addr,
nir_address_format addr_format,
nir_variable_mode modes,
nir_ssa_def *offset);
int64_t offset);
nir_ssa_def *nir_build_addr_iadd_imm(struct nir_builder *b, nir_ssa_def *addr,
nir_address_format addr_format,
nir_variable_mode modes,
int64_t offset);
nir_def *nir_build_addr_ieq(struct nir_builder *b, nir_def *addr0, nir_def *addr1,
nir_address_format addr_format);
nir_ssa_def *nir_build_addr_ieq(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
nir_address_format addr_format);
nir_def *nir_build_addr_isub(struct nir_builder *b, nir_def *addr0, nir_def *addr1,
nir_address_format addr_format);
nir_ssa_def *nir_build_addr_isub(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
nir_address_format addr_format);
nir_ssa_def *nir_explicit_io_address_from_deref(struct nir_builder *b,
nir_deref_instr *deref,
nir_ssa_def *base_addr,
nir_address_format addr_format);
nir_def *nir_explicit_io_address_from_deref(struct nir_builder *b,
nir_deref_instr *deref,
nir_def *base_addr,
nir_address_format addr_format);
bool nir_get_explicit_deref_align(nir_deref_instr *deref,
bool default_to_type_align,
@ -5084,7 +5084,7 @@ bool nir_get_explicit_deref_align(nir_deref_instr *deref,
void nir_lower_explicit_io_instr(struct nir_builder *b,
nir_intrinsic_instr *io_instr,
nir_ssa_def *addr,
nir_def *addr,
nir_address_format addr_format);
bool nir_lower_explicit_io(nir_shader *shader,
@ -5330,7 +5330,7 @@ bool nir_lower_subgroups(nir_shader *shader,
bool nir_lower_system_values(nir_shader *shader);
nir_ssa_def *
nir_def *
nir_build_lowered_load_helper_invocation(struct nir_builder *b);
typedef struct nir_lower_compute_system_values_options {
@ -5875,7 +5875,7 @@ void nir_loop_analyze_impl(nir_function_impl *impl,
nir_variable_mode indirect_mask,
bool force_unroll_sampler_indirect);
bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
bool nir_defs_interfere(nir_def *a, nir_def *b);
bool nir_repair_ssa_impl(nir_function_impl *impl);
bool nir_repair_ssa(nir_shader *shader);
@ -5887,8 +5887,8 @@ bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr);
bool nir_has_divergent_loop(nir_shader *shader);
void
nir_rewrite_uses_to_load_reg(struct nir_builder *b, nir_ssa_def *old,
nir_ssa_def *reg);
nir_rewrite_uses_to_load_reg(struct nir_builder *b, nir_def *old,
nir_def *reg);
/* If phi_webs_only is true, only convert SSA values involved in phi nodes to
* registers. If false, convert all values (even those not involved in a phi
@ -6096,12 +6096,12 @@ typedef struct nir_unsigned_upper_bound_config {
uint32_t
nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
nir_ssa_scalar scalar,
nir_scalar scalar,
const nir_unsigned_upper_bound_config *config);
bool
nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht,
nir_ssa_scalar ssa, unsigned const_val,
nir_scalar ssa, unsigned const_val,
const nir_unsigned_upper_bound_config *config);
typedef struct {
@ -6114,7 +6114,7 @@ typedef struct {
bool subgroup_size_uniform;
/* size/align for load/store_preamble. */
void (*def_size)(nir_ssa_def *def, unsigned *size, unsigned *align);
void (*def_size)(nir_def *def, unsigned *size, unsigned *align);
/* Total available size for load/store_preamble storage, in units
* determined by def_size.
@ -6132,7 +6132,7 @@ typedef struct {
* may happen from inserting move instructions, etc. If the benefit doesn't
* exceed the cost here then we won't rewrite it.
*/
float (*rewrite_cost_cb)(nir_ssa_def *def, const void *data);
float (*rewrite_cost_cb)(nir_def *def, const void *data);
/* Instructions whose definitions should not be rewritten. These could
* still be moved to the preamble, but they shouldn't be the root of a
@ -6154,7 +6154,7 @@ nir_function_impl *nir_shader_get_preamble(nir_shader *shader);
bool nir_lower_point_smooth(nir_shader *shader);
bool nir_lower_poly_line_smooth(nir_shader *shader, unsigned num_smooth_aa_sample);
bool nir_mod_analysis(nir_ssa_scalar val, nir_alu_type val_type, unsigned div, unsigned *mod);
bool nir_mod_analysis(nir_scalar val, nir_alu_type val_type, unsigned div, unsigned *mod);
bool
nir_remove_tex_shadow(nir_shader *shader, unsigned textures_bitmask);
@ -6163,7 +6163,7 @@ void
nir_trivialize_registers(nir_shader *s);
static inline nir_intrinsic_instr *
nir_reg_get_decl(nir_ssa_def *reg)
nir_reg_get_decl(nir_def *reg)
{
assert(reg->parent_instr->type == nir_instr_type_intrinsic);
nir_intrinsic_instr *decl = nir_instr_as_intrinsic(reg->parent_instr);
@ -6231,7 +6231,7 @@ nir_is_store_reg(nir_intrinsic_instr *intr)
if (nir_is_store_reg(nir_instr_as_intrinsic(store->parent_instr)))
static inline nir_intrinsic_instr *
nir_load_reg_for_def(const nir_ssa_def *def)
nir_load_reg_for_def(const nir_def *def)
{
if (def->parent_instr->type != nir_instr_type_intrinsic)
return NULL;
@ -6244,7 +6244,7 @@ nir_load_reg_for_def(const nir_ssa_def *def)
}
static inline nir_intrinsic_instr *
nir_store_reg_for_def(const nir_ssa_def *def)
nir_store_reg_for_def(const nir_def *def)
{
/* Look for the trivial store: single use of our destination by a
* store_register intrinsic.

Some files were not shown because too many files have changed in this diff Show more