mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 05:10:11 +01:00
nir: Drop "SSA" from NIR language
Everything is SSA now.
sed -e 's/nir_ssa_def/nir_def/g' \
-e 's/nir_ssa_undef/nir_undef/g' \
-e 's/nir_ssa_scalar/nir_scalar/g' \
-e 's/nir_src_rewrite_ssa/nir_src_rewrite/g' \
-e 's/nir_gather_ssa_types/nir_gather_types/g' \
-i $(git grep -l nir | grep -v relnotes)
git mv src/compiler/nir/nir_gather_ssa_types.c \
src/compiler/nir/nir_gather_types.c
ninja -C build/ clang-format
cd src/compiler/nir && find *.c *.h -type f -exec clang-format -i \{} \;
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Acked-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24585>
This commit is contained in:
parent
777d336b1f
commit
09d31922de
492 changed files with 10408 additions and 10455 deletions
|
|
@ -40,7 +40,7 @@ import nir_opcodes
|
|||
OP_DESC_TEMPLATE = mako.template.Template("""
|
||||
<%
|
||||
def src_decl_list(num_srcs):
|
||||
return ', '.join('nir_ssa_def *src' + str(i) for i in range(num_srcs))
|
||||
return ', '.join('nir_def *src' + str(i) for i in range(num_srcs))
|
||||
|
||||
def to_yn(b):
|
||||
return 'Y' if b else 'N'
|
||||
|
|
@ -68,7 +68,7 @@ ${textwrap.indent(op.const_expr, ' ')}
|
|||
|
||||
**Builder function:**
|
||||
|
||||
.. c:function:: nir_ssa_def *nir_${op.name}(nir_builder *, ${src_decl_list(op.num_inputs)})
|
||||
.. c:function:: nir_def *nir_${op.name}(nir_builder *, ${src_decl_list(op.num_inputs)})
|
||||
""")
|
||||
|
||||
def parse_rst(state, parent, rst):
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
#include "nir_xfb_info.h"
|
||||
|
||||
/* Load argument with index start from arg plus relative_index. */
|
||||
nir_ssa_def *
|
||||
nir_def *
|
||||
ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args,
|
||||
struct ac_arg arg, unsigned relative_index)
|
||||
{
|
||||
|
|
@ -25,7 +25,7 @@ ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args,
|
|||
|
||||
void
|
||||
ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
|
||||
nir_ssa_def *val)
|
||||
nir_def *val)
|
||||
{
|
||||
assert(nir_cursor_current_block(b->cursor)->cf_node.parent->type == nir_cf_node_function);
|
||||
|
||||
|
|
@ -35,11 +35,11 @@ ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac
|
|||
nir_store_vector_arg_amd(b, val, .base = arg.arg_index);
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
nir_def *
|
||||
ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
|
||||
unsigned rshift, unsigned bitwidth)
|
||||
{
|
||||
nir_ssa_def *value = ac_nir_load_arg(b, ac_args, arg);
|
||||
nir_def *value = ac_nir_load_arg(b, ac_args, arg);
|
||||
if (rshift == 0 && bitwidth == 32)
|
||||
return value;
|
||||
else if (rshift == 0)
|
||||
|
|
@ -57,11 +57,11 @@ is_sin_cos(const nir_instr *instr, UNUSED const void *_)
|
|||
nir_instr_as_alu(instr)->op == nir_op_fcos);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
lower_sin_cos(struct nir_builder *b, nir_instr *instr, UNUSED void *_)
|
||||
{
|
||||
nir_alu_instr *sincos = nir_instr_as_alu(instr);
|
||||
nir_ssa_def *src = nir_fmul_imm(b, nir_ssa_for_alu_src(b, sincos, 0), 0.15915493667125702);
|
||||
nir_def *src = nir_fmul_imm(b, nir_ssa_for_alu_src(b, sincos, 0), 0.15915493667125702);
|
||||
return sincos->op == nir_op_fsin ? nir_fsin_amd(b, src) : nir_fcos_amd(b, src);
|
||||
}
|
||||
|
||||
|
|
@ -85,7 +85,7 @@ lower_intrinsic_to_arg(nir_builder *b, nir_instr *instr, void *state)
|
|||
|
||||
lower_intrinsics_to_args_state *s = (lower_intrinsics_to_args_state *)state;
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
nir_ssa_def *replacement = NULL;
|
||||
nir_def *replacement = NULL;
|
||||
b->cursor = nir_after_instr(&intrin->instr);
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
|
|
@ -134,7 +134,7 @@ lower_intrinsic_to_arg(nir_builder *b, nir_instr *instr, void *state)
|
|||
}
|
||||
|
||||
assert(replacement);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, replacement);
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, replacement);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -155,15 +155,15 @@ ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_level gfx
|
|||
}
|
||||
|
||||
void
|
||||
ac_nir_store_var_components(nir_builder *b, nir_variable *var, nir_ssa_def *value,
|
||||
ac_nir_store_var_components(nir_builder *b, nir_variable *var, nir_def *value,
|
||||
unsigned component, unsigned writemask)
|
||||
{
|
||||
/* component store */
|
||||
if (value->num_components != 4) {
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, 1, value->bit_size);
|
||||
nir_def *undef = nir_undef(b, 1, value->bit_size);
|
||||
|
||||
/* add undef component before and after value to form a vec4 */
|
||||
nir_ssa_def *comp[4];
|
||||
nir_def *comp[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
comp[i] = (i >= component && i < component + value->num_components) ?
|
||||
nir_channel(b, value, i - component) : undef;
|
||||
|
|
@ -180,7 +180,7 @@ ac_nir_store_var_components(nir_builder *b, nir_variable *var, nir_ssa_def *valu
|
|||
}
|
||||
|
||||
void
|
||||
ac_nir_export_primitive(nir_builder *b, nir_ssa_def *prim)
|
||||
ac_nir_export_primitive(nir_builder *b, nir_def *prim)
|
||||
{
|
||||
unsigned write_mask = BITFIELD_MASK(prim->num_components);
|
||||
|
||||
|
|
@ -190,15 +190,15 @@ ac_nir_export_primitive(nir_builder *b, nir_ssa_def *prim)
|
|||
.write_mask = write_mask);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
get_export_output(nir_builder *b, nir_ssa_def **output)
|
||||
static nir_def *
|
||||
get_export_output(nir_builder *b, nir_def **output)
|
||||
{
|
||||
nir_ssa_def *vec[4];
|
||||
nir_def *vec[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (output[i])
|
||||
vec[i] = nir_u2uN(b, output[i], 32);
|
||||
else
|
||||
vec[i] = nir_ssa_undef(b, 1, 32);
|
||||
vec[i] = nir_undef(b, 1, 32);
|
||||
}
|
||||
|
||||
return nir_vec(b, vec, 4);
|
||||
|
|
@ -211,17 +211,17 @@ ac_nir_export_position(nir_builder *b,
|
|||
bool no_param_export,
|
||||
bool force_vrs,
|
||||
uint64_t outputs_written,
|
||||
nir_ssa_def *(*outputs)[4])
|
||||
nir_def *(*outputs)[4])
|
||||
{
|
||||
nir_intrinsic_instr *exp[4];
|
||||
unsigned exp_num = 0;
|
||||
|
||||
nir_ssa_def *pos;
|
||||
nir_def *pos;
|
||||
if (outputs_written & VARYING_BIT_POS) {
|
||||
pos = get_export_output(b, outputs[VARYING_SLOT_POS]);
|
||||
} else {
|
||||
nir_ssa_def *zero = nir_imm_float(b, 0);
|
||||
nir_ssa_def *one = nir_imm_float(b, 1);
|
||||
nir_def *zero = nir_imm_float(b, 0);
|
||||
nir_def *one = nir_imm_float(b, 1);
|
||||
pos = nir_vec4(b, zero, zero, zero, one);
|
||||
}
|
||||
|
||||
|
|
@ -255,8 +255,8 @@ ac_nir_export_position(nir_builder *b,
|
|||
outputs_written &= ~VARYING_BIT_VIEWPORT;
|
||||
|
||||
if ((outputs_written & mask) || force_vrs) {
|
||||
nir_ssa_def *zero = nir_imm_float(b, 0);
|
||||
nir_ssa_def *vec[4] = { zero, zero, zero, zero };
|
||||
nir_def *zero = nir_imm_float(b, 0);
|
||||
nir_def *vec[4] = { zero, zero, zero, zero };
|
||||
unsigned flags = 0;
|
||||
unsigned write_mask = 0;
|
||||
|
||||
|
|
@ -270,13 +270,13 @@ ac_nir_export_position(nir_builder *b,
|
|||
write_mask |= BITFIELD_BIT(1);
|
||||
}
|
||||
|
||||
nir_ssa_def *rates = NULL;
|
||||
nir_def *rates = NULL;
|
||||
if (outputs_written & VARYING_BIT_PRIMITIVE_SHADING_RATE) {
|
||||
rates = outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE][0];
|
||||
} else if (force_vrs) {
|
||||
/* If Pos.W != 1 (typical for non-GUI elements), use coarse shading. */
|
||||
nir_ssa_def *pos_w = nir_channel(b, pos, 3);
|
||||
nir_ssa_def *cond = nir_fneu_imm(b, pos_w, 1);
|
||||
nir_def *pos_w = nir_channel(b, pos, 3);
|
||||
nir_def *cond = nir_fneu_imm(b, pos_w, 1);
|
||||
rates = nir_bcsel(b, cond, nir_load_force_vrs_rates_amd(b), nir_imm_int(b, 0));
|
||||
}
|
||||
|
||||
|
|
@ -293,7 +293,7 @@ ac_nir_export_position(nir_builder *b,
|
|||
if (outputs_written & VARYING_BIT_VIEWPORT) {
|
||||
if (gfx_level >= GFX9) {
|
||||
/* GFX9 has the layer in [10:0] and the viewport index in [19:16]. */
|
||||
nir_ssa_def *v = nir_ishl_imm(b, outputs[VARYING_SLOT_VIEWPORT][0], 16);
|
||||
nir_def *v = nir_ishl_imm(b, outputs[VARYING_SLOT_VIEWPORT][0], 16);
|
||||
vec[2] = nir_ior(b, vec[2], v);
|
||||
write_mask |= BITFIELD_BIT(2);
|
||||
} else {
|
||||
|
|
@ -322,12 +322,12 @@ ac_nir_export_position(nir_builder *b,
|
|||
}
|
||||
|
||||
if (outputs_written & VARYING_BIT_CLIP_VERTEX) {
|
||||
nir_ssa_def *vtx = get_export_output(b, outputs[VARYING_SLOT_CLIP_VERTEX]);
|
||||
nir_def *vtx = get_export_output(b, outputs[VARYING_SLOT_CLIP_VERTEX]);
|
||||
|
||||
/* Clip distance for clip vertex to each user clip plane. */
|
||||
nir_ssa_def *clip_dist[8] = {0};
|
||||
nir_def *clip_dist[8] = {0};
|
||||
u_foreach_bit (i, clip_cull_mask) {
|
||||
nir_ssa_def *ucp = nir_load_user_clip_plane(b, .ucp_id = i);
|
||||
nir_def *ucp = nir_load_user_clip_plane(b, .ucp_id = i);
|
||||
clip_dist[i] = nir_fdot4(b, vtx, ucp);
|
||||
}
|
||||
|
||||
|
|
@ -365,9 +365,9 @@ ac_nir_export_parameters(nir_builder *b,
|
|||
const uint8_t *param_offsets,
|
||||
uint64_t outputs_written,
|
||||
uint16_t outputs_written_16bit,
|
||||
nir_ssa_def *(*outputs)[4],
|
||||
nir_ssa_def *(*outputs_16bit_lo)[4],
|
||||
nir_ssa_def *(*outputs_16bit_hi)[4])
|
||||
nir_def *(*outputs)[4],
|
||||
nir_def *(*outputs_16bit_lo)[4],
|
||||
nir_def *(*outputs_16bit_hi)[4])
|
||||
{
|
||||
uint32_t exported_params = 0;
|
||||
|
||||
|
|
@ -422,11 +422,11 @@ ac_nir_export_parameters(nir_builder *b,
|
|||
if (exported_params & BITFIELD_BIT(offset))
|
||||
continue;
|
||||
|
||||
nir_ssa_def *vec[4];
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, 1, 16);
|
||||
nir_def *vec[4];
|
||||
nir_def *undef = nir_undef(b, 1, 16);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
nir_ssa_def *lo = outputs_16bit_lo[slot][i] ? outputs_16bit_lo[slot][i] : undef;
|
||||
nir_ssa_def *hi = outputs_16bit_hi[slot][i] ? outputs_16bit_hi[slot][i] : undef;
|
||||
nir_def *lo = outputs_16bit_lo[slot][i] ? outputs_16bit_lo[slot][i] : undef;
|
||||
nir_def *hi = outputs_16bit_hi[slot][i] ? outputs_16bit_hi[slot][i] : undef;
|
||||
vec[i] = nir_pack_32_2x16_split(b, lo, hi);
|
||||
}
|
||||
|
||||
|
|
@ -443,10 +443,10 @@ ac_nir_export_parameters(nir_builder *b,
|
|||
* and emits a sequence that calculates the full offset of that instruction,
|
||||
* including a stride to the base and component offsets.
|
||||
*/
|
||||
nir_ssa_def *
|
||||
nir_def *
|
||||
ac_nir_calc_io_offset(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
nir_ssa_def *base_stride,
|
||||
nir_def *base_stride,
|
||||
unsigned component_stride,
|
||||
ac_nir_map_io_driver_location map_io)
|
||||
{
|
||||
|
|
@ -455,13 +455,13 @@ ac_nir_calc_io_offset(nir_builder *b,
|
|||
unsigned mapped_driver_location = map_io ? map_io(semantic) : base;
|
||||
|
||||
/* base is the driver_location, which is in slots (1 slot = 4x4 bytes) */
|
||||
nir_ssa_def *base_op = nir_imul_imm(b, base_stride, mapped_driver_location);
|
||||
nir_def *base_op = nir_imul_imm(b, base_stride, mapped_driver_location);
|
||||
|
||||
/* offset should be interpreted in relation to the base,
|
||||
* so the instruction effectively reads/writes another input/output
|
||||
* when it has an offset
|
||||
*/
|
||||
nir_ssa_def *offset_op = nir_imul(b, base_stride, nir_ssa_for_src(b, *nir_get_io_offset_src(intrin), 1));
|
||||
nir_def *offset_op = nir_imul(b, base_stride, nir_ssa_for_src(b, *nir_get_io_offset_src(intrin), 1));
|
||||
|
||||
/* component is in bytes */
|
||||
unsigned const_op = nir_intrinsic_component(intrin) * component_stride;
|
||||
|
|
@ -513,19 +513,19 @@ ac_nir_lower_indirect_derefs(nir_shader *shader,
|
|||
}
|
||||
|
||||
struct shader_outputs {
|
||||
nir_ssa_def *data[VARYING_SLOT_MAX][4];
|
||||
nir_ssa_def *data_16bit_lo[16][4];
|
||||
nir_ssa_def *data_16bit_hi[16][4];
|
||||
nir_def *data[VARYING_SLOT_MAX][4];
|
||||
nir_def *data_16bit_lo[16][4];
|
||||
nir_def *data_16bit_hi[16][4];
|
||||
|
||||
nir_alu_type (*type_16bit_lo)[4];
|
||||
nir_alu_type (*type_16bit_hi)[4];
|
||||
};
|
||||
|
||||
static nir_ssa_def **
|
||||
static nir_def **
|
||||
get_output_and_type(struct shader_outputs *outputs, unsigned slot, bool high_16bits,
|
||||
nir_alu_type **types)
|
||||
{
|
||||
nir_ssa_def **data;
|
||||
nir_def **data;
|
||||
nir_alu_type *type;
|
||||
|
||||
/* Only VARYING_SLOT_VARn_16BIT slots need output type to convert 16bit output
|
||||
|
|
@ -554,38 +554,38 @@ static void
|
|||
emit_streamout(nir_builder *b, unsigned stream, nir_xfb_info *info,
|
||||
struct shader_outputs *outputs)
|
||||
{
|
||||
nir_ssa_def *so_vtx_count = nir_ubfe_imm(b, nir_load_streamout_config_amd(b), 16, 7);
|
||||
nir_ssa_def *tid = nir_load_subgroup_invocation(b);
|
||||
nir_def *so_vtx_count = nir_ubfe_imm(b, nir_load_streamout_config_amd(b), 16, 7);
|
||||
nir_def *tid = nir_load_subgroup_invocation(b);
|
||||
|
||||
nir_push_if(b, nir_ilt(b, tid, so_vtx_count));
|
||||
nir_ssa_def *so_write_index = nir_load_streamout_write_index_amd(b);
|
||||
nir_def *so_write_index = nir_load_streamout_write_index_amd(b);
|
||||
|
||||
nir_ssa_def *so_buffers[NIR_MAX_XFB_BUFFERS];
|
||||
nir_ssa_def *so_write_offset[NIR_MAX_XFB_BUFFERS];
|
||||
nir_def *so_buffers[NIR_MAX_XFB_BUFFERS];
|
||||
nir_def *so_write_offset[NIR_MAX_XFB_BUFFERS];
|
||||
u_foreach_bit(i, info->buffers_written) {
|
||||
so_buffers[i] = nir_load_streamout_buffer_amd(b, i);
|
||||
|
||||
unsigned stride = info->buffers[i].stride;
|
||||
nir_ssa_def *offset = nir_load_streamout_offset_amd(b, i);
|
||||
nir_def *offset = nir_load_streamout_offset_amd(b, i);
|
||||
offset = nir_iadd(b, nir_imul_imm(b, nir_iadd(b, so_write_index, tid), stride),
|
||||
nir_imul_imm(b, offset, 4));
|
||||
so_write_offset[i] = offset;
|
||||
}
|
||||
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
|
||||
nir_def *undef = nir_undef(b, 1, 32);
|
||||
for (unsigned i = 0; i < info->output_count; i++) {
|
||||
const nir_xfb_output_info *output = info->outputs + i;
|
||||
if (stream != info->buffer_to_stream[output->buffer])
|
||||
continue;
|
||||
|
||||
nir_alu_type *output_type;
|
||||
nir_ssa_def **output_data =
|
||||
nir_def **output_data =
|
||||
get_output_and_type(outputs, output->location, output->high_16bits, &output_type);
|
||||
|
||||
nir_ssa_def *vec[4] = {undef, undef, undef, undef};
|
||||
nir_def *vec[4] = {undef, undef, undef, undef};
|
||||
uint8_t mask = 0;
|
||||
u_foreach_bit(j, output->component_mask) {
|
||||
nir_ssa_def *data = output_data[j];
|
||||
nir_def *data = output_data[j];
|
||||
|
||||
if (data) {
|
||||
if (data->bit_size < 32) {
|
||||
|
|
@ -606,8 +606,8 @@ emit_streamout(nir_builder *b, unsigned stream, nir_xfb_info *info,
|
|||
continue;
|
||||
|
||||
unsigned buffer = output->buffer;
|
||||
nir_ssa_def *data = nir_vec(b, vec, util_last_bit(mask));
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *data = nir_vec(b, vec, util_last_bit(mask));
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
nir_store_buffer_amd(b, data, so_buffers[buffer], so_write_offset[buffer], zero, zero,
|
||||
.base = output->offset, .write_mask = mask,
|
||||
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
|
||||
|
|
@ -636,15 +636,15 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
|
|||
b.shader->info.outputs_written = gs_nir->info.outputs_written;
|
||||
b.shader->info.outputs_written_16bit = gs_nir->info.outputs_written_16bit;
|
||||
|
||||
nir_ssa_def *gsvs_ring = nir_load_ring_gsvs_amd(&b);
|
||||
nir_def *gsvs_ring = nir_load_ring_gsvs_amd(&b);
|
||||
|
||||
nir_xfb_info *info = gs_nir->xfb_info;
|
||||
nir_ssa_def *stream_id = NULL;
|
||||
nir_def *stream_id = NULL;
|
||||
if (!disable_streamout && info)
|
||||
stream_id = nir_ubfe_imm(&b, nir_load_streamout_config_amd(&b), 24, 2);
|
||||
|
||||
nir_ssa_def *vtx_offset = nir_imul_imm(&b, nir_load_vertex_id_zero_base(&b), 4);
|
||||
nir_ssa_def *zero = nir_imm_zero(&b, 1, 32);
|
||||
nir_def *vtx_offset = nir_imul_imm(&b, nir_load_vertex_id_zero_base(&b), 4);
|
||||
nir_def *zero = nir_imm_zero(&b, 1, 32);
|
||||
|
||||
for (unsigned stream = 0; stream < 4; stream++) {
|
||||
if (stream > 0 && (!stream_id || !(info->streams_written & BITFIELD_BIT(stream))))
|
||||
|
|
@ -672,8 +672,8 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
|
|||
/* clamp legacy color output */
|
||||
if (i == VARYING_SLOT_COL0 || i == VARYING_SLOT_COL1 ||
|
||||
i == VARYING_SLOT_BFC0 || i == VARYING_SLOT_BFC1) {
|
||||
nir_ssa_def *color = outputs.data[i][j];
|
||||
nir_ssa_def *clamp = nir_load_clamp_vertex_color_amd(&b);
|
||||
nir_def *color = outputs.data[i][j];
|
||||
nir_def *clamp = nir_load_clamp_vertex_color_amd(&b);
|
||||
outputs.data[i][j] = nir_bcsel(&b, clamp, nir_fsat(&b, color), color);
|
||||
}
|
||||
|
||||
|
|
@ -690,7 +690,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
|
|||
if (!has_lo_16bit && !has_hi_16bit)
|
||||
continue;
|
||||
|
||||
nir_ssa_def *data =
|
||||
nir_def *data =
|
||||
nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero,
|
||||
.base = offset,
|
||||
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
|
||||
|
|
@ -759,7 +759,7 @@ gather_outputs(nir_builder *b, nir_function_impl *impl, struct shader_outputs *o
|
|||
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
|
||||
|
||||
nir_alu_type *output_type;
|
||||
nir_ssa_def **output_data =
|
||||
nir_def **output_data =
|
||||
get_output_and_type(outputs, sem.location, sem.high_16bits, &output_type);
|
||||
|
||||
u_foreach_bit (i, nir_intrinsic_write_mask(intrin)) {
|
||||
|
|
@ -841,12 +841,12 @@ ac_nir_gs_shader_query(nir_builder *b,
|
|||
bool has_pipeline_stats_query,
|
||||
unsigned num_vertices_per_primitive,
|
||||
unsigned wave_size,
|
||||
nir_ssa_def *vertex_count[4],
|
||||
nir_ssa_def *primitive_count[4])
|
||||
nir_def *vertex_count[4],
|
||||
nir_def *primitive_count[4])
|
||||
{
|
||||
nir_ssa_def *pipeline_query_enabled = NULL;
|
||||
nir_ssa_def *prim_gen_query_enabled = NULL;
|
||||
nir_ssa_def *shader_query_enabled = NULL;
|
||||
nir_def *pipeline_query_enabled = NULL;
|
||||
nir_def *prim_gen_query_enabled = NULL;
|
||||
nir_def *shader_query_enabled = NULL;
|
||||
if (has_gen_prim_query) {
|
||||
prim_gen_query_enabled = nir_load_prim_gen_query_enabled_amd(b);
|
||||
if (has_pipeline_stats_query) {
|
||||
|
|
@ -865,31 +865,31 @@ ac_nir_gs_shader_query(nir_builder *b,
|
|||
|
||||
nir_if *if_shader_query = nir_push_if(b, shader_query_enabled);
|
||||
|
||||
nir_ssa_def *active_threads_mask = nir_ballot(b, 1, wave_size, nir_imm_true(b));
|
||||
nir_ssa_def *num_active_threads = nir_bit_count(b, active_threads_mask);
|
||||
nir_def *active_threads_mask = nir_ballot(b, 1, wave_size, nir_imm_true(b));
|
||||
nir_def *num_active_threads = nir_bit_count(b, active_threads_mask);
|
||||
|
||||
/* Calculate the "real" number of emitted primitives from the emitted GS vertices and primitives.
|
||||
* GS emits points, line strips or triangle strips.
|
||||
* Real primitives are points, lines or triangles.
|
||||
*/
|
||||
nir_ssa_def *num_prims_in_wave[4] = {0};
|
||||
nir_def *num_prims_in_wave[4] = {0};
|
||||
u_foreach_bit (i, b->shader->info.gs.active_stream_mask) {
|
||||
assert(vertex_count[i] && primitive_count[i]);
|
||||
|
||||
nir_ssa_scalar vtx_cnt = nir_get_ssa_scalar(vertex_count[i], 0);
|
||||
nir_ssa_scalar prm_cnt = nir_get_ssa_scalar(primitive_count[i], 0);
|
||||
nir_scalar vtx_cnt = nir_get_ssa_scalar(vertex_count[i], 0);
|
||||
nir_scalar prm_cnt = nir_get_ssa_scalar(primitive_count[i], 0);
|
||||
|
||||
if (nir_ssa_scalar_is_const(vtx_cnt) && nir_ssa_scalar_is_const(prm_cnt)) {
|
||||
unsigned gs_vtx_cnt = nir_ssa_scalar_as_uint(vtx_cnt);
|
||||
unsigned gs_prm_cnt = nir_ssa_scalar_as_uint(prm_cnt);
|
||||
if (nir_scalar_is_const(vtx_cnt) && nir_scalar_is_const(prm_cnt)) {
|
||||
unsigned gs_vtx_cnt = nir_scalar_as_uint(vtx_cnt);
|
||||
unsigned gs_prm_cnt = nir_scalar_as_uint(prm_cnt);
|
||||
unsigned total_prm_cnt = gs_vtx_cnt - gs_prm_cnt * (num_vertices_per_primitive - 1u);
|
||||
if (total_prm_cnt == 0)
|
||||
continue;
|
||||
|
||||
num_prims_in_wave[i] = nir_imul_imm(b, num_active_threads, total_prm_cnt);
|
||||
} else {
|
||||
nir_ssa_def *gs_vtx_cnt = vtx_cnt.def;
|
||||
nir_ssa_def *gs_prm_cnt = prm_cnt.def;
|
||||
nir_def *gs_vtx_cnt = vtx_cnt.def;
|
||||
nir_def *gs_prm_cnt = prm_cnt.def;
|
||||
if (num_vertices_per_primitive > 1)
|
||||
gs_prm_cnt = nir_iadd(b, nir_imul_imm(b, gs_prm_cnt, -1u * (num_vertices_per_primitive - 1)), gs_vtx_cnt);
|
||||
num_prims_in_wave[i] = nir_reduce(b, gs_prm_cnt, .reduction_op = nir_op_iadd);
|
||||
|
|
@ -902,7 +902,7 @@ ac_nir_gs_shader_query(nir_builder *b,
|
|||
if (has_pipeline_stats_query) {
|
||||
nir_if *if_pipeline_query = nir_push_if(b, pipeline_query_enabled);
|
||||
{
|
||||
nir_ssa_def *count = NULL;
|
||||
nir_def *count = NULL;
|
||||
|
||||
/* Add all streams' number to the same counter. */
|
||||
for (int i = 0; i < 4; i++) {
|
||||
|
|
@ -941,14 +941,14 @@ ac_nir_gs_shader_query(nir_builder *b,
|
|||
}
|
||||
|
||||
typedef struct {
|
||||
nir_ssa_def *outputs[64][4];
|
||||
nir_ssa_def *outputs_16bit_lo[16][4];
|
||||
nir_ssa_def *outputs_16bit_hi[16][4];
|
||||
nir_def *outputs[64][4];
|
||||
nir_def *outputs_16bit_lo[16][4];
|
||||
nir_def *outputs_16bit_hi[16][4];
|
||||
|
||||
ac_nir_gs_output_info *info;
|
||||
|
||||
nir_ssa_def *vertex_count[4];
|
||||
nir_ssa_def *primitive_count[4];
|
||||
nir_def *vertex_count[4];
|
||||
nir_def *primitive_count[4];
|
||||
} lower_legacy_gs_state;
|
||||
|
||||
static bool
|
||||
|
|
@ -968,7 +968,7 @@ lower_legacy_gs_store_output(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||
unsigned write_mask = nir_intrinsic_write_mask(intrin);
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
|
||||
|
||||
nir_ssa_def **outputs;
|
||||
nir_def **outputs;
|
||||
if (sem.location < VARYING_SLOT_VAR0_16BIT) {
|
||||
outputs = s->outputs[sem.location];
|
||||
} else {
|
||||
|
|
@ -979,7 +979,7 @@ lower_legacy_gs_store_output(nir_builder *b, nir_intrinsic_instr *intrin,
|
|||
outputs = s->outputs_16bit_lo[index];
|
||||
}
|
||||
|
||||
nir_ssa_def *store_val = intrin->src[0].ssa;
|
||||
nir_def *store_val = intrin->src[0].ssa;
|
||||
/* 64bit output has been lowered to 32bit */
|
||||
assert(store_val->bit_size <= 32);
|
||||
|
||||
|
|
@ -999,15 +999,15 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in
|
|||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
unsigned stream = nir_intrinsic_stream_id(intrin);
|
||||
nir_ssa_def *vtxidx = intrin->src[0].ssa;
|
||||
nir_def *vtxidx = intrin->src[0].ssa;
|
||||
|
||||
nir_ssa_def *gsvs_ring = nir_load_ring_gsvs_amd(b, .stream_id = stream);
|
||||
nir_ssa_def *soffset = nir_load_ring_gs2vs_offset_amd(b);
|
||||
nir_def *gsvs_ring = nir_load_ring_gsvs_amd(b, .stream_id = stream);
|
||||
nir_def *soffset = nir_load_ring_gs2vs_offset_amd(b);
|
||||
|
||||
unsigned offset = 0;
|
||||
u_foreach_bit64 (i, b->shader->info.outputs_written) {
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
nir_ssa_def *output = s->outputs[i][j];
|
||||
nir_def *output = s->outputs[i][j];
|
||||
/* Next vertex emit need a new value, reset all outputs. */
|
||||
s->outputs[i][j] = NULL;
|
||||
|
||||
|
|
@ -1022,10 +1022,10 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in
|
|||
if (!output)
|
||||
continue;
|
||||
|
||||
nir_ssa_def *voffset = nir_ishl_imm(b, vtxidx, 2);
|
||||
nir_def *voffset = nir_ishl_imm(b, vtxidx, 2);
|
||||
|
||||
/* extend 8/16 bit to 32 bit, 64 bit has been lowered */
|
||||
nir_ssa_def *data = nir_u2uN(b, output, 32);
|
||||
nir_def *data = nir_u2uN(b, output, 32);
|
||||
|
||||
nir_store_buffer_amd(b, data, gsvs_ring, voffset, soffset, nir_imm_int(b, 0),
|
||||
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL |
|
||||
|
|
@ -1038,8 +1038,8 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in
|
|||
|
||||
u_foreach_bit (i, b->shader->info.outputs_written_16bit) {
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
nir_ssa_def *output_lo = s->outputs_16bit_lo[i][j];
|
||||
nir_ssa_def *output_hi = s->outputs_16bit_hi[i][j];
|
||||
nir_def *output_lo = s->outputs_16bit_lo[i][j];
|
||||
nir_def *output_hi = s->outputs_16bit_hi[i][j];
|
||||
/* Next vertex emit need a new value, reset all outputs. */
|
||||
s->outputs_16bit_lo[i][j] = NULL;
|
||||
s->outputs_16bit_hi[i][j] = NULL;
|
||||
|
|
@ -1062,12 +1062,12 @@ lower_legacy_gs_emit_vertex_with_counter(nir_builder *b, nir_intrinsic_instr *in
|
|||
continue;
|
||||
|
||||
if (!has_lo_16bit_out)
|
||||
output_lo = nir_ssa_undef(b, 1, 16);
|
||||
output_lo = nir_undef(b, 1, 16);
|
||||
|
||||
if (!has_hi_16bit_out)
|
||||
output_hi = nir_ssa_undef(b, 1, 16);
|
||||
output_hi = nir_undef(b, 1, 16);
|
||||
|
||||
nir_ssa_def *voffset = nir_iadd_imm(b, vtxidx, base);
|
||||
nir_def *voffset = nir_iadd_imm(b, vtxidx, base);
|
||||
voffset = nir_ishl_imm(b, voffset, 2);
|
||||
|
||||
nir_store_buffer_amd(b, nir_pack_32_2x16_split(b, output_lo, output_hi),
|
||||
|
|
|
|||
|
|
@ -47,20 +47,20 @@ typedef struct nir_builder nir_builder;
|
|||
/* Executed by ac_nir_cull when the current primitive is accepted. */
|
||||
typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state);
|
||||
|
||||
nir_ssa_def *
|
||||
nir_def *
|
||||
ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args,
|
||||
struct ac_arg arg, unsigned relative_index);
|
||||
|
||||
static inline nir_ssa_def *
|
||||
static inline nir_def *
|
||||
ac_nir_load_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg)
|
||||
{
|
||||
return ac_nir_load_arg_at_offset(b, ac_args, arg, 0);
|
||||
}
|
||||
|
||||
void ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
|
||||
nir_ssa_def *val);
|
||||
nir_def *val);
|
||||
|
||||
nir_ssa_def *
|
||||
nir_def *
|
||||
ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
|
||||
unsigned rshift, unsigned bitwidth);
|
||||
|
||||
|
|
@ -71,11 +71,11 @@ bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_leve
|
|||
const struct ac_shader_args *ac_args);
|
||||
|
||||
void
|
||||
ac_nir_store_var_components(nir_builder *b, nir_variable *var, nir_ssa_def *value,
|
||||
ac_nir_store_var_components(nir_builder *b, nir_variable *var, nir_def *value,
|
||||
unsigned component, unsigned writemask);
|
||||
|
||||
void
|
||||
ac_nir_export_primitive(nir_builder *b, nir_ssa_def *prim);
|
||||
ac_nir_export_primitive(nir_builder *b, nir_def *prim);
|
||||
|
||||
void
|
||||
ac_nir_export_position(nir_builder *b,
|
||||
|
|
@ -84,21 +84,21 @@ ac_nir_export_position(nir_builder *b,
|
|||
bool no_param_export,
|
||||
bool force_vrs,
|
||||
uint64_t outputs_written,
|
||||
nir_ssa_def *(*outputs)[4]);
|
||||
nir_def *(*outputs)[4]);
|
||||
|
||||
void
|
||||
ac_nir_export_parameters(nir_builder *b,
|
||||
const uint8_t *param_offsets,
|
||||
uint64_t outputs_written,
|
||||
uint16_t outputs_written_16bit,
|
||||
nir_ssa_def *(*outputs)[4],
|
||||
nir_ssa_def *(*outputs_16bit_lo)[4],
|
||||
nir_ssa_def *(*outputs_16bit_hi)[4]);
|
||||
nir_def *(*outputs)[4],
|
||||
nir_def *(*outputs_16bit_lo)[4],
|
||||
nir_def *(*outputs_16bit_hi)[4]);
|
||||
|
||||
nir_ssa_def *
|
||||
nir_def *
|
||||
ac_nir_calc_io_offset(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
nir_ssa_def *base_stride,
|
||||
nir_def *base_stride,
|
||||
unsigned component_stride,
|
||||
ac_nir_map_io_driver_location map_io);
|
||||
|
||||
|
|
@ -206,10 +206,10 @@ ac_nir_lower_mesh_inputs_to_mem(nir_shader *shader,
|
|||
unsigned task_payload_entry_bytes,
|
||||
unsigned task_num_entries);
|
||||
|
||||
nir_ssa_def *
|
||||
nir_def *
|
||||
ac_nir_cull_primitive(nir_builder *b,
|
||||
nir_ssa_def *initially_accepted,
|
||||
nir_ssa_def *pos[3][4],
|
||||
nir_def *initially_accepted,
|
||||
nir_def *pos[3][4],
|
||||
unsigned num_vertices,
|
||||
ac_nir_cull_accepted accept_func,
|
||||
void *state);
|
||||
|
|
@ -262,8 +262,8 @@ ac_nir_gs_shader_query(nir_builder *b,
|
|||
bool has_pipeline_stats_query,
|
||||
unsigned num_vertices_per_primitive,
|
||||
unsigned wave_size,
|
||||
nir_ssa_def *vertex_count[4],
|
||||
nir_ssa_def *primitive_count[4]);
|
||||
nir_def *vertex_count[4],
|
||||
nir_def *primitive_count[4]);
|
||||
|
||||
void
|
||||
ac_nir_lower_legacy_gs(nir_shader *nir,
|
||||
|
|
|
|||
|
|
@ -12,13 +12,13 @@
|
|||
|
||||
typedef struct
|
||||
{
|
||||
nir_ssa_def *w_reflection;
|
||||
nir_ssa_def *all_w_negative;
|
||||
nir_ssa_def *any_w_negative;
|
||||
nir_def *w_reflection;
|
||||
nir_def *all_w_negative;
|
||||
nir_def *any_w_negative;
|
||||
} position_w_info;
|
||||
|
||||
static void
|
||||
analyze_position_w(nir_builder *b, nir_ssa_def *pos[][4], unsigned num_vertices,
|
||||
analyze_position_w(nir_builder *b, nir_def *pos[][4], unsigned num_vertices,
|
||||
position_w_info *w_info)
|
||||
{
|
||||
w_info->all_w_negative = nir_imm_true(b);
|
||||
|
|
@ -26,34 +26,34 @@ analyze_position_w(nir_builder *b, nir_ssa_def *pos[][4], unsigned num_vertices,
|
|||
w_info->any_w_negative = nir_imm_false(b);
|
||||
|
||||
for (unsigned i = 0; i < num_vertices; ++i) {
|
||||
nir_ssa_def *neg_w = nir_flt_imm(b, pos[i][3], 0.0f);
|
||||
nir_def *neg_w = nir_flt_imm(b, pos[i][3], 0.0f);
|
||||
w_info->w_reflection = nir_ixor(b, neg_w, w_info->w_reflection);
|
||||
w_info->any_w_negative = nir_ior(b, neg_w, w_info->any_w_negative);
|
||||
w_info->all_w_negative = nir_iand(b, neg_w, w_info->all_w_negative);
|
||||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
cull_face_triangle(nir_builder *b, nir_ssa_def *pos[3][4], const position_w_info *w_info)
|
||||
static nir_def *
|
||||
cull_face_triangle(nir_builder *b, nir_def *pos[3][4], const position_w_info *w_info)
|
||||
{
|
||||
nir_ssa_def *det_t0 = nir_fsub(b, pos[2][0], pos[0][0]);
|
||||
nir_ssa_def *det_t1 = nir_fsub(b, pos[1][1], pos[0][1]);
|
||||
nir_ssa_def *det_t2 = nir_fsub(b, pos[0][0], pos[1][0]);
|
||||
nir_ssa_def *det_t3 = nir_fsub(b, pos[0][1], pos[2][1]);
|
||||
nir_ssa_def *det_p0 = nir_fmul(b, det_t0, det_t1);
|
||||
nir_ssa_def *det_p1 = nir_fmul(b, det_t2, det_t3);
|
||||
nir_ssa_def *det = nir_fsub(b, det_p0, det_p1);
|
||||
nir_def *det_t0 = nir_fsub(b, pos[2][0], pos[0][0]);
|
||||
nir_def *det_t1 = nir_fsub(b, pos[1][1], pos[0][1]);
|
||||
nir_def *det_t2 = nir_fsub(b, pos[0][0], pos[1][0]);
|
||||
nir_def *det_t3 = nir_fsub(b, pos[0][1], pos[2][1]);
|
||||
nir_def *det_p0 = nir_fmul(b, det_t0, det_t1);
|
||||
nir_def *det_p1 = nir_fmul(b, det_t2, det_t3);
|
||||
nir_def *det = nir_fsub(b, det_p0, det_p1);
|
||||
|
||||
det = nir_bcsel(b, w_info->w_reflection, nir_fneg(b, det), det);
|
||||
|
||||
nir_ssa_def *front_facing_ccw = nir_fgt_imm(b, det, 0.0f);
|
||||
nir_ssa_def *zero_area = nir_feq_imm(b, det, 0.0f);
|
||||
nir_ssa_def *ccw = nir_load_cull_ccw_amd(b);
|
||||
nir_ssa_def *front_facing = nir_ieq(b, front_facing_ccw, ccw);
|
||||
nir_ssa_def *cull_front = nir_load_cull_front_face_enabled_amd(b);
|
||||
nir_ssa_def *cull_back = nir_load_cull_back_face_enabled_amd(b);
|
||||
nir_def *front_facing_ccw = nir_fgt_imm(b, det, 0.0f);
|
||||
nir_def *zero_area = nir_feq_imm(b, det, 0.0f);
|
||||
nir_def *ccw = nir_load_cull_ccw_amd(b);
|
||||
nir_def *front_facing = nir_ieq(b, front_facing_ccw, ccw);
|
||||
nir_def *cull_front = nir_load_cull_front_face_enabled_amd(b);
|
||||
nir_def *cull_back = nir_load_cull_back_face_enabled_amd(b);
|
||||
|
||||
nir_ssa_def *face_culled = nir_bcsel(b, front_facing, cull_front, cull_back);
|
||||
nir_def *face_culled = nir_bcsel(b, front_facing, cull_front, cull_back);
|
||||
face_culled = nir_ior(b, face_culled, zero_area);
|
||||
|
||||
/* Don't reject NaN and +/-infinity, these are tricky.
|
||||
|
|
@ -63,7 +63,7 @@ cull_face_triangle(nir_builder *b, nir_ssa_def *pos[3][4], const position_w_info
|
|||
}
|
||||
|
||||
static void
|
||||
calc_bbox_triangle(nir_builder *b, nir_ssa_def *pos[3][4], nir_ssa_def *bbox_min[2], nir_ssa_def *bbox_max[2])
|
||||
calc_bbox_triangle(nir_builder *b, nir_def *pos[3][4], nir_def *bbox_min[2], nir_def *bbox_max[2])
|
||||
{
|
||||
for (unsigned chan = 0; chan < 2; ++chan) {
|
||||
bbox_min[chan] = nir_fmin(b, pos[0][chan], nir_fmin(b, pos[1][chan], pos[2][chan]));
|
||||
|
|
@ -71,10 +71,10 @@ calc_bbox_triangle(nir_builder *b, nir_ssa_def *pos[3][4], nir_ssa_def *bbox_min
|
|||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
cull_frustrum(nir_builder *b, nir_ssa_def *bbox_min[2], nir_ssa_def *bbox_max[2])
|
||||
static nir_def *
|
||||
cull_frustrum(nir_builder *b, nir_def *bbox_min[2], nir_def *bbox_max[2])
|
||||
{
|
||||
nir_ssa_def *prim_outside_view = nir_imm_false(b);
|
||||
nir_def *prim_outside_view = nir_imm_false(b);
|
||||
|
||||
for (unsigned chan = 0; chan < 2; ++chan) {
|
||||
prim_outside_view = nir_ior(b, prim_outside_view, nir_flt_imm(b, bbox_max[chan], -1.0f));
|
||||
|
|
@ -84,25 +84,25 @@ cull_frustrum(nir_builder *b, nir_ssa_def *bbox_min[2], nir_ssa_def *bbox_max[2]
|
|||
return prim_outside_view;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
cull_small_primitive_triangle(nir_builder *b, nir_ssa_def *bbox_min[2], nir_ssa_def *bbox_max[2],
|
||||
nir_ssa_def *prim_is_small_else)
|
||||
static nir_def *
|
||||
cull_small_primitive_triangle(nir_builder *b, nir_def *bbox_min[2], nir_def *bbox_max[2],
|
||||
nir_def *prim_is_small_else)
|
||||
{
|
||||
nir_ssa_def *prim_is_small = NULL;
|
||||
nir_def *prim_is_small = NULL;
|
||||
|
||||
nir_if *if_cull_small_prims = nir_push_if(b, nir_load_cull_small_primitives_enabled_amd(b));
|
||||
{
|
||||
nir_ssa_def *vp = nir_load_viewport_xy_scale_and_offset(b);
|
||||
nir_ssa_def *small_prim_precision = nir_load_cull_small_prim_precision_amd(b);
|
||||
nir_def *vp = nir_load_viewport_xy_scale_and_offset(b);
|
||||
nir_def *small_prim_precision = nir_load_cull_small_prim_precision_amd(b);
|
||||
prim_is_small = prim_is_small_else;
|
||||
|
||||
for (unsigned chan = 0; chan < 2; ++chan) {
|
||||
nir_ssa_def *vp_scale = nir_channel(b, vp, chan);
|
||||
nir_ssa_def *vp_translate = nir_channel(b, vp, 2 + chan);
|
||||
nir_def *vp_scale = nir_channel(b, vp, chan);
|
||||
nir_def *vp_translate = nir_channel(b, vp, 2 + chan);
|
||||
|
||||
/* Convert the position to screen-space coordinates. */
|
||||
nir_ssa_def *min = nir_ffma(b, bbox_min[chan], vp_scale, vp_translate);
|
||||
nir_ssa_def *max = nir_ffma(b, bbox_max[chan], vp_scale, vp_translate);
|
||||
nir_def *min = nir_ffma(b, bbox_min[chan], vp_scale, vp_translate);
|
||||
nir_def *max = nir_ffma(b, bbox_max[chan], vp_scale, vp_translate);
|
||||
|
||||
/* Scale the bounding box according to precision. */
|
||||
min = nir_fsub(b, min, small_prim_precision);
|
||||
|
|
@ -112,7 +112,7 @@ cull_small_primitive_triangle(nir_builder *b, nir_ssa_def *bbox_min[2], nir_ssa_
|
|||
min = nir_fround_even(b, min);
|
||||
max = nir_fround_even(b, max);
|
||||
|
||||
nir_ssa_def *rounded_to_eq = nir_feq(b, min, max);
|
||||
nir_def *rounded_to_eq = nir_feq(b, min, max);
|
||||
prim_is_small = nir_ior(b, prim_is_small, rounded_to_eq);
|
||||
}
|
||||
}
|
||||
|
|
@ -121,27 +121,27 @@ cull_small_primitive_triangle(nir_builder *b, nir_ssa_def *bbox_min[2], nir_ssa_
|
|||
return nir_if_phi(b, prim_is_small, prim_is_small_else);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
ac_nir_cull_triangle(nir_builder *b,
|
||||
nir_ssa_def *initially_accepted,
|
||||
nir_ssa_def *pos[3][4],
|
||||
nir_def *initially_accepted,
|
||||
nir_def *pos[3][4],
|
||||
position_w_info *w_info,
|
||||
ac_nir_cull_accepted accept_func,
|
||||
void *state)
|
||||
{
|
||||
nir_ssa_def *accepted = initially_accepted;
|
||||
nir_def *accepted = initially_accepted;
|
||||
accepted = nir_iand(b, accepted, nir_inot(b, w_info->all_w_negative));
|
||||
accepted = nir_iand(b, accepted, nir_inot(b, cull_face_triangle(b, pos, w_info)));
|
||||
|
||||
nir_ssa_def *bbox_accepted = NULL;
|
||||
nir_def *bbox_accepted = NULL;
|
||||
|
||||
nir_if *if_accepted = nir_push_if(b, accepted);
|
||||
{
|
||||
nir_ssa_def *bbox_min[2] = {0}, *bbox_max[2] = {0};
|
||||
nir_def *bbox_min[2] = {0}, *bbox_max[2] = {0};
|
||||
calc_bbox_triangle(b, pos, bbox_min, bbox_max);
|
||||
|
||||
nir_ssa_def *prim_outside_view = cull_frustrum(b, bbox_min, bbox_max);
|
||||
nir_ssa_def *prim_invisible =
|
||||
nir_def *prim_outside_view = cull_frustrum(b, bbox_min, bbox_max);
|
||||
nir_def *prim_invisible =
|
||||
cull_small_primitive_triangle(b, bbox_min, bbox_max, prim_outside_view);
|
||||
|
||||
bbox_accepted = nir_ior(b, nir_inot(b, prim_invisible), w_info->any_w_negative);
|
||||
|
|
@ -162,18 +162,18 @@ ac_nir_cull_triangle(nir_builder *b,
|
|||
}
|
||||
|
||||
static void
|
||||
rotate_45degrees(nir_builder *b, nir_ssa_def *v[2])
|
||||
rotate_45degrees(nir_builder *b, nir_def *v[2])
|
||||
{
|
||||
/* sin(45) == cos(45) */
|
||||
nir_ssa_def *sincos45 = nir_imm_float(b, 0.707106781);
|
||||
nir_def *sincos45 = nir_imm_float(b, 0.707106781);
|
||||
|
||||
/* x2 = x*cos45 - y*sin45 = x*sincos45 - y*sincos45
|
||||
* y2 = x*sin45 + y*cos45 = x*sincos45 + y*sincos45
|
||||
*/
|
||||
nir_ssa_def *first = nir_fmul(b, v[0], sincos45);
|
||||
nir_def *first = nir_fmul(b, v[0], sincos45);
|
||||
|
||||
/* Doing 2x ffma while duplicating the multiplication is 33% faster than fmul+fadd+fadd. */
|
||||
nir_ssa_def *result[2] = {
|
||||
nir_def *result[2] = {
|
||||
nir_ffma(b, nir_fneg(b, v[1]), sincos45, first),
|
||||
nir_ffma(b, v[1], sincos45, first),
|
||||
};
|
||||
|
|
@ -182,26 +182,26 @@ rotate_45degrees(nir_builder *b, nir_ssa_def *v[2])
|
|||
}
|
||||
|
||||
static void
|
||||
calc_bbox_line(nir_builder *b, nir_ssa_def *pos[3][4], nir_ssa_def *bbox_min[2], nir_ssa_def *bbox_max[2])
|
||||
calc_bbox_line(nir_builder *b, nir_def *pos[3][4], nir_def *bbox_min[2], nir_def *bbox_max[2])
|
||||
{
|
||||
nir_ssa_def *clip_half_line_width = nir_load_clip_half_line_width_amd(b);
|
||||
nir_def *clip_half_line_width = nir_load_clip_half_line_width_amd(b);
|
||||
|
||||
for (unsigned chan = 0; chan < 2; ++chan) {
|
||||
bbox_min[chan] = nir_fmin(b, pos[0][chan], pos[1][chan]);
|
||||
bbox_max[chan] = nir_fmax(b, pos[0][chan], pos[1][chan]);
|
||||
|
||||
nir_ssa_def *width = nir_channel(b, clip_half_line_width, chan);
|
||||
nir_def *width = nir_channel(b, clip_half_line_width, chan);
|
||||
bbox_min[chan] = nir_fsub(b, bbox_min[chan], width);
|
||||
bbox_max[chan] = nir_fadd(b, bbox_max[chan], width);
|
||||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
cull_small_primitive_line(nir_builder *b, nir_ssa_def *pos[3][4],
|
||||
nir_ssa_def *bbox_min[2], nir_ssa_def *bbox_max[2],
|
||||
nir_ssa_def *prim_is_small_else)
|
||||
static nir_def *
|
||||
cull_small_primitive_line(nir_builder *b, nir_def *pos[3][4],
|
||||
nir_def *bbox_min[2], nir_def *bbox_max[2],
|
||||
nir_def *prim_is_small_else)
|
||||
{
|
||||
nir_ssa_def *prim_is_small = NULL;
|
||||
nir_def *prim_is_small = NULL;
|
||||
|
||||
/* Small primitive filter - eliminate lines that are too small to affect a sample. */
|
||||
nir_if *if_cull_small_prims = nir_push_if(b, nir_load_cull_small_primitives_enabled_amd(b));
|
||||
|
|
@ -234,13 +234,13 @@ cull_small_primitive_line(nir_builder *b, nir_ssa_def *pos[3][4],
|
|||
* A good test is piglit/lineloop because it draws 10k subpixel lines in a circle.
|
||||
* It should contain no holes if this matches hw behavior.
|
||||
*/
|
||||
nir_ssa_def *v0[2], *v1[2];
|
||||
nir_ssa_def *vp = nir_load_viewport_xy_scale_and_offset(b);
|
||||
nir_def *v0[2], *v1[2];
|
||||
nir_def *vp = nir_load_viewport_xy_scale_and_offset(b);
|
||||
|
||||
/* Get vertex positions in pixels. */
|
||||
for (unsigned chan = 0; chan < 2; chan++) {
|
||||
nir_ssa_def *vp_scale = nir_channel(b, vp, chan);
|
||||
nir_ssa_def *vp_translate = nir_channel(b, vp, 2 + chan);
|
||||
nir_def *vp_scale = nir_channel(b, vp, chan);
|
||||
nir_def *vp_translate = nir_channel(b, vp, 2 + chan);
|
||||
|
||||
v0[chan] = nir_ffma(b, pos[0][chan], vp_scale, vp_translate);
|
||||
v1[chan] = nir_ffma(b, pos[1][chan], vp_scale, vp_translate);
|
||||
|
|
@ -250,9 +250,9 @@ cull_small_primitive_line(nir_builder *b, nir_ssa_def *pos[3][4],
|
|||
rotate_45degrees(b, v0);
|
||||
rotate_45degrees(b, v1);
|
||||
|
||||
nir_ssa_def *small_prim_precision = nir_load_cull_small_prim_precision_amd(b);
|
||||
nir_def *small_prim_precision = nir_load_cull_small_prim_precision_amd(b);
|
||||
|
||||
nir_ssa_def *rounded_to_eq[2];
|
||||
nir_def *rounded_to_eq[2];
|
||||
for (unsigned chan = 0; chan < 2; chan++) {
|
||||
/* The width of each square is sqrt(0.5), so scale it to 1 because we want
|
||||
* round() to give us the position of the closest center of a square (diamond).
|
||||
|
|
@ -263,8 +263,8 @@ cull_small_primitive_line(nir_builder *b, nir_ssa_def *pos[3][4],
|
|||
/* Compute the bounding box around both vertices. We do this because we must
|
||||
* enlarge the line area by the precision of the rasterizer.
|
||||
*/
|
||||
nir_ssa_def *min = nir_fmin(b, v0[chan], v1[chan]);
|
||||
nir_ssa_def *max = nir_fmax(b, v0[chan], v1[chan]);
|
||||
nir_def *min = nir_fmin(b, v0[chan], v1[chan]);
|
||||
nir_def *max = nir_fmax(b, v0[chan], v1[chan]);
|
||||
|
||||
/* Enlarge the bounding box by the precision of the rasterizer. */
|
||||
min = nir_fsub(b, min, small_prim_precision);
|
||||
|
|
@ -287,27 +287,27 @@ cull_small_primitive_line(nir_builder *b, nir_ssa_def *pos[3][4],
|
|||
return nir_if_phi(b, prim_is_small, prim_is_small_else);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
ac_nir_cull_line(nir_builder *b,
|
||||
nir_ssa_def *initially_accepted,
|
||||
nir_ssa_def *pos[3][4],
|
||||
nir_def *initially_accepted,
|
||||
nir_def *pos[3][4],
|
||||
position_w_info *w_info,
|
||||
ac_nir_cull_accepted accept_func,
|
||||
void *state)
|
||||
{
|
||||
nir_ssa_def *accepted = initially_accepted;
|
||||
nir_def *accepted = initially_accepted;
|
||||
accepted = nir_iand(b, accepted, nir_inot(b, w_info->all_w_negative));
|
||||
|
||||
nir_ssa_def *bbox_accepted = NULL;
|
||||
nir_def *bbox_accepted = NULL;
|
||||
|
||||
nir_if *if_accepted = nir_push_if(b, accepted);
|
||||
{
|
||||
nir_ssa_def *bbox_min[2] = {0}, *bbox_max[2] = {0};
|
||||
nir_def *bbox_min[2] = {0}, *bbox_max[2] = {0};
|
||||
calc_bbox_line(b, pos, bbox_min, bbox_max);
|
||||
|
||||
/* Frustrum culling - eliminate lines that are fully outside the view. */
|
||||
nir_ssa_def *prim_outside_view = cull_frustrum(b, bbox_min, bbox_max);
|
||||
nir_ssa_def *prim_invisible =
|
||||
nir_def *prim_outside_view = cull_frustrum(b, bbox_min, bbox_max);
|
||||
nir_def *prim_invisible =
|
||||
cull_small_primitive_line(b, pos, bbox_min, bbox_max, prim_outside_view);
|
||||
|
||||
bbox_accepted = nir_ior(b, nir_inot(b, prim_invisible), w_info->any_w_negative);
|
||||
|
|
@ -326,10 +326,10 @@ ac_nir_cull_line(nir_builder *b,
|
|||
return nir_if_phi(b, bbox_accepted, accepted);
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
nir_def *
|
||||
ac_nir_cull_primitive(nir_builder *b,
|
||||
nir_ssa_def *initially_accepted,
|
||||
nir_ssa_def *pos[3][4],
|
||||
nir_def *initially_accepted,
|
||||
nir_def *pos[3][4],
|
||||
unsigned num_vertices,
|
||||
ac_nir_cull_accepted accept_func,
|
||||
void *state)
|
||||
|
|
|
|||
|
|
@ -36,8 +36,8 @@ typedef struct {
|
|||
bool gs_triangle_strip_adjacency_fix;
|
||||
} lower_esgs_io_state;
|
||||
|
||||
static nir_ssa_def *
|
||||
emit_split_buffer_load(nir_builder *b, nir_ssa_def *desc, nir_ssa_def *v_off, nir_ssa_def *s_off,
|
||||
static nir_def *
|
||||
emit_split_buffer_load(nir_builder *b, nir_def *desc, nir_def *v_off, nir_def *s_off,
|
||||
unsigned component_stride, unsigned num_components, unsigned bit_size)
|
||||
{
|
||||
unsigned total_bytes = num_components * bit_size / 8u;
|
||||
|
|
@ -45,7 +45,7 @@ emit_split_buffer_load(nir_builder *b, nir_ssa_def *desc, nir_ssa_def *v_off, ni
|
|||
unsigned remaining_bytes = total_bytes - full_dwords * 4u;
|
||||
|
||||
/* Accommodate max number of split 64-bit loads */
|
||||
nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS * 2u];
|
||||
nir_def *comps[NIR_MAX_VEC_COMPONENTS * 2u];
|
||||
|
||||
/* Assume that 1x32-bit load is better than 1x16-bit + 1x8-bit */
|
||||
if (remaining_bytes == 3) {
|
||||
|
|
@ -53,7 +53,7 @@ emit_split_buffer_load(nir_builder *b, nir_ssa_def *desc, nir_ssa_def *v_off, ni
|
|||
full_dwords++;
|
||||
}
|
||||
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
|
||||
for (unsigned i = 0; i < full_dwords; ++i)
|
||||
comps[i] = nir_load_buffer_amd(b, 1, 32, desc, v_off, s_off, zero,
|
||||
|
|
@ -70,11 +70,11 @@ emit_split_buffer_load(nir_builder *b, nir_ssa_def *desc, nir_ssa_def *v_off, ni
|
|||
}
|
||||
|
||||
static void
|
||||
emit_split_buffer_store(nir_builder *b, nir_ssa_def *d, nir_ssa_def *desc, nir_ssa_def *v_off, nir_ssa_def *s_off,
|
||||
emit_split_buffer_store(nir_builder *b, nir_def *d, nir_def *desc, nir_def *v_off, nir_def *s_off,
|
||||
unsigned component_stride, unsigned num_components, unsigned bit_size,
|
||||
unsigned writemask, bool swizzled, bool slc)
|
||||
{
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
|
||||
while (writemask) {
|
||||
int start, count;
|
||||
|
|
@ -91,7 +91,7 @@ emit_split_buffer_store(nir_builder *b, nir_ssa_def *d, nir_ssa_def *desc, nir_s
|
|||
else if ((start_byte % 4) == 2)
|
||||
store_bytes = MIN2(store_bytes, 2);
|
||||
|
||||
nir_ssa_def *store_val = nir_extract_bits(b, &d, 1, start_byte * 8u, 1, store_bytes * 8u);
|
||||
nir_def *store_val = nir_extract_bits(b, &d, 1, start_byte * 8u, 1, store_bytes * 8u);
|
||||
nir_store_buffer_amd(b, store_val, desc, v_off, s_off, zero,
|
||||
.base = start_byte, .memory_modes = nir_var_shader_out,
|
||||
.access = ACCESS_COHERENT |
|
||||
|
|
@ -153,19 +153,19 @@ lower_es_output_store(nir_builder *b,
|
|||
unsigned write_mask = nir_intrinsic_write_mask(intrin);
|
||||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
nir_ssa_def *io_off = ac_nir_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u, st->map_io);
|
||||
nir_def *io_off = ac_nir_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u, st->map_io);
|
||||
|
||||
if (st->gfx_level <= GFX8) {
|
||||
/* GFX6-8: ES is a separate HW stage, data is passed from ES to GS in VRAM. */
|
||||
nir_ssa_def *ring = nir_load_ring_esgs_amd(b);
|
||||
nir_ssa_def *es2gs_off = nir_load_ring_es2gs_offset_amd(b);
|
||||
nir_def *ring = nir_load_ring_esgs_amd(b);
|
||||
nir_def *es2gs_off = nir_load_ring_es2gs_offset_amd(b);
|
||||
emit_split_buffer_store(b, intrin->src[0].ssa, ring, io_off, es2gs_off, 4u,
|
||||
intrin->src[0].ssa->num_components, intrin->src[0].ssa->bit_size,
|
||||
write_mask, true, true);
|
||||
} else {
|
||||
/* GFX9+: ES is merged into GS, data is passed through LDS. */
|
||||
nir_ssa_def *vertex_idx = nir_load_local_invocation_index(b);
|
||||
nir_ssa_def *off = nir_iadd(b, nir_imul_imm(b, vertex_idx, st->esgs_itemsize), io_off);
|
||||
nir_def *vertex_idx = nir_load_local_invocation_index(b);
|
||||
nir_def *off = nir_iadd(b, nir_imul_imm(b, vertex_idx, st->esgs_itemsize), io_off);
|
||||
nir_store_shared(b, intrin->src[0].ssa, off, .write_mask = write_mask);
|
||||
}
|
||||
|
||||
|
|
@ -173,10 +173,10 @@ lower_es_output_store(nir_builder *b,
|
|||
return true;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
gs_get_vertex_offset(nir_builder *b, lower_esgs_io_state *st, unsigned vertex_index)
|
||||
{
|
||||
nir_ssa_def *origin = nir_load_gs_vertex_offset_amd(b, .base = vertex_index);
|
||||
nir_def *origin = nir_load_gs_vertex_offset_amd(b, .base = vertex_index);
|
||||
if (!st->gs_triangle_strip_adjacency_fix)
|
||||
return origin;
|
||||
|
||||
|
|
@ -190,33 +190,33 @@ gs_get_vertex_offset(nir_builder *b, lower_esgs_io_state *st, unsigned vertex_in
|
|||
/* 6 vertex offset are packed to 3 vgprs for GFX9+ */
|
||||
fixed_index = (vertex_index + 2) % 3;
|
||||
}
|
||||
nir_ssa_def *fixed = nir_load_gs_vertex_offset_amd(b, .base = fixed_index);
|
||||
nir_def *fixed = nir_load_gs_vertex_offset_amd(b, .base = fixed_index);
|
||||
|
||||
nir_ssa_def *prim_id = nir_load_primitive_id(b);
|
||||
nir_def *prim_id = nir_load_primitive_id(b);
|
||||
/* odd primitive id use fixed offset */
|
||||
nir_ssa_def *cond = nir_i2b(b, nir_iand_imm(b, prim_id, 1));
|
||||
nir_def *cond = nir_i2b(b, nir_iand_imm(b, prim_id, 1));
|
||||
return nir_bcsel(b, cond, fixed, origin);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
gs_per_vertex_input_vertex_offset_gfx6(nir_builder *b, lower_esgs_io_state *st,
|
||||
nir_src *vertex_src)
|
||||
{
|
||||
if (nir_src_is_const(*vertex_src))
|
||||
return gs_get_vertex_offset(b, st, nir_src_as_uint(*vertex_src));
|
||||
|
||||
nir_ssa_def *vertex_offset = gs_get_vertex_offset(b, st, 0);
|
||||
nir_def *vertex_offset = gs_get_vertex_offset(b, st, 0);
|
||||
|
||||
for (unsigned i = 1; i < b->shader->info.gs.vertices_in; ++i) {
|
||||
nir_ssa_def *cond = nir_ieq_imm(b, vertex_src->ssa, i);
|
||||
nir_ssa_def *elem = gs_get_vertex_offset(b, st, i);
|
||||
nir_def *cond = nir_ieq_imm(b, vertex_src->ssa, i);
|
||||
nir_def *elem = gs_get_vertex_offset(b, st, i);
|
||||
vertex_offset = nir_bcsel(b, cond, elem, vertex_offset);
|
||||
}
|
||||
|
||||
return vertex_offset;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
gs_per_vertex_input_vertex_offset_gfx9(nir_builder *b, lower_esgs_io_state *st,
|
||||
nir_src *vertex_src)
|
||||
{
|
||||
|
|
@ -226,11 +226,11 @@ gs_per_vertex_input_vertex_offset_gfx9(nir_builder *b, lower_esgs_io_state *st,
|
|||
(vertex & 1u) * 16u, 16u);
|
||||
}
|
||||
|
||||
nir_ssa_def *vertex_offset = gs_get_vertex_offset(b, st, 0);
|
||||
nir_def *vertex_offset = gs_get_vertex_offset(b, st, 0);
|
||||
|
||||
for (unsigned i = 1; i < b->shader->info.gs.vertices_in; i++) {
|
||||
nir_ssa_def *cond = nir_ieq_imm(b, vertex_src->ssa, i);
|
||||
nir_ssa_def *elem = gs_get_vertex_offset(b, st, i / 2u * 2u);
|
||||
nir_def *cond = nir_ieq_imm(b, vertex_src->ssa, i);
|
||||
nir_def *elem = gs_get_vertex_offset(b, st, i / 2u * 2u);
|
||||
if (i % 2u)
|
||||
elem = nir_ishr_imm(b, elem, 16u);
|
||||
|
||||
|
|
@ -240,13 +240,13 @@ gs_per_vertex_input_vertex_offset_gfx9(nir_builder *b, lower_esgs_io_state *st,
|
|||
return nir_iand_imm(b, vertex_offset, 0xffffu);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
gs_per_vertex_input_offset(nir_builder *b,
|
||||
lower_esgs_io_state *st,
|
||||
nir_intrinsic_instr *instr)
|
||||
{
|
||||
nir_src *vertex_src = nir_get_io_arrayed_index_src(instr);
|
||||
nir_ssa_def *vertex_offset = st->gfx_level >= GFX9
|
||||
nir_def *vertex_offset = st->gfx_level >= GFX9
|
||||
? gs_per_vertex_input_vertex_offset_gfx9(b, st, vertex_src)
|
||||
: gs_per_vertex_input_vertex_offset_gfx6(b, st, vertex_src);
|
||||
|
||||
|
|
@ -257,25 +257,25 @@ gs_per_vertex_input_offset(nir_builder *b,
|
|||
vertex_offset = nir_imul(b, vertex_offset, nir_load_esgs_vertex_stride_amd(b));
|
||||
|
||||
unsigned base_stride = st->gfx_level >= GFX9 ? 1 : 64 /* Wave size on GFX6-8 */;
|
||||
nir_ssa_def *io_off = ac_nir_calc_io_offset(b, instr, nir_imm_int(b, base_stride * 4u), base_stride, st->map_io);
|
||||
nir_ssa_def *off = nir_iadd(b, io_off, vertex_offset);
|
||||
nir_def *io_off = ac_nir_calc_io_offset(b, instr, nir_imm_int(b, base_stride * 4u), base_stride, st->map_io);
|
||||
nir_def *off = nir_iadd(b, io_off, vertex_offset);
|
||||
return nir_imul_imm(b, off, 4u);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
lower_gs_per_vertex_input_load(nir_builder *b,
|
||||
nir_instr *instr,
|
||||
void *state)
|
||||
{
|
||||
lower_esgs_io_state *st = (lower_esgs_io_state *) state;
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
nir_ssa_def *off = gs_per_vertex_input_offset(b, st, intrin);
|
||||
nir_def *off = gs_per_vertex_input_offset(b, st, intrin);
|
||||
|
||||
if (st->gfx_level >= GFX9)
|
||||
return nir_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off);
|
||||
|
||||
unsigned wave_size = 64u; /* GFX6-8 only support wave64 */
|
||||
nir_ssa_def *ring = nir_load_ring_esgs_amd(b);
|
||||
nir_def *ring = nir_load_ring_esgs_amd(b);
|
||||
return emit_split_buffer_load(b, ring, off, nir_imm_zero(b, 1, 32), 4u * wave_size,
|
||||
intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,24 +8,24 @@
|
|||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
static nir_ssa_def *
|
||||
try_extract_additions(nir_builder *b, nir_ssa_scalar scalar, uint64_t *out_const,
|
||||
nir_ssa_def **out_offset)
|
||||
static nir_def *
|
||||
try_extract_additions(nir_builder *b, nir_scalar scalar, uint64_t *out_const,
|
||||
nir_def **out_offset)
|
||||
{
|
||||
if (!nir_ssa_scalar_is_alu(scalar) || nir_ssa_scalar_alu_op(scalar) != nir_op_iadd)
|
||||
if (!nir_scalar_is_alu(scalar) || nir_scalar_alu_op(scalar) != nir_op_iadd)
|
||||
return NULL;
|
||||
|
||||
nir_alu_instr *alu = nir_instr_as_alu(scalar.def->parent_instr);
|
||||
nir_ssa_scalar src0 = nir_ssa_scalar_chase_alu_src(scalar, 0);
|
||||
nir_ssa_scalar src1 = nir_ssa_scalar_chase_alu_src(scalar, 1);
|
||||
nir_scalar src0 = nir_scalar_chase_alu_src(scalar, 0);
|
||||
nir_scalar src1 = nir_scalar_chase_alu_src(scalar, 1);
|
||||
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
nir_ssa_scalar src = i ? src1 : src0;
|
||||
if (nir_ssa_scalar_is_const(src)) {
|
||||
*out_const += nir_ssa_scalar_as_uint(src);
|
||||
} else if (nir_ssa_scalar_is_alu(src) && nir_ssa_scalar_alu_op(src) == nir_op_u2u64) {
|
||||
nir_ssa_scalar offset_scalar = nir_ssa_scalar_chase_alu_src(src, 0);
|
||||
nir_ssa_def *offset = nir_channel(b, offset_scalar.def, offset_scalar.comp);
|
||||
nir_scalar src = i ? src1 : src0;
|
||||
if (nir_scalar_is_const(src)) {
|
||||
*out_const += nir_scalar_as_uint(src);
|
||||
} else if (nir_scalar_is_alu(src) && nir_scalar_alu_op(src) == nir_op_u2u64) {
|
||||
nir_scalar offset_scalar = nir_scalar_chase_alu_src(src, 0);
|
||||
nir_def *offset = nir_channel(b, offset_scalar.def, offset_scalar.comp);
|
||||
if (*out_offset)
|
||||
*out_offset = nir_iadd(b, *out_offset, offset);
|
||||
else
|
||||
|
|
@ -34,13 +34,13 @@ try_extract_additions(nir_builder *b, nir_ssa_scalar scalar, uint64_t *out_const
|
|||
continue;
|
||||
}
|
||||
|
||||
nir_ssa_def *replace_src =
|
||||
nir_def *replace_src =
|
||||
try_extract_additions(b, i == 1 ? src0 : src1, out_const, out_offset);
|
||||
return replace_src ? replace_src : nir_ssa_for_alu_src(b, alu, 1 - i);
|
||||
}
|
||||
|
||||
nir_ssa_def *replace_src0 = try_extract_additions(b, src0, out_const, out_offset);
|
||||
nir_ssa_def *replace_src1 = try_extract_additions(b, src1, out_const, out_offset);
|
||||
nir_def *replace_src0 = try_extract_additions(b, src0, out_const, out_offset);
|
||||
nir_def *replace_src1 = try_extract_additions(b, src1, out_const, out_offset);
|
||||
if (!replace_src0 && !replace_src1)
|
||||
return NULL;
|
||||
|
||||
|
|
@ -80,10 +80,10 @@ process_instr(nir_builder *b, nir_instr *instr, void *_)
|
|||
nir_src *addr_src = &intrin->src[addr_src_idx];
|
||||
|
||||
uint64_t off_const = 0;
|
||||
nir_ssa_def *offset = NULL;
|
||||
nir_ssa_scalar src = {addr_src->ssa, 0};
|
||||
nir_def *offset = NULL;
|
||||
nir_scalar src = {addr_src->ssa, 0};
|
||||
b->cursor = nir_after_instr(addr_src->ssa->parent_instr);
|
||||
nir_ssa_def *addr = try_extract_additions(b, src, &off_const, &offset);
|
||||
nir_def *addr = try_extract_additions(b, src, &off_const, &offset);
|
||||
addr = addr ? addr : addr_src->ssa;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
|
@ -122,7 +122,7 @@ process_instr(nir_builder *b, nir_instr *instr, void *_)
|
|||
|
||||
nir_builder_instr_insert(b, &new_intrin->instr);
|
||||
if (op != nir_intrinsic_store_global_amd)
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, &new_intrin->dest.ssa);
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, &new_intrin->dest.ssa);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@
|
|||
#include "nir_builder.h"
|
||||
#include "amdgfxregs.h"
|
||||
|
||||
static nir_ssa_def *get_field(nir_builder *b, nir_ssa_def *desc, unsigned index, unsigned mask)
|
||||
static nir_def *get_field(nir_builder *b, nir_def *desc, unsigned index, unsigned mask)
|
||||
{
|
||||
return nir_ubfe_imm(b, nir_channel(b, desc, index), ffs(mask) - 1, util_bitcount(mask));
|
||||
}
|
||||
|
|
@ -46,17 +46,17 @@ static unsigned get_coord_components(enum glsl_sampler_dim dim, bool is_array)
|
|||
/* Lower image coordinates to a buffer element index. Return UINT_MAX if the image coordinates
|
||||
* are out of bounds.
|
||||
*/
|
||||
static nir_ssa_def *lower_image_coords(nir_builder *b, nir_ssa_def *desc, nir_ssa_def *coord,
|
||||
static nir_def *lower_image_coords(nir_builder *b, nir_def *desc, nir_def *coord,
|
||||
enum glsl_sampler_dim dim, bool is_array,
|
||||
bool handle_out_of_bounds)
|
||||
{
|
||||
unsigned num_coord_components = get_coord_components(dim, is_array);
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
|
||||
/* Get coordinates. */
|
||||
nir_ssa_def *x = nir_channel(b, coord, 0);
|
||||
nir_ssa_def *y = num_coord_components >= 2 ? nir_channel(b, coord, 1) : NULL;
|
||||
nir_ssa_def *z = num_coord_components >= 3 ? nir_channel(b, coord, 2) : NULL;
|
||||
nir_def *x = nir_channel(b, coord, 0);
|
||||
nir_def *y = num_coord_components >= 2 ? nir_channel(b, coord, 1) : NULL;
|
||||
nir_def *z = num_coord_components >= 3 ? nir_channel(b, coord, 2) : NULL;
|
||||
|
||||
if (dim == GLSL_SAMPLER_DIM_1D && is_array) {
|
||||
z = y;
|
||||
|
|
@ -64,35 +64,35 @@ static nir_ssa_def *lower_image_coords(nir_builder *b, nir_ssa_def *desc, nir_ss
|
|||
}
|
||||
|
||||
if (is_array) {
|
||||
nir_ssa_def *first_layer = get_field(b, desc, 5, 0xffff0000);
|
||||
nir_def *first_layer = get_field(b, desc, 5, 0xffff0000);
|
||||
z = nir_iadd(b, z, first_layer);
|
||||
}
|
||||
|
||||
/* Compute the buffer element index. */
|
||||
nir_ssa_def *index = x;
|
||||
nir_def *index = x;
|
||||
if (y) {
|
||||
nir_ssa_def *pitch = nir_channel(b, desc, 6);
|
||||
nir_def *pitch = nir_channel(b, desc, 6);
|
||||
index = nir_iadd(b, index, nir_imul(b, pitch, y));
|
||||
}
|
||||
if (z) {
|
||||
nir_ssa_def *slice_elements = nir_channel(b, desc, 7);
|
||||
nir_def *slice_elements = nir_channel(b, desc, 7);
|
||||
index = nir_iadd(b, index, nir_imul(b, slice_elements, z));
|
||||
}
|
||||
|
||||
/* Determine whether the coordinates are out of bounds. */
|
||||
nir_ssa_def *out_of_bounds = NULL;
|
||||
nir_def *out_of_bounds = NULL;
|
||||
|
||||
if (handle_out_of_bounds) {
|
||||
nir_ssa_def *width = get_field(b, desc, 4, 0xffff);
|
||||
nir_def *width = get_field(b, desc, 4, 0xffff);
|
||||
out_of_bounds = nir_ior(b, nir_ilt(b, x, zero), nir_ige(b, x, width));
|
||||
|
||||
if (y) {
|
||||
nir_ssa_def *height = get_field(b, desc, 4, 0xffff0000);
|
||||
nir_def *height = get_field(b, desc, 4, 0xffff0000);
|
||||
out_of_bounds = nir_ior(b, out_of_bounds,
|
||||
nir_ior(b, nir_ilt(b, y, zero), nir_ige(b, y, height)));
|
||||
}
|
||||
if (z) {
|
||||
nir_ssa_def *depth = get_field(b, desc, 5, 0xffff);
|
||||
nir_def *depth = get_field(b, desc, 5, 0xffff);
|
||||
out_of_bounds = nir_ior(b, out_of_bounds,
|
||||
nir_ior(b, nir_ilt(b, z, zero), nir_ige(b, z, depth)));
|
||||
}
|
||||
|
|
@ -104,12 +104,12 @@ static nir_ssa_def *lower_image_coords(nir_builder *b, nir_ssa_def *desc, nir_ss
|
|||
return index;
|
||||
}
|
||||
|
||||
static nir_ssa_def *emulated_image_load(nir_builder *b, unsigned num_components, unsigned bit_size,
|
||||
nir_ssa_def *desc, nir_ssa_def *coord,
|
||||
static nir_def *emulated_image_load(nir_builder *b, unsigned num_components, unsigned bit_size,
|
||||
nir_def *desc, nir_def *coord,
|
||||
enum gl_access_qualifier access, enum glsl_sampler_dim dim,
|
||||
bool is_array, bool handle_out_of_bounds)
|
||||
{
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
|
||||
return nir_load_buffer_amd(b, num_components, bit_size, nir_channels(b, desc, 0xf),
|
||||
zero, zero,
|
||||
|
|
@ -120,11 +120,11 @@ static nir_ssa_def *emulated_image_load(nir_builder *b, unsigned num_components,
|
|||
.access = access | ACCESS_USES_FORMAT_AMD);
|
||||
}
|
||||
|
||||
static void emulated_image_store(nir_builder *b, nir_ssa_def *desc, nir_ssa_def *coord,
|
||||
nir_ssa_def *data, enum gl_access_qualifier access,
|
||||
static void emulated_image_store(nir_builder *b, nir_def *desc, nir_def *coord,
|
||||
nir_def *data, enum gl_access_qualifier access,
|
||||
enum glsl_sampler_dim dim, bool is_array)
|
||||
{
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
|
||||
nir_store_buffer_amd(b, data, nir_channels(b, desc, 0xf), zero, zero,
|
||||
lower_image_coords(b, desc, coord, dim, is_array, true),
|
||||
|
|
@ -134,7 +134,7 @@ static void emulated_image_store(nir_builder *b, nir_ssa_def *desc, nir_ssa_def
|
|||
}
|
||||
|
||||
/* Return the width, height, or depth for dim=0,1,2. */
|
||||
static nir_ssa_def *get_dim(nir_builder *b, nir_ssa_def *desc, unsigned dim)
|
||||
static nir_def *get_dim(nir_builder *b, nir_def *desc, unsigned dim)
|
||||
{
|
||||
return get_field(b, desc, 4 + dim / 2, 0xffff << (16 * (dim % 2)));
|
||||
}
|
||||
|
|
@ -142,9 +142,9 @@ static nir_ssa_def *get_dim(nir_builder *b, nir_ssa_def *desc, unsigned dim)
|
|||
/* Lower txl with lod=0 to typed buffer loads. This is based on the equations in the GL spec.
|
||||
* This basically converts the tex opcode into 1 or more image_load opcodes.
|
||||
*/
|
||||
static nir_ssa_def *emulated_tex_level_zero(nir_builder *b, unsigned num_components,
|
||||
unsigned bit_size, nir_ssa_def *desc,
|
||||
nir_ssa_def *sampler_desc, nir_ssa_def *coord_vec,
|
||||
static nir_def *emulated_tex_level_zero(nir_builder *b, unsigned num_components,
|
||||
unsigned bit_size, nir_def *desc,
|
||||
nir_def *sampler_desc, nir_def *coord_vec,
|
||||
enum glsl_sampler_dim sampler_dim, bool is_array)
|
||||
{
|
||||
const enum gl_access_qualifier access =
|
||||
|
|
@ -153,9 +153,9 @@ static nir_ssa_def *emulated_tex_level_zero(nir_builder *b, unsigned num_compone
|
|||
const unsigned num_dim_coords = num_coord_components - is_array;
|
||||
const unsigned array_comp = num_coord_components - 1;
|
||||
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_ssa_def *fp_one = nir_imm_floatN_t(b, 1, bit_size);
|
||||
nir_ssa_def *coord[3] = {0};
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *fp_one = nir_imm_floatN_t(b, 1, bit_size);
|
||||
nir_def *coord[3] = {0};
|
||||
|
||||
assert(num_coord_components <= 3);
|
||||
for (unsigned i = 0; i < num_coord_components; i++)
|
||||
|
|
@ -179,14 +179,14 @@ static nir_ssa_def *emulated_tex_level_zero(nir_builder *b, unsigned num_compone
|
|||
*
|
||||
* We assume that XY_MIN_FILTER and Z_FILTER are identical.
|
||||
*/
|
||||
nir_ssa_def *is_nearest =
|
||||
nir_def *is_nearest =
|
||||
nir_ieq_imm(b, nir_iand_imm(b, nir_channel(b, sampler_desc, 2), 1 << 20), 0);
|
||||
nir_ssa_def *result_nearest, *result_linear;
|
||||
nir_def *result_nearest, *result_linear;
|
||||
|
||||
nir_if *if_nearest = nir_push_if(b, is_nearest);
|
||||
{
|
||||
/* Nearest filter. */
|
||||
nir_ssa_def *coord0[3] = {0};
|
||||
nir_def *coord0[3] = {0};
|
||||
memcpy(coord0, coord, sizeof(coord));
|
||||
|
||||
for (unsigned dim = 0; dim < num_dim_coords; dim++) {
|
||||
|
|
@ -205,9 +205,9 @@ static nir_ssa_def *emulated_tex_level_zero(nir_builder *b, unsigned num_compone
|
|||
nir_push_else(b, if_nearest);
|
||||
{
|
||||
/* Linear filter. */
|
||||
nir_ssa_def *coord0[3] = {0};
|
||||
nir_ssa_def *coord1[3] = {0};
|
||||
nir_ssa_def *weight[3] = {0};
|
||||
nir_def *coord0[3] = {0};
|
||||
nir_def *coord1[3] = {0};
|
||||
nir_def *weight[3] = {0};
|
||||
|
||||
memcpy(coord0, coord, sizeof(coord));
|
||||
|
||||
|
|
@ -231,10 +231,10 @@ static nir_ssa_def *emulated_tex_level_zero(nir_builder *b, unsigned num_compone
|
|||
/* Load all texels for the linear filter.
|
||||
* This is 2 texels for 1D, 4 texels for 2D, and 8 texels for 3D.
|
||||
*/
|
||||
nir_ssa_def *texel[8];
|
||||
nir_def *texel[8];
|
||||
|
||||
for (unsigned i = 0; i < (1 << num_dim_coords); i++) {
|
||||
nir_ssa_def *texel_coord[3];
|
||||
nir_def *texel_coord[3];
|
||||
|
||||
/* Determine whether the current texel should use channels from coord0
|
||||
* or coord1. The i-th bit of the texel index determines that.
|
||||
|
|
@ -247,7 +247,7 @@ static nir_ssa_def *emulated_tex_level_zero(nir_builder *b, unsigned num_compone
|
|||
texel_coord[array_comp] = coord0[array_comp];
|
||||
|
||||
/* Compute how much the texel contributes to the final result. */
|
||||
nir_ssa_def *texel_weight = fp_one;
|
||||
nir_def *texel_weight = fp_one;
|
||||
for (unsigned dim = 0; dim < num_dim_coords; dim++) {
|
||||
/* Let's see what "i" represents:
|
||||
* Texel i=0 = 000
|
||||
|
|
@ -296,10 +296,10 @@ static bool lower_image_opcodes(nir_builder *b, nir_instr *instr, void *data)
|
|||
enum gl_access_qualifier access;
|
||||
enum glsl_sampler_dim dim;
|
||||
bool is_array;
|
||||
nir_ssa_def *desc = NULL, *result = NULL;
|
||||
nir_def *desc = NULL, *result = NULL;
|
||||
ASSERTED const char *intr_name;
|
||||
|
||||
nir_ssa_def *dst = &intr->dest.ssa;
|
||||
nir_def *dst = &intr->dest.ssa;
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
|
|
@ -359,7 +359,7 @@ static bool lower_image_opcodes(nir_builder *b, nir_instr *instr, void *data)
|
|||
case nir_intrinsic_bindless_image_load:
|
||||
result = emulated_image_load(b, intr->dest.ssa.num_components, intr->dest.ssa.bit_size,
|
||||
desc, intr->src[1].ssa, access, dim, is_array, true);
|
||||
nir_ssa_def_rewrite_uses_after(dst, result, instr);
|
||||
nir_def_rewrite_uses_after(dst, result, instr);
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
|
||||
|
|
@ -376,9 +376,9 @@ static bool lower_image_opcodes(nir_builder *b, nir_instr *instr, void *data)
|
|||
} else if (instr->type == nir_instr_type_tex) {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
nir_tex_instr *new_tex;
|
||||
nir_ssa_def *coord = NULL, *desc = NULL, *sampler_desc = NULL, *result = NULL;
|
||||
nir_def *coord = NULL, *desc = NULL, *sampler_desc = NULL, *result = NULL;
|
||||
|
||||
nir_ssa_def *dst = &tex->dest.ssa;
|
||||
nir_def *dst = &tex->dest.ssa;
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
switch (tex->op) {
|
||||
|
|
@ -447,7 +447,7 @@ static bool lower_image_opcodes(nir_builder *b, nir_instr *instr, void *data)
|
|||
desc, coord,
|
||||
ACCESS_RESTRICT | ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER,
|
||||
tex->sampler_dim, tex->is_array, true);
|
||||
nir_ssa_def_rewrite_uses_after(dst, result, instr);
|
||||
nir_def_rewrite_uses_after(dst, result, instr);
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
|
||||
|
|
@ -455,7 +455,7 @@ static bool lower_image_opcodes(nir_builder *b, nir_instr *instr, void *data)
|
|||
case nir_texop_txl:
|
||||
result = emulated_tex_level_zero(b, tex->dest.ssa.num_components, tex->dest.ssa.bit_size,
|
||||
desc, sampler_desc, coord, tex->sampler_dim, tex->is_array);
|
||||
nir_ssa_def_rewrite_uses_after(dst, result, instr);
|
||||
nir_def_rewrite_uses_after(dst, result, instr);
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -21,7 +21,7 @@ typedef struct {
|
|||
bool lower_load_barycentric;
|
||||
|
||||
/* Add one for dual source blend second output. */
|
||||
nir_ssa_def *outputs[FRAG_RESULT_MAX + 1][4];
|
||||
nir_def *outputs[FRAG_RESULT_MAX + 1][4];
|
||||
nir_alu_type output_types[FRAG_RESULT_MAX + 1];
|
||||
|
||||
/* MAX_DRAW_BUFFERS for MRT export, 1 for MRTZ export */
|
||||
|
|
@ -85,52 +85,52 @@ init_interp_param(nir_builder *b, lower_ps_state *s)
|
|||
* contains fully-covered quads.
|
||||
*/
|
||||
if (s->options->bc_optimize_for_persp || s->options->bc_optimize_for_linear) {
|
||||
nir_ssa_def *bc_optimize = nir_load_barycentric_optimize_amd(b);
|
||||
nir_def *bc_optimize = nir_load_barycentric_optimize_amd(b);
|
||||
|
||||
if (s->options->bc_optimize_for_persp) {
|
||||
nir_ssa_def *center =
|
||||
nir_def *center =
|
||||
nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_SMOOTH);
|
||||
nir_ssa_def *centroid =
|
||||
nir_def *centroid =
|
||||
nir_load_barycentric_centroid(b, 32, .interp_mode = INTERP_MODE_SMOOTH);
|
||||
|
||||
nir_ssa_def *value = nir_bcsel(b, bc_optimize, center, centroid);
|
||||
nir_def *value = nir_bcsel(b, bc_optimize, center, centroid);
|
||||
nir_store_var(b, s->persp_centroid, value, 0x3);
|
||||
}
|
||||
|
||||
if (s->options->bc_optimize_for_linear) {
|
||||
nir_ssa_def *center =
|
||||
nir_def *center =
|
||||
nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE);
|
||||
nir_ssa_def *centroid =
|
||||
nir_def *centroid =
|
||||
nir_load_barycentric_centroid(b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE);
|
||||
|
||||
nir_ssa_def *value = nir_bcsel(b, bc_optimize, center, centroid);
|
||||
nir_def *value = nir_bcsel(b, bc_optimize, center, centroid);
|
||||
nir_store_var(b, s->linear_centroid, value, 0x3);
|
||||
}
|
||||
}
|
||||
|
||||
if (s->options->force_persp_sample_interp) {
|
||||
nir_ssa_def *sample =
|
||||
nir_def *sample =
|
||||
nir_load_barycentric_sample(b, 32, .interp_mode = INTERP_MODE_SMOOTH);
|
||||
nir_store_var(b, s->persp_center, sample, 0x3);
|
||||
nir_store_var(b, s->persp_centroid, sample, 0x3);
|
||||
}
|
||||
|
||||
if (s->options->force_linear_sample_interp) {
|
||||
nir_ssa_def *sample =
|
||||
nir_def *sample =
|
||||
nir_load_barycentric_sample(b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE);
|
||||
nir_store_var(b, s->linear_center, sample, 0x3);
|
||||
nir_store_var(b, s->linear_centroid, sample, 0x3);
|
||||
}
|
||||
|
||||
if (s->options->force_persp_center_interp) {
|
||||
nir_ssa_def *center =
|
||||
nir_def *center =
|
||||
nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_SMOOTH);
|
||||
nir_store_var(b, s->persp_sample, center, 0x3);
|
||||
nir_store_var(b, s->persp_centroid, center, 0x3);
|
||||
}
|
||||
|
||||
if (s->options->force_linear_center_interp) {
|
||||
nir_ssa_def *center =
|
||||
nir_def *center =
|
||||
nir_load_barycentric_pixel(b, 32, .interp_mode = INTERP_MODE_NOPERSPECTIVE);
|
||||
nir_store_var(b, s->linear_sample, center, 0x3);
|
||||
nir_store_var(b, s->linear_centroid, center, 0x3);
|
||||
|
|
@ -186,8 +186,8 @@ lower_ps_load_barycentric(nir_builder *b, nir_intrinsic_instr *intrin, lower_ps_
|
|||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_ssa_def *replacement = nir_load_var(b, var);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, replacement);
|
||||
nir_def *replacement = nir_load_var(b, var);
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, replacement);
|
||||
|
||||
nir_instr_remove(&intrin->instr);
|
||||
return true;
|
||||
|
|
@ -200,7 +200,7 @@ gather_ps_store_output(nir_builder *b, nir_intrinsic_instr *intrin, lower_ps_sta
|
|||
unsigned write_mask = nir_intrinsic_write_mask(intrin);
|
||||
unsigned component = nir_intrinsic_component(intrin);
|
||||
nir_alu_type type = nir_intrinsic_src_type(intrin);
|
||||
nir_ssa_def *store_val = intrin->src[0].ssa;
|
||||
nir_def *store_val = intrin->src[0].ssa;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
|
|
@ -249,13 +249,13 @@ lower_ps_load_sample_mask_in(nir_builder *b, nir_intrinsic_instr *intrin, lower_
|
|||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
uint32_t ps_iter_mask = ac_get_ps_iter_mask(s->options->ps_iter_samples);
|
||||
nir_ssa_def *sampleid = nir_load_sample_id(b);
|
||||
nir_ssa_def *submask = nir_ishl(b, nir_imm_int(b, ps_iter_mask), sampleid);
|
||||
nir_def *sampleid = nir_load_sample_id(b);
|
||||
nir_def *submask = nir_ishl(b, nir_imm_int(b, ps_iter_mask), sampleid);
|
||||
|
||||
nir_ssa_def *sample_mask = nir_load_sample_mask_in(b);
|
||||
nir_ssa_def *replacement = nir_iand(b, sample_mask, submask);
|
||||
nir_def *sample_mask = nir_load_sample_mask_in(b);
|
||||
nir_def *replacement = nir_iand(b, sample_mask, submask);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, replacement);
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, replacement);
|
||||
|
||||
nir_instr_remove(&intrin->instr);
|
||||
return true;
|
||||
|
|
@ -327,8 +327,8 @@ emit_ps_color_clamp_and_alpha_test(nir_builder *b, lower_ps_state *s)
|
|||
} else if (s->options->alpha_func == COMPARE_FUNC_NEVER) {
|
||||
nir_discard(b);
|
||||
} else if (s->outputs[slot][3]) {
|
||||
nir_ssa_def *ref = nir_load_alpha_reference_amd(b);
|
||||
nir_ssa_def *cond =
|
||||
nir_def *ref = nir_load_alpha_reference_amd(b);
|
||||
nir_def *cond =
|
||||
nir_compare_func(b, s->options->alpha_func, s->outputs[slot][3], ref);
|
||||
nir_discard_if(b, nir_inot(b, cond));
|
||||
}
|
||||
|
|
@ -341,16 +341,16 @@ emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s)
|
|||
{
|
||||
uint64_t outputs_written = b->shader->info.outputs_written;
|
||||
|
||||
nir_ssa_def *mrtz_alpha = NULL;
|
||||
nir_def *mrtz_alpha = NULL;
|
||||
if (s->options->alpha_to_coverage_via_mrtz) {
|
||||
mrtz_alpha = s->outputs[FRAG_RESULT_COLOR][3] ?
|
||||
s->outputs[FRAG_RESULT_COLOR][3] :
|
||||
s->outputs[FRAG_RESULT_DATA0][3];
|
||||
}
|
||||
|
||||
nir_ssa_def *depth = s->outputs[FRAG_RESULT_DEPTH][0];
|
||||
nir_ssa_def *stencil = s->outputs[FRAG_RESULT_STENCIL][0];
|
||||
nir_ssa_def *sample_mask = s->outputs[FRAG_RESULT_SAMPLE_MASK][0];
|
||||
nir_def *depth = s->outputs[FRAG_RESULT_DEPTH][0];
|
||||
nir_def *stencil = s->outputs[FRAG_RESULT_STENCIL][0];
|
||||
nir_def *sample_mask = s->outputs[FRAG_RESULT_SAMPLE_MASK][0];
|
||||
|
||||
if (s->options->kill_samplemask) {
|
||||
sample_mask = NULL;
|
||||
|
|
@ -371,8 +371,8 @@ emit_ps_mrtz_export(nir_builder *b, lower_ps_state *s)
|
|||
outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK),
|
||||
s->options->alpha_to_coverage_via_mrtz);
|
||||
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
|
||||
nir_ssa_def *outputs[4] = {undef, undef, undef, undef};
|
||||
nir_def *undef = nir_undef(b, 1, 32);
|
||||
nir_def *outputs[4] = {undef, undef, undef, undef};
|
||||
unsigned write_mask = 0;
|
||||
unsigned flags = 0;
|
||||
|
||||
|
|
@ -465,22 +465,22 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, gl_frag_result slot, uns
|
|||
bool enable_mrt_output_nan_fixup =
|
||||
s->options->enable_mrt_output_nan_fixup & BITFIELD_BIT(cbuf);
|
||||
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
|
||||
nir_ssa_def *outputs[4] = {undef, undef, undef, undef};
|
||||
nir_def *undef = nir_undef(b, 1, 32);
|
||||
nir_def *outputs[4] = {undef, undef, undef, undef};
|
||||
unsigned write_mask = 0;
|
||||
unsigned flags = 0;
|
||||
|
||||
nir_alu_type base_type = nir_alu_type_get_base_type(type);
|
||||
unsigned type_size = nir_alu_type_get_type_size(type);
|
||||
|
||||
nir_ssa_def *data[4];
|
||||
nir_def *data[4];
|
||||
memcpy(data, s->outputs[slot], sizeof(data));
|
||||
|
||||
/* Replace NaN by zero (for 32-bit float formats) to fix game bugs if requested. */
|
||||
if (enable_mrt_output_nan_fixup && type == nir_type_float32) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (data[i]) {
|
||||
nir_ssa_def *isnan = nir_fisnan(b, data[i]);
|
||||
nir_def *isnan = nir_fisnan(b, data[i]);
|
||||
data[i] = nir_bcsel(b, isnan, nir_imm_float(b, 0), data[i]);
|
||||
}
|
||||
}
|
||||
|
|
@ -593,14 +593,14 @@ emit_ps_color_export(nir_builder *b, lower_ps_state *s, gl_frag_result slot, uns
|
|||
}
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
nir_ssa_def *lo = data[i * 2];
|
||||
nir_ssa_def *hi = data[i * 2 + 1];
|
||||
nir_def *lo = data[i * 2];
|
||||
nir_def *hi = data[i * 2 + 1];
|
||||
if (!lo && !hi)
|
||||
continue;
|
||||
|
||||
lo = lo ? lo : nir_ssa_undef(b, 1, type_size);
|
||||
hi = hi ? hi : nir_ssa_undef(b, 1, type_size);
|
||||
nir_ssa_def *vec = nir_vec2(b, lo, hi);
|
||||
lo = lo ? lo : nir_undef(b, 1, type_size);
|
||||
hi = hi ? hi : nir_undef(b, 1, type_size);
|
||||
nir_def *vec = nir_vec2(b, lo, hi);
|
||||
|
||||
outputs[i] = nir_build_alu1(b, pack_op, vec);
|
||||
|
||||
|
|
@ -657,8 +657,8 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first
|
|||
uint32_t mrt1_write_mask = nir_intrinsic_write_mask(mrt1_exp);
|
||||
uint32_t write_mask = mrt0_write_mask | mrt1_write_mask;
|
||||
|
||||
nir_ssa_def *mrt0_arg = mrt0_exp->src[0].ssa;
|
||||
nir_ssa_def *mrt1_arg = mrt1_exp->src[0].ssa;
|
||||
nir_def *mrt0_arg = mrt0_exp->src[0].ssa;
|
||||
nir_def *mrt1_arg = mrt1_exp->src[0].ssa;
|
||||
|
||||
/* Swizzle code is right before mrt0_exp. */
|
||||
b->cursor = nir_before_instr(&mrt0_exp->instr);
|
||||
|
|
@ -671,9 +671,9 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first
|
|||
return;
|
||||
}
|
||||
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
|
||||
nir_ssa_def *arg0_vec[4] = {undef, undef, undef, undef};
|
||||
nir_ssa_def *arg1_vec[4] = {undef, undef, undef, undef};
|
||||
nir_def *undef = nir_undef(b, 1, 32);
|
||||
nir_def *arg0_vec[4] = {undef, undef, undef, undef};
|
||||
nir_def *arg1_vec[4] = {undef, undef, undef, undef};
|
||||
|
||||
/* For illustration, originally
|
||||
* lane0 export arg00 and arg01
|
||||
|
|
@ -684,17 +684,17 @@ emit_ps_dual_src_blend_swizzle(nir_builder *b, lower_ps_state *s, unsigned first
|
|||
* lane1 export arg01 and arg11.
|
||||
*/
|
||||
u_foreach_bit (i, write_mask) {
|
||||
nir_ssa_def *arg0 = nir_channel(b, mrt0_arg, i);
|
||||
nir_ssa_def *arg1 = nir_channel(b, mrt1_arg, i);
|
||||
nir_def *arg0 = nir_channel(b, mrt0_arg, i);
|
||||
nir_def *arg1 = nir_channel(b, mrt1_arg, i);
|
||||
|
||||
/* swap odd,even lanes of arg0 */
|
||||
arg0 = nir_quad_swizzle_amd(b, arg0, .swizzle_mask = 0b10110001);
|
||||
|
||||
/* swap even lanes between arg0 and arg1 */
|
||||
nir_ssa_def *tid = nir_load_subgroup_invocation(b);
|
||||
nir_ssa_def *is_even = nir_ieq_imm(b, nir_iand_imm(b, tid, 1), 0);
|
||||
nir_def *tid = nir_load_subgroup_invocation(b);
|
||||
nir_def *is_even = nir_ieq_imm(b, nir_iand_imm(b, tid, 1), 0);
|
||||
|
||||
nir_ssa_def *tmp = arg0;
|
||||
nir_def *tmp = arg0;
|
||||
arg0 = nir_bcsel(b, is_even, arg1, arg0);
|
||||
arg1 = nir_bcsel(b, is_even, tmp, arg1);
|
||||
|
||||
|
|
@ -741,7 +741,7 @@ emit_ps_null_export(nir_builder *b, lower_ps_state *s)
|
|||
V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_NULL;
|
||||
|
||||
nir_intrinsic_instr *intrin =
|
||||
nir_export_amd(b, nir_ssa_undef(b, 4, 32),
|
||||
nir_export_amd(b, nir_undef(b, 4, 32),
|
||||
.base = target,
|
||||
.flags = AC_EXP_FLAG_VALID_MASK | AC_EXP_FLAG_DONE);
|
||||
/* To avoid builder set write mask to 0xf. */
|
||||
|
|
@ -798,7 +798,7 @@ export_ps_outputs(nir_builder *b, lower_ps_state *s)
|
|||
unsigned target = get_ps_color_export_target(s);
|
||||
|
||||
s->exp[s->exp_num++] =
|
||||
nir_export_amd(b, nir_ssa_undef(b, 4, 32), .base = target);
|
||||
nir_export_amd(b, nir_undef(b, 4, 32), .base = target);
|
||||
}
|
||||
} else {
|
||||
if (s->output_types[FRAG_RESULT_COLOR] != nir_type_invalid) {
|
||||
|
|
|
|||
|
|
@ -12,20 +12,20 @@
|
|||
#include "nir_builder.h"
|
||||
#include "amdgfxregs.h"
|
||||
|
||||
static nir_ssa_def *get_field(nir_builder *b, nir_ssa_def *desc, unsigned index, unsigned mask)
|
||||
static nir_def *get_field(nir_builder *b, nir_def *desc, unsigned index, unsigned mask)
|
||||
{
|
||||
return nir_ubfe_imm(b, nir_channel(b, desc, index), ffs(mask) - 1, util_bitcount(mask));
|
||||
}
|
||||
|
||||
static nir_ssa_def *handle_null_desc(nir_builder *b, nir_ssa_def *desc, nir_ssa_def *value)
|
||||
static nir_def *handle_null_desc(nir_builder *b, nir_def *desc, nir_def *value)
|
||||
{
|
||||
nir_ssa_def *is_null = nir_ieq_imm(b, nir_channel(b, desc, 1), 0);
|
||||
nir_def *is_null = nir_ieq_imm(b, nir_channel(b, desc, 1), 0);
|
||||
return nir_bcsel(b, is_null, nir_imm_int(b, 0), value);
|
||||
}
|
||||
|
||||
static nir_ssa_def *query_samples(nir_builder *b, nir_ssa_def *desc, enum glsl_sampler_dim dim)
|
||||
static nir_def *query_samples(nir_builder *b, nir_def *desc, enum glsl_sampler_dim dim)
|
||||
{
|
||||
nir_ssa_def *samples;
|
||||
nir_def *samples;
|
||||
|
||||
if (dim == GLSL_SAMPLER_DIM_MS) {
|
||||
/* LAST_LEVEL contains log2(num_samples). */
|
||||
|
|
@ -38,22 +38,22 @@ static nir_ssa_def *query_samples(nir_builder *b, nir_ssa_def *desc, enum glsl_s
|
|||
return handle_null_desc(b, desc, samples);
|
||||
}
|
||||
|
||||
static nir_ssa_def *query_levels(nir_builder *b, nir_ssa_def *desc)
|
||||
static nir_def *query_levels(nir_builder *b, nir_def *desc)
|
||||
{
|
||||
nir_ssa_def *base_level = get_field(b, desc, 3, ~C_00A00C_BASE_LEVEL);
|
||||
nir_ssa_def *last_level = get_field(b, desc, 3, ~C_00A00C_LAST_LEVEL);
|
||||
nir_def *base_level = get_field(b, desc, 3, ~C_00A00C_BASE_LEVEL);
|
||||
nir_def *last_level = get_field(b, desc, 3, ~C_00A00C_LAST_LEVEL);
|
||||
|
||||
nir_ssa_def *levels = nir_iadd_imm(b, nir_isub(b, last_level, base_level), 1);
|
||||
nir_def *levels = nir_iadd_imm(b, nir_isub(b, last_level, base_level), 1);
|
||||
|
||||
return handle_null_desc(b, desc, levels);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
lower_query_size(nir_builder *b, nir_ssa_def *desc, nir_src *lod,
|
||||
static nir_def *
|
||||
lower_query_size(nir_builder *b, nir_def *desc, nir_src *lod,
|
||||
enum glsl_sampler_dim dim, bool is_array, enum amd_gfx_level gfx_level)
|
||||
{
|
||||
if (dim == GLSL_SAMPLER_DIM_BUF) {
|
||||
nir_ssa_def *size = nir_channel(b, desc, 2);
|
||||
nir_def *size = nir_channel(b, desc, 2);
|
||||
|
||||
if (gfx_level == GFX8) {
|
||||
/* On GFX8, the descriptor contains the size in bytes,
|
||||
|
|
@ -72,14 +72,14 @@ lower_query_size(nir_builder *b, nir_ssa_def *desc, nir_src *lod,
|
|||
bool has_width = dim != GLSL_SAMPLER_DIM_CUBE;
|
||||
bool has_height = dim != GLSL_SAMPLER_DIM_1D;
|
||||
bool has_depth = dim == GLSL_SAMPLER_DIM_3D;
|
||||
nir_ssa_def *width = NULL, *height = NULL, *layers = NULL, *base_array = NULL;
|
||||
nir_ssa_def *last_array = NULL, *depth = NULL;
|
||||
nir_def *width = NULL, *height = NULL, *layers = NULL, *base_array = NULL;
|
||||
nir_def *last_array = NULL, *depth = NULL;
|
||||
|
||||
/* Get the width, height, depth, layers. */
|
||||
if (gfx_level >= GFX10) {
|
||||
if (has_width) {
|
||||
nir_ssa_def *width_lo = get_field(b, desc, 1, ~C_00A004_WIDTH_LO);
|
||||
nir_ssa_def *width_hi = get_field(b, desc, 2, ~C_00A008_WIDTH_HI);
|
||||
nir_def *width_lo = get_field(b, desc, 1, ~C_00A004_WIDTH_LO);
|
||||
nir_def *width_hi = get_field(b, desc, 2, ~C_00A008_WIDTH_HI);
|
||||
/* Use iadd to get s_lshl2_add_u32 in the end. */
|
||||
width = nir_iadd(b, width_lo, nir_ishl_imm(b, width_hi, 2));
|
||||
}
|
||||
|
|
@ -115,8 +115,8 @@ lower_query_size(nir_builder *b, nir_ssa_def *desc, nir_src *lod,
|
|||
* the pitch for 2D. We need to set depth and last_array to 0 in that case.
|
||||
*/
|
||||
if (gfx_level >= GFX10_3 && (has_depth || is_array)) {
|
||||
nir_ssa_def *type = get_field(b, desc, 3, ~C_00A00C_TYPE);
|
||||
nir_ssa_def *is_2d = nir_ieq_imm(b, type, V_008F1C_SQ_RSRC_IMG_2D);
|
||||
nir_def *type = get_field(b, desc, 3, ~C_00A00C_TYPE);
|
||||
nir_def *is_2d = nir_ieq_imm(b, type, V_008F1C_SQ_RSRC_IMG_2D);
|
||||
|
||||
if (has_depth)
|
||||
depth = nir_bcsel(b, is_2d, nir_imm_int(b, 0), depth);
|
||||
|
|
@ -139,8 +139,8 @@ lower_query_size(nir_builder *b, nir_ssa_def *desc, nir_src *lod,
|
|||
|
||||
/* Minify the dimensions according to base_level + lod. */
|
||||
if (dim != GLSL_SAMPLER_DIM_MS && dim != GLSL_SAMPLER_DIM_RECT) {
|
||||
nir_ssa_def *base_level = get_field(b, desc, 3, ~C_00A00C_BASE_LEVEL);
|
||||
nir_ssa_def *level = lod ? nir_iadd(b, base_level, lod->ssa) : base_level;
|
||||
nir_def *base_level = get_field(b, desc, 3, ~C_00A00C_BASE_LEVEL);
|
||||
nir_def *level = lod ? nir_iadd(b, base_level, lod->ssa) : base_level;
|
||||
|
||||
if (has_width)
|
||||
width = nir_ushr(b, width, level);
|
||||
|
|
@ -165,16 +165,16 @@ lower_query_size(nir_builder *b, nir_ssa_def *desc, nir_src *lod,
|
|||
|
||||
/* Special case for sliced storage 3D views which shouldn't be minified. */
|
||||
if (gfx_level >= GFX10 && has_depth) {
|
||||
nir_ssa_def *uav3d =
|
||||
nir_def *uav3d =
|
||||
nir_ieq_imm(b, get_field(b, desc, 5, ~C_00A014_ARRAY_PITCH), 1);
|
||||
nir_ssa_def *layers_3d =
|
||||
nir_def *layers_3d =
|
||||
nir_isub(b, get_field(b, desc, 4, ~C_00A010_DEPTH),
|
||||
get_field(b, desc, 4, ~C_00A010_BASE_ARRAY));
|
||||
layers_3d = nir_iadd_imm(b, layers_3d, 1);
|
||||
depth = nir_bcsel(b, uav3d, layers_3d, depth);
|
||||
}
|
||||
|
||||
nir_ssa_def *result = NULL;
|
||||
nir_def *result = NULL;
|
||||
|
||||
/* Construct the result. */
|
||||
switch (dim) {
|
||||
|
|
@ -203,14 +203,14 @@ lower_query_size(nir_builder *b, nir_ssa_def *desc, nir_src *lod,
|
|||
static bool lower_resinfo(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
enum amd_gfx_level gfx_level = *(enum amd_gfx_level*)data;
|
||||
nir_ssa_def *result = NULL, *dst = NULL;
|
||||
nir_def *result = NULL, *dst = NULL;
|
||||
|
||||
if (instr->type == nir_instr_type_intrinsic) {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
const struct glsl_type *type;
|
||||
enum glsl_sampler_dim dim;
|
||||
bool is_array;
|
||||
nir_ssa_def *desc = NULL;
|
||||
nir_def *desc = NULL;
|
||||
|
||||
dst = &intr->dest.ssa;
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
|
@ -265,7 +265,7 @@ static bool lower_resinfo(nir_builder *b, nir_instr *instr, void *data)
|
|||
} else if (instr->type == nir_instr_type_tex) {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
nir_tex_instr *new_tex;
|
||||
nir_ssa_def *desc = NULL;
|
||||
nir_def *desc = NULL;
|
||||
nir_src *lod = NULL;
|
||||
|
||||
dst = &tex->dest.ssa;
|
||||
|
|
@ -326,7 +326,7 @@ static bool lower_resinfo(nir_builder *b, nir_instr *instr, void *data)
|
|||
if (!result)
|
||||
return false;
|
||||
|
||||
nir_ssa_def_rewrite_uses_after(dst, result, instr);
|
||||
nir_def_rewrite_uses_after(dst, result, instr);
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -69,8 +69,8 @@ lower_subdword_loads(nir_builder *b, nir_instr *instr, void *data)
|
|||
unsigned align_offset = nir_intrinsic_align_offset(intr) % align_mul;
|
||||
|
||||
nir_src *src_offset = nir_get_io_offset_src(intr);
|
||||
nir_ssa_def *offset = src_offset->ssa;
|
||||
nir_ssa_def *result = &intr->dest.ssa;
|
||||
nir_def *offset = src_offset->ssa;
|
||||
nir_def *result = &intr->dest.ssa;
|
||||
|
||||
/* Change the load to 32 bits per channel, update the channel count,
|
||||
* and increase the declared load alignment.
|
||||
|
|
@ -87,7 +87,7 @@ lower_subdword_loads(nir_builder *b, nir_instr *instr, void *data)
|
|||
b->cursor = nir_after_instr(instr);
|
||||
result = nir_extract_bits(b, &result, 1, 0, num_components, bit_size);
|
||||
|
||||
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, result,
|
||||
nir_def_rewrite_uses_after(&intr->dest.ssa, result,
|
||||
result->parent_instr);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -121,7 +121,7 @@ lower_subdword_loads(nir_builder *b, nir_instr *instr, void *data)
|
|||
result = nir_extract_bits(b, &result, 1, comp_offset * bit_size,
|
||||
num_components, bit_size);
|
||||
|
||||
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, result,
|
||||
nir_def_rewrite_uses_after(&intr->dest.ssa, result,
|
||||
result->parent_instr);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -138,10 +138,10 @@ lower_subdword_loads(nir_builder *b, nir_instr *instr, void *data)
|
|||
|
||||
/* We need to shift bits in the loaded vector by this number. */
|
||||
b->cursor = nir_after_instr(instr);
|
||||
nir_ssa_def *shift = nir_ishl_imm(b, nir_iand_imm(b, offset, 0x3), 3);
|
||||
nir_ssa_def *rev_shift32 = nir_isub_imm(b, 32, shift);
|
||||
nir_def *shift = nir_ishl_imm(b, nir_iand_imm(b, offset, 0x3), 3);
|
||||
nir_def *rev_shift32 = nir_isub_imm(b, 32, shift);
|
||||
|
||||
nir_ssa_def *elems[NIR_MAX_VEC_COMPONENTS];
|
||||
nir_def *elems[NIR_MAX_VEC_COMPONENTS];
|
||||
|
||||
/* "shift" can be only be one of: 0, 8, 16, 24
|
||||
*
|
||||
|
|
@ -170,7 +170,7 @@ lower_subdword_loads(nir_builder *b, nir_instr *instr, void *data)
|
|||
if (intr->num_components >= 2) {
|
||||
/* Use the 64-bit algorithm as described above. */
|
||||
for (i = 0; i < intr->num_components / 2 - 1; i++) {
|
||||
nir_ssa_def *qword1, *dword2;
|
||||
nir_def *qword1, *dword2;
|
||||
|
||||
qword1 = nir_pack_64_2x32_split(b,
|
||||
nir_channel(b, result, i * 2 + 0),
|
||||
|
|
@ -203,7 +203,7 @@ lower_subdword_loads(nir_builder *b, nir_instr *instr, void *data)
|
|||
result = nir_vec(b, elems, intr->num_components);
|
||||
result = nir_extract_bits(b, &result, 1, 0, num_components, bit_size);
|
||||
|
||||
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, result,
|
||||
nir_def_rewrite_uses_after(&intr->dest.ssa, result,
|
||||
result->parent_instr);
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,25 +22,25 @@ typedef struct {
|
|||
unsigned num_entries;
|
||||
} lower_tsms_io_state;
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
task_workgroup_index(nir_builder *b,
|
||||
lower_tsms_io_state *s)
|
||||
{
|
||||
nir_ssa_def *id = nir_load_workgroup_id(b, 32);
|
||||
nir_def *id = nir_load_workgroup_id(b, 32);
|
||||
|
||||
nir_ssa_def *x = nir_channel(b, id, 0);
|
||||
nir_ssa_def *y = nir_channel(b, id, 1);
|
||||
nir_ssa_def *z = nir_channel(b, id, 2);
|
||||
nir_def *x = nir_channel(b, id, 0);
|
||||
nir_def *y = nir_channel(b, id, 1);
|
||||
nir_def *z = nir_channel(b, id, 2);
|
||||
|
||||
nir_ssa_def *grid_size = nir_load_num_workgroups(b, 32);
|
||||
nir_ssa_def *grid_size_x = nir_channel(b, grid_size, 0);
|
||||
nir_ssa_def *grid_size_y = nir_channel(b, grid_size, 1);
|
||||
nir_def *grid_size = nir_load_num_workgroups(b, 32);
|
||||
nir_def *grid_size_x = nir_channel(b, grid_size, 0);
|
||||
nir_def *grid_size_y = nir_channel(b, grid_size, 1);
|
||||
|
||||
return nir_iadd(b, nir_imul(b, nir_imul(b, grid_size_x, grid_size_y), z),
|
||||
nir_iadd(b, nir_imul(b, grid_size_x, y), x));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
task_ring_entry_index(nir_builder *b,
|
||||
lower_tsms_io_state *s)
|
||||
{
|
||||
|
|
@ -54,12 +54,12 @@ task_ring_entry_index(nir_builder *b,
|
|||
* AND with num_entries - 1 to get the correct meaning.
|
||||
* Note that num_entries must be a power of two.
|
||||
*/
|
||||
nir_ssa_def *ring_entry = nir_load_task_ring_entry_amd(b);
|
||||
nir_ssa_def *idx = nir_iadd_nuw(b, ring_entry, task_workgroup_index(b, s));
|
||||
nir_def *ring_entry = nir_load_task_ring_entry_amd(b);
|
||||
nir_def *idx = nir_iadd_nuw(b, ring_entry, task_workgroup_index(b, s));
|
||||
return nir_iand_imm(b, idx, s->num_entries - 1);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
task_draw_ready_bit(nir_builder *b,
|
||||
lower_tsms_io_state *s)
|
||||
{
|
||||
|
|
@ -86,14 +86,14 @@ task_draw_ready_bit(nir_builder *b,
|
|||
* If the task shader doesn't write this bit, the HW hangs.
|
||||
*/
|
||||
|
||||
nir_ssa_def *ring_entry = nir_load_task_ring_entry_amd(b);
|
||||
nir_ssa_def *workgroup_index = task_workgroup_index(b, s);
|
||||
nir_def *ring_entry = nir_load_task_ring_entry_amd(b);
|
||||
nir_def *workgroup_index = task_workgroup_index(b, s);
|
||||
|
||||
nir_ssa_def *idx = nir_iadd_nuw(b, ring_entry, workgroup_index);
|
||||
nir_def *idx = nir_iadd_nuw(b, ring_entry, workgroup_index);
|
||||
return nir_u2u8(b, nir_ubfe_imm(b, idx, util_bitcount(s->num_entries - 1), 1));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
mesh_ring_entry_index(nir_builder *b,
|
||||
lower_tsms_io_state *s)
|
||||
{
|
||||
|
|
@ -111,15 +111,15 @@ mesh_ring_entry_index(nir_builder *b,
|
|||
|
||||
static void
|
||||
task_write_draw_ring(nir_builder *b,
|
||||
nir_ssa_def *store_val,
|
||||
nir_def *store_val,
|
||||
unsigned const_off,
|
||||
lower_tsms_io_state *s)
|
||||
{
|
||||
nir_ssa_def *ptr = task_ring_entry_index(b, s);
|
||||
nir_ssa_def *ring = nir_load_ring_task_draw_amd(b);
|
||||
nir_ssa_def *scalar_off = nir_imul_imm(b, ptr, s->draw_entry_bytes);
|
||||
nir_ssa_def *vector_off = nir_imm_int(b, 0);
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *ptr = task_ring_entry_index(b, s);
|
||||
nir_def *ring = nir_load_ring_task_draw_amd(b);
|
||||
nir_def *scalar_off = nir_imul_imm(b, ptr, s->draw_entry_bytes);
|
||||
nir_def *vector_off = nir_imm_int(b, 0);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
|
||||
nir_store_buffer_amd(b, store_val, ring, vector_off, scalar_off, zero,
|
||||
.base = const_off, .memory_modes = nir_var_shader_out,
|
||||
|
|
@ -139,7 +139,7 @@ filter_task_intrinsics(const nir_instr *instr,
|
|||
intrin->intrinsic == nir_intrinsic_load_task_payload;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
lower_task_launch_mesh_workgroups(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
lower_tsms_io_state *s)
|
||||
|
|
@ -160,13 +160,13 @@ lower_task_launch_mesh_workgroups(nir_builder *b,
|
|||
nir_var_mem_ssbo | nir_var_mem_global);
|
||||
|
||||
/* On the first invocation, write the full draw ring entry. */
|
||||
nir_ssa_def *invocation_index = nir_load_local_invocation_index(b);
|
||||
nir_def *invocation_index = nir_load_local_invocation_index(b);
|
||||
nir_if *if_invocation_index_zero = nir_push_if(b, nir_ieq_imm(b, invocation_index, 0));
|
||||
{
|
||||
nir_ssa_def *dimensions = intrin->src[0].ssa;
|
||||
nir_ssa_def *x = nir_channel(b, dimensions, 0);
|
||||
nir_ssa_def *y = nir_channel(b, dimensions, 1);
|
||||
nir_ssa_def *z = nir_channel(b, dimensions, 2);
|
||||
nir_def *dimensions = intrin->src[0].ssa;
|
||||
nir_def *x = nir_channel(b, dimensions, 0);
|
||||
nir_def *y = nir_channel(b, dimensions, 1);
|
||||
nir_def *z = nir_channel(b, dimensions, 2);
|
||||
|
||||
/* When either Y or Z are 0, also set X to 0.
|
||||
* Not necessary, but speeds up the job of the CP.
|
||||
|
|
@ -185,7 +185,7 @@ lower_task_launch_mesh_workgroups(nir_builder *b,
|
|||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
lower_task_payload_store(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
lower_tsms_io_state *s)
|
||||
|
|
@ -193,12 +193,12 @@ lower_task_payload_store(nir_builder *b,
|
|||
unsigned write_mask = nir_intrinsic_write_mask(intrin);
|
||||
unsigned base = nir_intrinsic_base(intrin);
|
||||
|
||||
nir_ssa_def *store_val = intrin->src[0].ssa;
|
||||
nir_ssa_def *addr = intrin->src[1].ssa;
|
||||
nir_ssa_def *ring = nir_load_ring_task_payload_amd(b);
|
||||
nir_ssa_def *ptr = task_ring_entry_index(b, s);
|
||||
nir_ssa_def *ring_off = nir_imul_imm(b, ptr, s->payload_entry_bytes);
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *store_val = intrin->src[0].ssa;
|
||||
nir_def *addr = intrin->src[1].ssa;
|
||||
nir_def *ring = nir_load_ring_task_payload_amd(b);
|
||||
nir_def *ptr = task_ring_entry_index(b, s);
|
||||
nir_def *ring_off = nir_imul_imm(b, ptr, s->payload_entry_bytes);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
|
||||
nir_store_buffer_amd(b, store_val, ring, addr, ring_off, zero, .base = base,
|
||||
.write_mask = write_mask,
|
||||
|
|
@ -208,7 +208,7 @@ lower_task_payload_store(nir_builder *b,
|
|||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
lower_taskmesh_payload_load(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
lower_tsms_io_state *s)
|
||||
|
|
@ -217,22 +217,22 @@ lower_taskmesh_payload_load(nir_builder *b,
|
|||
unsigned num_components = intrin->dest.ssa.num_components;
|
||||
unsigned bit_size = intrin->dest.ssa.bit_size;
|
||||
|
||||
nir_ssa_def *ptr =
|
||||
nir_def *ptr =
|
||||
b->shader->info.stage == MESA_SHADER_TASK ?
|
||||
task_ring_entry_index(b, s) :
|
||||
mesh_ring_entry_index(b, s);
|
||||
|
||||
nir_ssa_def *addr = intrin->src[0].ssa;
|
||||
nir_ssa_def *ring = nir_load_ring_task_payload_amd(b);
|
||||
nir_ssa_def *ring_off = nir_imul_imm(b, ptr, s->payload_entry_bytes);
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *addr = intrin->src[0].ssa;
|
||||
nir_def *ring = nir_load_ring_task_payload_amd(b);
|
||||
nir_def *ring_off = nir_imul_imm(b, ptr, s->payload_entry_bytes);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
|
||||
return nir_load_buffer_amd(b, num_components, bit_size, ring, addr, ring_off, zero, .base = base,
|
||||
.memory_modes = nir_var_mem_task_payload,
|
||||
.access = ACCESS_COHERENT);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
lower_task_intrinsics(nir_builder *b,
|
||||
nir_instr *instr,
|
||||
void *state)
|
||||
|
|
@ -293,7 +293,7 @@ filter_mesh_input_load(const nir_instr *instr,
|
|||
return intrin->intrinsic == nir_intrinsic_load_task_payload;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
lower_mesh_intrinsics(nir_builder *b,
|
||||
nir_instr *instr,
|
||||
void *state)
|
||||
|
|
|
|||
|
|
@ -238,13 +238,13 @@ lower_ls_output_store(nir_builder *b,
|
|||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
nir_ssa_def *vertex_idx = nir_load_local_invocation_index(b);
|
||||
nir_ssa_def *base_off_var = nir_imul(b, vertex_idx, nir_load_lshs_vertex_stride_amd(b));
|
||||
nir_def *vertex_idx = nir_load_local_invocation_index(b);
|
||||
nir_def *base_off_var = nir_imul(b, vertex_idx, nir_load_lshs_vertex_stride_amd(b));
|
||||
|
||||
nir_ssa_def *io_off = ac_nir_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u, st->map_io);
|
||||
nir_def *io_off = ac_nir_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u, st->map_io);
|
||||
unsigned write_mask = nir_intrinsic_write_mask(intrin);
|
||||
|
||||
nir_ssa_def *off = nir_iadd_nuw(b, base_off_var, io_off);
|
||||
nir_def *off = nir_iadd_nuw(b, base_off_var, io_off);
|
||||
nir_store_shared(b, intrin->src[0].ssa, off, .write_mask = write_mask);
|
||||
|
||||
/* NOTE: don't remove the store_output intrinsic on GFX9+ when tcs_in_out_eq,
|
||||
|
|
@ -285,27 +285,27 @@ filter_load_tcs_per_vertex_input(const nir_instr *instr,
|
|||
return !can_use_temps;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
hs_per_vertex_input_lds_offset(nir_builder *b,
|
||||
lower_tess_io_state *st,
|
||||
nir_intrinsic_instr *instr)
|
||||
{
|
||||
nir_ssa_def *tcs_in_vtxcnt = nir_load_patch_vertices_in(b);
|
||||
nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
|
||||
nir_ssa_def *vertex_index = nir_get_io_arrayed_index_src(instr)->ssa;
|
||||
nir_def *tcs_in_vtxcnt = nir_load_patch_vertices_in(b);
|
||||
nir_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
|
||||
nir_def *vertex_index = nir_get_io_arrayed_index_src(instr)->ssa;
|
||||
|
||||
nir_ssa_def *stride = nir_load_lshs_vertex_stride_amd(b);
|
||||
nir_ssa_def *tcs_in_patch_stride = nir_imul(b, tcs_in_vtxcnt, stride);
|
||||
nir_ssa_def *vertex_index_off = nir_imul(b, vertex_index, stride);
|
||||
nir_def *stride = nir_load_lshs_vertex_stride_amd(b);
|
||||
nir_def *tcs_in_patch_stride = nir_imul(b, tcs_in_vtxcnt, stride);
|
||||
nir_def *vertex_index_off = nir_imul(b, vertex_index, stride);
|
||||
|
||||
nir_ssa_def *tcs_in_current_patch_offset = nir_imul(b, rel_patch_id, tcs_in_patch_stride);
|
||||
nir_def *tcs_in_current_patch_offset = nir_imul(b, rel_patch_id, tcs_in_patch_stride);
|
||||
|
||||
nir_ssa_def *io_offset = ac_nir_calc_io_offset(b, instr, nir_imm_int(b, 16u), 4u, st->map_io);
|
||||
nir_def *io_offset = ac_nir_calc_io_offset(b, instr, nir_imm_int(b, 16u), 4u, st->map_io);
|
||||
|
||||
return nir_iadd_nuw(b, nir_iadd_nuw(b, tcs_in_current_patch_offset, vertex_index_off), io_offset);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
hs_output_lds_offset(nir_builder *b,
|
||||
lower_tess_io_state *st,
|
||||
nir_intrinsic_instr *intrin)
|
||||
|
|
@ -318,28 +318,28 @@ hs_output_lds_offset(nir_builder *b,
|
|||
unsigned pervertex_output_patch_size = b->shader->info.tess.tcs_vertices_out * output_vertex_size;
|
||||
unsigned output_patch_stride = pervertex_output_patch_size + st->tcs_num_reserved_patch_outputs * 16u;
|
||||
|
||||
nir_ssa_def *off = intrin
|
||||
nir_def *off = intrin
|
||||
? ac_nir_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u, st->map_io)
|
||||
: nir_imm_int(b, 0);
|
||||
|
||||
nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
|
||||
nir_ssa_def *patch_offset = nir_imul_imm(b, rel_patch_id, output_patch_stride);
|
||||
nir_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
|
||||
nir_def *patch_offset = nir_imul_imm(b, rel_patch_id, output_patch_stride);
|
||||
|
||||
nir_ssa_def *output_patch_offset;
|
||||
nir_def *output_patch_offset;
|
||||
if (st->tcs_no_inputs_in_lds)
|
||||
output_patch_offset = patch_offset;
|
||||
else {
|
||||
nir_ssa_def *tcs_in_vtxcnt = nir_load_patch_vertices_in(b);
|
||||
nir_ssa_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b);
|
||||
nir_ssa_def *input_patch_size =
|
||||
nir_def *tcs_in_vtxcnt = nir_load_patch_vertices_in(b);
|
||||
nir_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b);
|
||||
nir_def *input_patch_size =
|
||||
nir_imul(b, tcs_in_vtxcnt, nir_load_lshs_vertex_stride_amd(b));
|
||||
nir_ssa_def *output_patch0_offset = nir_imul(b, input_patch_size, tcs_num_patches);
|
||||
nir_def *output_patch0_offset = nir_imul(b, input_patch_size, tcs_num_patches);
|
||||
output_patch_offset = nir_iadd_nuw(b, patch_offset, output_patch0_offset);
|
||||
}
|
||||
|
||||
if (per_vertex) {
|
||||
nir_ssa_def *vertex_index = nir_ssa_for_src(b, *nir_get_io_arrayed_index_src(intrin), 1);
|
||||
nir_ssa_def *vertex_index_off = nir_imul_imm(b, vertex_index, output_vertex_size);
|
||||
nir_def *vertex_index = nir_ssa_for_src(b, *nir_get_io_arrayed_index_src(intrin), 1);
|
||||
nir_def *vertex_index_off = nir_imul_imm(b, vertex_index, output_vertex_size);
|
||||
|
||||
off = nir_iadd_nuw(b, off, vertex_index_off);
|
||||
return nir_iadd_nuw(b, off, output_patch_offset);
|
||||
|
|
@ -349,51 +349,51 @@ hs_output_lds_offset(nir_builder *b,
|
|||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
hs_per_vertex_output_vmem_offset(nir_builder *b,
|
||||
lower_tess_io_state *st,
|
||||
nir_intrinsic_instr *intrin)
|
||||
{
|
||||
nir_ssa_def *out_vertices_per_patch = b->shader->info.stage == MESA_SHADER_TESS_CTRL
|
||||
nir_def *out_vertices_per_patch = b->shader->info.stage == MESA_SHADER_TESS_CTRL
|
||||
? nir_imm_int(b, b->shader->info.tess.tcs_vertices_out)
|
||||
: nir_load_patch_vertices_in(b);
|
||||
|
||||
nir_ssa_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b);
|
||||
nir_ssa_def *attr_stride = nir_imul(b, tcs_num_patches, nir_imul_imm(b, out_vertices_per_patch, 16u));
|
||||
nir_ssa_def *io_offset = ac_nir_calc_io_offset(b, intrin, attr_stride, 4u, st->map_io);
|
||||
nir_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b);
|
||||
nir_def *attr_stride = nir_imul(b, tcs_num_patches, nir_imul_imm(b, out_vertices_per_patch, 16u));
|
||||
nir_def *io_offset = ac_nir_calc_io_offset(b, intrin, attr_stride, 4u, st->map_io);
|
||||
|
||||
nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
|
||||
nir_ssa_def *patch_offset = nir_imul(b, rel_patch_id, nir_imul_imm(b, out_vertices_per_patch, 16u));
|
||||
nir_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
|
||||
nir_def *patch_offset = nir_imul(b, rel_patch_id, nir_imul_imm(b, out_vertices_per_patch, 16u));
|
||||
|
||||
nir_ssa_def *vertex_index = nir_ssa_for_src(b, *nir_get_io_arrayed_index_src(intrin), 1);
|
||||
nir_ssa_def *vertex_index_off = nir_imul_imm(b, vertex_index, 16u);
|
||||
nir_def *vertex_index = nir_ssa_for_src(b, *nir_get_io_arrayed_index_src(intrin), 1);
|
||||
nir_def *vertex_index_off = nir_imul_imm(b, vertex_index, 16u);
|
||||
|
||||
return nir_iadd_nuw(b, nir_iadd_nuw(b, patch_offset, vertex_index_off), io_offset);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
hs_per_patch_output_vmem_offset(nir_builder *b,
|
||||
lower_tess_io_state *st,
|
||||
nir_intrinsic_instr *intrin,
|
||||
unsigned const_base_offset)
|
||||
{
|
||||
nir_ssa_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b);
|
||||
nir_ssa_def *per_patch_data_offset = nir_load_hs_out_patch_data_offset_amd(b);
|
||||
nir_def *tcs_num_patches = nir_load_tcs_num_patches_amd(b);
|
||||
nir_def *per_patch_data_offset = nir_load_hs_out_patch_data_offset_amd(b);
|
||||
|
||||
nir_ssa_def * off = intrin
|
||||
nir_def * off = intrin
|
||||
? ac_nir_calc_io_offset(b, intrin, nir_imul_imm(b, tcs_num_patches, 16u), 4u, st->map_io)
|
||||
: nir_imm_int(b, 0);
|
||||
|
||||
if (const_base_offset)
|
||||
off = nir_iadd_nuw(b, off, nir_imul_imm(b, tcs_num_patches, const_base_offset));
|
||||
|
||||
nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
|
||||
nir_ssa_def *patch_offset = nir_imul_imm(b, rel_patch_id, 16u);
|
||||
nir_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
|
||||
nir_def *patch_offset = nir_imul_imm(b, rel_patch_id, 16u);
|
||||
off = nir_iadd_nuw(b, off, per_patch_data_offset);
|
||||
return nir_iadd_nuw(b, off, patch_offset);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
lower_hs_per_vertex_input_load(nir_builder *b,
|
||||
nir_instr *instr,
|
||||
void *state)
|
||||
|
|
@ -401,11 +401,11 @@ lower_hs_per_vertex_input_load(nir_builder *b,
|
|||
lower_tess_io_state *st = (lower_tess_io_state *) state;
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
nir_ssa_def *off = hs_per_vertex_input_lds_offset(b, st, intrin);
|
||||
nir_def *off = hs_per_vertex_input_lds_offset(b, st, intrin);
|
||||
return nir_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
lower_hs_output_store(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
lower_tess_io_state *st)
|
||||
|
|
@ -414,7 +414,7 @@ lower_hs_output_store(nir_builder *b,
|
|||
intrin->intrinsic == nir_intrinsic_store_output);
|
||||
|
||||
nir_io_semantics semantics = nir_intrinsic_io_semantics(intrin);
|
||||
nir_ssa_def *store_val = intrin->src[0].ssa;
|
||||
nir_def *store_val = intrin->src[0].ssa;
|
||||
unsigned component = nir_intrinsic_component(intrin);
|
||||
unsigned write_mask = nir_intrinsic_write_mask(intrin);
|
||||
bool is_tess_factor = semantics.location == VARYING_SLOT_TESS_LEVEL_INNER ||
|
||||
|
|
@ -437,24 +437,24 @@ lower_hs_output_store(nir_builder *b,
|
|||
}
|
||||
|
||||
if (write_to_vmem) {
|
||||
nir_ssa_def *vmem_off = intrin->intrinsic == nir_intrinsic_store_per_vertex_output
|
||||
nir_def *vmem_off = intrin->intrinsic == nir_intrinsic_store_per_vertex_output
|
||||
? hs_per_vertex_output_vmem_offset(b, st, intrin)
|
||||
: hs_per_patch_output_vmem_offset(b, st, intrin, 0);
|
||||
|
||||
nir_ssa_def *hs_ring_tess_offchip = nir_load_ring_tess_offchip_amd(b);
|
||||
nir_ssa_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b);
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *hs_ring_tess_offchip = nir_load_ring_tess_offchip_amd(b);
|
||||
nir_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
nir_store_buffer_amd(b, store_val, hs_ring_tess_offchip, vmem_off, offchip_offset, zero,
|
||||
.write_mask = write_mask, .memory_modes = nir_var_shader_out,
|
||||
.access = ACCESS_COHERENT);
|
||||
}
|
||||
|
||||
if (write_to_lds) {
|
||||
nir_ssa_def *lds_off = hs_output_lds_offset(b, st, intrin);
|
||||
nir_def *lds_off = hs_output_lds_offset(b, st, intrin);
|
||||
nir_store_shared(b, store_val, lds_off, .write_mask = write_mask);
|
||||
}
|
||||
|
||||
nir_ssa_def *ret = NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
nir_def *ret = NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
|
||||
if (is_tess_factor && st->tcs_pass_tessfactors_by_reg) {
|
||||
if (st->tcs_emit_tess_factor_write) {
|
||||
|
|
@ -474,12 +474,12 @@ lower_hs_output_store(nir_builder *b,
|
|||
return ret;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
lower_hs_output_load(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
lower_tess_io_state *st)
|
||||
{
|
||||
nir_ssa_def *off = hs_output_lds_offset(b, st, intrin);
|
||||
nir_def *off = hs_output_lds_offset(b, st, intrin);
|
||||
return nir_load_shared(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size, off);
|
||||
}
|
||||
|
||||
|
|
@ -505,7 +505,7 @@ update_hs_barrier(nir_intrinsic_instr *intrin, lower_tess_io_state *st)
|
|||
nir_intrinsic_set_memory_scope(intrin, SCOPE_SUBGROUP);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
lower_hs_output_access(nir_builder *b,
|
||||
nir_instr *instr,
|
||||
void *state)
|
||||
|
|
@ -571,7 +571,7 @@ hs_emit_write_tess_factors(nir_shader *shader,
|
|||
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_shared);
|
||||
}
|
||||
|
||||
nir_ssa_def *invocation_id = nir_load_invocation_id(b);
|
||||
nir_def *invocation_id = nir_load_invocation_id(b);
|
||||
|
||||
/* Only the 1st invocation of each patch needs to do this. */
|
||||
nir_if *invocation_id_zero = nir_push_if(b, nir_ieq_imm(b, invocation_id, 0));
|
||||
|
|
@ -586,8 +586,8 @@ hs_emit_write_tess_factors(nir_shader *shader,
|
|||
const bool tess_lvl_in_written = st->tcs_tess_lvl_in_loc >= 0;
|
||||
const bool tess_lvl_out_written = st->tcs_tess_lvl_out_loc >= 0;
|
||||
|
||||
nir_ssa_def *tessfactors_outer = NULL;
|
||||
nir_ssa_def *tessfactors_inner = NULL;
|
||||
nir_def *tessfactors_outer = NULL;
|
||||
nir_def *tessfactors_inner = NULL;
|
||||
if (st->tcs_pass_tessfactors_by_reg) {
|
||||
if (tess_lvl_out_written) {
|
||||
tessfactors_outer = nir_load_var(b, st->tcs_tess_level_outer);
|
||||
|
|
@ -600,7 +600,7 @@ hs_emit_write_tess_factors(nir_shader *shader,
|
|||
}
|
||||
} else {
|
||||
/* Base LDS address of per-patch outputs in the current patch. */
|
||||
nir_ssa_def *lds_base = hs_output_lds_offset(b, st, NULL);
|
||||
nir_def *lds_base = hs_output_lds_offset(b, st, NULL);
|
||||
|
||||
/* Load all tessellation factors (aka. tess levels) from LDS. */
|
||||
if (tess_lvl_out_written) {
|
||||
|
|
@ -621,18 +621,18 @@ hs_emit_write_tess_factors(nir_shader *shader,
|
|||
tessfactors_inner = nir_imm_zero(b, inner_comps, 32);
|
||||
|
||||
/* The descriptor where tess factors have to be stored by the shader. */
|
||||
nir_ssa_def *tessfactor_ring = nir_load_ring_tess_factors_amd(b);
|
||||
nir_def *tessfactor_ring = nir_load_ring_tess_factors_amd(b);
|
||||
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_ssa_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
|
||||
nir_ssa_def *tess_factors_base = nir_load_ring_tess_factors_offset_amd(b);
|
||||
nir_ssa_def *tess_factors_offset = nir_imul_imm(b, rel_patch_id, (inner_comps + outer_comps) * 4u);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *rel_patch_id = nir_load_tess_rel_patch_id_amd(b);
|
||||
nir_def *tess_factors_base = nir_load_ring_tess_factors_offset_amd(b);
|
||||
nir_def *tess_factors_offset = nir_imul_imm(b, rel_patch_id, (inner_comps + outer_comps) * 4u);
|
||||
unsigned tess_factors_const_offset = 0;
|
||||
|
||||
if (st->gfx_level <= GFX8) {
|
||||
/* Store the dynamic HS control word. */
|
||||
nir_if *rel_patch_id_zero = nir_push_if(b, nir_ieq_imm(b, rel_patch_id, 0));
|
||||
nir_ssa_def *ctrlw = nir_imm_int(b, 0x80000000u);
|
||||
nir_def *ctrlw = nir_imm_int(b, 0x80000000u);
|
||||
nir_store_buffer_amd(b, ctrlw, tessfactor_ring, zero, tess_factors_base, zero,
|
||||
.access = ACCESS_COHERENT);
|
||||
tess_factors_const_offset += 4;
|
||||
|
|
@ -642,11 +642,11 @@ hs_emit_write_tess_factors(nir_shader *shader,
|
|||
/* Store tess factors for the tessellator */
|
||||
if (shader->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) {
|
||||
/* LINES reversal */
|
||||
nir_ssa_def *t = nir_vec2(b, nir_channel(b, tessfactors_outer, 1), nir_channel(b, tessfactors_outer, 0));
|
||||
nir_def *t = nir_vec2(b, nir_channel(b, tessfactors_outer, 1), nir_channel(b, tessfactors_outer, 0));
|
||||
nir_store_buffer_amd(b, t, tessfactor_ring, tess_factors_offset, tess_factors_base, zero,
|
||||
.base = tess_factors_const_offset, .access = ACCESS_COHERENT);
|
||||
} else if (shader->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES) {
|
||||
nir_ssa_def *t = nir_vec4(b, nir_channel(b, tessfactors_outer, 0), nir_channel(b, tessfactors_outer, 1),
|
||||
nir_def *t = nir_vec4(b, nir_channel(b, tessfactors_outer, 0), nir_channel(b, tessfactors_outer, 1),
|
||||
nir_channel(b, tessfactors_outer, 2), nir_channel(b, tessfactors_inner, 0));
|
||||
nir_store_buffer_amd(b, t, tessfactor_ring, tess_factors_offset, tess_factors_base, zero,
|
||||
.base = tess_factors_const_offset, .access = ACCESS_COHERENT);
|
||||
|
|
@ -659,11 +659,11 @@ hs_emit_write_tess_factors(nir_shader *shader,
|
|||
|
||||
if (st->tes_reads_tessfactors) {
|
||||
/* Store to offchip for TES to read - only if TES actually reads them */
|
||||
nir_ssa_def *hs_ring_tess_offchip = nir_load_ring_tess_offchip_amd(b);
|
||||
nir_ssa_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b);
|
||||
nir_def *hs_ring_tess_offchip = nir_load_ring_tess_offchip_amd(b);
|
||||
nir_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b);
|
||||
|
||||
if (tess_lvl_out_written) {
|
||||
nir_ssa_def *vmem_off_outer =
|
||||
nir_def *vmem_off_outer =
|
||||
hs_per_patch_output_vmem_offset(b, st, NULL, st->tcs_tess_lvl_out_loc);
|
||||
|
||||
nir_store_buffer_amd(b, tessfactors_outer, hs_ring_tess_offchip,
|
||||
|
|
@ -673,7 +673,7 @@ hs_emit_write_tess_factors(nir_shader *shader,
|
|||
}
|
||||
|
||||
if (inner_comps && tess_lvl_in_written) {
|
||||
nir_ssa_def *vmem_off_inner =
|
||||
nir_def *vmem_off_inner =
|
||||
hs_per_patch_output_vmem_offset(b, st, NULL, st->tcs_tess_lvl_in_loc);
|
||||
|
||||
nir_store_buffer_amd(b, tessfactors_inner, hs_ring_tess_offchip,
|
||||
|
|
@ -688,7 +688,7 @@ hs_emit_write_tess_factors(nir_shader *shader,
|
|||
nir_metadata_preserve(impl, nir_metadata_none);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
lower_tes_input_load(nir_builder *b,
|
||||
nir_instr *instr,
|
||||
void *state)
|
||||
|
|
@ -696,13 +696,13 @@ lower_tes_input_load(nir_builder *b,
|
|||
lower_tess_io_state *st = (lower_tess_io_state *) state;
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
nir_ssa_def *offchip_ring = nir_load_ring_tess_offchip_amd(b);
|
||||
nir_ssa_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b);
|
||||
nir_ssa_def *off = intrin->intrinsic == nir_intrinsic_load_per_vertex_input
|
||||
nir_def *offchip_ring = nir_load_ring_tess_offchip_amd(b);
|
||||
nir_def *offchip_offset = nir_load_ring_tess_offchip_offset_amd(b);
|
||||
nir_def *off = intrin->intrinsic == nir_intrinsic_load_per_vertex_input
|
||||
? hs_per_vertex_output_vmem_offset(b, st, intrin)
|
||||
: hs_per_patch_output_vmem_offset(b, st, intrin, 0);
|
||||
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
|
||||
return nir_load_buffer_amd(b, intrin->dest.ssa.num_components,
|
||||
intrin->dest.ssa.bit_size, offchip_ring,
|
||||
|
|
|
|||
|
|
@ -35,26 +35,26 @@
|
|||
* the selcoords major axis.
|
||||
*/
|
||||
static void
|
||||
build_cube_select(nir_builder *b, nir_ssa_def *ma, nir_ssa_def *id, nir_ssa_def *deriv,
|
||||
nir_ssa_def **out_ma, nir_ssa_def **out_sc, nir_ssa_def **out_tc)
|
||||
build_cube_select(nir_builder *b, nir_def *ma, nir_def *id, nir_def *deriv,
|
||||
nir_def **out_ma, nir_def **out_sc, nir_def **out_tc)
|
||||
{
|
||||
nir_ssa_def *deriv_x = nir_channel(b, deriv, 0);
|
||||
nir_ssa_def *deriv_y = nir_channel(b, deriv, 1);
|
||||
nir_ssa_def *deriv_z = nir_channel(b, deriv, 2);
|
||||
nir_def *deriv_x = nir_channel(b, deriv, 0);
|
||||
nir_def *deriv_y = nir_channel(b, deriv, 1);
|
||||
nir_def *deriv_z = nir_channel(b, deriv, 2);
|
||||
|
||||
nir_ssa_def *is_ma_positive = nir_fge_imm(b, ma, 0.0);
|
||||
nir_ssa_def *sgn_ma =
|
||||
nir_def *is_ma_positive = nir_fge_imm(b, ma, 0.0);
|
||||
nir_def *sgn_ma =
|
||||
nir_bcsel(b, is_ma_positive, nir_imm_float(b, 1.0), nir_imm_float(b, -1.0));
|
||||
nir_ssa_def *neg_sgn_ma = nir_fneg(b, sgn_ma);
|
||||
nir_def *neg_sgn_ma = nir_fneg(b, sgn_ma);
|
||||
|
||||
nir_ssa_def *is_ma_z = nir_fge_imm(b, id, 4.0);
|
||||
nir_ssa_def *is_ma_y = nir_fge_imm(b, id, 2.0);
|
||||
nir_def *is_ma_z = nir_fge_imm(b, id, 4.0);
|
||||
nir_def *is_ma_y = nir_fge_imm(b, id, 2.0);
|
||||
is_ma_y = nir_iand(b, is_ma_y, nir_inot(b, is_ma_z));
|
||||
nir_ssa_def *is_not_ma_x = nir_ior(b, is_ma_z, is_ma_y);
|
||||
nir_def *is_not_ma_x = nir_ior(b, is_ma_z, is_ma_y);
|
||||
|
||||
/* Select sc */
|
||||
nir_ssa_def *tmp = nir_bcsel(b, is_not_ma_x, deriv_x, deriv_z);
|
||||
nir_ssa_def *sgn =
|
||||
nir_def *tmp = nir_bcsel(b, is_not_ma_x, deriv_x, deriv_z);
|
||||
nir_def *sgn =
|
||||
nir_bcsel(b, is_ma_y, nir_imm_float(b, 1.0), nir_bcsel(b, is_ma_z, sgn_ma, neg_sgn_ma));
|
||||
*out_sc = nir_fmul(b, tmp, sgn);
|
||||
|
||||
|
|
@ -69,10 +69,10 @@ build_cube_select(nir_builder *b, nir_ssa_def *ma, nir_ssa_def *id, nir_ssa_def
|
|||
}
|
||||
|
||||
static void
|
||||
prepare_cube_coords(nir_builder *b, nir_tex_instr *tex, nir_ssa_def **coord, nir_src *ddx,
|
||||
prepare_cube_coords(nir_builder *b, nir_tex_instr *tex, nir_def **coord, nir_src *ddx,
|
||||
nir_src *ddy, const ac_nir_lower_tex_options *options)
|
||||
{
|
||||
nir_ssa_def *coords[NIR_MAX_VEC_COMPONENTS] = {0};
|
||||
nir_def *coords[NIR_MAX_VEC_COMPONENTS] = {0};
|
||||
for (unsigned i = 0; i < (*coord)->num_components; i++)
|
||||
coords[i] = nir_channel(b, *coord, i);
|
||||
|
||||
|
|
@ -98,12 +98,12 @@ prepare_cube_coords(nir_builder *b, nir_tex_instr *tex, nir_ssa_def **coord, nir
|
|||
if (tex->is_array && options->gfx_level <= GFX8 && coords[3])
|
||||
coords[3] = nir_fmax(b, coords[3], nir_imm_float(b, 0.0));
|
||||
|
||||
nir_ssa_def *cube_coords = nir_cube_amd(b, nir_vec(b, coords, 3));
|
||||
nir_ssa_def *sc = nir_channel(b, cube_coords, 1);
|
||||
nir_ssa_def *tc = nir_channel(b, cube_coords, 0);
|
||||
nir_ssa_def *ma = nir_channel(b, cube_coords, 2);
|
||||
nir_ssa_def *invma = nir_frcp(b, nir_fabs(b, ma));
|
||||
nir_ssa_def *id = nir_channel(b, cube_coords, 3);
|
||||
nir_def *cube_coords = nir_cube_amd(b, nir_vec(b, coords, 3));
|
||||
nir_def *sc = nir_channel(b, cube_coords, 1);
|
||||
nir_def *tc = nir_channel(b, cube_coords, 0);
|
||||
nir_def *ma = nir_channel(b, cube_coords, 2);
|
||||
nir_def *invma = nir_frcp(b, nir_fabs(b, ma));
|
||||
nir_def *id = nir_channel(b, cube_coords, 3);
|
||||
|
||||
if (ddx || ddy) {
|
||||
sc = nir_fmul(b, sc, invma);
|
||||
|
|
@ -132,13 +132,13 @@ prepare_cube_coords(nir_builder *b, nir_tex_instr *tex, nir_ssa_def **coord, nir
|
|||
* seems awfully quiet about how textureGrad for cube
|
||||
* maps should be handled.
|
||||
*/
|
||||
nir_ssa_def *deriv_ma, *deriv_sc, *deriv_tc;
|
||||
nir_def *deriv_ma, *deriv_sc, *deriv_tc;
|
||||
build_cube_select(b, ma, id, i ? ddy->ssa : ddx->ssa, &deriv_ma, &deriv_sc, &deriv_tc);
|
||||
|
||||
deriv_ma = nir_fmul(b, deriv_ma, invma);
|
||||
|
||||
nir_ssa_def *x = nir_fsub(b, nir_fmul(b, deriv_sc, invma), nir_fmul(b, deriv_ma, sc));
|
||||
nir_ssa_def *y = nir_fsub(b, nir_fmul(b, deriv_tc, invma), nir_fmul(b, deriv_ma, tc));
|
||||
nir_def *x = nir_fsub(b, nir_fmul(b, deriv_sc, invma), nir_fmul(b, deriv_ma, sc));
|
||||
nir_def *y = nir_fsub(b, nir_fmul(b, deriv_tc, invma), nir_fmul(b, deriv_ma, tc));
|
||||
|
||||
nir_instr_rewrite_src_ssa(&tex->instr, i ? ddy : ddx, nir_vec2(b, x, y));
|
||||
}
|
||||
|
|
@ -159,20 +159,20 @@ prepare_cube_coords(nir_builder *b, nir_tex_instr *tex, nir_ssa_def **coord, nir
|
|||
}
|
||||
|
||||
static bool
|
||||
lower_array_layer_round_even(nir_builder *b, nir_tex_instr *tex, nir_ssa_def **coords)
|
||||
lower_array_layer_round_even(nir_builder *b, nir_tex_instr *tex, nir_def **coords)
|
||||
{
|
||||
int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
|
||||
if (coord_index < 0 || nir_tex_instr_src_type(tex, coord_index) != nir_type_float)
|
||||
return false;
|
||||
|
||||
unsigned layer = tex->coord_components - 1;
|
||||
nir_ssa_def *rounded_layer = nir_fround_even(b, nir_channel(b, *coords, layer));
|
||||
nir_def *rounded_layer = nir_fround_even(b, nir_channel(b, *coords, layer));
|
||||
*coords = nir_vector_insert_imm(b, *coords, rounded_layer, layer);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_tex_coords(nir_builder *b, nir_tex_instr *tex, nir_ssa_def **coords,
|
||||
lower_tex_coords(nir_builder *b, nir_tex_instr *tex, nir_def **coords,
|
||||
const ac_nir_lower_tex_options *options)
|
||||
{
|
||||
bool progress = false;
|
||||
|
|
@ -190,11 +190,11 @@ lower_tex_coords(nir_builder *b, nir_tex_instr *tex, nir_ssa_def **coords,
|
|||
nir_src *ddy = ddy_idx >= 0 ? &tex->src[ddy_idx].src : NULL;
|
||||
|
||||
if (tex->sampler_dim == GLSL_SAMPLER_DIM_1D) {
|
||||
nir_ssa_def *y =
|
||||
nir_def *y =
|
||||
nir_imm_floatN_t(b, tex->op == nir_texop_txf ? 0.0 : 0.5, (*coords)->bit_size);
|
||||
if (tex->is_array && (*coords)->num_components > 1) {
|
||||
nir_ssa_def *x = nir_channel(b, *coords, 0);
|
||||
nir_ssa_def *idx = nir_channel(b, *coords, 1);
|
||||
nir_def *x = nir_channel(b, *coords, 0);
|
||||
nir_def *idx = nir_channel(b, *coords, 1);
|
||||
*coords = nir_vec3(b, x, y, idx);
|
||||
} else {
|
||||
*coords = nir_vec2(b, *coords, y);
|
||||
|
|
@ -203,12 +203,12 @@ lower_tex_coords(nir_builder *b, nir_tex_instr *tex, nir_ssa_def **coords,
|
|||
int offset_src = nir_tex_instr_src_index(tex, nir_tex_src_offset);
|
||||
if (offset_src >= 0) {
|
||||
nir_src *offset = &tex->src[offset_src].src;
|
||||
nir_ssa_def *zero = nir_imm_intN_t(b, 0, offset->ssa->bit_size);
|
||||
nir_def *zero = nir_imm_intN_t(b, 0, offset->ssa->bit_size);
|
||||
nir_instr_rewrite_src_ssa(&tex->instr, offset, nir_vec2(b, offset->ssa, zero));
|
||||
}
|
||||
|
||||
if (ddx || ddy) {
|
||||
nir_ssa_def *def = nir_vec2(b, ddx->ssa, nir_imm_floatN_t(b, 0.0, ddx->ssa->bit_size));
|
||||
nir_def *def = nir_vec2(b, ddx->ssa, nir_imm_floatN_t(b, 0.0, ddx->ssa->bit_size));
|
||||
nir_instr_rewrite_src_ssa(&tex->instr, ddx, def);
|
||||
def = nir_vec2(b, ddy->ssa, nir_imm_floatN_t(b, 0.0, ddy->ssa->bit_size));
|
||||
nir_instr_rewrite_src_ssa(&tex->instr, ddy, def);
|
||||
|
|
@ -233,7 +233,7 @@ lower_tex(nir_builder *b, nir_instr *instr, void *options_)
|
|||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
nir_ssa_def *coords = tex->src[coord_idx].src.ssa;
|
||||
nir_def *coords = tex->src[coord_idx].src.ssa;
|
||||
if (lower_tex_coords(b, tex, &coords, options)) {
|
||||
tex->coord_components = coords->num_components;
|
||||
nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[coord_idx].src, coords);
|
||||
|
|
@ -249,12 +249,12 @@ typedef struct {
|
|||
} coord_info;
|
||||
|
||||
static bool
|
||||
can_move_coord(nir_ssa_scalar scalar, coord_info *info)
|
||||
can_move_coord(nir_scalar scalar, coord_info *info)
|
||||
{
|
||||
if (scalar.def->bit_size != 32)
|
||||
return false;
|
||||
|
||||
if (nir_ssa_scalar_is_const(scalar))
|
||||
if (nir_scalar_is_const(scalar))
|
||||
return true;
|
||||
|
||||
if (scalar.def->parent_instr->type != nir_instr_type_intrinsic)
|
||||
|
|
@ -270,8 +270,8 @@ can_move_coord(nir_ssa_scalar scalar, coord_info *info)
|
|||
if (intrin->intrinsic != nir_intrinsic_load_interpolated_input)
|
||||
return false;
|
||||
|
||||
nir_ssa_scalar coord_x = nir_ssa_scalar_resolved(intrin->src[0].ssa, 0);
|
||||
nir_ssa_scalar coord_y = nir_ssa_scalar_resolved(intrin->src[0].ssa, 1);
|
||||
nir_scalar coord_x = nir_scalar_resolved(intrin->src[0].ssa, 0);
|
||||
nir_scalar coord_y = nir_scalar_resolved(intrin->src[0].ssa, 1);
|
||||
if (coord_x.def->parent_instr->type != nir_instr_type_intrinsic || coord_x.comp != 0 ||
|
||||
coord_y.def->parent_instr->type != nir_instr_type_intrinsic || coord_y.comp != 1)
|
||||
return false;
|
||||
|
|
@ -297,22 +297,22 @@ struct move_tex_coords_state {
|
|||
nir_builder toplevel_b;
|
||||
};
|
||||
|
||||
static nir_ssa_def *
|
||||
build_coordinate(struct move_tex_coords_state *state, nir_ssa_scalar scalar, coord_info info)
|
||||
static nir_def *
|
||||
build_coordinate(struct move_tex_coords_state *state, nir_scalar scalar, coord_info info)
|
||||
{
|
||||
nir_builder *b = &state->toplevel_b;
|
||||
|
||||
if (nir_ssa_scalar_is_const(scalar))
|
||||
return nir_imm_intN_t(b, nir_ssa_scalar_as_uint(scalar), scalar.def->bit_size);
|
||||
if (nir_scalar_is_const(scalar))
|
||||
return nir_imm_intN_t(b, nir_scalar_as_uint(scalar), scalar.def->bit_size);
|
||||
|
||||
ASSERTED nir_src offset = *nir_get_io_offset_src(info.load);
|
||||
assert(nir_src_is_const(offset) && !nir_src_as_uint(offset));
|
||||
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_ssa_def *res;
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *res;
|
||||
if (info.bary) {
|
||||
enum glsl_interp_mode interp_mode = nir_intrinsic_interp_mode(info.bary);
|
||||
nir_ssa_def *bary = nir_load_system_value(b, info.bary->intrinsic, interp_mode, 2, 32);
|
||||
nir_def *bary = nir_load_system_value(b, info.bary->intrinsic, interp_mode, 2, 32);
|
||||
res = nir_load_interpolated_input(b, 1, 32, bary, zero);
|
||||
} else {
|
||||
res = nir_load_input(b, 1, 32, zero);
|
||||
|
|
@ -351,11 +351,11 @@ move_tex_coords(struct move_tex_coords_state *state, nir_function_impl *impl, ni
|
|||
return false;
|
||||
|
||||
nir_tex_src *src = &tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)];
|
||||
nir_ssa_scalar components[NIR_MAX_VEC_COMPONENTS];
|
||||
nir_scalar components[NIR_MAX_VEC_COMPONENTS];
|
||||
coord_info infos[NIR_MAX_VEC_COMPONENTS];
|
||||
bool can_move_all = true;
|
||||
for (unsigned i = 0; i < tex->coord_components; i++) {
|
||||
components[i] = nir_ssa_scalar_resolved(src->src.ssa, i);
|
||||
components[i] = nir_scalar_resolved(src->src.ssa, i);
|
||||
can_move_all &= can_move_coord(components[i], &infos[i]);
|
||||
}
|
||||
if (!can_move_all)
|
||||
|
|
@ -386,7 +386,7 @@ move_tex_coords(struct move_tex_coords_state *state, nir_function_impl *impl, ni
|
|||
for (unsigned i = 0; i < tex->coord_components; i++)
|
||||
components[i] = nir_get_ssa_scalar(build_coordinate(state, components[i], infos[i]), 0);
|
||||
|
||||
nir_ssa_def *linear_vgpr = nir_vec_scalars(&state->toplevel_b, components, tex->coord_components);
|
||||
nir_def *linear_vgpr = nir_vec_scalars(&state->toplevel_b, components, tex->coord_components);
|
||||
lower_tex_coords(&state->toplevel_b, tex, &linear_vgpr, state->options);
|
||||
|
||||
linear_vgpr = nir_strict_wqm_coord_amd(&state->toplevel_b, linear_vgpr, coord_base * 4);
|
||||
|
|
@ -421,25 +421,25 @@ move_fddxy(struct move_tex_coords_state *state, nir_function_impl *impl, nir_alu
|
|||
}
|
||||
|
||||
unsigned num_components = instr->dest.dest.ssa.num_components;
|
||||
nir_ssa_scalar components[NIR_MAX_VEC_COMPONENTS];
|
||||
nir_scalar components[NIR_MAX_VEC_COMPONENTS];
|
||||
coord_info infos[NIR_MAX_VEC_COMPONENTS];
|
||||
bool can_move_all = true;
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
components[i] = nir_ssa_scalar_chase_alu_src(nir_get_ssa_scalar(&instr->dest.dest.ssa, i), 0);
|
||||
components[i] = nir_ssa_scalar_chase_movs(components[i]);
|
||||
components[i] = nir_scalar_chase_alu_src(nir_get_ssa_scalar(&instr->dest.dest.ssa, i), 0);
|
||||
components[i] = nir_scalar_chase_movs(components[i]);
|
||||
can_move_all &= can_move_coord(components[i], &infos[i]);
|
||||
}
|
||||
if (!can_move_all || state->num_wqm_vgprs + num_components > state->options->max_wqm_vgprs)
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
nir_ssa_def *def = build_coordinate(state, components[i], infos[i]);
|
||||
nir_def *def = build_coordinate(state, components[i], infos[i]);
|
||||
components[i] = nir_get_ssa_scalar(def, 0);
|
||||
}
|
||||
|
||||
nir_ssa_def *def = nir_vec_scalars(&state->toplevel_b, components, num_components);
|
||||
nir_def *def = nir_vec_scalars(&state->toplevel_b, components, num_components);
|
||||
def = nir_build_alu1(&state->toplevel_b, instr->op, def);
|
||||
nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, def);
|
||||
nir_def_rewrite_uses(&instr->dest.dest.ssa, def);
|
||||
|
||||
state->num_wqm_vgprs += num_components;
|
||||
|
||||
|
|
|
|||
|
|
@ -3312,16 +3312,16 @@ void ac_surface_print_info(FILE *out, const struct radeon_info *info,
|
|||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info,
|
||||
static nir_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info,
|
||||
struct gfx9_meta_equation *equation,
|
||||
int blkSizeBias, unsigned blkStart,
|
||||
nir_ssa_def *meta_pitch, nir_ssa_def *meta_slice_size,
|
||||
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
|
||||
nir_ssa_def *pipe_xor,
|
||||
nir_ssa_def **bit_position)
|
||||
nir_def *meta_pitch, nir_def *meta_slice_size,
|
||||
nir_def *x, nir_def *y, nir_def *z,
|
||||
nir_def *pipe_xor,
|
||||
nir_def **bit_position)
|
||||
{
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_ssa_def *one = nir_imm_int(b, 1);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *one = nir_imm_int(b, 1);
|
||||
|
||||
assert(info->gfx_level >= GFX10);
|
||||
|
||||
|
|
@ -3329,17 +3329,17 @@ static nir_ssa_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct
|
|||
unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height);
|
||||
unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + blkSizeBias;
|
||||
|
||||
nir_ssa_def *coord[] = {x, y, z, 0};
|
||||
nir_ssa_def *address = zero;
|
||||
nir_def *coord[] = {x, y, z, 0};
|
||||
nir_def *address = zero;
|
||||
|
||||
for (unsigned i = blkStart; i < blkSizeLog2 + 1; i++) {
|
||||
nir_ssa_def *v = zero;
|
||||
nir_def *v = zero;
|
||||
|
||||
for (unsigned c = 0; c < 4; c++) {
|
||||
unsigned index = i * 4 + c - (blkStart * 4);
|
||||
if (equation->u.gfx10_bits[index]) {
|
||||
unsigned mask = equation->u.gfx10_bits[index];
|
||||
nir_ssa_def *bits = coord[c];
|
||||
nir_def *bits = coord[c];
|
||||
|
||||
while (mask)
|
||||
v = nir_ixor(b, v, nir_iand(b, nir_ushr_imm(b, bits, u_bit_scan(&mask)), one));
|
||||
|
|
@ -3352,11 +3352,11 @@ static nir_ssa_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct
|
|||
unsigned blkMask = (1 << blkSizeLog2) - 1;
|
||||
unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1;
|
||||
unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
|
||||
nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);
|
||||
nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);
|
||||
nir_ssa_def *pb = nir_ushr_imm(b, meta_pitch, meta_block_width_log2);
|
||||
nir_ssa_def *blkIndex = nir_iadd(b, nir_imul(b, yb, pb), xb);
|
||||
nir_ssa_def *pipeXor = nir_iand_imm(b, nir_ishl_imm(b, nir_iand_imm(b, pipe_xor, pipeMask),
|
||||
nir_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);
|
||||
nir_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);
|
||||
nir_def *pb = nir_ushr_imm(b, meta_pitch, meta_block_width_log2);
|
||||
nir_def *blkIndex = nir_iadd(b, nir_imul(b, yb, pb), xb);
|
||||
nir_def *pipeXor = nir_iand_imm(b, nir_ishl_imm(b, nir_iand_imm(b, pipe_xor, pipeMask),
|
||||
m_pipeInterleaveLog2), blkMask);
|
||||
|
||||
if (bit_position)
|
||||
|
|
@ -3367,15 +3367,15 @@ static nir_ssa_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct
|
|||
nir_ixor(b, nir_ushr(b, address, one), pipeXor));
|
||||
}
|
||||
|
||||
static nir_ssa_def *gfx9_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info,
|
||||
static nir_def *gfx9_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info,
|
||||
struct gfx9_meta_equation *equation,
|
||||
nir_ssa_def *meta_pitch, nir_ssa_def *meta_height,
|
||||
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
|
||||
nir_ssa_def *sample, nir_ssa_def *pipe_xor,
|
||||
nir_ssa_def **bit_position)
|
||||
nir_def *meta_pitch, nir_def *meta_height,
|
||||
nir_def *x, nir_def *y, nir_def *z,
|
||||
nir_def *sample, nir_def *pipe_xor,
|
||||
nir_def **bit_position)
|
||||
{
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_ssa_def *one = nir_imm_int(b, 1);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *one = nir_imm_int(b, 1);
|
||||
|
||||
assert(info->gfx_level >= GFX9);
|
||||
|
||||
|
|
@ -3385,32 +3385,32 @@ static nir_ssa_def *gfx9_nir_meta_addr_from_coord(nir_builder *b, const struct r
|
|||
|
||||
unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
|
||||
unsigned numPipeBits = equation->u.gfx9.num_pipe_bits;
|
||||
nir_ssa_def *pitchInBlock = nir_ushr_imm(b, meta_pitch, meta_block_width_log2);
|
||||
nir_ssa_def *sliceSizeInBlock = nir_imul(b, nir_ushr_imm(b, meta_height, meta_block_height_log2),
|
||||
nir_def *pitchInBlock = nir_ushr_imm(b, meta_pitch, meta_block_width_log2);
|
||||
nir_def *sliceSizeInBlock = nir_imul(b, nir_ushr_imm(b, meta_height, meta_block_height_log2),
|
||||
pitchInBlock);
|
||||
|
||||
nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);
|
||||
nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);
|
||||
nir_ssa_def *zb = nir_ushr_imm(b, z, meta_block_depth_log2);
|
||||
nir_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);
|
||||
nir_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);
|
||||
nir_def *zb = nir_ushr_imm(b, z, meta_block_depth_log2);
|
||||
|
||||
nir_ssa_def *blockIndex = nir_iadd(b, nir_iadd(b, nir_imul(b, zb, sliceSizeInBlock),
|
||||
nir_def *blockIndex = nir_iadd(b, nir_iadd(b, nir_imul(b, zb, sliceSizeInBlock),
|
||||
nir_imul(b, yb, pitchInBlock)), xb);
|
||||
nir_ssa_def *coords[] = {x, y, z, sample, blockIndex};
|
||||
nir_def *coords[] = {x, y, z, sample, blockIndex};
|
||||
|
||||
nir_ssa_def *address = zero;
|
||||
nir_def *address = zero;
|
||||
unsigned num_bits = equation->u.gfx9.num_bits;
|
||||
assert(num_bits <= 32);
|
||||
|
||||
/* Compute the address up until the last bit that doesn't use the block index. */
|
||||
for (unsigned i = 0; i < num_bits - 1; i++) {
|
||||
nir_ssa_def *xor = zero;
|
||||
nir_def *xor = zero;
|
||||
|
||||
for (unsigned c = 0; c < 5; c++) {
|
||||
if (equation->u.gfx9.bit[i].coord[c].dim >= 5)
|
||||
continue;
|
||||
|
||||
assert(equation->u.gfx9.bit[i].coord[c].ord < 32);
|
||||
nir_ssa_def *ison =
|
||||
nir_def *ison =
|
||||
nir_iand(b, nir_ushr_imm(b, coords[equation->u.gfx9.bit[i].coord[c].dim],
|
||||
equation->u.gfx9.bit[i].coord[c].ord), one);
|
||||
|
||||
|
|
@ -3429,17 +3429,17 @@ static nir_ssa_def *gfx9_nir_meta_addr_from_coord(nir_builder *b, const struct r
|
|||
if (bit_position)
|
||||
*bit_position = nir_ishl_imm(b, nir_iand_imm(b, address, 1), 2);
|
||||
|
||||
nir_ssa_def *pipeXor = nir_iand_imm(b, pipe_xor, (1 << numPipeBits) - 1);
|
||||
nir_def *pipeXor = nir_iand_imm(b, pipe_xor, (1 << numPipeBits) - 1);
|
||||
return nir_ixor(b, nir_ushr(b, address, one),
|
||||
nir_ishl_imm(b, pipeXor, m_pipeInterleaveLog2));
|
||||
}
|
||||
|
||||
nir_ssa_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info,
|
||||
nir_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info,
|
||||
unsigned bpe, struct gfx9_meta_equation *equation,
|
||||
nir_ssa_def *dcc_pitch, nir_ssa_def *dcc_height,
|
||||
nir_ssa_def *dcc_slice_size,
|
||||
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
|
||||
nir_ssa_def *sample, nir_ssa_def *pipe_xor)
|
||||
nir_def *dcc_pitch, nir_def *dcc_height,
|
||||
nir_def *dcc_slice_size,
|
||||
nir_def *x, nir_def *y, nir_def *z,
|
||||
nir_def *sample, nir_def *pipe_xor)
|
||||
{
|
||||
if (info->gfx_level >= GFX10) {
|
||||
unsigned bpp_log2 = util_logbase2(bpe);
|
||||
|
|
@ -3454,15 +3454,15 @@ nir_ssa_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info
|
|||
}
|
||||
}
|
||||
|
||||
nir_ssa_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_info *info,
|
||||
nir_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_info *info,
|
||||
struct gfx9_meta_equation *equation,
|
||||
nir_ssa_def *cmask_pitch, nir_ssa_def *cmask_height,
|
||||
nir_ssa_def *cmask_slice_size,
|
||||
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
|
||||
nir_ssa_def *pipe_xor,
|
||||
nir_ssa_def **bit_position)
|
||||
nir_def *cmask_pitch, nir_def *cmask_height,
|
||||
nir_def *cmask_slice_size,
|
||||
nir_def *x, nir_def *y, nir_def *z,
|
||||
nir_def *pipe_xor,
|
||||
nir_def **bit_position)
|
||||
{
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
|
||||
if (info->gfx_level >= GFX10) {
|
||||
return gfx10_nir_meta_addr_from_coord(b, info, equation, -7, 1,
|
||||
|
|
@ -3475,12 +3475,12 @@ nir_ssa_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_in
|
|||
}
|
||||
}
|
||||
|
||||
nir_ssa_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info,
|
||||
nir_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info,
|
||||
struct gfx9_meta_equation *equation,
|
||||
nir_ssa_def *htile_pitch,
|
||||
nir_ssa_def *htile_slice_size,
|
||||
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
|
||||
nir_ssa_def *pipe_xor)
|
||||
nir_def *htile_pitch,
|
||||
nir_def *htile_slice_size,
|
||||
nir_def *x, nir_def *y, nir_def *z,
|
||||
nir_def *pipe_xor)
|
||||
{
|
||||
return gfx10_nir_meta_addr_from_coord(b, info, equation, -4, 2,
|
||||
htile_pitch, htile_slice_size,
|
||||
|
|
|
|||
|
|
@ -490,27 +490,27 @@ unsigned ac_get_cb_number_type(enum pipe_format format);
|
|||
unsigned ac_get_cb_format(enum amd_gfx_level gfx_level, enum pipe_format format);
|
||||
|
||||
#ifdef AC_SURFACE_INCLUDE_NIR
|
||||
nir_ssa_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info,
|
||||
nir_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info,
|
||||
unsigned bpe, struct gfx9_meta_equation *equation,
|
||||
nir_ssa_def *dcc_pitch, nir_ssa_def *dcc_height,
|
||||
nir_ssa_def *dcc_slice_size,
|
||||
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
|
||||
nir_ssa_def *sample, nir_ssa_def *pipe_xor);
|
||||
nir_def *dcc_pitch, nir_def *dcc_height,
|
||||
nir_def *dcc_slice_size,
|
||||
nir_def *x, nir_def *y, nir_def *z,
|
||||
nir_def *sample, nir_def *pipe_xor);
|
||||
|
||||
nir_ssa_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_info *info,
|
||||
nir_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_info *info,
|
||||
struct gfx9_meta_equation *equation,
|
||||
nir_ssa_def *cmask_pitch, nir_ssa_def *cmask_height,
|
||||
nir_ssa_def *cmask_slice_size,
|
||||
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
|
||||
nir_ssa_def *pipe_xor,
|
||||
nir_ssa_def **bit_position);
|
||||
nir_def *cmask_pitch, nir_def *cmask_height,
|
||||
nir_def *cmask_slice_size,
|
||||
nir_def *x, nir_def *y, nir_def *z,
|
||||
nir_def *pipe_xor,
|
||||
nir_def **bit_position);
|
||||
|
||||
nir_ssa_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info,
|
||||
nir_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info,
|
||||
struct gfx9_meta_equation *equation,
|
||||
nir_ssa_def *htile_pitch,
|
||||
nir_ssa_def *htile_slice_size,
|
||||
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
|
||||
nir_ssa_def *pipe_xor);
|
||||
nir_def *htile_pitch,
|
||||
nir_def *htile_slice_size,
|
||||
nir_def *x, nir_def *y, nir_def *z,
|
||||
nir_def *pipe_xor);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -128,7 +128,7 @@ append_logical_end(Block* b)
|
|||
}
|
||||
|
||||
Temp
|
||||
get_ssa_temp(struct isel_context* ctx, nir_ssa_def* def)
|
||||
get_ssa_temp(struct isel_context* ctx, nir_def* def)
|
||||
{
|
||||
uint32_t id = ctx->first_temp_id + def->index;
|
||||
return Temp(id, ctx->program->temp_rc[id]);
|
||||
|
|
@ -576,7 +576,7 @@ byte_align_vector(isel_context* ctx, Temp vec, Operand offset, Temp dst, unsigne
|
|||
}
|
||||
|
||||
Temp
|
||||
get_ssa_temp_tex(struct isel_context* ctx, nir_ssa_def* def, bool is_16bit)
|
||||
get_ssa_temp_tex(struct isel_context* ctx, nir_def* def, bool is_16bit)
|
||||
{
|
||||
RegClass rc = RegClass::get(RegType::vgpr, (is_16bit ? 2 : 4) * def->num_components);
|
||||
Temp tmp = get_ssa_temp(ctx, def);
|
||||
|
|
@ -806,8 +806,7 @@ get_alu_src_vop3p(struct isel_context* ctx, nir_alu_src src)
|
|||
uint32_t
|
||||
get_alu_src_ub(isel_context* ctx, nir_alu_instr* instr, int src_idx)
|
||||
{
|
||||
nir_ssa_scalar scalar =
|
||||
nir_ssa_scalar{instr->src[src_idx].src.ssa, instr->src[src_idx].swizzle[0]};
|
||||
nir_scalar scalar = nir_scalar{instr->src[src_idx].src.ssa, instr->src[src_idx].swizzle[0]};
|
||||
return nir_unsigned_upper_bound(ctx->shader, ctx->range_ht, scalar, &ctx->ub_config);
|
||||
}
|
||||
|
||||
|
|
@ -6131,7 +6130,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
|
||||
unsigned result_size = instr->dest.ssa.num_components - is_sparse;
|
||||
unsigned expand_mask =
|
||||
nir_ssa_def_components_read(&instr->dest.ssa) & u_bit_consecutive(0, result_size);
|
||||
nir_def_components_read(&instr->dest.ssa) & u_bit_consecutive(0, result_size);
|
||||
expand_mask = MAX2(expand_mask, 1); /* this can be zero in the case of sparse image loads */
|
||||
if (dim == GLSL_SAMPLER_DIM_BUF)
|
||||
expand_mask = (1u << util_last_bit(expand_mask)) - 1u;
|
||||
|
|
@ -6311,9 +6310,9 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
*/
|
||||
if (instr->src[3].ssa->bit_size == 32 || instr->src[3].ssa->bit_size == 16) {
|
||||
for (uint32_t i = 0; i < instr->num_components; i++) {
|
||||
nir_ssa_scalar comp = nir_ssa_scalar_resolved(instr->src[3].ssa, i);
|
||||
if ((nir_ssa_scalar_is_const(comp) && nir_ssa_scalar_as_uint(comp) == 0) ||
|
||||
nir_ssa_scalar_is_undef(comp))
|
||||
nir_scalar comp = nir_scalar_resolved(instr->src[3].ssa, i);
|
||||
if ((nir_scalar_is_const(comp) && nir_scalar_as_uint(comp) == 0) ||
|
||||
nir_scalar_is_undef(comp))
|
||||
dmask &= ~BITFIELD_BIT(i);
|
||||
}
|
||||
|
||||
|
|
@ -6444,7 +6443,7 @@ translate_buffer_image_atomic_op(const nir_atomic_op op, aco_opcode* buf_op, aco
|
|||
void
|
||||
visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
{
|
||||
bool return_previous = !nir_ssa_def_is_unused(&instr->dest.ssa);
|
||||
bool return_previous = !nir_def_is_unused(&instr->dest.ssa);
|
||||
const enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
|
||||
bool is_array = nir_intrinsic_image_array(instr);
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
|
@ -6586,7 +6585,7 @@ void
|
|||
visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
bool return_previous = !nir_ssa_def_is_unused(&instr->dest.ssa);
|
||||
bool return_previous = !nir_def_is_unused(&instr->dest.ssa);
|
||||
Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[2].ssa));
|
||||
|
||||
const nir_atomic_op nir_op = nir_intrinsic_atomic_op(instr);
|
||||
|
|
@ -6788,7 +6787,7 @@ void
|
|||
visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
bool return_previous = !nir_ssa_def_is_unused(&instr->dest.ssa);
|
||||
bool return_previous = !nir_def_is_unused(&instr->dest.ssa);
|
||||
Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[1].ssa));
|
||||
|
||||
const nir_atomic_op nir_op = nir_intrinsic_atomic_op(instr);
|
||||
|
|
@ -7177,7 +7176,7 @@ emit_barrier(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
void
|
||||
visit_load_shared(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
{
|
||||
// TODO: implement sparse reads using ds_read2_b32 and nir_ssa_def_components_read()
|
||||
// TODO: implement sparse reads using ds_read2_b32 and nir_def_components_read()
|
||||
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
|
||||
Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
|
@ -7294,7 +7293,7 @@ visit_shared_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
default: unreachable("Unhandled shared atomic intrinsic");
|
||||
}
|
||||
|
||||
bool return_previous = !nir_ssa_def_is_unused(&instr->dest.ssa);
|
||||
bool return_previous = !nir_def_is_unused(&instr->dest.ssa);
|
||||
|
||||
aco_opcode op;
|
||||
if (data.size() == 1) {
|
||||
|
|
@ -9102,7 +9101,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
}
|
||||
|
||||
void
|
||||
get_const_vec(nir_ssa_def* vec, nir_const_value* cv[4])
|
||||
get_const_vec(nir_def* vec, nir_const_value* cv[4])
|
||||
{
|
||||
if (vec->parent_instr->type != nir_instr_type_alu)
|
||||
return;
|
||||
|
|
@ -9339,7 +9338,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
|||
}
|
||||
|
||||
/* Build tex instruction */
|
||||
unsigned dmask = nir_ssa_def_components_read(&instr->dest.ssa) & 0xf;
|
||||
unsigned dmask = nir_def_components_read(&instr->dest.ssa) & 0xf;
|
||||
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
|
||||
dmask = u_bit_consecutive(0, util_last_bit(dmask));
|
||||
if (instr->is_sparse)
|
||||
|
|
@ -9746,7 +9745,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
|||
}
|
||||
|
||||
Operand
|
||||
get_phi_operand(isel_context* ctx, nir_ssa_def* ssa, RegClass rc, bool logical)
|
||||
get_phi_operand(isel_context* ctx, nir_def* ssa, RegClass rc, bool logical)
|
||||
{
|
||||
Temp tmp = get_ssa_temp(ctx, ssa);
|
||||
if (ssa->parent_instr->type == nir_instr_type_ssa_undef) {
|
||||
|
|
@ -9772,7 +9771,7 @@ visit_phi(isel_context* ctx, nir_phi_instr* instr)
|
|||
aco_opcode opcode = logical ? aco_opcode::p_phi : aco_opcode::p_linear_phi;
|
||||
|
||||
/* we want a sorted list of sources, since the predecessor list is also sorted */
|
||||
std::map<unsigned, nir_ssa_def*> phi_src;
|
||||
std::map<unsigned, nir_def*> phi_src;
|
||||
nir_foreach_phi_src (src, instr)
|
||||
phi_src[src->pred->index] = src->src.ssa;
|
||||
|
||||
|
|
@ -9782,7 +9781,7 @@ visit_phi(isel_context* ctx, nir_phi_instr* instr)
|
|||
(std::max(exec_list_length(&instr->srcs), (unsigned)preds.size()) + 1) * sizeof(Operand));
|
||||
unsigned num_defined = 0;
|
||||
unsigned cur_pred_idx = 0;
|
||||
for (std::pair<unsigned, nir_ssa_def*> src : phi_src) {
|
||||
for (std::pair<unsigned, nir_def*> src : phi_src) {
|
||||
if (cur_pred_idx < preds.size()) {
|
||||
/* handle missing preds (IF merges with discard/break) and extra preds
|
||||
* (loop exit with discard) */
|
||||
|
|
@ -9857,7 +9856,7 @@ visit_phi(isel_context* ctx, nir_phi_instr* instr)
|
|||
}
|
||||
|
||||
void
|
||||
visit_undef(isel_context* ctx, nir_ssa_undef_instr* instr)
|
||||
visit_undef(isel_context* ctx, nir_undef_instr* instr)
|
||||
{
|
||||
Temp dst = get_ssa_temp(ctx, &instr->def);
|
||||
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ is_block_reachable(nir_function_impl* impl, nir_block* known_reachable, nir_bloc
|
|||
|
||||
/* Check whether the given SSA def is only used by cross-lane instructions. */
|
||||
bool
|
||||
only_used_by_cross_lane_instrs(nir_ssa_def* ssa, bool follow_phis = true)
|
||||
only_used_by_cross_lane_instrs(nir_def* ssa, bool follow_phis = true)
|
||||
{
|
||||
nir_foreach_use (src, ssa) {
|
||||
switch (src->parent_instr->type) {
|
||||
|
|
@ -178,13 +178,13 @@ sanitize_cf_list(nir_function_impl* impl, struct exec_list* cf_list)
|
|||
}
|
||||
|
||||
void
|
||||
apply_nuw_to_ssa(isel_context* ctx, nir_ssa_def* ssa)
|
||||
apply_nuw_to_ssa(isel_context* ctx, nir_def* ssa)
|
||||
{
|
||||
nir_ssa_scalar scalar;
|
||||
nir_scalar scalar;
|
||||
scalar.def = ssa;
|
||||
scalar.comp = 0;
|
||||
|
||||
if (!nir_ssa_scalar_is_alu(scalar) || nir_ssa_scalar_alu_op(scalar) != nir_op_iadd)
|
||||
if (!nir_scalar_is_alu(scalar) || nir_scalar_alu_op(scalar) != nir_op_iadd)
|
||||
return;
|
||||
|
||||
nir_alu_instr* add = nir_instr_as_alu(ssa->parent_instr);
|
||||
|
|
@ -192,11 +192,11 @@ apply_nuw_to_ssa(isel_context* ctx, nir_ssa_def* ssa)
|
|||
if (add->no_unsigned_wrap)
|
||||
return;
|
||||
|
||||
nir_ssa_scalar src0 = nir_ssa_scalar_chase_alu_src(scalar, 0);
|
||||
nir_ssa_scalar src1 = nir_ssa_scalar_chase_alu_src(scalar, 1);
|
||||
nir_scalar src0 = nir_scalar_chase_alu_src(scalar, 0);
|
||||
nir_scalar src1 = nir_scalar_chase_alu_src(scalar, 1);
|
||||
|
||||
if (nir_ssa_scalar_is_const(src0)) {
|
||||
nir_ssa_scalar tmp = src0;
|
||||
if (nir_scalar_is_const(src0)) {
|
||||
nir_scalar tmp = src0;
|
||||
src0 = src1;
|
||||
src1 = tmp;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ struct ac_nir_context {
|
|||
LLVMBasicBlockRef break_block;
|
||||
};
|
||||
|
||||
static LLVMTypeRef get_def_type(struct ac_nir_context *ctx, const nir_ssa_def *def)
|
||||
static LLVMTypeRef get_def_type(struct ac_nir_context *ctx, const nir_def *def)
|
||||
{
|
||||
LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
|
||||
if (def->num_components > 1) {
|
||||
|
|
@ -1471,7 +1471,7 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_te
|
|||
assert((!args->tfe || !args->d16) && "unsupported");
|
||||
|
||||
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
|
||||
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
|
||||
unsigned mask = nir_def_components_read(&instr->dest.ssa);
|
||||
|
||||
/* Buffers don't support A16. */
|
||||
if (args->a16)
|
||||
|
|
@ -2326,7 +2326,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intri
|
|||
args.tfe = instr->intrinsic == nir_intrinsic_bindless_image_sparse_load;
|
||||
|
||||
if (dim == GLSL_SAMPLER_DIM_BUF) {
|
||||
unsigned num_channels = util_last_bit(nir_ssa_def_components_read(&instr->dest.ssa));
|
||||
unsigned num_channels = util_last_bit(nir_def_components_read(&instr->dest.ssa));
|
||||
if (instr->dest.ssa.bit_size == 64)
|
||||
num_channels = num_channels < 4 ? 2 : 4;
|
||||
LLVMValueRef rsrc, vindex;
|
||||
|
|
@ -4133,7 +4133,7 @@ static void phi_post_pass(struct ac_nir_context *ctx)
|
|||
}
|
||||
}
|
||||
|
||||
static bool is_def_used_in_an_export(const nir_ssa_def *def)
|
||||
static bool is_def_used_in_an_export(const nir_def *def)
|
||||
{
|
||||
nir_foreach_use (use_src, def) {
|
||||
if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
|
||||
|
|
@ -4150,7 +4150,7 @@ static bool is_def_used_in_an_export(const nir_ssa_def *def)
|
|||
return false;
|
||||
}
|
||||
|
||||
static void visit_ssa_undef(struct ac_nir_context *ctx, const nir_ssa_undef_instr *instr)
|
||||
static void visit_ssa_undef(struct ac_nir_context *ctx, const nir_undef_instr *instr)
|
||||
{
|
||||
unsigned num_components = instr->def.num_components;
|
||||
LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
|
||||
|
|
|
|||
|
|
@ -618,7 +618,7 @@ radv_meta_build_nir_vs_generate_vertices(struct radv_device *dev)
|
|||
|
||||
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_VERTEX, "meta_vs_gen_verts");
|
||||
|
||||
nir_ssa_def *outvec = nir_gen_rect_vertices(&b, NULL, NULL);
|
||||
nir_def *outvec = nir_gen_rect_vertices(&b, NULL, NULL);
|
||||
|
||||
v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
|
||||
v_position->data.location = VARYING_SLOT_POS;
|
||||
|
|
@ -636,10 +636,10 @@ radv_meta_build_nir_fs_noop(struct radv_device *dev)
|
|||
|
||||
void
|
||||
radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, int samples,
|
||||
nir_variable *input_img, nir_variable *color, nir_ssa_def *img_coord)
|
||||
nir_variable *input_img, nir_variable *color, nir_def *img_coord)
|
||||
{
|
||||
nir_deref_instr *input_img_deref = nir_build_deref_var(b, input_img);
|
||||
nir_ssa_def *sample0 = nir_txf_ms_deref(b, input_img_deref, img_coord, nir_imm_int(b, 0));
|
||||
nir_def *sample0 = nir_txf_ms_deref(b, input_img_deref, img_coord, nir_imm_int(b, 0));
|
||||
|
||||
if (is_integer || samples <= 1) {
|
||||
nir_store_var(b, color, sample0, 0xf);
|
||||
|
|
@ -647,13 +647,13 @@ radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b,
|
|||
}
|
||||
|
||||
if (device->physical_device->use_fmask) {
|
||||
nir_ssa_def *all_same = nir_samples_identical_deref(b, input_img_deref, img_coord);
|
||||
nir_def *all_same = nir_samples_identical_deref(b, input_img_deref, img_coord);
|
||||
nir_push_if(b, nir_inot(b, all_same));
|
||||
}
|
||||
|
||||
nir_ssa_def *accum = sample0;
|
||||
nir_def *accum = sample0;
|
||||
for (int i = 1; i < samples; i++) {
|
||||
nir_ssa_def *sample = nir_txf_ms_deref(b, input_img_deref, img_coord, nir_imm_int(b, i));
|
||||
nir_def *sample = nir_txf_ms_deref(b, input_img_deref, img_coord, nir_imm_int(b, i));
|
||||
accum = nir_fadd(b, accum, sample);
|
||||
}
|
||||
|
||||
|
|
@ -667,21 +667,21 @@ radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b,
|
|||
}
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
nir_def *
|
||||
radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding)
|
||||
{
|
||||
nir_ssa_def *rsrc = nir_vulkan_resource_index(b, 3, 32, nir_imm_int(b, 0), .desc_set = desc_set, .binding = binding);
|
||||
nir_def *rsrc = nir_vulkan_resource_index(b, 3, 32, nir_imm_int(b, 0), .desc_set = desc_set, .binding = binding);
|
||||
return nir_trim_vector(b, rsrc, 2);
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
nir_def *
|
||||
get_global_ids(nir_builder *b, unsigned num_components)
|
||||
{
|
||||
unsigned mask = BITFIELD_MASK(num_components);
|
||||
|
||||
nir_ssa_def *local_ids = nir_channels(b, nir_load_local_invocation_id(b), mask);
|
||||
nir_ssa_def *block_ids = nir_channels(b, nir_load_workgroup_id(b, 32), mask);
|
||||
nir_ssa_def *block_size =
|
||||
nir_def *local_ids = nir_channels(b, nir_load_local_invocation_id(b), mask);
|
||||
nir_def *block_ids = nir_channels(b, nir_load_workgroup_id(b, 32), mask);
|
||||
nir_def *block_size =
|
||||
nir_channels(b,
|
||||
nir_imm_ivec4(b, b->shader->info.workgroup_size[0], b->shader->info.workgroup_size[1],
|
||||
b->shader->info.workgroup_size[2], 0),
|
||||
|
|
@ -691,9 +691,9 @@ get_global_ids(nir_builder *b, unsigned num_components)
|
|||
}
|
||||
|
||||
void
|
||||
radv_break_on_count(nir_builder *b, nir_variable *var, nir_ssa_def *count)
|
||||
radv_break_on_count(nir_builder *b, nir_variable *var, nir_def *count)
|
||||
{
|
||||
nir_ssa_def *counter = nir_load_var(b, var);
|
||||
nir_def *counter = nir_load_var(b, var);
|
||||
|
||||
nir_push_if(b, nir_uge(b, counter, count));
|
||||
nir_jump(b, nir_jump_break);
|
||||
|
|
|
|||
|
|
@ -261,13 +261,13 @@ nir_shader *radv_meta_build_nir_vs_generate_vertices(struct radv_device *dev);
|
|||
nir_shader *radv_meta_build_nir_fs_noop(struct radv_device *dev);
|
||||
|
||||
void radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, int samples,
|
||||
nir_variable *input_img, nir_variable *color, nir_ssa_def *img_coord);
|
||||
nir_variable *input_img, nir_variable *color, nir_def *img_coord);
|
||||
|
||||
nir_ssa_def *radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding);
|
||||
nir_def *radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding);
|
||||
|
||||
nir_ssa_def *get_global_ids(nir_builder *b, unsigned num_components);
|
||||
nir_def *get_global_ids(nir_builder *b, unsigned num_components);
|
||||
|
||||
void radv_break_on_count(nir_builder *b, nir_variable *var, nir_ssa_def *count);
|
||||
void radv_break_on_count(nir_builder *b, nir_variable *var, nir_def *count);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,14 +47,14 @@ build_nir_vertex_shader(struct radv_device *dev)
|
|||
tex_pos_out->data.location = VARYING_SLOT_VAR0;
|
||||
tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
|
||||
|
||||
nir_ssa_def *outvec = nir_gen_rect_vertices(&b, NULL, NULL);
|
||||
nir_def *outvec = nir_gen_rect_vertices(&b, NULL, NULL);
|
||||
|
||||
nir_store_var(&b, pos_out, outvec, 0xf);
|
||||
|
||||
nir_ssa_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
|
||||
nir_ssa_def *src0_z = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
|
||||
nir_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
|
||||
nir_def *src0_z = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
|
||||
|
||||
nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(&b);
|
||||
nir_def *vertex_id = nir_load_vertex_id_zero_base(&b);
|
||||
|
||||
/* vertex 0 - src0_x, src0_y, src0_z */
|
||||
/* vertex 1 - src0_x, src1_y, src0_z*/
|
||||
|
|
@ -62,16 +62,16 @@ build_nir_vertex_shader(struct radv_device *dev)
|
|||
/* so channel 0 is vertex_id != 2 ? src_x : src_x + w
|
||||
channel 1 is vertex id != 1 ? src_y : src_y + w */
|
||||
|
||||
nir_ssa_def *c0cmp = nir_ine_imm(&b, vertex_id, 2);
|
||||
nir_ssa_def *c1cmp = nir_ine_imm(&b, vertex_id, 1);
|
||||
nir_def *c0cmp = nir_ine_imm(&b, vertex_id, 2);
|
||||
nir_def *c1cmp = nir_ine_imm(&b, vertex_id, 1);
|
||||
|
||||
nir_ssa_def *comp[4];
|
||||
nir_def *comp[4];
|
||||
comp[0] = nir_bcsel(&b, c0cmp, nir_channel(&b, src_box, 0), nir_channel(&b, src_box, 2));
|
||||
|
||||
comp[1] = nir_bcsel(&b, c1cmp, nir_channel(&b, src_box, 1), nir_channel(&b, src_box, 3));
|
||||
comp[2] = src0_z;
|
||||
comp[3] = nir_imm_float(&b, 1.0);
|
||||
nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 4);
|
||||
nir_def *out_tex_vec = nir_vec(&b, comp, 4);
|
||||
nir_store_var(&b, tex_pos_out, out_tex_vec, 0xf);
|
||||
return b.shader;
|
||||
}
|
||||
|
|
@ -89,7 +89,7 @@ build_nir_copy_fragment_shader(struct radv_device *dev, enum glsl_sampler_dim te
|
|||
* position.
|
||||
*/
|
||||
unsigned swz[] = {0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2};
|
||||
nir_ssa_def *const tex_pos =
|
||||
nir_def *const tex_pos =
|
||||
nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
|
||||
|
||||
const struct glsl_type *sampler_type =
|
||||
|
|
@ -99,7 +99,7 @@ build_nir_copy_fragment_shader(struct radv_device *dev, enum glsl_sampler_dim te
|
|||
sampler->data.binding = 0;
|
||||
|
||||
nir_deref_instr *tex_deref = nir_build_deref_var(&b, sampler);
|
||||
nir_ssa_def *color = nir_tex_deref(&b, tex_deref, tex_deref, tex_pos);
|
||||
nir_def *color = nir_tex_deref(&b, tex_deref, tex_deref, tex_pos);
|
||||
|
||||
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
|
||||
color_out->data.location = FRAG_RESULT_DATA0;
|
||||
|
|
@ -121,7 +121,7 @@ build_nir_copy_fragment_shader_depth(struct radv_device *dev, enum glsl_sampler_
|
|||
* position.
|
||||
*/
|
||||
unsigned swz[] = {0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2};
|
||||
nir_ssa_def *const tex_pos =
|
||||
nir_def *const tex_pos =
|
||||
nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
|
||||
|
||||
const struct glsl_type *sampler_type =
|
||||
|
|
@ -131,7 +131,7 @@ build_nir_copy_fragment_shader_depth(struct radv_device *dev, enum glsl_sampler_
|
|||
sampler->data.binding = 0;
|
||||
|
||||
nir_deref_instr *tex_deref = nir_build_deref_var(&b, sampler);
|
||||
nir_ssa_def *color = nir_tex_deref(&b, tex_deref, tex_deref, tex_pos);
|
||||
nir_def *color = nir_tex_deref(&b, tex_deref, tex_deref, tex_pos);
|
||||
|
||||
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
|
||||
color_out->data.location = FRAG_RESULT_DEPTH;
|
||||
|
|
@ -153,7 +153,7 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *dev, enum glsl_sample
|
|||
* position.
|
||||
*/
|
||||
unsigned swz[] = {0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2};
|
||||
nir_ssa_def *const tex_pos =
|
||||
nir_def *const tex_pos =
|
||||
nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
|
||||
|
||||
const struct glsl_type *sampler_type =
|
||||
|
|
@ -163,7 +163,7 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *dev, enum glsl_sample
|
|||
sampler->data.binding = 0;
|
||||
|
||||
nir_deref_instr *tex_deref = nir_build_deref_var(&b, sampler);
|
||||
nir_ssa_def *color = nir_tex_deref(&b, tex_deref, tex_deref, tex_pos);
|
||||
nir_def *color = nir_tex_deref(&b, tex_deref, tex_deref, tex_pos);
|
||||
|
||||
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
|
||||
color_out->data.location = FRAG_RESULT_STENCIL;
|
||||
|
|
|
|||
|
|
@ -375,11 +375,11 @@ build_nir_vertex_shader(struct radv_device *device)
|
|||
tex_pos_out->data.location = VARYING_SLOT_VAR0;
|
||||
tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
|
||||
|
||||
nir_ssa_def *outvec = nir_gen_rect_vertices(&b, NULL, NULL);
|
||||
nir_def *outvec = nir_gen_rect_vertices(&b, NULL, NULL);
|
||||
nir_store_var(&b, pos_out, outvec, 0xf);
|
||||
|
||||
nir_ssa_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
|
||||
nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(&b);
|
||||
nir_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
|
||||
nir_def *vertex_id = nir_load_vertex_id_zero_base(&b);
|
||||
|
||||
/* vertex 0 - src_x, src_y */
|
||||
/* vertex 1 - src_x, src_y+h */
|
||||
|
|
@ -387,22 +387,22 @@ build_nir_vertex_shader(struct radv_device *device)
|
|||
/* so channel 0 is vertex_id != 2 ? src_x : src_x + w
|
||||
channel 1 is vertex id != 1 ? src_y : src_y + w */
|
||||
|
||||
nir_ssa_def *c0cmp = nir_ine_imm(&b, vertex_id, 2);
|
||||
nir_ssa_def *c1cmp = nir_ine_imm(&b, vertex_id, 1);
|
||||
nir_def *c0cmp = nir_ine_imm(&b, vertex_id, 2);
|
||||
nir_def *c1cmp = nir_ine_imm(&b, vertex_id, 1);
|
||||
|
||||
nir_ssa_def *comp[2];
|
||||
nir_def *comp[2];
|
||||
comp[0] = nir_bcsel(&b, c0cmp, nir_channel(&b, src_box, 0), nir_channel(&b, src_box, 2));
|
||||
|
||||
comp[1] = nir_bcsel(&b, c1cmp, nir_channel(&b, src_box, 1), nir_channel(&b, src_box, 3));
|
||||
nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 2);
|
||||
nir_def *out_tex_vec = nir_vec(&b, comp, 2);
|
||||
nir_store_var(&b, tex_pos_out, out_tex_vec, 0x3);
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
typedef nir_ssa_def *(*texel_fetch_build_func)(struct nir_builder *, struct radv_device *, nir_ssa_def *, bool, bool);
|
||||
typedef nir_def *(*texel_fetch_build_func)(struct nir_builder *, struct radv_device *, nir_def *, bool, bool);
|
||||
|
||||
static nir_ssa_def *
|
||||
build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa_def *tex_pos, bool is_3d,
|
||||
static nir_def *
|
||||
build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device, nir_def *tex_pos, bool is_3d,
|
||||
bool is_multisampled)
|
||||
{
|
||||
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D
|
||||
|
|
@ -413,12 +413,12 @@ build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa
|
|||
sampler->data.descriptor_set = 0;
|
||||
sampler->data.binding = 0;
|
||||
|
||||
nir_ssa_def *tex_pos_3d = NULL;
|
||||
nir_ssa_def *sample_idx = NULL;
|
||||
nir_def *tex_pos_3d = NULL;
|
||||
nir_def *sample_idx = NULL;
|
||||
if (is_3d) {
|
||||
nir_ssa_def *layer = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
|
||||
nir_def *layer = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
|
||||
|
||||
nir_ssa_def *chans[3];
|
||||
nir_def *chans[3];
|
||||
chans[0] = nir_channel(b, tex_pos, 0);
|
||||
chans[1] = nir_channel(b, tex_pos, 1);
|
||||
chans[2] = layer;
|
||||
|
|
@ -437,8 +437,8 @@ build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa
|
|||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa_def *tex_pos, bool is_3d,
|
||||
static nir_def *
|
||||
build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device, nir_def *tex_pos, bool is_3d,
|
||||
bool is_multisampled)
|
||||
{
|
||||
const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
|
||||
|
|
@ -446,10 +446,10 @@ build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device, nir_ss
|
|||
sampler->data.descriptor_set = 0;
|
||||
sampler->data.binding = 0;
|
||||
|
||||
nir_ssa_def *width = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
|
||||
nir_def *width = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
|
||||
|
||||
nir_ssa_def *pos_x = nir_channel(b, tex_pos, 0);
|
||||
nir_ssa_def *pos_y = nir_channel(b, tex_pos, 1);
|
||||
nir_def *pos_x = nir_channel(b, tex_pos, 0);
|
||||
nir_def *pos_y = nir_channel(b, tex_pos, 1);
|
||||
pos_y = nir_imul(b, pos_y, width);
|
||||
pos_x = nir_iadd(b, pos_x, pos_y);
|
||||
|
||||
|
|
@ -477,10 +477,10 @@ build_nir_copy_fragment_shader(struct radv_device *device, texel_fetch_build_fun
|
|||
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
|
||||
color_out->data.location = FRAG_RESULT_DATA0;
|
||||
|
||||
nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
|
||||
nir_ssa_def *tex_pos = nir_trim_vector(&b, pos_int, 2);
|
||||
nir_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
|
||||
nir_def *tex_pos = nir_trim_vector(&b, pos_int, 2);
|
||||
|
||||
nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
|
||||
nir_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
|
||||
nir_store_var(&b, color_out, color, 0xf);
|
||||
|
||||
b.shader->info.fs.uses_sample_shading = is_multisampled;
|
||||
|
|
@ -502,10 +502,10 @@ build_nir_copy_fragment_shader_depth(struct radv_device *device, texel_fetch_bui
|
|||
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
|
||||
color_out->data.location = FRAG_RESULT_DEPTH;
|
||||
|
||||
nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
|
||||
nir_ssa_def *tex_pos = nir_trim_vector(&b, pos_int, 2);
|
||||
nir_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
|
||||
nir_def *tex_pos = nir_trim_vector(&b, pos_int, 2);
|
||||
|
||||
nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
|
||||
nir_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
|
||||
nir_store_var(&b, color_out, color, 0x1);
|
||||
|
||||
b.shader->info.fs.uses_sample_shading = is_multisampled;
|
||||
|
|
@ -527,10 +527,10 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *device, texel_fetch_b
|
|||
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
|
||||
color_out->data.location = FRAG_RESULT_STENCIL;
|
||||
|
||||
nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
|
||||
nir_ssa_def *tex_pos = nir_trim_vector(&b, pos_int, 2);
|
||||
nir_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
|
||||
nir_def *tex_pos = nir_trim_vector(&b, pos_int, 2);
|
||||
|
||||
nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
|
||||
nir_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
|
||||
nir_store_var(&b, color_out, color, 0x1);
|
||||
|
||||
b.shader->info.fs.uses_sample_shading = is_multisampled;
|
||||
|
|
|
|||
|
|
@ -10,17 +10,17 @@ build_buffer_fill_shader(struct radv_device *dev)
|
|||
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_buffer_fill");
|
||||
b.shader->info.workgroup_size[0] = 64;
|
||||
|
||||
nir_ssa_def *pconst = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
|
||||
nir_ssa_def *buffer_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b0011));
|
||||
nir_ssa_def *max_offset = nir_channel(&b, pconst, 2);
|
||||
nir_ssa_def *data = nir_swizzle(&b, nir_channel(&b, pconst, 3), (unsigned[]){0, 0, 0, 0}, 4);
|
||||
nir_def *pconst = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
|
||||
nir_def *buffer_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b0011));
|
||||
nir_def *max_offset = nir_channel(&b, pconst, 2);
|
||||
nir_def *data = nir_swizzle(&b, nir_channel(&b, pconst, 3), (unsigned[]){0, 0, 0, 0}, 4);
|
||||
|
||||
nir_ssa_def *global_id = nir_iadd(
|
||||
nir_def *global_id = nir_iadd(
|
||||
&b, nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b, 32), 0), b.shader->info.workgroup_size[0]),
|
||||
nir_load_local_invocation_index(&b));
|
||||
|
||||
nir_ssa_def *offset = nir_imin(&b, nir_imul_imm(&b, global_id, 16), max_offset);
|
||||
nir_ssa_def *dst_addr = nir_iadd(&b, buffer_addr, nir_u2u64(&b, offset));
|
||||
nir_def *offset = nir_imin(&b, nir_imul_imm(&b, global_id, 16), max_offset);
|
||||
nir_def *dst_addr = nir_iadd(&b, buffer_addr, nir_u2u64(&b, offset));
|
||||
nir_build_store_global(&b, data, dst_addr, .align_mul = 4);
|
||||
|
||||
return b.shader;
|
||||
|
|
@ -32,18 +32,18 @@ build_buffer_copy_shader(struct radv_device *dev)
|
|||
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_buffer_copy");
|
||||
b.shader->info.workgroup_size[0] = 64;
|
||||
|
||||
nir_ssa_def *pconst = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
|
||||
nir_ssa_def *max_offset = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
|
||||
nir_ssa_def *src_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b0011));
|
||||
nir_ssa_def *dst_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b1100));
|
||||
nir_def *pconst = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
|
||||
nir_def *max_offset = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
|
||||
nir_def *src_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b0011));
|
||||
nir_def *dst_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b1100));
|
||||
|
||||
nir_ssa_def *global_id = nir_iadd(
|
||||
nir_def *global_id = nir_iadd(
|
||||
&b, nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b, 32), 0), b.shader->info.workgroup_size[0]),
|
||||
nir_load_local_invocation_index(&b));
|
||||
|
||||
nir_ssa_def *offset = nir_u2u64(&b, nir_imin(&b, nir_imul_imm(&b, global_id, 16), max_offset));
|
||||
nir_def *offset = nir_u2u64(&b, nir_imin(&b, nir_imul_imm(&b, global_id, 16), max_offset));
|
||||
|
||||
nir_ssa_def *data = nir_build_load_global(&b, 4, 32, nir_iadd(&b, src_addr, offset), .align_mul = 4);
|
||||
nir_def *data = nir_build_load_global(&b, 4, 32, nir_iadd(&b, src_addr, offset), .align_mul = 4);
|
||||
nir_build_store_global(&b, data, nir_iadd(&b, dst_addr, offset), .align_mul = 4);
|
||||
|
||||
return b.shader;
|
||||
|
|
|
|||
|
|
@ -46,24 +46,24 @@ build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
|
|||
output_img->data.descriptor_set = 0;
|
||||
output_img->data.binding = 1;
|
||||
|
||||
nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
|
||||
nir_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
|
||||
|
||||
nir_ssa_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
|
||||
nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
|
||||
nir_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
|
||||
nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
|
||||
|
||||
nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
|
||||
nir_ssa_def *outval =
|
||||
nir_def *img_coord = nir_iadd(&b, global_id, offset);
|
||||
nir_def *outval =
|
||||
nir_txf_deref(&b, nir_build_deref_var(&b, input_img), nir_trim_vector(&b, img_coord, 2 + is_3d), NULL);
|
||||
|
||||
nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
|
||||
nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
|
||||
nir_def *pos_x = nir_channel(&b, global_id, 0);
|
||||
nir_def *pos_y = nir_channel(&b, global_id, 1);
|
||||
|
||||
nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
|
||||
nir_def *tmp = nir_imul(&b, pos_y, stride);
|
||||
tmp = nir_iadd(&b, tmp, pos_x);
|
||||
|
||||
nir_ssa_def *coord = nir_replicate(&b, tmp, 4);
|
||||
nir_def *coord = nir_replicate(&b, tmp, 4);
|
||||
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_ssa_undef(&b, 1, 32), outval,
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_undef(&b, 1, 32), outval,
|
||||
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
|
||||
|
||||
return b.shader;
|
||||
|
|
@ -196,26 +196,25 @@ build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
|
|||
output_img->data.descriptor_set = 0;
|
||||
output_img->data.binding = 1;
|
||||
|
||||
nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
|
||||
nir_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
|
||||
|
||||
nir_ssa_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
|
||||
nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
|
||||
nir_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
|
||||
nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
|
||||
|
||||
nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
|
||||
nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
|
||||
nir_def *pos_x = nir_channel(&b, global_id, 0);
|
||||
nir_def *pos_y = nir_channel(&b, global_id, 1);
|
||||
|
||||
nir_ssa_def *buf_coord = nir_imul(&b, pos_y, stride);
|
||||
nir_def *buf_coord = nir_imul(&b, pos_y, stride);
|
||||
buf_coord = nir_iadd(&b, buf_coord, pos_x);
|
||||
|
||||
nir_ssa_def *coord = nir_iadd(&b, global_id, offset);
|
||||
nir_ssa_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), buf_coord, NULL);
|
||||
nir_def *coord = nir_iadd(&b, global_id, offset);
|
||||
nir_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), buf_coord, NULL);
|
||||
|
||||
nir_ssa_def *img_coord =
|
||||
nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),
|
||||
is_3d ? nir_channel(&b, coord, 2) : nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32));
|
||||
nir_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),
|
||||
is_3d ? nir_channel(&b, coord, 2) : nir_undef(&b, 1, 32), nir_undef(&b, 1, 32));
|
||||
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32),
|
||||
outval, nir_imm_int(&b, 0), .image_dim = dim);
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_undef(&b, 1, 32), outval,
|
||||
nir_imm_int(&b, 0), .image_dim = dim);
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
|
@ -344,31 +343,31 @@ build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
|
|||
output_img->data.descriptor_set = 0;
|
||||
output_img->data.binding = 1;
|
||||
|
||||
nir_ssa_def *global_id = get_global_ids(&b, 2);
|
||||
nir_def *global_id = get_global_ids(&b, 2);
|
||||
|
||||
nir_ssa_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
|
||||
nir_ssa_def *pitch = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 12);
|
||||
nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
|
||||
nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
|
||||
nir_def *pitch = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 12);
|
||||
nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
|
||||
|
||||
nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
|
||||
nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
|
||||
nir_def *pos_x = nir_channel(&b, global_id, 0);
|
||||
nir_def *pos_y = nir_channel(&b, global_id, 1);
|
||||
|
||||
nir_ssa_def *buf_coord = nir_imul(&b, pos_y, stride);
|
||||
nir_def *buf_coord = nir_imul(&b, pos_y, stride);
|
||||
buf_coord = nir_iadd(&b, buf_coord, pos_x);
|
||||
|
||||
nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
|
||||
nir_def *img_coord = nir_iadd(&b, global_id, offset);
|
||||
|
||||
nir_ssa_def *global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, img_coord, 1), pitch),
|
||||
nir_def *global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, img_coord, 1), pitch),
|
||||
nir_imul_imm(&b, nir_channel(&b, img_coord, 0), 3));
|
||||
|
||||
nir_ssa_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), buf_coord, NULL);
|
||||
nir_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), buf_coord, NULL);
|
||||
|
||||
for (int chan = 0; chan < 3; chan++) {
|
||||
nir_ssa_def *local_pos = nir_iadd_imm(&b, global_pos, chan);
|
||||
nir_def *local_pos = nir_iadd_imm(&b, global_pos, chan);
|
||||
|
||||
nir_ssa_def *coord = nir_replicate(&b, local_pos, 4);
|
||||
nir_def *coord = nir_replicate(&b, local_pos, 4);
|
||||
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_ssa_undef(&b, 1, 32),
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_undef(&b, 1, 32),
|
||||
nir_channel(&b, outval, chan), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
|
||||
}
|
||||
|
||||
|
|
@ -472,18 +471,17 @@ build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d, int samples)
|
|||
output_img->data.descriptor_set = 0;
|
||||
output_img->data.binding = 1;
|
||||
|
||||
nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
|
||||
nir_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
|
||||
|
||||
nir_ssa_def *src_offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
|
||||
nir_ssa_def *dst_offset =
|
||||
nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range = is_3d ? 24 : 20);
|
||||
nir_def *src_offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
|
||||
nir_def *dst_offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range = is_3d ? 24 : 20);
|
||||
|
||||
nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset);
|
||||
nir_def *src_coord = nir_iadd(&b, global_id, src_offset);
|
||||
nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
|
||||
|
||||
nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
|
||||
nir_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
|
||||
|
||||
nir_ssa_def *tex_vals[8];
|
||||
nir_def *tex_vals[8];
|
||||
if (is_multisampled) {
|
||||
for (uint32_t i = 0; i < samples; i++) {
|
||||
tex_vals[i] = nir_txf_ms_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2), nir_imm_int(&b, i));
|
||||
|
|
@ -492,9 +490,8 @@ build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d, int samples)
|
|||
tex_vals[0] = nir_txf_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2 + is_3d), nir_imm_int(&b, 0));
|
||||
}
|
||||
|
||||
nir_ssa_def *img_coord =
|
||||
nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1),
|
||||
is_3d ? nir_channel(&b, dst_coord, 2) : nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32));
|
||||
nir_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1),
|
||||
is_3d ? nir_channel(&b, dst_coord, 2) : nir_undef(&b, 1, 32), nir_undef(&b, 1, 32));
|
||||
|
||||
for (uint32_t i = 0; i < samples; i++) {
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_imm_int(&b, i),
|
||||
|
|
@ -641,34 +638,34 @@ build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
|
|||
output_img->data.descriptor_set = 0;
|
||||
output_img->data.binding = 1;
|
||||
|
||||
nir_ssa_def *global_id = get_global_ids(&b, 2);
|
||||
nir_def *global_id = get_global_ids(&b, 2);
|
||||
|
||||
nir_ssa_def *src_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 12);
|
||||
nir_ssa_def *dst_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 12), .range = 24);
|
||||
nir_def *src_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 12);
|
||||
nir_def *dst_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 12), .range = 24);
|
||||
|
||||
nir_ssa_def *src_stride = nir_channel(&b, src_offset, 2);
|
||||
nir_ssa_def *dst_stride = nir_channel(&b, dst_offset, 2);
|
||||
nir_def *src_stride = nir_channel(&b, src_offset, 2);
|
||||
nir_def *dst_stride = nir_channel(&b, dst_offset, 2);
|
||||
|
||||
nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, src_offset);
|
||||
nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset);
|
||||
nir_def *src_img_coord = nir_iadd(&b, global_id, src_offset);
|
||||
nir_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset);
|
||||
|
||||
nir_ssa_def *src_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
|
||||
nir_def *src_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
|
||||
nir_imul_imm(&b, nir_channel(&b, src_img_coord, 0), 3));
|
||||
|
||||
nir_ssa_def *dst_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
|
||||
nir_def *dst_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
|
||||
nir_imul_imm(&b, nir_channel(&b, dst_img_coord, 0), 3));
|
||||
|
||||
for (int chan = 0; chan < 3; chan++) {
|
||||
/* src */
|
||||
nir_ssa_def *src_local_pos = nir_iadd_imm(&b, src_global_pos, chan);
|
||||
nir_ssa_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), src_local_pos, NULL);
|
||||
nir_def *src_local_pos = nir_iadd_imm(&b, src_global_pos, chan);
|
||||
nir_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), src_local_pos, NULL);
|
||||
|
||||
/* dst */
|
||||
nir_ssa_def *dst_local_pos = nir_iadd_imm(&b, dst_global_pos, chan);
|
||||
nir_def *dst_local_pos = nir_iadd_imm(&b, dst_global_pos, chan);
|
||||
|
||||
nir_ssa_def *dst_coord = nir_replicate(&b, dst_local_pos, 4);
|
||||
nir_def *dst_coord = nir_replicate(&b, dst_local_pos, 4);
|
||||
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord, nir_ssa_undef(&b, 1, 32),
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord, nir_undef(&b, 1, 32),
|
||||
nir_channel(&b, outval, 0), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
|
||||
}
|
||||
|
||||
|
|
@ -769,16 +766,16 @@ build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d, int samples
|
|||
output_img->data.descriptor_set = 0;
|
||||
output_img->data.binding = 0;
|
||||
|
||||
nir_ssa_def *global_id = get_global_ids(&b, 2);
|
||||
nir_def *global_id = get_global_ids(&b, 2);
|
||||
|
||||
nir_ssa_def *clear_val = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
|
||||
nir_ssa_def *layer = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20);
|
||||
nir_def *clear_val = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
|
||||
nir_def *layer = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20);
|
||||
|
||||
nir_ssa_def *comps[4];
|
||||
nir_def *comps[4];
|
||||
comps[0] = nir_channel(&b, global_id, 0);
|
||||
comps[1] = nir_channel(&b, global_id, 1);
|
||||
comps[2] = layer;
|
||||
comps[3] = nir_ssa_undef(&b, 1, 32);
|
||||
comps[3] = nir_undef(&b, 1, 32);
|
||||
global_id = nir_vec(&b, comps, 4);
|
||||
|
||||
for (uint32_t i = 0; i < samples; i++) {
|
||||
|
|
@ -917,22 +914,22 @@ build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
|
|||
output_img->data.descriptor_set = 0;
|
||||
output_img->data.binding = 0;
|
||||
|
||||
nir_ssa_def *global_id = get_global_ids(&b, 2);
|
||||
nir_def *global_id = get_global_ids(&b, 2);
|
||||
|
||||
nir_ssa_def *clear_val = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 12);
|
||||
nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
|
||||
nir_def *clear_val = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 12);
|
||||
nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
|
||||
|
||||
nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
|
||||
nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
|
||||
nir_def *global_x = nir_channel(&b, global_id, 0);
|
||||
nir_def *global_y = nir_channel(&b, global_id, 1);
|
||||
|
||||
nir_ssa_def *global_pos = nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul_imm(&b, global_x, 3));
|
||||
nir_def *global_pos = nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul_imm(&b, global_x, 3));
|
||||
|
||||
for (unsigned chan = 0; chan < 3; chan++) {
|
||||
nir_ssa_def *local_pos = nir_iadd_imm(&b, global_pos, chan);
|
||||
nir_def *local_pos = nir_iadd_imm(&b, global_pos, chan);
|
||||
|
||||
nir_ssa_def *coord = nir_replicate(&b, local_pos, 4);
|
||||
nir_def *coord = nir_replicate(&b, local_pos, 4);
|
||||
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_ssa_undef(&b, 1, 32),
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_undef(&b, 1, 32),
|
||||
nir_channel(&b, clear_val, chan), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -45,24 +45,24 @@ build_color_shaders(struct radv_device *dev, struct nir_shader **out_vs, struct
|
|||
nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position");
|
||||
vs_out_pos->data.location = VARYING_SLOT_POS;
|
||||
|
||||
nir_ssa_def *in_color_load = nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16);
|
||||
nir_def *in_color_load = nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16);
|
||||
|
||||
nir_variable *fs_out_color = nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color");
|
||||
fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
|
||||
|
||||
nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf);
|
||||
|
||||
nir_ssa_def *outvec = nir_gen_rect_vertices(&vs_b, NULL, NULL);
|
||||
nir_def *outvec = nir_gen_rect_vertices(&vs_b, NULL, NULL);
|
||||
nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
|
||||
|
||||
const struct glsl_type *layer_type = glsl_int_type();
|
||||
nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
|
||||
vs_out_layer->data.location = VARYING_SLOT_LAYER;
|
||||
vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
|
||||
nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
|
||||
nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
|
||||
nir_def *inst_id = nir_load_instance_id(&vs_b);
|
||||
nir_def *base_instance = nir_load_base_instance(&vs_b);
|
||||
|
||||
nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
|
||||
nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
|
||||
nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
|
||||
|
||||
*out_vs = vs_b.shader;
|
||||
|
|
@ -376,9 +376,9 @@ build_depthstencil_shader(struct radv_device *dev, struct nir_shader **out_vs, s
|
|||
nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position");
|
||||
vs_out_pos->data.location = VARYING_SLOT_POS;
|
||||
|
||||
nir_ssa_def *z;
|
||||
nir_def *z;
|
||||
if (unrestricted) {
|
||||
nir_ssa_def *in_color_load = nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4);
|
||||
nir_def *in_color_load = nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4);
|
||||
|
||||
nir_variable *fs_out_depth = nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth");
|
||||
fs_out_depth->data.location = FRAG_RESULT_DEPTH;
|
||||
|
|
@ -389,17 +389,17 @@ build_depthstencil_shader(struct radv_device *dev, struct nir_shader **out_vs, s
|
|||
z = nir_load_push_constant(&vs_b, 1, 32, nir_imm_int(&vs_b, 0), .range = 4);
|
||||
}
|
||||
|
||||
nir_ssa_def *outvec = nir_gen_rect_vertices(&vs_b, z, NULL);
|
||||
nir_def *outvec = nir_gen_rect_vertices(&vs_b, z, NULL);
|
||||
nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
|
||||
|
||||
const struct glsl_type *layer_type = glsl_int_type();
|
||||
nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
|
||||
vs_out_layer->data.location = VARYING_SLOT_LAYER;
|
||||
vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
|
||||
nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
|
||||
nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
|
||||
nir_def *inst_id = nir_load_instance_id(&vs_b);
|
||||
nir_def *base_instance = nir_load_base_instance(&vs_b);
|
||||
|
||||
nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
|
||||
nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
|
||||
nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
|
||||
|
||||
*out_vs = vs_b.shader;
|
||||
|
|
@ -808,19 +808,19 @@ build_clear_htile_mask_shader(struct radv_device *dev)
|
|||
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_htile_mask");
|
||||
b.shader->info.workgroup_size[0] = 64;
|
||||
|
||||
nir_ssa_def *global_id = get_global_ids(&b, 1);
|
||||
nir_def *global_id = get_global_ids(&b, 1);
|
||||
|
||||
nir_ssa_def *offset = nir_imul_imm(&b, global_id, 16);
|
||||
nir_def *offset = nir_imul_imm(&b, global_id, 16);
|
||||
offset = nir_channel(&b, offset, 0);
|
||||
|
||||
nir_ssa_def *buf = radv_meta_load_descriptor(&b, 0, 0);
|
||||
nir_def *buf = radv_meta_load_descriptor(&b, 0, 0);
|
||||
|
||||
nir_ssa_def *constants = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
|
||||
nir_def *constants = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
|
||||
|
||||
nir_ssa_def *load = nir_load_ssbo(&b, 4, 32, buf, offset, .align_mul = 16);
|
||||
nir_def *load = nir_load_ssbo(&b, 4, 32, buf, offset, .align_mul = 16);
|
||||
|
||||
/* data = (data & ~htile_mask) | (htile_value & htile_mask) */
|
||||
nir_ssa_def *data = nir_iand(&b, load, nir_channel(&b, constants, 1));
|
||||
nir_def *data = nir_iand(&b, load, nir_channel(&b, constants, 1));
|
||||
data = nir_ior(&b, data, nir_channel(&b, constants, 0));
|
||||
|
||||
nir_store_ssbo(&b, data, buf, offset, .access = ACCESS_NON_READABLE, .align_mul = 16);
|
||||
|
|
@ -906,29 +906,29 @@ build_clear_dcc_comp_to_single_shader(struct radv_device *dev, bool is_msaa)
|
|||
b.shader->info.workgroup_size[0] = 8;
|
||||
b.shader->info.workgroup_size[1] = 8;
|
||||
|
||||
nir_ssa_def *global_id = get_global_ids(&b, 3);
|
||||
nir_def *global_id = get_global_ids(&b, 3);
|
||||
|
||||
/* Load the dimensions in pixels of a block that gets compressed to one DCC byte. */
|
||||
nir_ssa_def *dcc_block_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
|
||||
nir_def *dcc_block_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
|
||||
|
||||
/* Compute the coordinates. */
|
||||
nir_ssa_def *coord = nir_trim_vector(&b, global_id, 2);
|
||||
nir_def *coord = nir_trim_vector(&b, global_id, 2);
|
||||
coord = nir_imul(&b, coord, dcc_block_size);
|
||||
coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), nir_channel(&b, global_id, 2),
|
||||
nir_ssa_undef(&b, 1, 32));
|
||||
nir_undef(&b, 1, 32));
|
||||
|
||||
nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
|
||||
output_img->data.descriptor_set = 0;
|
||||
output_img->data.binding = 0;
|
||||
|
||||
/* Load the clear color values. */
|
||||
nir_ssa_def *clear_values = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8);
|
||||
nir_def *clear_values = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8);
|
||||
|
||||
nir_ssa_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0), nir_channel(&b, clear_values, 1),
|
||||
nir_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0), nir_channel(&b, clear_values, 1),
|
||||
nir_channel(&b, clear_values, 1), nir_channel(&b, clear_values, 1));
|
||||
|
||||
/* Store the clear color values. */
|
||||
nir_ssa_def *sample_id = is_msaa ? nir_imm_int(&b, 0) : nir_ssa_undef(&b, 1, 32);
|
||||
nir_def *sample_id = is_msaa ? nir_imm_int(&b, 0) : nir_undef(&b, 1, 32);
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, sample_id, data, nir_imm_int(&b, 0),
|
||||
.image_dim = dim, .image_array = true);
|
||||
|
||||
|
|
|
|||
|
|
@ -47,22 +47,22 @@ build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf
|
|||
b.shader->info.workgroup_size[1] = 8;
|
||||
|
||||
/* Get coordinates. */
|
||||
nir_ssa_def *global_id = get_global_ids(&b, 2);
|
||||
nir_def *global_id = get_global_ids(&b, 2);
|
||||
|
||||
nir_ssa_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
|
||||
nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
|
||||
|
||||
/* Multiply the coordinates by the HTILE block size. */
|
||||
nir_ssa_def *coord = nir_iadd(&b, nir_imul_imm(&b, global_id, 8), offset);
|
||||
nir_def *coord = nir_iadd(&b, nir_imul_imm(&b, global_id, 8), offset);
|
||||
|
||||
/* Load constants. */
|
||||
nir_ssa_def *constants = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 8), .range = 20);
|
||||
nir_ssa_def *htile_pitch = nir_channel(&b, constants, 0);
|
||||
nir_ssa_def *htile_slice_size = nir_channel(&b, constants, 1);
|
||||
nir_ssa_def *read_htile_value = nir_channel(&b, constants, 2);
|
||||
nir_def *constants = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 8), .range = 20);
|
||||
nir_def *htile_pitch = nir_channel(&b, constants, 0);
|
||||
nir_def *htile_slice_size = nir_channel(&b, constants, 1);
|
||||
nir_def *read_htile_value = nir_channel(&b, constants, 2);
|
||||
|
||||
/* Get the HTILE addr from coordinates. */
|
||||
nir_ssa_def *zero = nir_imm_int(&b, 0);
|
||||
nir_ssa_def *htile_addr =
|
||||
nir_def *zero = nir_imm_int(&b, 0);
|
||||
nir_def *htile_addr =
|
||||
ac_nir_htile_addr_from_coord(&b, &device->physical_device->rad_info, &surf->u.gfx9.zs.htile_equation, htile_pitch,
|
||||
htile_slice_size, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero);
|
||||
|
||||
|
|
@ -73,7 +73,7 @@ build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf
|
|||
input_vrs_img->data.binding = 0;
|
||||
|
||||
/* Load the VRS rates from the 2D image. */
|
||||
nir_ssa_def *value = nir_txf_deref(&b, nir_build_deref_var(&b, input_vrs_img), global_id, NULL);
|
||||
nir_def *value = nir_txf_deref(&b, nir_build_deref_var(&b, input_vrs_img), global_id, NULL);
|
||||
|
||||
/* Extract the X/Y rates and clamp them because the maximum supported VRS rate is 2x2 (1x1 in
|
||||
* hardware).
|
||||
|
|
@ -81,17 +81,17 @@ build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf
|
|||
* VRS rate X = min(value >> 2, 1)
|
||||
* VRS rate Y = min(value & 3, 1)
|
||||
*/
|
||||
nir_ssa_def *x_rate = nir_ushr_imm(&b, nir_channel(&b, value, 0), 2);
|
||||
nir_def *x_rate = nir_ushr_imm(&b, nir_channel(&b, value, 0), 2);
|
||||
x_rate = nir_umin(&b, x_rate, nir_imm_int(&b, 1));
|
||||
|
||||
nir_ssa_def *y_rate = nir_iand_imm(&b, nir_channel(&b, value, 0), 3);
|
||||
nir_def *y_rate = nir_iand_imm(&b, nir_channel(&b, value, 0), 3);
|
||||
y_rate = nir_umin(&b, y_rate, nir_imm_int(&b, 1));
|
||||
|
||||
/* Compute the final VRS rate. */
|
||||
nir_ssa_def *vrs_rates = nir_ior(&b, nir_ishl_imm(&b, y_rate, 10), nir_ishl_imm(&b, x_rate, 6));
|
||||
nir_def *vrs_rates = nir_ior(&b, nir_ishl_imm(&b, y_rate, 10), nir_ishl_imm(&b, x_rate, 6));
|
||||
|
||||
/* Load the HTILE buffer descriptor. */
|
||||
nir_ssa_def *htile_buf = radv_meta_load_descriptor(&b, 0, 1);
|
||||
nir_def *htile_buf = radv_meta_load_descriptor(&b, 0, 1);
|
||||
|
||||
/* Load the HTILE value if requested, otherwise use the default value. */
|
||||
nir_variable *htile_value = nir_local_variable_create(b.impl, glsl_int_type(), "htile_value");
|
||||
|
|
@ -99,7 +99,7 @@ build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf
|
|||
nir_push_if(&b, nir_ieq_imm(&b, read_htile_value, 1));
|
||||
{
|
||||
/* Load the existing HTILE 32-bit value for this 8x8 pixels area. */
|
||||
nir_ssa_def *input_value = nir_load_ssbo(&b, 1, 32, htile_buf, htile_addr);
|
||||
nir_def *input_value = nir_load_ssbo(&b, 1, 32, htile_buf, htile_addr);
|
||||
|
||||
/* Clear the 4-bit VRS rates. */
|
||||
nir_store_var(&b, htile_value, nir_iand_imm(&b, input_value, 0xfffff33f), 0x1);
|
||||
|
|
@ -111,7 +111,7 @@ build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf
|
|||
nir_pop_if(&b, NULL);
|
||||
|
||||
/* Set the VRS rates loaded from the image. */
|
||||
nir_ssa_def *output_value = nir_ior(&b, nir_load_var(&b, htile_value), vrs_rates);
|
||||
nir_def *output_value = nir_ior(&b, nir_load_var(&b, htile_value), vrs_rates);
|
||||
|
||||
/* Store the updated HTILE 32-bit which contains the VRS rates. */
|
||||
nir_store_ssbo(&b, output_value, htile_buf, htile_addr, .access = ACCESS_NON_READABLE);
|
||||
|
|
|
|||
|
|
@ -37,13 +37,13 @@ build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *sur
|
|||
b.shader->info.workgroup_size[0] = 8;
|
||||
b.shader->info.workgroup_size[1] = 8;
|
||||
|
||||
nir_ssa_def *src_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
|
||||
nir_ssa_def *src_dcc_pitch = nir_channels(&b, src_dcc_size, 1);
|
||||
nir_ssa_def *src_dcc_height = nir_channels(&b, src_dcc_size, 2);
|
||||
nir_def *src_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
|
||||
nir_def *src_dcc_pitch = nir_channels(&b, src_dcc_size, 1);
|
||||
nir_def *src_dcc_height = nir_channels(&b, src_dcc_size, 2);
|
||||
|
||||
nir_ssa_def *dst_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8);
|
||||
nir_ssa_def *dst_dcc_pitch = nir_channels(&b, dst_dcc_size, 1);
|
||||
nir_ssa_def *dst_dcc_height = nir_channels(&b, dst_dcc_size, 2);
|
||||
nir_def *dst_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8);
|
||||
nir_def *dst_dcc_pitch = nir_channels(&b, dst_dcc_size, 1);
|
||||
nir_def *dst_dcc_height = nir_channels(&b, dst_dcc_size, 2);
|
||||
nir_variable *input_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_in");
|
||||
input_dcc->data.descriptor_set = 0;
|
||||
input_dcc->data.binding = 0;
|
||||
|
|
@ -51,25 +51,25 @@ build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *sur
|
|||
output_dcc->data.descriptor_set = 0;
|
||||
output_dcc->data.binding = 1;
|
||||
|
||||
nir_ssa_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->dest.ssa;
|
||||
nir_ssa_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->dest.ssa;
|
||||
nir_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->dest.ssa;
|
||||
nir_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->dest.ssa;
|
||||
|
||||
nir_ssa_def *coord = get_global_ids(&b, 2);
|
||||
nir_ssa_def *zero = nir_imm_int(&b, 0);
|
||||
nir_def *coord = get_global_ids(&b, 2);
|
||||
nir_def *zero = nir_imm_int(&b, 0);
|
||||
coord =
|
||||
nir_imul(&b, coord, nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height));
|
||||
|
||||
nir_ssa_def *src = ac_nir_dcc_addr_from_coord(
|
||||
&b, &dev->physical_device->rad_info, surf->bpe, &surf->u.gfx9.color.dcc_equation, src_dcc_pitch, src_dcc_height,
|
||||
zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero);
|
||||
nir_ssa_def *dst = ac_nir_dcc_addr_from_coord(
|
||||
nir_def *src = ac_nir_dcc_addr_from_coord(&b, &dev->physical_device->rad_info, surf->bpe,
|
||||
&surf->u.gfx9.color.dcc_equation, src_dcc_pitch, src_dcc_height, zero,
|
||||
nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero);
|
||||
nir_def *dst = ac_nir_dcc_addr_from_coord(
|
||||
&b, &dev->physical_device->rad_info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation, dst_dcc_pitch,
|
||||
dst_dcc_height, zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero);
|
||||
|
||||
nir_ssa_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref, nir_vec4(&b, src, src, src, src),
|
||||
nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = dim);
|
||||
nir_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref, nir_vec4(&b, src, src, src, src),
|
||||
nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = dim);
|
||||
|
||||
nir_image_deref_store(&b, output_dcc_ref, nir_vec4(&b, dst, dst, dst, dst), nir_ssa_undef(&b, 1, 32), dcc_val,
|
||||
nir_image_deref_store(&b, output_dcc_ref, nir_vec4(&b, dst, dst, dst, dst), nir_undef(&b, 1, 32), dcc_val,
|
||||
nir_imm_int(&b, 0), .image_dim = dim);
|
||||
|
||||
return b.shader;
|
||||
|
|
|
|||
|
|
@ -51,16 +51,15 @@ build_expand_depth_stencil_compute_shader(struct radv_device *dev)
|
|||
output_img->data.descriptor_set = 0;
|
||||
output_img->data.binding = 1;
|
||||
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
|
||||
nir_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_def *wg_id = nir_load_workgroup_id(&b, 32);
|
||||
nir_def *block_size = nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
|
||||
b.shader->info.workgroup_size[2], 0);
|
||||
|
||||
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
|
||||
nir_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
|
||||
|
||||
nir_ssa_def *data =
|
||||
nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, global_id,
|
||||
nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
|
||||
nir_def *data = nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, global_id,
|
||||
nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
|
||||
|
||||
/* We need a SCOPE_DEVICE memory_scope because ACO will avoid
|
||||
* creating a vmcnt(0) because it expects the L1 cache to keep memory
|
||||
|
|
@ -69,7 +68,7 @@ build_expand_depth_stencil_compute_shader(struct radv_device *dev)
|
|||
nir_barrier(&b, .execution_scope = SCOPE_WORKGROUP, .memory_scope = SCOPE_DEVICE,
|
||||
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
|
||||
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id, nir_ssa_undef(&b, 1, 32), data,
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id, nir_undef(&b, 1, 32), data,
|
||||
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
|
||||
return b.shader;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,13 +41,13 @@
|
|||
* - the EAC shader doesn't do SNORM correctly, so this has that fixed.
|
||||
*/
|
||||
|
||||
static nir_ssa_def *
|
||||
flip_endian(nir_builder *b, nir_ssa_def *src, unsigned cnt)
|
||||
static nir_def *
|
||||
flip_endian(nir_builder *b, nir_def *src, unsigned cnt)
|
||||
{
|
||||
nir_ssa_def *v[2];
|
||||
nir_def *v[2];
|
||||
for (unsigned i = 0; i < cnt; ++i) {
|
||||
nir_ssa_def *intermediate[4];
|
||||
nir_ssa_def *chan = cnt == 1 ? src : nir_channel(b, src, i);
|
||||
nir_def *intermediate[4];
|
||||
nir_def *chan = cnt == 1 ? src : nir_channel(b, src, i);
|
||||
for (unsigned j = 0; j < 4; ++j)
|
||||
intermediate[j] = nir_ubfe_imm(b, chan, 8 * j, 8);
|
||||
v[i] = nir_ior(b, nir_ior(b, nir_ishl_imm(b, intermediate[0], 24), nir_ishl_imm(b, intermediate[1], 16)),
|
||||
|
|
@ -56,14 +56,14 @@ flip_endian(nir_builder *b, nir_ssa_def *src, unsigned cnt)
|
|||
return cnt == 1 ? v[0] : nir_vec(b, v, cnt);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
etc1_color_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
||||
static nir_def *
|
||||
etc1_color_modifier_lookup(nir_builder *b, nir_def *x, nir_def *y)
|
||||
{
|
||||
const unsigned table[8][2] = {{2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183}};
|
||||
nir_ssa_def *upper = nir_ieq_imm(b, y, 1);
|
||||
nir_ssa_def *result = NULL;
|
||||
nir_def *upper = nir_ieq_imm(b, y, 1);
|
||||
nir_def *result = NULL;
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
nir_ssa_def *tmp = nir_bcsel(b, upper, nir_imm_int(b, table[i][1]), nir_imm_int(b, table[i][0]));
|
||||
nir_def *tmp = nir_bcsel(b, upper, nir_imm_int(b, table[i][1]), nir_imm_int(b, table[i][0]));
|
||||
if (result)
|
||||
result = nir_bcsel(b, nir_ieq_imm(b, x, i), tmp, result);
|
||||
else
|
||||
|
|
@ -72,11 +72,11 @@ etc1_color_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
|||
return result;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
etc2_distance_lookup(nir_builder *b, nir_ssa_def *x)
|
||||
static nir_def *
|
||||
etc2_distance_lookup(nir_builder *b, nir_def *x)
|
||||
{
|
||||
const unsigned table[8] = {3, 6, 11, 16, 23, 32, 41, 64};
|
||||
nir_ssa_def *result = NULL;
|
||||
nir_def *result = NULL;
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
if (result)
|
||||
result = nir_bcsel(b, nir_ieq_imm(b, x, i), nir_imm_int(b, table[i]), result);
|
||||
|
|
@ -86,14 +86,14 @@ etc2_distance_lookup(nir_builder *b, nir_ssa_def *x)
|
|||
return result;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
etc1_alpha_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
||||
static nir_def *
|
||||
etc1_alpha_modifier_lookup(nir_builder *b, nir_def *x, nir_def *y)
|
||||
{
|
||||
const unsigned table[16] = {0xe852, 0xc962, 0xc741, 0xc531, 0xb752, 0xa862, 0xa763, 0xa742,
|
||||
0x9751, 0x9741, 0x9731, 0x9641, 0x9632, 0x9210, 0x8753, 0x8642};
|
||||
nir_ssa_def *result = NULL;
|
||||
nir_def *result = NULL;
|
||||
for (unsigned i = 0; i < 16; ++i) {
|
||||
nir_ssa_def *tmp = nir_imm_int(b, table[i]);
|
||||
nir_def *tmp = nir_imm_int(b, table[i]);
|
||||
if (result)
|
||||
result = nir_bcsel(b, nir_ieq_imm(b, x, i), tmp, result);
|
||||
else
|
||||
|
|
@ -102,45 +102,44 @@ etc1_alpha_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
|
|||
return nir_ubfe(b, result, nir_imul_imm(b, y, 4), nir_imm_int(b, 4));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
etc_extend(nir_builder *b, nir_ssa_def *v, int bits)
|
||||
static nir_def *
|
||||
etc_extend(nir_builder *b, nir_def *v, int bits)
|
||||
{
|
||||
if (bits == 4)
|
||||
return nir_imul_imm(b, v, 0x11);
|
||||
return nir_ior(b, nir_ishl_imm(b, v, 8 - bits), nir_ushr_imm(b, v, bits - (8 - bits)));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
decode_etc2_alpha(struct nir_builder *b, nir_ssa_def *alpha_payload, nir_ssa_def *linear_pixel, bool eac,
|
||||
nir_ssa_def *is_signed)
|
||||
static nir_def *
|
||||
decode_etc2_alpha(struct nir_builder *b, nir_def *alpha_payload, nir_def *linear_pixel, bool eac, nir_def *is_signed)
|
||||
{
|
||||
alpha_payload = flip_endian(b, alpha_payload, 2);
|
||||
nir_ssa_def *alpha_x = nir_channel(b, alpha_payload, 1);
|
||||
nir_ssa_def *alpha_y = nir_channel(b, alpha_payload, 0);
|
||||
nir_ssa_def *bit_offset = nir_isub_imm(b, 45, nir_imul_imm(b, linear_pixel, 3));
|
||||
nir_ssa_def *base = nir_ubfe_imm(b, alpha_y, 24, 8);
|
||||
nir_ssa_def *multiplier = nir_ubfe_imm(b, alpha_y, 20, 4);
|
||||
nir_ssa_def *table = nir_ubfe_imm(b, alpha_y, 16, 4);
|
||||
nir_def *alpha_x = nir_channel(b, alpha_payload, 1);
|
||||
nir_def *alpha_y = nir_channel(b, alpha_payload, 0);
|
||||
nir_def *bit_offset = nir_isub_imm(b, 45, nir_imul_imm(b, linear_pixel, 3));
|
||||
nir_def *base = nir_ubfe_imm(b, alpha_y, 24, 8);
|
||||
nir_def *multiplier = nir_ubfe_imm(b, alpha_y, 20, 4);
|
||||
nir_def *table = nir_ubfe_imm(b, alpha_y, 16, 4);
|
||||
|
||||
if (eac) {
|
||||
nir_ssa_def *signed_base = nir_ibfe_imm(b, alpha_y, 24, 8);
|
||||
nir_def *signed_base = nir_ibfe_imm(b, alpha_y, 24, 8);
|
||||
signed_base = nir_imul_imm(b, signed_base, 8);
|
||||
base = nir_iadd_imm(b, nir_imul_imm(b, base, 8), 4);
|
||||
base = nir_bcsel(b, is_signed, signed_base, base);
|
||||
multiplier = nir_imax(b, nir_imul_imm(b, multiplier, 8), nir_imm_int(b, 1));
|
||||
}
|
||||
|
||||
nir_ssa_def *lsb_index = nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x),
|
||||
nir_def *lsb_index = nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x),
|
||||
nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 2));
|
||||
bit_offset = nir_iadd_imm(b, bit_offset, 2);
|
||||
nir_ssa_def *msb = nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x),
|
||||
nir_def *msb = nir_ubfe(b, nir_bcsel(b, nir_uge_imm(b, bit_offset, 32), alpha_y, alpha_x),
|
||||
nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 1));
|
||||
nir_ssa_def *mod = nir_ixor(b, etc1_alpha_modifier_lookup(b, table, lsb_index), nir_iadd_imm(b, msb, -1));
|
||||
nir_ssa_def *a = nir_iadd(b, base, nir_imul(b, mod, multiplier));
|
||||
nir_def *mod = nir_ixor(b, etc1_alpha_modifier_lookup(b, table, lsb_index), nir_iadd_imm(b, msb, -1));
|
||||
nir_def *a = nir_iadd(b, base, nir_imul(b, mod, multiplier));
|
||||
|
||||
nir_ssa_def *low_bound = nir_imm_int(b, 0);
|
||||
nir_ssa_def *high_bound = nir_imm_int(b, 255);
|
||||
nir_ssa_def *final_mult = nir_imm_float(b, 1 / 255.0);
|
||||
nir_def *low_bound = nir_imm_int(b, 0);
|
||||
nir_def *high_bound = nir_imm_int(b, 255);
|
||||
nir_def *final_mult = nir_imm_float(b, 1 / 255.0);
|
||||
if (eac) {
|
||||
low_bound = nir_bcsel(b, is_signed, nir_imm_int(b, -1023), low_bound);
|
||||
high_bound = nir_bcsel(b, is_signed, nir_imm_int(b, 1023), nir_imm_int(b, 2047));
|
||||
|
|
@ -177,55 +176,55 @@ build_shader(struct radv_device *dev)
|
|||
output_img_3d->data.descriptor_set = 0;
|
||||
output_img_3d->data.binding = 1;
|
||||
|
||||
nir_ssa_def *global_id = get_global_ids(&b, 3);
|
||||
nir_def *global_id = get_global_ids(&b, 3);
|
||||
|
||||
nir_ssa_def *consts = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
|
||||
nir_ssa_def *consts2 = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
|
||||
nir_ssa_def *offset = nir_channels(&b, consts, 7);
|
||||
nir_ssa_def *format = nir_channel(&b, consts, 3);
|
||||
nir_ssa_def *image_type = nir_channel(&b, consts2, 0);
|
||||
nir_ssa_def *is_3d = nir_ieq_imm(&b, image_type, VK_IMAGE_TYPE_3D);
|
||||
nir_ssa_def *coord = nir_iadd(&b, global_id, offset);
|
||||
nir_ssa_def *src_coord = nir_vec3(&b, nir_ushr_imm(&b, nir_channel(&b, coord, 0), 2),
|
||||
nir_def *consts = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
|
||||
nir_def *consts2 = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
|
||||
nir_def *offset = nir_channels(&b, consts, 7);
|
||||
nir_def *format = nir_channel(&b, consts, 3);
|
||||
nir_def *image_type = nir_channel(&b, consts2, 0);
|
||||
nir_def *is_3d = nir_ieq_imm(&b, image_type, VK_IMAGE_TYPE_3D);
|
||||
nir_def *coord = nir_iadd(&b, global_id, offset);
|
||||
nir_def *src_coord = nir_vec3(&b, nir_ushr_imm(&b, nir_channel(&b, coord, 0), 2),
|
||||
nir_ushr_imm(&b, nir_channel(&b, coord, 1), 2), nir_channel(&b, coord, 2));
|
||||
|
||||
nir_variable *payload_var = nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "payload");
|
||||
nir_push_if(&b, is_3d);
|
||||
{
|
||||
nir_ssa_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_3d), src_coord, nir_imm_int(&b, 0));
|
||||
nir_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_3d), src_coord, nir_imm_int(&b, 0));
|
||||
nir_store_var(&b, payload_var, color, 0xf);
|
||||
}
|
||||
nir_push_else(&b, NULL);
|
||||
{
|
||||
nir_ssa_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_2d), src_coord, nir_imm_int(&b, 0));
|
||||
nir_def *color = nir_txf_deref(&b, nir_build_deref_var(&b, input_img_2d), src_coord, nir_imm_int(&b, 0));
|
||||
nir_store_var(&b, payload_var, color, 0xf);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
nir_ssa_def *pixel_coord = nir_iand_imm(&b, nir_channels(&b, coord, 3), 3);
|
||||
nir_ssa_def *linear_pixel =
|
||||
nir_def *pixel_coord = nir_iand_imm(&b, nir_channels(&b, coord, 3), 3);
|
||||
nir_def *linear_pixel =
|
||||
nir_iadd(&b, nir_imul_imm(&b, nir_channel(&b, pixel_coord, 0), 4), nir_channel(&b, pixel_coord, 1));
|
||||
|
||||
nir_ssa_def *payload = nir_load_var(&b, payload_var);
|
||||
nir_def *payload = nir_load_var(&b, payload_var);
|
||||
nir_variable *color = nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "color");
|
||||
nir_store_var(&b, color, nir_imm_vec4(&b, 1.0, 0.0, 0.0, 1.0), 0xf);
|
||||
nir_push_if(&b, nir_ilt_imm(&b, format, VK_FORMAT_EAC_R11_UNORM_BLOCK));
|
||||
{
|
||||
nir_ssa_def *alpha_bits_8 = nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK);
|
||||
nir_ssa_def *alpha_bits_1 = nir_iand(&b, nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK),
|
||||
nir_def *alpha_bits_8 = nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK);
|
||||
nir_def *alpha_bits_1 = nir_iand(&b, nir_ige_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK),
|
||||
nir_ilt_imm(&b, format, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK));
|
||||
|
||||
nir_ssa_def *color_payload =
|
||||
nir_def *color_payload =
|
||||
nir_bcsel(&b, alpha_bits_8, nir_channels(&b, payload, 0xC), nir_channels(&b, payload, 3));
|
||||
color_payload = flip_endian(&b, color_payload, 2);
|
||||
nir_ssa_def *color_y = nir_channel(&b, color_payload, 0);
|
||||
nir_ssa_def *color_x = nir_channel(&b, color_payload, 1);
|
||||
nir_ssa_def *flip = nir_test_mask(&b, color_y, 1);
|
||||
nir_ssa_def *subblock =
|
||||
nir_def *color_y = nir_channel(&b, color_payload, 0);
|
||||
nir_def *color_x = nir_channel(&b, color_payload, 1);
|
||||
nir_def *flip = nir_test_mask(&b, color_y, 1);
|
||||
nir_def *subblock =
|
||||
nir_ushr_imm(&b, nir_bcsel(&b, flip, nir_channel(&b, pixel_coord, 1), nir_channel(&b, pixel_coord, 0)), 1);
|
||||
|
||||
nir_variable *punchthrough = nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "punchthrough");
|
||||
nir_ssa_def *punchthrough_init = nir_iand(&b, alpha_bits_1, nir_inot(&b, nir_test_mask(&b, color_y, 2)));
|
||||
nir_def *punchthrough_init = nir_iand(&b, alpha_bits_1, nir_inot(&b, nir_test_mask(&b, color_y, 2)));
|
||||
nir_store_var(&b, punchthrough, punchthrough_init, 0x1);
|
||||
|
||||
nir_variable *etc1_compat = nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "etc1_compat");
|
||||
|
|
@ -249,13 +248,13 @@ build_shader(struct radv_device *dev)
|
|||
nir_variable *base_rgb = nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "base_rgb");
|
||||
nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 255, 0, 0), 0x7);
|
||||
|
||||
nir_ssa_def *msb = nir_iand_imm(&b, nir_ushr(&b, color_x, nir_iadd_imm(&b, linear_pixel, 15)), 2);
|
||||
nir_ssa_def *lsb = nir_iand_imm(&b, nir_ushr(&b, color_x, linear_pixel), 1);
|
||||
nir_def *msb = nir_iand_imm(&b, nir_ushr(&b, color_x, nir_iadd_imm(&b, linear_pixel, 15)), 2);
|
||||
nir_def *lsb = nir_iand_imm(&b, nir_ushr(&b, color_x, linear_pixel), 1);
|
||||
|
||||
nir_push_if(&b, nir_iand(&b, nir_inot(&b, alpha_bits_1), nir_inot(&b, nir_test_mask(&b, color_y, 2))));
|
||||
{
|
||||
nir_store_var(&b, etc1_compat, nir_imm_true(&b), 1);
|
||||
nir_ssa_def *tmp[3];
|
||||
nir_def *tmp[3];
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
tmp[i] = etc_extend(
|
||||
&b,
|
||||
|
|
@ -266,29 +265,29 @@ build_shader(struct radv_device *dev)
|
|||
}
|
||||
nir_push_else(&b, NULL);
|
||||
{
|
||||
nir_ssa_def *rb = nir_ubfe_imm(&b, color_y, 27, 5);
|
||||
nir_ssa_def *rd = nir_ibfe_imm(&b, color_y, 24, 3);
|
||||
nir_ssa_def *gb = nir_ubfe_imm(&b, color_y, 19, 5);
|
||||
nir_ssa_def *gd = nir_ibfe_imm(&b, color_y, 16, 3);
|
||||
nir_ssa_def *bb = nir_ubfe_imm(&b, color_y, 11, 5);
|
||||
nir_ssa_def *bd = nir_ibfe_imm(&b, color_y, 8, 3);
|
||||
nir_ssa_def *r1 = nir_iadd(&b, rb, rd);
|
||||
nir_ssa_def *g1 = nir_iadd(&b, gb, gd);
|
||||
nir_ssa_def *b1 = nir_iadd(&b, bb, bd);
|
||||
nir_def *rb = nir_ubfe_imm(&b, color_y, 27, 5);
|
||||
nir_def *rd = nir_ibfe_imm(&b, color_y, 24, 3);
|
||||
nir_def *gb = nir_ubfe_imm(&b, color_y, 19, 5);
|
||||
nir_def *gd = nir_ibfe_imm(&b, color_y, 16, 3);
|
||||
nir_def *bb = nir_ubfe_imm(&b, color_y, 11, 5);
|
||||
nir_def *bd = nir_ibfe_imm(&b, color_y, 8, 3);
|
||||
nir_def *r1 = nir_iadd(&b, rb, rd);
|
||||
nir_def *g1 = nir_iadd(&b, gb, gd);
|
||||
nir_def *b1 = nir_iadd(&b, bb, bd);
|
||||
|
||||
nir_push_if(&b, nir_ugt_imm(&b, r1, 31));
|
||||
{
|
||||
nir_ssa_def *r0 =
|
||||
nir_def *r0 =
|
||||
nir_ior(&b, nir_ubfe_imm(&b, color_y, 24, 2), nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 27, 2), 2));
|
||||
nir_ssa_def *g0 = nir_ubfe_imm(&b, color_y, 20, 4);
|
||||
nir_ssa_def *b0 = nir_ubfe_imm(&b, color_y, 16, 4);
|
||||
nir_ssa_def *r2 = nir_ubfe_imm(&b, color_y, 12, 4);
|
||||
nir_ssa_def *g2 = nir_ubfe_imm(&b, color_y, 8, 4);
|
||||
nir_ssa_def *b2 = nir_ubfe_imm(&b, color_y, 4, 4);
|
||||
nir_ssa_def *da =
|
||||
nir_def *g0 = nir_ubfe_imm(&b, color_y, 20, 4);
|
||||
nir_def *b0 = nir_ubfe_imm(&b, color_y, 16, 4);
|
||||
nir_def *r2 = nir_ubfe_imm(&b, color_y, 12, 4);
|
||||
nir_def *g2 = nir_ubfe_imm(&b, color_y, 8, 4);
|
||||
nir_def *b2 = nir_ubfe_imm(&b, color_y, 4, 4);
|
||||
nir_def *da =
|
||||
nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 2), 1), nir_iand_imm(&b, color_y, 1));
|
||||
nir_ssa_def *dist = etc2_distance_lookup(&b, da);
|
||||
nir_ssa_def *index = nir_ior(&b, lsb, msb);
|
||||
nir_def *dist = etc2_distance_lookup(&b, da);
|
||||
nir_def *index = nir_ior(&b, lsb, msb);
|
||||
|
||||
nir_store_var(&b, punchthrough,
|
||||
nir_iand(&b, nir_load_var(&b, punchthrough), nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)),
|
||||
|
|
@ -300,7 +299,7 @@ build_shader(struct radv_device *dev)
|
|||
nir_push_else(&b, NULL);
|
||||
{
|
||||
|
||||
nir_ssa_def *tmp = nir_iadd(&b, etc_extend(&b, nir_vec3(&b, r2, g2, b2), 4),
|
||||
nir_def *tmp = nir_iadd(&b, etc_extend(&b, nir_vec3(&b, r2, g2, b2), 4),
|
||||
nir_imul(&b, dist, nir_isub_imm(&b, 2, index)));
|
||||
nir_store_var(&b, rgb_result, tmp, 0x7);
|
||||
}
|
||||
|
|
@ -309,23 +308,22 @@ build_shader(struct radv_device *dev)
|
|||
nir_push_else(&b, NULL);
|
||||
nir_push_if(&b, nir_ugt_imm(&b, g1, 31));
|
||||
{
|
||||
nir_ssa_def *r0 = nir_ubfe_imm(&b, color_y, 27, 4);
|
||||
nir_ssa_def *g0 = nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 24, 3), 1),
|
||||
nir_def *r0 = nir_ubfe_imm(&b, color_y, 27, 4);
|
||||
nir_def *g0 = nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 24, 3), 1),
|
||||
nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 20), 1));
|
||||
nir_ssa_def *b0 =
|
||||
nir_def *b0 =
|
||||
nir_ior(&b, nir_ubfe_imm(&b, color_y, 15, 3), nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 16), 8));
|
||||
nir_ssa_def *r2 = nir_ubfe_imm(&b, color_y, 11, 4);
|
||||
nir_ssa_def *g2 = nir_ubfe_imm(&b, color_y, 7, 4);
|
||||
nir_ssa_def *b2 = nir_ubfe_imm(&b, color_y, 3, 4);
|
||||
nir_ssa_def *da = nir_iand_imm(&b, color_y, 4);
|
||||
nir_ssa_def *db = nir_iand_imm(&b, color_y, 1);
|
||||
nir_ssa_def *d = nir_iadd(&b, da, nir_imul_imm(&b, db, 2));
|
||||
nir_ssa_def *d0 = nir_iadd(&b, nir_ishl_imm(&b, r0, 16), nir_iadd(&b, nir_ishl_imm(&b, g0, 8), b0));
|
||||
nir_ssa_def *d2 = nir_iadd(&b, nir_ishl_imm(&b, r2, 16), nir_iadd(&b, nir_ishl_imm(&b, g2, 8), b2));
|
||||
nir_def *r2 = nir_ubfe_imm(&b, color_y, 11, 4);
|
||||
nir_def *g2 = nir_ubfe_imm(&b, color_y, 7, 4);
|
||||
nir_def *b2 = nir_ubfe_imm(&b, color_y, 3, 4);
|
||||
nir_def *da = nir_iand_imm(&b, color_y, 4);
|
||||
nir_def *db = nir_iand_imm(&b, color_y, 1);
|
||||
nir_def *d = nir_iadd(&b, da, nir_imul_imm(&b, db, 2));
|
||||
nir_def *d0 = nir_iadd(&b, nir_ishl_imm(&b, r0, 16), nir_iadd(&b, nir_ishl_imm(&b, g0, 8), b0));
|
||||
nir_def *d2 = nir_iadd(&b, nir_ishl_imm(&b, r2, 16), nir_iadd(&b, nir_ishl_imm(&b, g2, 8), b2));
|
||||
d = nir_bcsel(&b, nir_uge(&b, d0, d2), nir_iadd_imm(&b, d, 1), d);
|
||||
nir_ssa_def *dist = etc2_distance_lookup(&b, d);
|
||||
nir_ssa_def *base =
|
||||
nir_bcsel(&b, nir_ine_imm(&b, msb, 0), nir_vec3(&b, r2, g2, b2), nir_vec3(&b, r0, g0, b0));
|
||||
nir_def *dist = etc2_distance_lookup(&b, d);
|
||||
nir_def *base = nir_bcsel(&b, nir_ine_imm(&b, msb, 0), nir_vec3(&b, r2, g2, b2), nir_vec3(&b, r0, g0, b0));
|
||||
base = etc_extend(&b, base, 4);
|
||||
base = nir_iadd(&b, base, nir_imul(&b, dist, nir_isub_imm(&b, 1, nir_imul_imm(&b, lsb, 2))));
|
||||
nir_store_var(&b, rgb_result, base, 0x7);
|
||||
|
|
@ -336,19 +334,19 @@ build_shader(struct radv_device *dev)
|
|||
nir_push_else(&b, NULL);
|
||||
nir_push_if(&b, nir_ugt_imm(&b, b1, 31));
|
||||
{
|
||||
nir_ssa_def *r0 = nir_ubfe_imm(&b, color_y, 25, 6);
|
||||
nir_ssa_def *g0 =
|
||||
nir_def *r0 = nir_ubfe_imm(&b, color_y, 25, 6);
|
||||
nir_def *g0 =
|
||||
nir_ior(&b, nir_ubfe_imm(&b, color_y, 17, 6), nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 18), 0x40));
|
||||
nir_ssa_def *b0 = nir_ior(
|
||||
nir_def *b0 = nir_ior(
|
||||
&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 11, 2), 3),
|
||||
nir_ior(&b, nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 11), 0x20), nir_ubfe_imm(&b, color_y, 7, 3)));
|
||||
nir_ssa_def *rh =
|
||||
nir_def *rh =
|
||||
nir_ior(&b, nir_iand_imm(&b, color_y, 1), nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 5), 1));
|
||||
nir_ssa_def *rv = nir_ubfe_imm(&b, color_x, 13, 6);
|
||||
nir_ssa_def *gh = nir_ubfe_imm(&b, color_x, 25, 7);
|
||||
nir_ssa_def *gv = nir_ubfe_imm(&b, color_x, 6, 7);
|
||||
nir_ssa_def *bh = nir_ubfe_imm(&b, color_x, 19, 6);
|
||||
nir_ssa_def *bv = nir_ubfe_imm(&b, color_x, 0, 6);
|
||||
nir_def *rv = nir_ubfe_imm(&b, color_x, 13, 6);
|
||||
nir_def *gh = nir_ubfe_imm(&b, color_x, 25, 7);
|
||||
nir_def *gv = nir_ubfe_imm(&b, color_x, 6, 7);
|
||||
nir_def *bh = nir_ubfe_imm(&b, color_x, 19, 6);
|
||||
nir_def *bv = nir_ubfe_imm(&b, color_x, 0, 6);
|
||||
|
||||
r0 = etc_extend(&b, r0, 6);
|
||||
g0 = etc_extend(&b, g0, 7);
|
||||
|
|
@ -360,11 +358,9 @@ build_shader(struct radv_device *dev)
|
|||
bh = etc_extend(&b, bh, 6);
|
||||
bv = etc_extend(&b, bv, 6);
|
||||
|
||||
nir_ssa_def *rgb = nir_vec3(&b, r0, g0, b0);
|
||||
nir_ssa_def *dx =
|
||||
nir_imul(&b, nir_isub(&b, nir_vec3(&b, rh, gh, bh), rgb), nir_channel(&b, pixel_coord, 0));
|
||||
nir_ssa_def *dy =
|
||||
nir_imul(&b, nir_isub(&b, nir_vec3(&b, rv, gv, bv), rgb), nir_channel(&b, pixel_coord, 1));
|
||||
nir_def *rgb = nir_vec3(&b, r0, g0, b0);
|
||||
nir_def *dx = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rh, gh, bh), rgb), nir_channel(&b, pixel_coord, 0));
|
||||
nir_def *dy = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rv, gv, bv), rgb), nir_channel(&b, pixel_coord, 1));
|
||||
rgb = nir_iadd(&b, rgb, nir_ishr_imm(&b, nir_iadd_imm(&b, nir_iadd(&b, dx, dy), 2), 2));
|
||||
nir_store_var(&b, rgb_result, rgb, 0x7);
|
||||
nir_store_var(&b, punchthrough, nir_imm_false(&b), 0x1);
|
||||
|
|
@ -372,8 +368,8 @@ build_shader(struct radv_device *dev)
|
|||
nir_push_else(&b, NULL);
|
||||
{
|
||||
nir_store_var(&b, etc1_compat, nir_imm_true(&b), 1);
|
||||
nir_ssa_def *subblock_b = nir_ine_imm(&b, subblock, 0);
|
||||
nir_ssa_def *tmp[] = {
|
||||
nir_def *subblock_b = nir_ine_imm(&b, subblock, 0);
|
||||
nir_def *tmp[] = {
|
||||
nir_bcsel(&b, subblock_b, r1, rb),
|
||||
nir_bcsel(&b, subblock_b, g1, gb),
|
||||
nir_bcsel(&b, subblock_b, b1, bb),
|
||||
|
|
@ -387,14 +383,14 @@ build_shader(struct radv_device *dev)
|
|||
nir_pop_if(&b, NULL);
|
||||
nir_push_if(&b, nir_load_var(&b, etc1_compat));
|
||||
{
|
||||
nir_ssa_def *etc1_table_index =
|
||||
nir_def *etc1_table_index =
|
||||
nir_ubfe(&b, color_y, nir_isub_imm(&b, 5, nir_imul_imm(&b, subblock, 3)), nir_imm_int(&b, 3));
|
||||
nir_ssa_def *sgn = nir_isub_imm(&b, 1, msb);
|
||||
nir_def *sgn = nir_isub_imm(&b, 1, msb);
|
||||
sgn = nir_bcsel(&b, nir_load_var(&b, punchthrough), nir_imul(&b, sgn, lsb), sgn);
|
||||
nir_store_var(&b, punchthrough,
|
||||
nir_iand(&b, nir_load_var(&b, punchthrough), nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)), 0x1);
|
||||
nir_ssa_def *off = nir_imul(&b, etc1_color_modifier_lookup(&b, etc1_table_index, lsb), sgn);
|
||||
nir_ssa_def *result = nir_iadd(&b, nir_load_var(&b, base_rgb), off);
|
||||
nir_def *off = nir_imul(&b, etc1_color_modifier_lookup(&b, etc1_table_index, lsb), sgn);
|
||||
nir_def *result = nir_iadd(&b, nir_load_var(&b, base_rgb), off);
|
||||
nir_store_var(&b, rgb_result, result, 0x7);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
|
|
@ -404,7 +400,7 @@ build_shader(struct radv_device *dev)
|
|||
nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 0, 0, 0), 0x7);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
nir_ssa_def *col[4];
|
||||
nir_def *col[4];
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
col[i] = nir_fdiv_imm(&b, nir_i2f32(&b, nir_channel(&b, nir_load_var(&b, rgb_result), i)), 255.0);
|
||||
col[3] = nir_load_var(&b, alpha_result);
|
||||
|
|
@ -412,9 +408,9 @@ build_shader(struct radv_device *dev)
|
|||
}
|
||||
nir_push_else(&b, NULL);
|
||||
{ /* EAC */
|
||||
nir_ssa_def *is_signed = nir_ior(&b, nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11_SNORM_BLOCK),
|
||||
nir_def *is_signed = nir_ior(&b, nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11_SNORM_BLOCK),
|
||||
nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11G11_SNORM_BLOCK));
|
||||
nir_ssa_def *val[4];
|
||||
nir_def *val[4];
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
val[i] = decode_etc2_alpha(&b, nir_channels(&b, payload, 3 << (2 * i)), linear_pixel, true, is_signed);
|
||||
}
|
||||
|
|
@ -424,18 +420,18 @@ build_shader(struct radv_device *dev)
|
|||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
nir_ssa_def *outval = nir_load_var(&b, color);
|
||||
nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),
|
||||
nir_channel(&b, coord, 2), nir_ssa_undef(&b, 1, 32));
|
||||
nir_def *outval = nir_load_var(&b, color);
|
||||
nir_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), nir_channel(&b, coord, 2),
|
||||
nir_undef(&b, 1, 32));
|
||||
|
||||
nir_push_if(&b, is_3d);
|
||||
{
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_3d)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32),
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_3d)->dest.ssa, img_coord, nir_undef(&b, 1, 32),
|
||||
outval, nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_3D);
|
||||
}
|
||||
nir_push_else(&b, NULL);
|
||||
{
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_2d)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32),
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_2d)->dest.ssa, img_coord, nir_undef(&b, 1, 32),
|
||||
outval, nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
|
|
|
|||
|
|
@ -52,13 +52,12 @@ build_dcc_decompress_compute_shader(struct radv_device *dev)
|
|||
output_img->data.descriptor_set = 0;
|
||||
output_img->data.binding = 1;
|
||||
|
||||
nir_ssa_def *global_id = get_global_ids(&b, 2);
|
||||
nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, global_id, 0), nir_channel(&b, global_id, 1),
|
||||
nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32));
|
||||
nir_def *global_id = get_global_ids(&b, 2);
|
||||
nir_def *img_coord = nir_vec4(&b, nir_channel(&b, global_id, 0), nir_channel(&b, global_id, 1), nir_undef(&b, 1, 32),
|
||||
nir_undef(&b, 1, 32));
|
||||
|
||||
nir_ssa_def *data =
|
||||
nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, img_coord,
|
||||
nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
|
||||
nir_def *data = nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, img_coord,
|
||||
nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
|
||||
|
||||
/* We need a SCOPE_DEVICE memory_scope because ACO will avoid
|
||||
* creating a vmcnt(0) because it expects the L1 cache to keep memory
|
||||
|
|
@ -67,7 +66,7 @@ build_dcc_decompress_compute_shader(struct radv_device *dev)
|
|||
nir_barrier(&b, .execution_scope = SCOPE_WORKGROUP, .memory_scope = SCOPE_DEVICE,
|
||||
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
|
||||
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32), data,
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_undef(&b, 1, 32), data,
|
||||
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
|
||||
return b.shader;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -42,28 +42,28 @@ build_fmask_copy_compute_shader(struct radv_device *dev, int samples)
|
|||
output_img->data.descriptor_set = 0;
|
||||
output_img->data.binding = 1;
|
||||
|
||||
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
|
||||
nir_ssa_def *block_size = nir_imm_ivec3(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
|
||||
nir_def *invoc_id = nir_load_local_invocation_id(&b);
|
||||
nir_def *wg_id = nir_load_workgroup_id(&b, 32);
|
||||
nir_def *block_size = nir_imm_ivec3(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
|
||||
b.shader->info.workgroup_size[2]);
|
||||
|
||||
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
|
||||
nir_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
|
||||
|
||||
/* Get coordinates. */
|
||||
nir_ssa_def *src_coord = nir_trim_vector(&b, global_id, 2);
|
||||
nir_ssa_def *dst_coord = nir_vec4(&b, nir_channel(&b, src_coord, 0), nir_channel(&b, src_coord, 1),
|
||||
nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32));
|
||||
nir_def *src_coord = nir_trim_vector(&b, global_id, 2);
|
||||
nir_def *dst_coord = nir_vec4(&b, nir_channel(&b, src_coord, 0), nir_channel(&b, src_coord, 1), nir_undef(&b, 1, 32),
|
||||
nir_undef(&b, 1, 32));
|
||||
|
||||
nir_tex_src frag_mask_srcs[] = {{
|
||||
.src_type = nir_tex_src_coord,
|
||||
.src = nir_src_for_ssa(src_coord),
|
||||
}};
|
||||
nir_ssa_def *frag_mask =
|
||||
nir_def *frag_mask =
|
||||
nir_build_tex_deref_instr(&b, nir_texop_fragment_mask_fetch_amd, nir_build_deref_var(&b, input_img), NULL,
|
||||
ARRAY_SIZE(frag_mask_srcs), frag_mask_srcs);
|
||||
|
||||
/* Get the maximum sample used in this fragment. */
|
||||
nir_ssa_def *max_sample_index = nir_imm_int(&b, 0);
|
||||
nir_def *max_sample_index = nir_imm_int(&b, 0);
|
||||
for (uint32_t s = 0; s < samples; s++) {
|
||||
/* max_sample_index = MAX2(max_sample_index, (frag_mask >> (s * 4)) & 0xf) */
|
||||
max_sample_index = nir_umax(&b, max_sample_index,
|
||||
|
|
@ -75,7 +75,7 @@ build_fmask_copy_compute_shader(struct radv_device *dev, int samples)
|
|||
|
||||
nir_loop *loop = nir_push_loop(&b);
|
||||
{
|
||||
nir_ssa_def *sample_id = nir_load_var(&b, counter);
|
||||
nir_def *sample_id = nir_load_var(&b, counter);
|
||||
|
||||
nir_tex_src frag_fetch_srcs[] = {{
|
||||
.src_type = nir_tex_src_coord,
|
||||
|
|
@ -85,9 +85,8 @@ build_fmask_copy_compute_shader(struct radv_device *dev, int samples)
|
|||
.src_type = nir_tex_src_ms_index,
|
||||
.src = nir_src_for_ssa(sample_id),
|
||||
}};
|
||||
nir_ssa_def *outval =
|
||||
nir_build_tex_deref_instr(&b, nir_texop_fragment_fetch_amd, nir_build_deref_var(&b, input_img), NULL,
|
||||
ARRAY_SIZE(frag_fetch_srcs), frag_fetch_srcs);
|
||||
nir_def *outval = nir_build_tex_deref_instr(&b, nir_texop_fragment_fetch_amd, nir_build_deref_var(&b, input_img),
|
||||
NULL, ARRAY_SIZE(frag_fetch_srcs), frag_fetch_srcs);
|
||||
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord, sample_id, outval,
|
||||
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_MS);
|
||||
|
|
|
|||
|
|
@ -48,17 +48,17 @@ build_fmask_expand_compute_shader(struct radv_device *device, int samples)
|
|||
output_img->data.access = ACCESS_NON_READABLE;
|
||||
|
||||
nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
|
||||
nir_ssa_def *output_img_deref = &nir_build_deref_var(&b, output_img)->dest.ssa;
|
||||
nir_def *output_img_deref = &nir_build_deref_var(&b, output_img)->dest.ssa;
|
||||
|
||||
nir_ssa_def *tex_coord = get_global_ids(&b, 3);
|
||||
nir_def *tex_coord = get_global_ids(&b, 3);
|
||||
|
||||
nir_ssa_def *tex_vals[8];
|
||||
nir_def *tex_vals[8];
|
||||
for (uint32_t i = 0; i < samples; i++) {
|
||||
tex_vals[i] = nir_txf_ms_deref(&b, input_img_deref, tex_coord, nir_imm_int(&b, i));
|
||||
}
|
||||
|
||||
nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, tex_coord, 0), nir_channel(&b, tex_coord, 1),
|
||||
nir_channel(&b, tex_coord, 2), nir_ssa_undef(&b, 1, 32));
|
||||
nir_def *img_coord = nir_vec4(&b, nir_channel(&b, tex_coord, 0), nir_channel(&b, tex_coord, 1),
|
||||
nir_channel(&b, tex_coord, 2), nir_undef(&b, 1, 32));
|
||||
|
||||
for (uint32_t i = 0; i < samples; i++) {
|
||||
nir_image_deref_store(&b, output_img_deref, img_coord, nir_imm_int(&b, i), tex_vals[i], nir_imm_int(&b, 0),
|
||||
|
|
|
|||
|
|
@ -32,11 +32,11 @@
|
|||
#include "sid.h"
|
||||
#include "vk_format.h"
|
||||
|
||||
static nir_ssa_def *
|
||||
radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_ssa_def *input)
|
||||
static nir_def *
|
||||
radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_def *input)
|
||||
{
|
||||
unsigned i;
|
||||
nir_ssa_def *comp[4];
|
||||
nir_def *comp[4];
|
||||
for (i = 0; i < 3; i++)
|
||||
comp[i] = nir_format_linear_to_srgb(b, nir_channel(b, input, i));
|
||||
comp[3] = nir_channels(b, input, 1 << 3);
|
||||
|
|
@ -62,27 +62,27 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s
|
|||
output_img->data.descriptor_set = 0;
|
||||
output_img->data.binding = 1;
|
||||
|
||||
nir_ssa_def *global_id = get_global_ids(&b, 2);
|
||||
nir_def *global_id = get_global_ids(&b, 2);
|
||||
|
||||
nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
|
||||
nir_ssa_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
|
||||
nir_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
|
||||
nir_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
|
||||
|
||||
nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset);
|
||||
nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
|
||||
nir_def *src_coord = nir_iadd(&b, global_id, src_offset);
|
||||
nir_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
|
||||
|
||||
nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
|
||||
|
||||
radv_meta_build_resolve_shader_core(dev, &b, is_integer, samples, input_img, color, src_coord);
|
||||
|
||||
nir_ssa_def *outval = nir_load_var(&b, color);
|
||||
nir_def *outval = nir_load_var(&b, color);
|
||||
if (is_srgb)
|
||||
outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
|
||||
|
||||
nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1),
|
||||
nir_ssa_undef(&b, 1, 32), nir_ssa_undef(&b, 1, 32));
|
||||
nir_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1), nir_undef(&b, 1, 32),
|
||||
nir_undef(&b, 1, 32));
|
||||
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32),
|
||||
outval, nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord, nir_undef(&b, 1, 32), outval,
|
||||
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
|
|
@ -130,21 +130,21 @@ build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
|
|||
output_img->data.descriptor_set = 0;
|
||||
output_img->data.binding = 1;
|
||||
|
||||
nir_ssa_def *global_id = get_global_ids(&b, 3);
|
||||
nir_def *global_id = get_global_ids(&b, 3);
|
||||
|
||||
nir_ssa_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
|
||||
nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
|
||||
|
||||
nir_ssa_def *resolve_coord = nir_iadd(&b, nir_trim_vector(&b, global_id, 2), offset);
|
||||
nir_def *resolve_coord = nir_iadd(&b, nir_trim_vector(&b, global_id, 2), offset);
|
||||
|
||||
nir_ssa_def *img_coord =
|
||||
nir_def *img_coord =
|
||||
nir_vec3(&b, nir_channel(&b, resolve_coord, 0), nir_channel(&b, resolve_coord, 1), nir_channel(&b, global_id, 2));
|
||||
|
||||
nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
|
||||
nir_ssa_def *outval = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, 0));
|
||||
nir_def *outval = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, 0));
|
||||
|
||||
if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) {
|
||||
for (int i = 1; i < samples; i++) {
|
||||
nir_ssa_def *si = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, i));
|
||||
nir_def *si = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, i));
|
||||
|
||||
switch (resolve_mode) {
|
||||
case VK_RESOLVE_MODE_AVERAGE_BIT:
|
||||
|
|
@ -172,9 +172,9 @@ build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
|
|||
outval = nir_fdiv_imm(&b, outval, samples);
|
||||
}
|
||||
|
||||
nir_ssa_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
|
||||
nir_channel(&b, img_coord, 2), nir_ssa_undef(&b, 1, 32));
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_ssa_undef(&b, 1, 32), outval,
|
||||
nir_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
|
||||
nir_channel(&b, img_coord, 2), nir_undef(&b, 1, 32));
|
||||
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord, nir_undef(&b, 1, 32), outval,
|
||||
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
|
||||
return b.shader;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,17 +47,17 @@ build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, int samp
|
|||
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
|
||||
color_out->data.location = FRAG_RESULT_DATA0;
|
||||
|
||||
nir_ssa_def *pos_in = nir_trim_vector(&b, nir_load_frag_coord(&b), 2);
|
||||
nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
|
||||
nir_def *pos_in = nir_trim_vector(&b, nir_load_frag_coord(&b), 2);
|
||||
nir_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
|
||||
|
||||
nir_ssa_def *pos_int = nir_f2i32(&b, pos_in);
|
||||
nir_def *pos_int = nir_f2i32(&b, pos_in);
|
||||
|
||||
nir_ssa_def *img_coord = nir_trim_vector(&b, nir_iadd(&b, pos_int, src_offset), 2);
|
||||
nir_def *img_coord = nir_trim_vector(&b, nir_iadd(&b, pos_int, src_offset), 2);
|
||||
nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
|
||||
|
||||
radv_meta_build_resolve_shader_core(dev, &b, is_integer, samples, input_img, color, img_coord);
|
||||
|
||||
nir_ssa_def *outval = nir_load_var(&b, color);
|
||||
nir_def *outval = nir_load_var(&b, color);
|
||||
nir_store_var(&b, color_out, outval, 0xf);
|
||||
return b.shader;
|
||||
}
|
||||
|
|
@ -260,18 +260,18 @@ build_depth_stencil_resolve_fragment_shader(struct radv_device *dev, int samples
|
|||
nir_variable *fs_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_out");
|
||||
fs_out->data.location = index == DEPTH_RESOLVE ? FRAG_RESULT_DEPTH : FRAG_RESULT_STENCIL;
|
||||
|
||||
nir_ssa_def *pos_in = nir_trim_vector(&b, nir_load_frag_coord(&b), 2);
|
||||
nir_def *pos_in = nir_trim_vector(&b, nir_load_frag_coord(&b), 2);
|
||||
|
||||
nir_ssa_def *pos_int = nir_f2i32(&b, pos_in);
|
||||
nir_def *pos_int = nir_f2i32(&b, pos_in);
|
||||
|
||||
nir_ssa_def *img_coord = nir_trim_vector(&b, pos_int, 2);
|
||||
nir_def *img_coord = nir_trim_vector(&b, pos_int, 2);
|
||||
|
||||
nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
|
||||
nir_ssa_def *outval = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, 0));
|
||||
nir_def *outval = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, 0));
|
||||
|
||||
if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) {
|
||||
for (int i = 1; i < samples; i++) {
|
||||
nir_ssa_def *si = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, i));
|
||||
nir_def *si = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, i));
|
||||
|
||||
switch (resolve_mode) {
|
||||
case VK_RESOLVE_MODE_AVERAGE_BIT:
|
||||
|
|
|
|||
|
|
@ -41,25 +41,25 @@ typedef struct {
|
|||
const struct radv_shader_layout *layout;
|
||||
} apply_layout_state;
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
get_scalar_arg(nir_builder *b, unsigned size, struct ac_arg arg)
|
||||
{
|
||||
assert(arg.used);
|
||||
return nir_load_scalar_arg_amd(b, size, .base = arg.arg_index);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
convert_pointer_to_64_bit(nir_builder *b, apply_layout_state *state, nir_ssa_def *ptr)
|
||||
static nir_def *
|
||||
convert_pointer_to_64_bit(nir_builder *b, apply_layout_state *state, nir_def *ptr)
|
||||
{
|
||||
return nir_pack_64_2x32_split(b, ptr, nir_imm_int(b, state->address32_hi));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
load_desc_ptr(nir_builder *b, apply_layout_state *state, unsigned set)
|
||||
{
|
||||
const struct radv_userdata_locations *user_sgprs_locs = &state->info->user_sgprs_locs;
|
||||
if (user_sgprs_locs->shader_data[AC_UD_INDIRECT_DESCRIPTOR_SETS].sgpr_idx != -1) {
|
||||
nir_ssa_def *addr = get_scalar_arg(b, 1, state->args->descriptor_sets[0]);
|
||||
nir_def *addr = get_scalar_arg(b, 1, state->args->descriptor_sets[0]);
|
||||
addr = convert_pointer_to_64_bit(b, state, addr);
|
||||
return nir_load_smem_amd(b, 1, addr, nir_imm_int(b, set * 4));
|
||||
}
|
||||
|
|
@ -77,7 +77,7 @@ visit_vulkan_resource_index(nir_builder *b, apply_layout_state *state, nir_intri
|
|||
unsigned offset = layout->binding[binding].offset;
|
||||
unsigned stride;
|
||||
|
||||
nir_ssa_def *set_ptr;
|
||||
nir_def *set_ptr;
|
||||
if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
|
||||
layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
|
||||
unsigned idx = state->layout->set[desc_set].dynamic_offset_start + layout->binding[binding].dynamic_offset_offset;
|
||||
|
|
@ -89,7 +89,7 @@ visit_vulkan_resource_index(nir_builder *b, apply_layout_state *state, nir_intri
|
|||
stride = layout->binding[binding].size;
|
||||
}
|
||||
|
||||
nir_ssa_def *binding_ptr = nir_imul_imm(b, intrin->src[0].ssa, stride);
|
||||
nir_def *binding_ptr = nir_imul_imm(b, intrin->src[0].ssa, stride);
|
||||
nir_instr_as_alu(binding_ptr->parent_instr)->no_unsigned_wrap = true;
|
||||
|
||||
binding_ptr = nir_iadd_imm(b, binding_ptr, offset);
|
||||
|
|
@ -97,9 +97,9 @@ visit_vulkan_resource_index(nir_builder *b, apply_layout_state *state, nir_intri
|
|||
|
||||
if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
|
||||
assert(stride == 16);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
|
||||
} else {
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_vec3(b, set_ptr, binding_ptr, nir_imm_int(b, stride)));
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, nir_vec3(b, set_ptr, binding_ptr, nir_imm_int(b, stride)));
|
||||
}
|
||||
nir_instr_remove(&intrin->instr);
|
||||
}
|
||||
|
|
@ -109,27 +109,27 @@ visit_vulkan_resource_reindex(nir_builder *b, apply_layout_state *state, nir_int
|
|||
{
|
||||
VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
|
||||
if (desc_type == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
|
||||
nir_ssa_def *set_ptr = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
|
||||
nir_ssa_def *binding_ptr = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);
|
||||
nir_def *set_ptr = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);
|
||||
nir_def *binding_ptr = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);
|
||||
|
||||
nir_ssa_def *index = nir_imul_imm(b, intrin->src[1].ssa, 16);
|
||||
nir_def *index = nir_imul_imm(b, intrin->src[1].ssa, 16);
|
||||
nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true;
|
||||
|
||||
binding_ptr = nir_iadd_nuw(b, binding_ptr, index);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, nir_pack_64_2x32_split(b, set_ptr, binding_ptr));
|
||||
} else {
|
||||
assert(desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || desc_type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
|
||||
|
||||
nir_ssa_def *binding_ptr = nir_channel(b, intrin->src[0].ssa, 1);
|
||||
nir_ssa_def *stride = nir_channel(b, intrin->src[0].ssa, 2);
|
||||
nir_def *binding_ptr = nir_channel(b, intrin->src[0].ssa, 1);
|
||||
nir_def *stride = nir_channel(b, intrin->src[0].ssa, 2);
|
||||
|
||||
nir_ssa_def *index = nir_imul(b, intrin->src[1].ssa, stride);
|
||||
nir_def *index = nir_imul(b, intrin->src[1].ssa, stride);
|
||||
nir_instr_as_alu(index->parent_instr)->no_unsigned_wrap = true;
|
||||
|
||||
binding_ptr = nir_iadd_nuw(b, binding_ptr, index);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_vector_insert_imm(b, intrin->src[0].ssa, binding_ptr, 1));
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, nir_vector_insert_imm(b, intrin->src[0].ssa, binding_ptr, 1));
|
||||
}
|
||||
nir_instr_remove(&intrin->instr);
|
||||
}
|
||||
|
|
@ -138,20 +138,20 @@ static void
|
|||
visit_load_vulkan_descriptor(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
if (nir_intrinsic_desc_type(intrin) == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) {
|
||||
nir_ssa_def *addr = convert_pointer_to_64_bit(b, state,
|
||||
nir_def *addr = convert_pointer_to_64_bit(b, state,
|
||||
nir_iadd(b, nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa),
|
||||
nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa)));
|
||||
nir_ssa_def *desc = nir_build_load_global(b, 1, 64, addr, .access = ACCESS_NON_WRITEABLE);
|
||||
nir_def *desc = nir_build_load_global(b, 1, 64, addr, .access = ACCESS_NON_WRITEABLE);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, desc);
|
||||
} else {
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_vector_insert_imm(b, intrin->src[0].ssa, nir_imm_int(b, 0), 2));
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, nir_vector_insert_imm(b, intrin->src[0].ssa, nir_imm_int(b, 0), 2));
|
||||
}
|
||||
nir_instr_remove(&intrin->instr);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
load_inline_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc)
|
||||
static nir_def *
|
||||
load_inline_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_def *rsrc)
|
||||
{
|
||||
uint32_t desc_type = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
|
@ -169,8 +169,8 @@ load_inline_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa
|
|||
nir_imm_int(b, desc_type));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *rsrc, unsigned access)
|
||||
static nir_def *
|
||||
load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_def *rsrc, unsigned access)
|
||||
{
|
||||
nir_binding binding = nir_chase_binding(nir_src_for_ssa(rsrc));
|
||||
|
||||
|
|
@ -188,34 +188,34 @@ load_buffer_descriptor(nir_builder *b, apply_layout_state *state, nir_ssa_def *r
|
|||
if (access & ACCESS_NON_UNIFORM)
|
||||
return nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
|
||||
|
||||
nir_ssa_def *desc_set = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
|
||||
nir_def *desc_set = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
|
||||
return nir_load_smem_amd(b, 4, desc_set, nir_channel(b, rsrc, 1), .align_mul = 16);
|
||||
}
|
||||
|
||||
static void
|
||||
visit_get_ssbo_size(nir_builder *b, apply_layout_state *state, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
nir_ssa_def *rsrc = intrin->src[0].ssa;
|
||||
nir_def *rsrc = intrin->src[0].ssa;
|
||||
|
||||
nir_ssa_def *size;
|
||||
nir_def *size;
|
||||
if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM) {
|
||||
nir_ssa_def *ptr = nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
|
||||
nir_def *ptr = nir_iadd(b, nir_channel(b, rsrc, 0), nir_channel(b, rsrc, 1));
|
||||
ptr = nir_iadd_imm(b, ptr, 8);
|
||||
ptr = convert_pointer_to_64_bit(b, state, ptr);
|
||||
size = nir_build_load_global(b, 4, 32, ptr, .access = ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER, .align_mul = 16,
|
||||
.align_offset = 4);
|
||||
} else {
|
||||
/* load the entire descriptor so it can be CSE'd */
|
||||
nir_ssa_def *ptr = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
|
||||
nir_ssa_def *desc = nir_load_smem_amd(b, 4, ptr, nir_channel(b, rsrc, 1), .align_mul = 16);
|
||||
nir_def *ptr = convert_pointer_to_64_bit(b, state, nir_channel(b, rsrc, 0));
|
||||
nir_def *desc = nir_load_smem_amd(b, 4, ptr, nir_channel(b, rsrc, 1), .align_mul = 16);
|
||||
size = nir_channel(b, desc, 2);
|
||||
}
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, size);
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, size);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *deref, enum ac_descriptor_type desc_type,
|
||||
bool non_uniform, nir_tex_instr *tex, bool write)
|
||||
{
|
||||
|
|
@ -276,13 +276,13 @@ get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *der
|
|||
break;
|
||||
}
|
||||
|
||||
nir_ssa_def *index = NULL;
|
||||
nir_def *index = NULL;
|
||||
while (deref->deref_type != nir_deref_type_var) {
|
||||
assert(deref->deref_type == nir_deref_type_array);
|
||||
unsigned array_size = MAX2(glsl_get_aoa_size(deref->type), 1);
|
||||
array_size *= binding->size;
|
||||
|
||||
nir_ssa_def *tmp = nir_imul_imm(b, deref->arr.index.ssa, array_size);
|
||||
nir_def *tmp = nir_imul_imm(b, deref->arr.index.ssa, array_size);
|
||||
if (tmp != deref->arr.index.ssa)
|
||||
nir_instr_as_alu(tmp->parent_instr)->no_unsigned_wrap = true;
|
||||
|
||||
|
|
@ -296,23 +296,23 @@ get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *der
|
|||
deref = nir_deref_instr_parent(deref);
|
||||
}
|
||||
|
||||
nir_ssa_def *index_offset = index ? nir_iadd_imm(b, index, offset) : nir_imm_int(b, offset);
|
||||
nir_def *index_offset = index ? nir_iadd_imm(b, index, offset) : nir_imm_int(b, offset);
|
||||
if (index && index_offset != index)
|
||||
nir_instr_as_alu(index_offset->parent_instr)->no_unsigned_wrap = true;
|
||||
|
||||
if (non_uniform)
|
||||
return nir_iadd(b, load_desc_ptr(b, state, desc_set), index_offset);
|
||||
|
||||
nir_ssa_def *addr = convert_pointer_to_64_bit(b, state, load_desc_ptr(b, state, desc_set));
|
||||
nir_ssa_def *desc = nir_load_smem_amd(b, size, addr, index_offset, .align_mul = size * 4u);
|
||||
nir_def *addr = convert_pointer_to_64_bit(b, state, load_desc_ptr(b, state, desc_set));
|
||||
nir_def *desc = nir_load_smem_amd(b, size, addr, index_offset, .align_mul = size * 4u);
|
||||
|
||||
/* 3 plane formats always have same size and format for plane 1 & 2, so
|
||||
* use the tail from plane 1 so that we can store only the first 16 bytes
|
||||
* of the last plane. */
|
||||
if (desc_type == AC_DESC_PLANE_2) {
|
||||
nir_ssa_def *desc2 = get_sampler_desc(b, state, deref, AC_DESC_PLANE_1, non_uniform, tex, write);
|
||||
nir_def *desc2 = get_sampler_desc(b, state, deref, AC_DESC_PLANE_1, non_uniform, tex, write);
|
||||
|
||||
nir_ssa_def *comp[8];
|
||||
nir_def *comp[8];
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
comp[i] = nir_channel(b, desc, i);
|
||||
for (unsigned i = 4; i < 8; i++)
|
||||
|
|
@ -320,7 +320,7 @@ get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *der
|
|||
|
||||
return nir_vec(b, comp, 8);
|
||||
} else if (desc_type == AC_DESC_IMAGE && state->has_image_load_dcc_bug && !tex && !write) {
|
||||
nir_ssa_def *comp[8];
|
||||
nir_def *comp[8];
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
comp[i] = nir_channel(b, desc, i);
|
||||
|
||||
|
|
@ -331,7 +331,7 @@ get_sampler_desc(nir_builder *b, apply_layout_state *state, nir_deref_instr *der
|
|||
|
||||
return nir_vec(b, comp, 8);
|
||||
} else if (desc_type == AC_DESC_SAMPLER && tex->op == nir_texop_tg4 && !state->conformant_trunc_coord) {
|
||||
nir_ssa_def *comp[4];
|
||||
nir_def *comp[4];
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
comp[i] = nir_channel(b, desc, i);
|
||||
|
||||
|
|
@ -354,11 +354,11 @@ update_image_intrinsic(nir_builder *b, apply_layout_state *state, nir_intrinsic_
|
|||
bool is_load =
|
||||
intrin->intrinsic == nir_intrinsic_image_deref_load || intrin->intrinsic == nir_intrinsic_image_deref_sparse_load;
|
||||
|
||||
nir_ssa_def *desc = get_sampler_desc(b, state, deref, dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE,
|
||||
nir_def *desc = get_sampler_desc(b, state, deref, dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE,
|
||||
nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM, NULL, !is_load);
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_image_deref_descriptor_amd) {
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, desc);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
} else {
|
||||
nir_rewrite_image_intrinsic(intrin, desc, true);
|
||||
|
|
@ -370,7 +370,7 @@ apply_layout_to_intrin(nir_builder *b, apply_layout_state *state, nir_intrinsic_
|
|||
{
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_ssa_def *rsrc;
|
||||
nir_def *rsrc;
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_vulkan_resource_index:
|
||||
visit_vulkan_resource_index(b, state, intrin);
|
||||
|
|
@ -435,8 +435,8 @@ apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *te
|
|||
}
|
||||
}
|
||||
|
||||
nir_ssa_def *image = NULL;
|
||||
nir_ssa_def *sampler = NULL;
|
||||
nir_def *image = NULL;
|
||||
nir_def *sampler = NULL;
|
||||
if (plane >= 0) {
|
||||
assert(tex->op != nir_texop_txf_ms && tex->op != nir_texop_samples_identical);
|
||||
assert(tex->sampler_dim != GLSL_SAMPLER_DIM_BUF);
|
||||
|
|
@ -467,7 +467,7 @@ apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *te
|
|||
*/
|
||||
/* TODO: This is unnecessary for combined image+sampler.
|
||||
* We can do this when updating the desc set. */
|
||||
nir_ssa_def *comp[4];
|
||||
nir_def *comp[4];
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
comp[i] = nir_channel(b, sampler, i);
|
||||
comp[0] = nir_iand(b, comp[0], nir_channel(b, image, 7));
|
||||
|
|
@ -477,7 +477,7 @@ apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *te
|
|||
}
|
||||
|
||||
if (tex->op == nir_texop_descriptor_amd) {
|
||||
nir_ssa_def_rewrite_uses(&tex->dest.ssa, image);
|
||||
nir_def_rewrite_uses(&tex->dest.ssa, image);
|
||||
nir_instr_remove(&tex->instr);
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,31 +39,31 @@ typedef struct {
|
|||
const struct radv_shader_info *info;
|
||||
const struct radv_pipeline_key *pl_key;
|
||||
uint32_t address32_hi;
|
||||
nir_ssa_def *gsvs_ring[4];
|
||||
nir_def *gsvs_ring[4];
|
||||
} lower_abi_state;
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
load_ring(nir_builder *b, unsigned ring, lower_abi_state *s)
|
||||
{
|
||||
struct ac_arg arg =
|
||||
b->shader->info.stage == MESA_SHADER_TASK ? s->args->task_ring_offsets : s->args->ac.ring_offsets;
|
||||
|
||||
nir_ssa_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, arg);
|
||||
nir_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, arg);
|
||||
ring_offsets = nir_pack_64_2x32_split(b, nir_channel(b, ring_offsets, 0), nir_channel(b, ring_offsets, 1));
|
||||
return nir_load_smem_amd(b, 4, ring_offsets, nir_imm_int(b, ring * 16u), .align_mul = 4u);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
nggc_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
|
||||
{
|
||||
nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
|
||||
nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
|
||||
return nir_test_mask(b, settings, mask);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
shader_query_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
|
||||
{
|
||||
nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->shader_query_state);
|
||||
nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->shader_query_state);
|
||||
return nir_test_mask(b, settings, mask);
|
||||
}
|
||||
|
||||
|
|
@ -80,7 +80,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
nir_ssa_def *replacement = NULL;
|
||||
nir_def *replacement = NULL;
|
||||
bool progress = true;
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
|
|
@ -129,13 +129,13 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
/* Note, the HW always assumes there is at least 1 per-vertex param. */
|
||||
const unsigned total_num_params = MAX2(1, s->info->outinfo.param_exports) + s->info->outinfo.prim_param_exports;
|
||||
|
||||
nir_ssa_def *dword1 = nir_channel(b, replacement, 1);
|
||||
nir_def *dword1 = nir_channel(b, replacement, 1);
|
||||
dword1 = nir_ior_imm(b, dword1, S_008F04_STRIDE(16 * total_num_params));
|
||||
replacement = nir_vector_insert_imm(b, replacement, dword1, 1);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ring_attr_offset_amd: {
|
||||
nir_ssa_def *ring_attr_offset = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_attr_offset);
|
||||
nir_def *ring_attr_offset = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_attr_offset);
|
||||
replacement = nir_ishl_imm(b, nir_ubfe_imm(b, ring_attr_offset, 0, 15), 9); /* 512b increments. */
|
||||
break;
|
||||
}
|
||||
|
|
@ -148,7 +148,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
* to optimize some multiplications (in address calculations) so that
|
||||
* constant additions can be added to the const offset in memory load instructions.
|
||||
*/
|
||||
nir_ssa_def *arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tes_rel_patch_id);
|
||||
nir_def *arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tes_rel_patch_id);
|
||||
|
||||
if (s->info->tes.tcs_vertices_out) {
|
||||
nir_intrinsic_instr *load_arg = nir_instr_as_intrinsic(arg->parent_instr);
|
||||
|
|
@ -203,7 +203,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.merged_wave_info);
|
||||
break;
|
||||
case nir_intrinsic_load_cull_any_enabled_amd: {
|
||||
nir_ssa_def *gs_tg_info = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info);
|
||||
nir_def *gs_tg_info = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info);
|
||||
|
||||
/* Consider a workgroup small if it contains less than 16 triangles.
|
||||
*
|
||||
|
|
@ -211,12 +211,12 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
* so the below is equivalent to: "ult(ubfe(gs_tg_info, 22, 9), 16)", but
|
||||
* ACO can optimize out the comparison to zero (see try_optimize_scc_nocompare).
|
||||
*/
|
||||
nir_ssa_def *small_workgroup = nir_ieq_imm(b, nir_iand_imm(b, gs_tg_info, BITFIELD_RANGE(22 + 4, 9 - 4)), 0);
|
||||
nir_def *small_workgroup = nir_ieq_imm(b, nir_iand_imm(b, gs_tg_info, BITFIELD_RANGE(22 + 4, 9 - 4)), 0);
|
||||
|
||||
nir_ssa_def *mask =
|
||||
nir_def *mask =
|
||||
nir_bcsel(b, small_workgroup, nir_imm_int(b, radv_nggc_none),
|
||||
nir_imm_int(b, radv_nggc_front_face | radv_nggc_back_face | radv_nggc_small_primitives));
|
||||
nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
|
||||
nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
|
||||
replacement = nir_ine_imm(b, nir_iand(b, settings, mask), 0);
|
||||
break;
|
||||
}
|
||||
|
|
@ -238,14 +238,14 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
* exponent = nggc_settings >> 24
|
||||
* precision = 1.0 * 2 ^ exponent
|
||||
*/
|
||||
nir_ssa_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
|
||||
nir_ssa_def *exponent = nir_ishr_imm(b, settings, 24u);
|
||||
nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
|
||||
nir_def *exponent = nir_ishr_imm(b, settings, 24u);
|
||||
replacement = nir_ldexp(b, nir_imm_float(b, 1.0f), exponent);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_viewport_xy_scale_and_offset: {
|
||||
nir_ssa_def *comps[] = {
|
||||
nir_def *comps[] = {
|
||||
ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[0]),
|
||||
ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[1]),
|
||||
ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[0]),
|
||||
|
|
@ -280,7 +280,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
if (s->info->inputs_linked) {
|
||||
replacement = nir_imm_int(b, get_tcs_input_vertex_stride(s->info->tcs.num_linked_inputs));
|
||||
} else {
|
||||
nir_ssa_def *lshs_vertex_stride =
|
||||
nir_def *lshs_vertex_stride =
|
||||
GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_LSHS_VERTEX_STRIDE);
|
||||
replacement = nir_ishl_imm(b, lshs_vertex_stride, 2);
|
||||
}
|
||||
|
|
@ -296,7 +296,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
break;
|
||||
}
|
||||
case nir_intrinsic_load_hs_out_patch_data_offset_amd: {
|
||||
nir_ssa_def *out_vertices_per_patch;
|
||||
nir_def *out_vertices_per_patch;
|
||||
unsigned num_tcs_outputs =
|
||||
stage == MESA_SHADER_TESS_CTRL ? s->info->tcs.num_linked_outputs : s->info->tes.num_linked_inputs;
|
||||
|
||||
|
|
@ -310,13 +310,13 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
}
|
||||
}
|
||||
|
||||
nir_ssa_def *per_vertex_output_patch_size = nir_imul_imm(b, out_vertices_per_patch, num_tcs_outputs * 16u);
|
||||
nir_def *per_vertex_output_patch_size = nir_imul_imm(b, out_vertices_per_patch, num_tcs_outputs * 16u);
|
||||
|
||||
if (s->info->num_tess_patches) {
|
||||
unsigned num_patches = s->info->num_tess_patches;
|
||||
replacement = nir_imul_imm(b, per_vertex_output_patch_size, num_patches);
|
||||
} else {
|
||||
nir_ssa_def *num_patches;
|
||||
nir_def *num_patches;
|
||||
|
||||
if (stage == MESA_SHADER_TESS_CTRL) {
|
||||
num_patches = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES);
|
||||
|
|
@ -330,10 +330,10 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
case nir_intrinsic_load_sample_positions_amd: {
|
||||
uint32_t sample_pos_offset = (RING_PS_SAMPLE_POSITIONS * 16) - 8;
|
||||
|
||||
nir_ssa_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, s->args->ac.ring_offsets);
|
||||
nir_ssa_def *addr = nir_pack_64_2x32(b, ring_offsets);
|
||||
nir_ssa_def *sample_id = nir_umin(b, intrin->src[0].ssa, nir_imm_int(b, 7));
|
||||
nir_ssa_def *offset = nir_ishl_imm(b, sample_id, 3); /* 2 floats containing samplepos.xy */
|
||||
nir_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, s->args->ac.ring_offsets);
|
||||
nir_def *addr = nir_pack_64_2x32(b, ring_offsets);
|
||||
nir_def *sample_id = nir_umin(b, intrin->src[0].ssa, nir_imm_int(b, 7));
|
||||
nir_def *offset = nir_ishl_imm(b, sample_id, 3); /* 2 floats containing samplepos.xy */
|
||||
|
||||
nir_const_value *const_num_samples = nir_src_as_const_value(intrin->src[1]);
|
||||
if (const_num_samples) {
|
||||
|
|
@ -400,7 +400,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_write_index);
|
||||
break;
|
||||
case nir_intrinsic_load_streamout_buffer_amd: {
|
||||
nir_ssa_def *ptr = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_buffers),
|
||||
nir_def *ptr = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_buffers),
|
||||
nir_imm_int(b, s->address32_hi));
|
||||
replacement = nir_load_smem_amd(b, 4, ptr, nir_imm_int(b, nir_intrinsic_base(intrin) * 16));
|
||||
break;
|
||||
|
|
@ -461,19 +461,19 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.force_vrs_rates);
|
||||
break;
|
||||
case nir_intrinsic_load_fully_covered: {
|
||||
nir_ssa_def *sample_coverage = ac_nir_load_arg(b, &s->args->ac, s->args->ac.sample_coverage);
|
||||
nir_def *sample_coverage = ac_nir_load_arg(b, &s->args->ac, s->args->ac.sample_coverage);
|
||||
replacement = nir_ine_imm(b, sample_coverage, 0);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_barycentric_optimize_amd: {
|
||||
nir_ssa_def *prim_mask = ac_nir_load_arg(b, &s->args->ac, s->args->ac.prim_mask);
|
||||
nir_def *prim_mask = ac_nir_load_arg(b, &s->args->ac, s->args->ac.prim_mask);
|
||||
/* enabled when bit 31 is set */
|
||||
replacement = nir_ilt_imm(b, prim_mask, 0);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_poly_line_smooth_enabled:
|
||||
if (s->pl_key->dynamic_line_rast_mode) {
|
||||
nir_ssa_def *line_rast_mode = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_LINE_RAST_MODE);
|
||||
nir_def *line_rast_mode = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_LINE_RAST_MODE);
|
||||
replacement = nir_ieq_imm(b, line_rast_mode, VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT);
|
||||
} else {
|
||||
replacement = nir_imm_bool(b, s->pl_key->ps.line_smooth_enabled);
|
||||
|
|
@ -499,7 +499,7 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
return false;
|
||||
|
||||
if (replacement)
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, replacement);
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, replacement);
|
||||
|
||||
nir_instr_remove(instr);
|
||||
nir_instr_free(instr);
|
||||
|
|
@ -507,10 +507,10 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
return true;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
load_gsvs_ring(nir_builder *b, lower_abi_state *s, unsigned stream_id)
|
||||
{
|
||||
nir_ssa_def *ring = load_ring(b, RING_GSVS_GS, s);
|
||||
nir_def *ring = load_ring(b, RING_GSVS_GS, s);
|
||||
unsigned stream_offset = 0;
|
||||
unsigned stride = 0;
|
||||
for (unsigned i = 0; i <= stream_id; i++) {
|
||||
|
|
@ -523,7 +523,7 @@ load_gsvs_ring(nir_builder *b, lower_abi_state *s, unsigned stream_id)
|
|||
assert(stride < (1 << 14));
|
||||
|
||||
if (stream_offset) {
|
||||
nir_ssa_def *addr = nir_pack_64_2x32_split(b, nir_channel(b, ring, 0), nir_channel(b, ring, 1));
|
||||
nir_def *addr = nir_pack_64_2x32_split(b, nir_channel(b, ring, 0), nir_channel(b, ring, 1));
|
||||
addr = nir_iadd_imm(b, addr, stream_offset);
|
||||
ring = nir_vector_insert_imm(b, ring, nir_unpack_64_2x32_split_x(b, addr), 0);
|
||||
ring = nir_vector_insert_imm(b, ring, nir_unpack_64_2x32_split_y(b, addr), 1);
|
||||
|
|
|
|||
|
|
@ -32,34 +32,34 @@ typedef struct {
|
|||
unsigned rast_prim;
|
||||
} lower_fs_barycentric_state;
|
||||
|
||||
static nir_ssa_def *
|
||||
lower_interp_center_smooth(nir_builder *b, nir_ssa_def *offset)
|
||||
static nir_def *
|
||||
lower_interp_center_smooth(nir_builder *b, nir_def *offset)
|
||||
{
|
||||
nir_ssa_def *pull_model = nir_load_barycentric_model(b, 32);
|
||||
nir_def *pull_model = nir_load_barycentric_model(b, 32);
|
||||
|
||||
nir_ssa_def *deriv_x =
|
||||
nir_def *deriv_x =
|
||||
nir_vec3(b, nir_fddx_fine(b, nir_channel(b, pull_model, 0)), nir_fddx_fine(b, nir_channel(b, pull_model, 1)),
|
||||
nir_fddx_fine(b, nir_channel(b, pull_model, 2)));
|
||||
nir_ssa_def *deriv_y =
|
||||
nir_def *deriv_y =
|
||||
nir_vec3(b, nir_fddy_fine(b, nir_channel(b, pull_model, 0)), nir_fddy_fine(b, nir_channel(b, pull_model, 1)),
|
||||
nir_fddy_fine(b, nir_channel(b, pull_model, 2)));
|
||||
|
||||
nir_ssa_def *offset_x = nir_channel(b, offset, 0);
|
||||
nir_ssa_def *offset_y = nir_channel(b, offset, 1);
|
||||
nir_def *offset_x = nir_channel(b, offset, 0);
|
||||
nir_def *offset_y = nir_channel(b, offset, 1);
|
||||
|
||||
nir_ssa_def *adjusted_x = nir_fadd(b, pull_model, nir_fmul(b, deriv_x, offset_x));
|
||||
nir_ssa_def *adjusted = nir_fadd(b, adjusted_x, nir_fmul(b, deriv_y, offset_y));
|
||||
nir_def *adjusted_x = nir_fadd(b, pull_model, nir_fmul(b, deriv_x, offset_x));
|
||||
nir_def *adjusted = nir_fadd(b, adjusted_x, nir_fmul(b, deriv_y, offset_y));
|
||||
|
||||
nir_ssa_def *ij = nir_vec2(b, nir_channel(b, adjusted, 0), nir_channel(b, adjusted, 1));
|
||||
nir_def *ij = nir_vec2(b, nir_channel(b, adjusted, 0), nir_channel(b, adjusted, 1));
|
||||
|
||||
/* Get W by using the reciprocal of 1/W. */
|
||||
nir_ssa_def *w = nir_frcp(b, nir_channel(b, adjusted, 2));
|
||||
nir_def *w = nir_frcp(b, nir_channel(b, adjusted, 2));
|
||||
|
||||
return nir_fmul(b, ij, w);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
lower_barycentric_coord_at_offset(nir_builder *b, nir_ssa_def *src, enum glsl_interp_mode mode)
|
||||
static nir_def *
|
||||
lower_barycentric_coord_at_offset(nir_builder *b, nir_def *src, enum glsl_interp_mode mode)
|
||||
{
|
||||
if (mode == INTERP_MODE_SMOOTH)
|
||||
return lower_interp_center_smooth(b, src);
|
||||
|
|
@ -67,15 +67,15 @@ lower_barycentric_coord_at_offset(nir_builder *b, nir_ssa_def *src, enum glsl_in
|
|||
return nir_load_barycentric_at_offset(b, 32, src, .interp_mode = mode);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
lower_barycentric_coord_at_sample(nir_builder *b, lower_fs_barycentric_state *state, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
const enum glsl_interp_mode mode = (enum glsl_interp_mode)nir_intrinsic_interp_mode(intrin);
|
||||
nir_ssa_def *num_samples = nir_load_rasterization_samples_amd(b);
|
||||
nir_ssa_def *new_dest;
|
||||
nir_def *num_samples = nir_load_rasterization_samples_amd(b);
|
||||
nir_def *new_dest;
|
||||
|
||||
if (state->dynamic_rasterization_samples) {
|
||||
nir_ssa_def *res1, *res2;
|
||||
nir_def *res1, *res2;
|
||||
|
||||
nir_push_if(b, nir_ieq_imm(b, num_samples, 1));
|
||||
{
|
||||
|
|
@ -83,7 +83,7 @@ lower_barycentric_coord_at_sample(nir_builder *b, lower_fs_barycentric_state *st
|
|||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_ssa_def *sample_pos = nir_load_sample_positions_amd(b, 32, intrin->src[0].ssa, num_samples);
|
||||
nir_def *sample_pos = nir_load_sample_positions_amd(b, 32, intrin->src[0].ssa, num_samples);
|
||||
|
||||
/* sample_pos -= 0.5 */
|
||||
sample_pos = nir_fadd_imm(b, sample_pos, -0.5f);
|
||||
|
|
@ -97,7 +97,7 @@ lower_barycentric_coord_at_sample(nir_builder *b, lower_fs_barycentric_state *st
|
|||
if (!state->num_rasterization_samples) {
|
||||
new_dest = nir_load_barycentric_pixel(b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin));
|
||||
} else {
|
||||
nir_ssa_def *sample_pos = nir_load_sample_positions_amd(b, 32, intrin->src[0].ssa, num_samples);
|
||||
nir_def *sample_pos = nir_load_sample_positions_amd(b, 32, intrin->src[0].ssa, num_samples);
|
||||
|
||||
/* sample_pos -= 0.5 */
|
||||
sample_pos = nir_fadd_imm(b, sample_pos, -0.5f);
|
||||
|
|
@ -109,7 +109,7 @@ lower_barycentric_coord_at_sample(nir_builder *b, lower_fs_barycentric_state *st
|
|||
return new_dest;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
get_interp_param(nir_builder *b, lower_fs_barycentric_state *state, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
const enum glsl_interp_mode mode = (enum glsl_interp_mode)nir_intrinsic_interp_mode(intrin);
|
||||
|
|
@ -130,10 +130,10 @@ get_interp_param(nir_builder *b, lower_fs_barycentric_state *state, nir_intrinsi
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
lower_point(nir_builder *b)
|
||||
{
|
||||
nir_ssa_def *coords[3];
|
||||
nir_def *coords[3];
|
||||
|
||||
coords[0] = nir_imm_float(b, 1.0f);
|
||||
coords[1] = nir_imm_float(b, 0.0f);
|
||||
|
|
@ -142,10 +142,10 @@ lower_point(nir_builder *b)
|
|||
return nir_vec(b, coords, 3);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
lower_line(nir_builder *b, nir_ssa_def *p1, nir_ssa_def *p2)
|
||||
static nir_def *
|
||||
lower_line(nir_builder *b, nir_def *p1, nir_def *p2)
|
||||
{
|
||||
nir_ssa_def *coords[3];
|
||||
nir_def *coords[3];
|
||||
|
||||
coords[1] = nir_fadd(b, p1, p2);
|
||||
coords[0] = nir_fsub_imm(b, 1.0f, coords[1]);
|
||||
|
|
@ -154,20 +154,20 @@ lower_line(nir_builder *b, nir_ssa_def *p1, nir_ssa_def *p2)
|
|||
return nir_vec(b, coords, 3);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
lower_triangle(nir_builder *b, nir_ssa_def *p1, nir_ssa_def *p2)
|
||||
static nir_def *
|
||||
lower_triangle(nir_builder *b, nir_def *p1, nir_def *p2)
|
||||
{
|
||||
nir_ssa_def *v0_bary[3], *v1_bary[3], *v2_bary[3];
|
||||
nir_ssa_def *coords[3];
|
||||
nir_def *v0_bary[3], *v1_bary[3], *v2_bary[3];
|
||||
nir_def *coords[3];
|
||||
|
||||
/* Compute the provoking vertex ID:
|
||||
*
|
||||
* quad_id = thread_id >> 2
|
||||
* provoking_vtx_id = (provoking_vtx >> (quad_id << 1)) & 3
|
||||
*/
|
||||
nir_ssa_def *quad_id = nir_ushr_imm(b, nir_load_subgroup_invocation(b), 2);
|
||||
nir_ssa_def *provoking_vtx = nir_load_provoking_vtx_amd(b);
|
||||
nir_ssa_def *provoking_vtx_id = nir_ubfe(b, provoking_vtx, nir_ishl_imm(b, quad_id, 1), nir_imm_int(b, 2));
|
||||
nir_def *quad_id = nir_ushr_imm(b, nir_load_subgroup_invocation(b), 2);
|
||||
nir_def *provoking_vtx = nir_load_provoking_vtx_amd(b);
|
||||
nir_def *provoking_vtx_id = nir_ubfe(b, provoking_vtx, nir_ishl_imm(b, quad_id, 1), nir_imm_int(b, 2));
|
||||
|
||||
/* Compute barycentrics. */
|
||||
v0_bary[0] = nir_fsub(b, nir_fsub_imm(b, 1.0f, p2), p1);
|
||||
|
|
@ -194,30 +194,30 @@ lower_triangle(nir_builder *b, nir_ssa_def *p1, nir_ssa_def *p2)
|
|||
static bool
|
||||
lower_load_barycentric_coord(nir_builder *b, lower_fs_barycentric_state *state, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
nir_ssa_def *interp, *p1, *p2;
|
||||
nir_ssa_def *new_dest;
|
||||
nir_def *interp, *p1, *p2;
|
||||
nir_def *new_dest;
|
||||
|
||||
b->cursor = nir_after_instr(&intrin->instr);
|
||||
|
||||
/* When the rasterization primitive isn't known at compile time (GPL), load it. */
|
||||
if (state->rast_prim == -1) {
|
||||
nir_ssa_def *rast_prim = nir_load_rasterization_primitive_amd(b);
|
||||
nir_ssa_def *res1, *res2;
|
||||
nir_def *rast_prim = nir_load_rasterization_primitive_amd(b);
|
||||
nir_def *res1, *res2;
|
||||
|
||||
nir_ssa_def *is_point = nir_ieq_imm(b, rast_prim, V_028A6C_POINTLIST);
|
||||
nir_def *is_point = nir_ieq_imm(b, rast_prim, V_028A6C_POINTLIST);
|
||||
nir_if *if_point = nir_push_if(b, is_point);
|
||||
{
|
||||
res1 = lower_point(b);
|
||||
}
|
||||
nir_push_else(b, if_point);
|
||||
{
|
||||
nir_ssa_def *res_line, *res_triangle;
|
||||
nir_def *res_line, *res_triangle;
|
||||
|
||||
interp = get_interp_param(b, state, intrin);
|
||||
p1 = nir_channel(b, interp, 0);
|
||||
p2 = nir_channel(b, interp, 1);
|
||||
|
||||
nir_ssa_def *is_line = nir_ieq_imm(b, rast_prim, V_028A6C_LINESTRIP);
|
||||
nir_def *is_line = nir_ieq_imm(b, rast_prim, V_028A6C_LINESTRIP);
|
||||
nir_if *if_line = nir_push_if(b, is_line);
|
||||
{
|
||||
res_line = lower_line(b, p1, p2);
|
||||
|
|
@ -250,7 +250,7 @@ lower_load_barycentric_coord(nir_builder *b, lower_fs_barycentric_state *state,
|
|||
}
|
||||
}
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest);
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, new_dest);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -49,21 +49,21 @@ radv_nir_lower_fs_intrinsics(nir_shader *nir, const struct radv_shader_stage *fs
|
|||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_sample_mask_in: {
|
||||
nir_ssa_def *sample_coverage = nir_load_vector_arg_amd(&b, 1, .base = args->ac.sample_coverage.arg_index);
|
||||
nir_def *sample_coverage = nir_load_vector_arg_amd(&b, 1, .base = args->ac.sample_coverage.arg_index);
|
||||
|
||||
nir_ssa_def *def = NULL;
|
||||
nir_def *def = NULL;
|
||||
if (info->ps.uses_sample_shading || key->ps.sample_shading_enable) {
|
||||
/* gl_SampleMaskIn[0] = (SampleCoverage & (PsIterMask << gl_SampleID)). */
|
||||
nir_ssa_def *ps_state = nir_load_scalar_arg_amd(&b, 1, .base = args->ps_state.arg_index);
|
||||
nir_ssa_def *ps_iter_mask =
|
||||
nir_def *ps_state = nir_load_scalar_arg_amd(&b, 1, .base = args->ps_state.arg_index);
|
||||
nir_def *ps_iter_mask =
|
||||
nir_ubfe_imm(&b, ps_state, PS_STATE_PS_ITER_MASK__SHIFT, util_bitcount(PS_STATE_PS_ITER_MASK__MASK));
|
||||
nir_ssa_def *sample_id = nir_load_sample_id(&b);
|
||||
nir_def *sample_id = nir_load_sample_id(&b);
|
||||
def = nir_iand(&b, sample_coverage, nir_ishl(&b, ps_iter_mask, sample_id));
|
||||
} else {
|
||||
def = sample_coverage;
|
||||
}
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, def);
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, def);
|
||||
|
||||
nir_instr_remove(instr);
|
||||
progress = true;
|
||||
|
|
@ -73,35 +73,35 @@ radv_nir_lower_fs_intrinsics(nir_shader *nir, const struct radv_shader_stage *fs
|
|||
if (!key->adjust_frag_coord_z)
|
||||
continue;
|
||||
|
||||
if (!(nir_ssa_def_components_read(&intrin->dest.ssa) & (1 << 2)))
|
||||
if (!(nir_def_components_read(&intrin->dest.ssa) & (1 << 2)))
|
||||
continue;
|
||||
|
||||
nir_ssa_def *frag_z = nir_channel(&b, &intrin->dest.ssa, 2);
|
||||
nir_def *frag_z = nir_channel(&b, &intrin->dest.ssa, 2);
|
||||
|
||||
/* adjusted_frag_z = fddx_fine(frag_z) * 0.0625 + frag_z */
|
||||
nir_ssa_def *adjusted_frag_z = nir_fddx_fine(&b, frag_z);
|
||||
nir_def *adjusted_frag_z = nir_fddx_fine(&b, frag_z);
|
||||
adjusted_frag_z = nir_ffma_imm1(&b, adjusted_frag_z, 0.0625f, frag_z);
|
||||
|
||||
/* VRS Rate X = Ancillary[2:3] */
|
||||
nir_ssa_def *ancillary = nir_load_vector_arg_amd(&b, 1, .base = args->ac.ancillary.arg_index);
|
||||
nir_ssa_def *x_rate = nir_ubfe_imm(&b, ancillary, 2, 2);
|
||||
nir_def *ancillary = nir_load_vector_arg_amd(&b, 1, .base = args->ac.ancillary.arg_index);
|
||||
nir_def *x_rate = nir_ubfe_imm(&b, ancillary, 2, 2);
|
||||
|
||||
/* xRate = xRate == 0x1 ? adjusted_frag_z : frag_z. */
|
||||
nir_ssa_def *cond = nir_ieq_imm(&b, x_rate, 1);
|
||||
nir_def *cond = nir_ieq_imm(&b, x_rate, 1);
|
||||
frag_z = nir_bcsel(&b, cond, adjusted_frag_z, frag_z);
|
||||
|
||||
nir_ssa_def *new_dest = nir_vector_insert_imm(&b, &intrin->dest.ssa, frag_z, 2);
|
||||
nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, new_dest, new_dest->parent_instr);
|
||||
nir_def *new_dest = nir_vector_insert_imm(&b, &intrin->dest.ssa, frag_z, 2);
|
||||
nir_def_rewrite_uses_after(&intrin->dest.ssa, new_dest, new_dest->parent_instr);
|
||||
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_barycentric_at_sample: {
|
||||
nir_ssa_def *num_samples = nir_load_rasterization_samples_amd(&b);
|
||||
nir_ssa_def *new_dest;
|
||||
nir_def *num_samples = nir_load_rasterization_samples_amd(&b);
|
||||
nir_def *new_dest;
|
||||
|
||||
if (key->dynamic_rasterization_samples) {
|
||||
nir_ssa_def *res1, *res2;
|
||||
nir_def *res1, *res2;
|
||||
|
||||
nir_push_if(&b, nir_ieq_imm(&b, num_samples, 1));
|
||||
{
|
||||
|
|
@ -109,7 +109,7 @@ radv_nir_lower_fs_intrinsics(nir_shader *nir, const struct radv_shader_stage *fs
|
|||
}
|
||||
nir_push_else(&b, NULL);
|
||||
{
|
||||
nir_ssa_def *sample_pos = nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa, num_samples);
|
||||
nir_def *sample_pos = nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa, num_samples);
|
||||
|
||||
/* sample_pos -= 0.5 */
|
||||
sample_pos = nir_fadd_imm(&b, sample_pos, -0.5f);
|
||||
|
|
@ -124,7 +124,7 @@ radv_nir_lower_fs_intrinsics(nir_shader *nir, const struct radv_shader_stage *fs
|
|||
if (!key->ps.num_samples) {
|
||||
new_dest = nir_load_barycentric_pixel(&b, 32, .interp_mode = nir_intrinsic_interp_mode(intrin));
|
||||
} else {
|
||||
nir_ssa_def *sample_pos = nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa, num_samples);
|
||||
nir_def *sample_pos = nir_load_sample_positions_amd(&b, 32, intrin->src[0].ssa, num_samples);
|
||||
|
||||
/* sample_pos -= 0.5 */
|
||||
sample_pos = nir_fadd_imm(&b, sample_pos, -0.5f);
|
||||
|
|
@ -134,7 +134,7 @@ radv_nir_lower_fs_intrinsics(nir_shader *nir, const struct radv_shader_stage *fs
|
|||
}
|
||||
}
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest);
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, new_dest);
|
||||
nir_instr_remove(instr);
|
||||
|
||||
progress = true;
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ radv_nir_lower_intrinsics_early(nir_shader *nir, const struct radv_pipeline_key
|
|||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
b.cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_ssa_def *def = NULL;
|
||||
nir_def *def = NULL;
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_is_sparse_texels_resident:
|
||||
def = nir_ieq_imm(&b, intrin->src[0].ssa, 0);
|
||||
|
|
@ -60,7 +60,7 @@ radv_nir_lower_intrinsics_early(nir_shader *nir, const struct radv_pipeline_key
|
|||
continue;
|
||||
}
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, def);
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, def);
|
||||
|
||||
nir_instr_remove(instr);
|
||||
progress = true;
|
||||
|
|
|
|||
|
|
@ -51,17 +51,17 @@ radv_nir_lower_primitive_shading_rate(nir_shader *nir, enum amd_gfx_level gfx_le
|
|||
|
||||
b.cursor = nir_before_instr(instr);
|
||||
|
||||
nir_ssa_def *val = nir_ssa_for_src(&b, intr->src[1], 1);
|
||||
nir_def *val = nir_ssa_for_src(&b, intr->src[1], 1);
|
||||
|
||||
/* x_rate = (shadingRate & (Horizontal2Pixels | Horizontal4Pixels)) ? 0x1 : 0x0; */
|
||||
nir_ssa_def *x_rate = nir_iand_imm(&b, val, 12);
|
||||
nir_def *x_rate = nir_iand_imm(&b, val, 12);
|
||||
x_rate = nir_b2i32(&b, nir_ine_imm(&b, x_rate, 0));
|
||||
|
||||
/* y_rate = (shadingRate & (Vertical2Pixels | Vertical4Pixels)) ? 0x1 : 0x0; */
|
||||
nir_ssa_def *y_rate = nir_iand_imm(&b, val, 3);
|
||||
nir_def *y_rate = nir_iand_imm(&b, val, 3);
|
||||
y_rate = nir_b2i32(&b, nir_ine_imm(&b, y_rate, 0));
|
||||
|
||||
nir_ssa_def *out = NULL;
|
||||
nir_def *out = NULL;
|
||||
|
||||
/* MS:
|
||||
* Primitive shading rate is a per-primitive output, it is
|
||||
|
|
|
|||
|
|
@ -58,20 +58,20 @@ rq_variable_create(void *ctx, nir_shader *shader, unsigned array_length, const s
|
|||
return result;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
nir_load_array(nir_builder *b, nir_variable *array, nir_ssa_def *index)
|
||||
static nir_def *
|
||||
nir_load_array(nir_builder *b, nir_variable *array, nir_def *index)
|
||||
{
|
||||
return nir_load_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, array), index));
|
||||
}
|
||||
|
||||
static void
|
||||
nir_store_array(nir_builder *b, nir_variable *array, nir_ssa_def *index, nir_ssa_def *value, unsigned writemask)
|
||||
nir_store_array(nir_builder *b, nir_variable *array, nir_def *index, nir_def *value, unsigned writemask)
|
||||
{
|
||||
nir_store_deref(b, nir_build_deref_array(b, nir_build_deref_var(b, array), index), value, writemask);
|
||||
}
|
||||
|
||||
static nir_deref_instr *
|
||||
rq_deref_var(nir_builder *b, nir_ssa_def *index, rq_variable *var)
|
||||
rq_deref_var(nir_builder *b, nir_def *index, rq_variable *var)
|
||||
{
|
||||
if (var->array_length == 1)
|
||||
return nir_build_deref_var(b, var->variable);
|
||||
|
|
@ -79,8 +79,8 @@ rq_deref_var(nir_builder *b, nir_ssa_def *index, rq_variable *var)
|
|||
return nir_build_deref_array(b, nir_build_deref_var(b, var->variable), index);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
rq_load_var(nir_builder *b, nir_ssa_def *index, rq_variable *var)
|
||||
static nir_def *
|
||||
rq_load_var(nir_builder *b, nir_def *index, rq_variable *var)
|
||||
{
|
||||
if (var->array_length == 1)
|
||||
return nir_load_var(b, var->variable);
|
||||
|
|
@ -89,7 +89,7 @@ rq_load_var(nir_builder *b, nir_ssa_def *index, rq_variable *var)
|
|||
}
|
||||
|
||||
static void
|
||||
rq_store_var(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *value, unsigned writemask)
|
||||
rq_store_var(nir_builder *b, nir_def *index, rq_variable *var, nir_def *value, unsigned writemask)
|
||||
{
|
||||
if (var->array_length == 1) {
|
||||
nir_store_var(b, var->variable, value, writemask);
|
||||
|
|
@ -99,13 +99,13 @@ rq_store_var(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *
|
|||
}
|
||||
|
||||
static void
|
||||
rq_copy_var(nir_builder *b, nir_ssa_def *index, rq_variable *dst, rq_variable *src, unsigned mask)
|
||||
rq_copy_var(nir_builder *b, nir_def *index, rq_variable *dst, rq_variable *src, unsigned mask)
|
||||
{
|
||||
rq_store_var(b, index, dst, rq_load_var(b, index, src), mask);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
rq_load_array(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *array_index)
|
||||
static nir_def *
|
||||
rq_load_array(nir_builder *b, nir_def *index, rq_variable *var, nir_def *array_index)
|
||||
{
|
||||
if (var->array_length == 1)
|
||||
return nir_load_array(b, var->variable, array_index);
|
||||
|
|
@ -115,7 +115,7 @@ rq_load_array(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def
|
|||
}
|
||||
|
||||
static void
|
||||
rq_store_array(nir_builder *b, nir_ssa_def *index, rq_variable *var, nir_ssa_def *array_index, nir_ssa_def *value,
|
||||
rq_store_array(nir_builder *b, nir_def *index, rq_variable *var, nir_def *array_index, nir_def *value,
|
||||
unsigned writemask)
|
||||
{
|
||||
if (var->array_length == 1) {
|
||||
|
|
@ -282,7 +282,7 @@ lower_ray_query(nir_shader *shader, nir_variable *ray_query, struct hash_table *
|
|||
}
|
||||
|
||||
static void
|
||||
copy_candidate_to_closest(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars)
|
||||
copy_candidate_to_closest(nir_builder *b, nir_def *index, struct ray_query_vars *vars)
|
||||
{
|
||||
rq_copy_var(b, index, vars->closest.barycentrics, vars->candidate.barycentrics, 0x3);
|
||||
rq_copy_var(b, index, vars->closest.geometry_id_and_flags, vars->candidate.geometry_id_and_flags, 0x1);
|
||||
|
|
@ -296,10 +296,10 @@ copy_candidate_to_closest(nir_builder *b, nir_ssa_def *index, struct ray_query_v
|
|||
}
|
||||
|
||||
static void
|
||||
insert_terminate_on_first_hit(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars,
|
||||
insert_terminate_on_first_hit(nir_builder *b, nir_def *index, struct ray_query_vars *vars,
|
||||
const struct radv_ray_flags *ray_flags, bool break_on_terminate)
|
||||
{
|
||||
nir_ssa_def *terminate_on_first_hit;
|
||||
nir_def *terminate_on_first_hit;
|
||||
if (ray_flags)
|
||||
terminate_on_first_hit = ray_flags->terminate_on_first_hit;
|
||||
else
|
||||
|
|
@ -315,16 +315,14 @@ insert_terminate_on_first_hit(nir_builder *b, nir_ssa_def *index, struct ray_que
|
|||
}
|
||||
|
||||
static void
|
||||
lower_rq_confirm_intersection(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr,
|
||||
struct ray_query_vars *vars)
|
||||
lower_rq_confirm_intersection(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars)
|
||||
{
|
||||
copy_candidate_to_closest(b, index, vars);
|
||||
insert_terminate_on_first_hit(b, index, vars, NULL, false);
|
||||
}
|
||||
|
||||
static void
|
||||
lower_rq_generate_intersection(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr,
|
||||
struct ray_query_vars *vars)
|
||||
lower_rq_generate_intersection(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars)
|
||||
{
|
||||
nir_push_if(b, nir_iand(b, nir_fge(b, rq_load_var(b, index, vars->closest.t), instr->src[1].ssa),
|
||||
nir_fge(b, instr->src[1].ssa, rq_load_var(b, index, vars->tmin))));
|
||||
|
|
@ -339,7 +337,7 @@ lower_rq_generate_intersection(nir_builder *b, nir_ssa_def *index, nir_intrinsic
|
|||
enum rq_intersection_type { intersection_type_none, intersection_type_triangle, intersection_type_aabb };
|
||||
|
||||
static void
|
||||
lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars,
|
||||
lower_rq_initialize(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars,
|
||||
struct radv_instance *instance)
|
||||
{
|
||||
rq_store_var(b, index, vars->flags, instr->src[2].ssa, 0x1);
|
||||
|
|
@ -356,12 +354,12 @@ lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *ins
|
|||
rq_store_var(b, index, vars->closest.t, instr->src[7].ssa, 0x1);
|
||||
rq_store_var(b, index, vars->closest.intersection_type, nir_imm_int(b, intersection_type_none), 0x1);
|
||||
|
||||
nir_ssa_def *accel_struct = instr->src[1].ssa;
|
||||
nir_def *accel_struct = instr->src[1].ssa;
|
||||
|
||||
nir_ssa_def *bvh_offset = nir_build_load_global(
|
||||
nir_def *bvh_offset = nir_build_load_global(
|
||||
b, 1, 32, nir_iadd_imm(b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
|
||||
.access = ACCESS_NON_WRITEABLE);
|
||||
nir_ssa_def *bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset));
|
||||
nir_def *bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset));
|
||||
bvh_base = build_addr_to_node(b, bvh_base);
|
||||
|
||||
rq_store_var(b, index, vars->root_bvh_base, bvh_base, 0x1);
|
||||
|
|
@ -371,7 +369,7 @@ lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *ins
|
|||
rq_store_var(b, index, vars->trav.stack, nir_imm_int(b, 0), 0x1);
|
||||
rq_store_var(b, index, vars->trav.stack_low_watermark, nir_imm_int(b, 0), 0x1);
|
||||
} else {
|
||||
nir_ssa_def *base_offset = nir_imul_imm(b, nir_load_local_invocation_index(b), sizeof(uint32_t));
|
||||
nir_def *base_offset = nir_imul_imm(b, nir_load_local_invocation_index(b), sizeof(uint32_t));
|
||||
base_offset = nir_iadd_imm(b, base_offset, vars->shared_base);
|
||||
rq_store_var(b, index, vars->trav.stack, base_offset, 0x1);
|
||||
rq_store_var(b, index, vars->trav.stack_low_watermark, base_offset, 0x1);
|
||||
|
|
@ -387,8 +385,8 @@ lower_rq_initialize(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *ins
|
|||
rq_store_var(b, index, vars->incomplete, nir_imm_bool(b, !(instance->debug_flags & RADV_DEBUG_NO_RT)), 0x1);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
lower_rq_load(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars)
|
||||
static nir_def *
|
||||
lower_rq_load(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars)
|
||||
{
|
||||
bool committed = nir_intrinsic_committed(instr);
|
||||
struct ray_query_intersection_vars *intersection = committed ? &vars->closest : &vars->candidate;
|
||||
|
|
@ -409,7 +407,7 @@ lower_rq_load(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, st
|
|||
case nir_ray_query_value_intersection_geometry_index:
|
||||
return nir_iand_imm(b, rq_load_var(b, index, intersection->geometry_id_and_flags), 0xFFFFFF);
|
||||
case nir_ray_query_value_intersection_instance_custom_index: {
|
||||
nir_ssa_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
|
||||
nir_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
|
||||
return nir_iand_imm(
|
||||
b,
|
||||
nir_build_load_global(
|
||||
|
|
@ -418,27 +416,27 @@ lower_rq_load(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, st
|
|||
0xFFFFFF);
|
||||
}
|
||||
case nir_ray_query_value_intersection_instance_id: {
|
||||
nir_ssa_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
|
||||
nir_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
|
||||
return nir_build_load_global(
|
||||
b, 1, 32, nir_iadd_imm(b, instance_node_addr, offsetof(struct radv_bvh_instance_node, instance_id)));
|
||||
}
|
||||
case nir_ray_query_value_intersection_instance_sbt_index:
|
||||
return nir_iand_imm(b, rq_load_var(b, index, intersection->sbt_offset_and_flags), 0xFFFFFF);
|
||||
case nir_ray_query_value_intersection_object_ray_direction: {
|
||||
nir_ssa_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
|
||||
nir_ssa_def *wto_matrix[3];
|
||||
nir_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
|
||||
nir_def *wto_matrix[3];
|
||||
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
|
||||
return nir_build_vec3_mat_mult(b, rq_load_var(b, index, vars->direction), wto_matrix, false);
|
||||
}
|
||||
case nir_ray_query_value_intersection_object_ray_origin: {
|
||||
nir_ssa_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
|
||||
nir_ssa_def *wto_matrix[3];
|
||||
nir_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
|
||||
nir_def *wto_matrix[3];
|
||||
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
|
||||
return nir_build_vec3_mat_mult(b, rq_load_var(b, index, vars->origin), wto_matrix, true);
|
||||
}
|
||||
case nir_ray_query_value_intersection_object_to_world: {
|
||||
nir_ssa_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
|
||||
nir_ssa_def *rows[3];
|
||||
nir_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
|
||||
nir_def *rows[3];
|
||||
for (unsigned r = 0; r < 3; ++r)
|
||||
rows[r] = nir_build_load_global(
|
||||
b, 4, 32,
|
||||
|
|
@ -452,19 +450,19 @@ lower_rq_load(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, st
|
|||
case nir_ray_query_value_intersection_t:
|
||||
return rq_load_var(b, index, intersection->t);
|
||||
case nir_ray_query_value_intersection_type: {
|
||||
nir_ssa_def *intersection_type = rq_load_var(b, index, intersection->intersection_type);
|
||||
nir_def *intersection_type = rq_load_var(b, index, intersection->intersection_type);
|
||||
if (!committed)
|
||||
intersection_type = nir_iadd_imm(b, intersection_type, -1);
|
||||
|
||||
return intersection_type;
|
||||
}
|
||||
case nir_ray_query_value_intersection_world_to_object: {
|
||||
nir_ssa_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
|
||||
nir_def *instance_node_addr = rq_load_var(b, index, intersection->instance_addr);
|
||||
|
||||
nir_ssa_def *wto_matrix[3];
|
||||
nir_def *wto_matrix[3];
|
||||
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
|
||||
|
||||
nir_ssa_def *vals[3];
|
||||
nir_def *vals[3];
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
vals[i] = nir_channel(b, wto_matrix[i], column);
|
||||
|
||||
|
|
@ -485,7 +483,7 @@ lower_rq_load(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, st
|
|||
|
||||
struct traversal_data {
|
||||
struct ray_query_vars *vars;
|
||||
nir_ssa_def *index;
|
||||
nir_def *index;
|
||||
};
|
||||
|
||||
static void
|
||||
|
|
@ -494,7 +492,7 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
|
|||
{
|
||||
struct traversal_data *data = args->data;
|
||||
struct ray_query_vars *vars = data->vars;
|
||||
nir_ssa_def *index = data->index;
|
||||
nir_def *index = data->index;
|
||||
|
||||
rq_store_var(b, index, vars->candidate.primitive_id, intersection->primitive_id, 1);
|
||||
rq_store_var(b, index, vars->candidate.geometry_id_and_flags, intersection->geometry_id_and_flags, 1);
|
||||
|
|
@ -510,7 +508,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
|
|||
{
|
||||
struct traversal_data *data = args->data;
|
||||
struct ray_query_vars *vars = data->vars;
|
||||
nir_ssa_def *index = data->index;
|
||||
nir_def *index = data->index;
|
||||
|
||||
rq_store_var(b, index, vars->candidate.barycentrics, intersection->barycentrics, 3);
|
||||
rq_store_var(b, index, vars->candidate.primitive_id, intersection->base.primitive_id, 1);
|
||||
|
|
@ -533,7 +531,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
|
|||
}
|
||||
|
||||
static void
|
||||
store_stack_entry(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value, const struct radv_ray_traversal_args *args)
|
||||
store_stack_entry(nir_builder *b, nir_def *index, nir_def *value, const struct radv_ray_traversal_args *args)
|
||||
{
|
||||
struct traversal_data *data = args->data;
|
||||
if (data->vars->stack)
|
||||
|
|
@ -542,8 +540,8 @@ store_stack_entry(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value, const
|
|||
nir_store_shared(b, value, index, .base = 0, .align_mul = 4);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
load_stack_entry(nir_builder *b, nir_ssa_def *index, const struct radv_ray_traversal_args *args)
|
||||
static nir_def *
|
||||
load_stack_entry(nir_builder *b, nir_def *index, const struct radv_ray_traversal_args *args)
|
||||
{
|
||||
struct traversal_data *data = args->data;
|
||||
if (data->vars->stack)
|
||||
|
|
@ -552,8 +550,8 @@ load_stack_entry(nir_builder *b, nir_ssa_def *index, const struct radv_ray_trave
|
|||
return nir_load_shared(b, 1, 32, index, .base = 0, .align_mul = 4);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars, struct radv_device *device)
|
||||
static nir_def *
|
||||
lower_rq_proceed(nir_builder *b, nir_def *index, struct ray_query_vars *vars, struct radv_device *device)
|
||||
{
|
||||
nir_variable *inv_dir = nir_local_variable_create(b->impl, glsl_vector_type(GLSL_TYPE_FLOAT, 3), "inv_dir");
|
||||
nir_store_var(b, inv_dir, nir_frcp(b, rq_load_var(b, index, vars->trav.direction)), 0x7);
|
||||
|
|
@ -608,7 +606,7 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars
|
|||
|
||||
nir_push_if(b, rq_load_var(b, index, vars->incomplete));
|
||||
{
|
||||
nir_ssa_def *incomplete = radv_build_ray_traversal(device, b, &args);
|
||||
nir_def *incomplete = radv_build_ray_traversal(device, b, &args);
|
||||
rq_store_var(b, index, vars->incomplete, nir_iand(b, rq_load_var(b, index, vars->incomplete), incomplete), 1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
|
@ -617,7 +615,7 @@ lower_rq_proceed(nir_builder *b, nir_ssa_def *index, struct ray_query_vars *vars
|
|||
}
|
||||
|
||||
static void
|
||||
lower_rq_terminate(nir_builder *b, nir_ssa_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars)
|
||||
lower_rq_terminate(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, struct ray_query_vars *vars)
|
||||
{
|
||||
rq_store_var(b, index, vars->incomplete, nir_imm_false(b), 0x1);
|
||||
}
|
||||
|
|
@ -663,7 +661,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
|
|||
continue;
|
||||
|
||||
nir_deref_instr *ray_query_deref = nir_instr_as_deref(intrinsic->src[0].ssa->parent_instr);
|
||||
nir_ssa_def *index = NULL;
|
||||
nir_def *index = NULL;
|
||||
|
||||
if (ray_query_deref->deref_type == nir_deref_type_array) {
|
||||
index = ray_query_deref->arr.index.ssa;
|
||||
|
|
@ -677,7 +675,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
|
|||
|
||||
builder.cursor = nir_before_instr(instr);
|
||||
|
||||
nir_ssa_def *new_dest = NULL;
|
||||
nir_def *new_dest = NULL;
|
||||
|
||||
switch (intrinsic->intrinsic) {
|
||||
case nir_intrinsic_rq_confirm_intersection:
|
||||
|
|
@ -703,7 +701,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
|
|||
}
|
||||
|
||||
if (new_dest)
|
||||
nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa, new_dest);
|
||||
nir_def_rewrite_uses(&intrinsic->dest.ssa, new_dest);
|
||||
|
||||
nir_instr_remove(instr);
|
||||
nir_instr_free(instr);
|
||||
|
|
|
|||
|
|
@ -71,8 +71,8 @@ radv_nir_lower_view_index(nir_shader *nir, bool per_primitive)
|
|||
|
||||
layer->data.per_primitive = per_primitive;
|
||||
b.cursor = nir_before_instr(instr);
|
||||
nir_ssa_def *def = nir_load_var(&b, layer);
|
||||
nir_ssa_def_rewrite_uses(&load->dest.ssa, def);
|
||||
nir_def *def = nir_load_var(&b, layer);
|
||||
nir_def_rewrite_uses(&load->dest.ssa, def);
|
||||
|
||||
/* Update inputs_read to reflect that the pass added a new input. */
|
||||
nir->info.inputs_read |= VARYING_BIT_LAYER;
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ radv_nir_lower_viewport_to_zero(nir_shader *nir)
|
|||
|
||||
b.cursor = nir_before_instr(instr);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_imm_zero(&b, 1, 32));
|
||||
nir_def_rewrite_uses(&intr->dest.ssa, nir_imm_zero(&b, 1, 32));
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ typedef struct {
|
|||
const struct radeon_info *rad_info;
|
||||
} lower_vs_inputs_state;
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs_state *s)
|
||||
{
|
||||
nir_src *offset_src = nir_get_io_offset_src(intrin);
|
||||
|
|
@ -56,7 +56,7 @@ lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin, low
|
|||
const unsigned arg_bit_size = MAX2(bit_size, 32);
|
||||
|
||||
unsigned num_input_args = 1;
|
||||
nir_ssa_def *input_args[2] = {ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location]), NULL};
|
||||
nir_def *input_args[2] = {ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location]), NULL};
|
||||
if (component * 32 + arg_bit_size * num_components > 128) {
|
||||
assert(bit_size == 64);
|
||||
|
||||
|
|
@ -64,8 +64,7 @@ lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin, low
|
|||
input_args[1] = ac_nir_load_arg(b, &s->args->ac, s->args->vs_inputs[driver_location + 1]);
|
||||
}
|
||||
|
||||
nir_ssa_def *extracted =
|
||||
nir_extract_bits(b, input_args, num_input_args, component * 32, num_components, arg_bit_size);
|
||||
nir_def *extracted = nir_extract_bits(b, input_args, num_input_args, component * 32, num_components, arg_bit_size);
|
||||
|
||||
if (bit_size < arg_bit_size) {
|
||||
assert(bit_size == 16);
|
||||
|
|
@ -79,20 +78,20 @@ lower_load_vs_input_from_prolog(nir_builder *b, nir_intrinsic_instr *intrin, low
|
|||
return extracted;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
calc_vs_input_index_instance_rate(nir_builder *b, unsigned location, lower_vs_inputs_state *s)
|
||||
{
|
||||
const uint32_t divisor = s->pl_key->vs.instance_rate_divisors[location];
|
||||
nir_ssa_def *start_instance = nir_load_base_instance(b);
|
||||
nir_def *start_instance = nir_load_base_instance(b);
|
||||
|
||||
if (divisor == 0)
|
||||
return start_instance;
|
||||
|
||||
nir_ssa_def *instance_id = nir_udiv_imm(b, nir_load_instance_id(b), divisor);
|
||||
nir_def *instance_id = nir_udiv_imm(b, nir_load_instance_id(b), divisor);
|
||||
return nir_iadd(b, start_instance, instance_id);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
calc_vs_input_index(nir_builder *b, unsigned location, lower_vs_inputs_state *s)
|
||||
{
|
||||
if (s->pl_key->vs.instance_rate_inputs & BITFIELD_BIT(location))
|
||||
|
|
@ -112,7 +111,7 @@ can_use_untyped_load(const struct util_format_description *f, const unsigned bit
|
|||
return c->size == bit_size && bit_size >= 32;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
oob_input_load_value(nir_builder *b, const unsigned channel_idx, const unsigned bit_size, const bool is_float)
|
||||
{
|
||||
/* 22.1.1. Attribute Location and Component Assignment of Vulkan 1.3 specification:
|
||||
|
|
@ -120,7 +119,7 @@ oob_input_load_value(nir_builder *b, const unsigned channel_idx, const unsigned
|
|||
* must not use more components than provided by the attribute.
|
||||
*/
|
||||
if (bit_size == 64)
|
||||
return nir_ssa_undef(b, 1, bit_size);
|
||||
return nir_undef(b, 1, bit_size);
|
||||
|
||||
if (channel_idx == 3) {
|
||||
if (is_float)
|
||||
|
|
@ -175,8 +174,8 @@ first_used_swizzled_channel(const struct util_format_description *f, const unsig
|
|||
return first_used;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alpha_adjust, nir_ssa_def *alpha)
|
||||
static nir_def *
|
||||
adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alpha_adjust, nir_def *alpha)
|
||||
{
|
||||
if (alpha_adjust == AC_ALPHA_ADJUST_SSCALED)
|
||||
alpha = nir_f2u32(b, alpha);
|
||||
|
|
@ -201,7 +200,7 @@ adjust_vertex_fetch_alpha(nir_builder *b, enum ac_vs_input_alpha_adjust alpha_ad
|
|||
return alpha;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs_state *s)
|
||||
{
|
||||
nir_src *offset_src = nir_get_io_offset_src(intrin);
|
||||
|
|
@ -226,13 +225,13 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs
|
|||
/* Bitmask of components in bit_size units
|
||||
* of the current input load that are actually used.
|
||||
*/
|
||||
const unsigned dest_use_mask = nir_ssa_def_components_read(&intrin->dest.ssa) << component;
|
||||
const unsigned dest_use_mask = nir_def_components_read(&intrin->dest.ssa) << component;
|
||||
|
||||
/* If the input is entirely unused, just replace it with undef.
|
||||
* This is just in case we debug this pass without running DCE first.
|
||||
*/
|
||||
if (!dest_use_mask)
|
||||
return nir_ssa_undef(b, dest_num_components, bit_size);
|
||||
return nir_undef(b, dest_num_components, bit_size);
|
||||
|
||||
const uint32_t attrib_binding = s->pl_key->vs.vertex_attribute_bindings[location];
|
||||
const uint32_t attrib_offset = s->pl_key->vs.vertex_attribute_offsets[location];
|
||||
|
|
@ -244,12 +243,11 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs
|
|||
const unsigned binding_index = s->info->vs.use_per_attribute_vb_descs ? location : attrib_binding;
|
||||
const unsigned desc_index = util_bitcount(s->info->vs.vb_desc_usage_mask & u_bit_consecutive(0, binding_index));
|
||||
|
||||
nir_ssa_def *vertex_buffers_arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.vertex_buffers);
|
||||
nir_ssa_def *vertex_buffers =
|
||||
nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->rad_info->address32_hi));
|
||||
nir_ssa_def *descriptor = nir_load_smem_amd(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16));
|
||||
nir_ssa_def *base_index = calc_vs_input_index(b, location, s);
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *vertex_buffers_arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.vertex_buffers);
|
||||
nir_def *vertex_buffers = nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->rad_info->address32_hi));
|
||||
nir_def *descriptor = nir_load_smem_amd(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16));
|
||||
nir_def *base_index = calc_vs_input_index(b, location, s);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
|
||||
/* We currently implement swizzling for all formats in shaders.
|
||||
* Note, it is possible to specify swizzling in the DST_SEL fields of descriptors,
|
||||
|
|
@ -290,13 +288,13 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs
|
|||
* This is necessary because the backend can't further roll the const offset
|
||||
* into the index source of MUBUF / MTBUF instructions.
|
||||
*/
|
||||
nir_ssa_def *loads[NIR_MAX_VEC_COMPONENTS] = {0};
|
||||
nir_def *loads[NIR_MAX_VEC_COMPONENTS] = {0};
|
||||
unsigned num_loads = 0;
|
||||
for (unsigned x = 0, channels; x < fetch_num_channels; x += channels) {
|
||||
channels = fetch_num_channels - x;
|
||||
const unsigned start = skipped_start + x;
|
||||
enum pipe_format fetch_format = attrib_format;
|
||||
nir_ssa_def *index = base_index;
|
||||
nir_def *index = base_index;
|
||||
|
||||
/* Add excess constant offset to the index. */
|
||||
unsigned const_off = attrib_offset + count_format_bytes(f, 0, start);
|
||||
|
|
@ -339,7 +337,7 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs
|
|||
}
|
||||
}
|
||||
|
||||
nir_ssa_def *load = loads[0];
|
||||
nir_def *load = loads[0];
|
||||
|
||||
/* Extract the channels we actually need when we couldn't skip starting
|
||||
* components or had to emit more than one load intrinsic.
|
||||
|
|
@ -357,7 +355,7 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs
|
|||
* Apply swizzle and alpha adjust according to the format.
|
||||
*/
|
||||
const nir_alu_type dst_type = nir_alu_type_get_base_type(nir_intrinsic_dest_type(intrin));
|
||||
nir_ssa_def *channels[NIR_MAX_VEC_COMPONENTS] = {0};
|
||||
nir_def *channels[NIR_MAX_VEC_COMPONENTS] = {0};
|
||||
for (unsigned i = 0; i < dest_num_components; ++i) {
|
||||
const unsigned c = i + component;
|
||||
|
||||
|
|
@ -400,7 +398,7 @@ lower_vs_input_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
nir_ssa_def *replacement = NULL;
|
||||
nir_def *replacement = NULL;
|
||||
|
||||
if (s->info->vs.dynamic_inputs) {
|
||||
replacement = lower_load_vs_input_from_prolog(b, intrin, s);
|
||||
|
|
@ -408,7 +406,7 @@ lower_vs_input_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
replacement = lower_load_vs_input(b, intrin, s);
|
||||
}
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, replacement);
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, replacement);
|
||||
nir_instr_remove(instr);
|
||||
nir_instr_free(instr);
|
||||
|
||||
|
|
|
|||
|
|
@ -156,15 +156,15 @@ enum {
|
|||
};
|
||||
|
||||
struct dgc_cmdbuf {
|
||||
nir_ssa_def *descriptor;
|
||||
nir_def *descriptor;
|
||||
nir_variable *offset;
|
||||
};
|
||||
|
||||
static void
|
||||
dgc_emit(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *value)
|
||||
dgc_emit(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *value)
|
||||
{
|
||||
assert(value->bit_size >= 32);
|
||||
nir_ssa_def *offset = nir_load_var(b, cs->offset);
|
||||
nir_def *offset = nir_load_var(b, cs->offset);
|
||||
nir_store_ssbo(b, value, cs->descriptor, offset, .access = ACCESS_NON_READABLE);
|
||||
nir_store_var(b, cs->offset, nir_iadd_imm(b, offset, value->num_components * value->bit_size / 8), 0x1);
|
||||
}
|
||||
|
|
@ -188,14 +188,14 @@ dgc_emit(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *value)
|
|||
nir_pack_64_2x32((b), nir_load_push_constant((b), 2, 32, nir_imm_int((b), 0), \
|
||||
.base = offsetof(struct radv_dgc_params, field), .range = 8))
|
||||
|
||||
static nir_ssa_def *
|
||||
nir_pkt3(nir_builder *b, unsigned op, nir_ssa_def *len)
|
||||
static nir_def *
|
||||
nir_pkt3(nir_builder *b, unsigned op, nir_def *len)
|
||||
{
|
||||
len = nir_iand_imm(b, len, 0x3fff);
|
||||
return nir_ior_imm(b, nir_ishl_imm(b, len, 16), PKT_TYPE_S(3) | PKT3_IT_OPCODE_S(op));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
dgc_get_nop_packet(nir_builder *b, const struct radv_device *device)
|
||||
{
|
||||
if (device->physical_device->rad_info.gfx_ib_pad_with_type2) {
|
||||
|
|
@ -206,18 +206,18 @@ dgc_get_nop_packet(nir_builder *b, const struct radv_device *device)
|
|||
}
|
||||
|
||||
static void
|
||||
dgc_emit_userdata_vertex(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vtx_base_sgpr, nir_ssa_def *first_vertex,
|
||||
nir_ssa_def *first_instance, nir_ssa_def *drawid, const struct radv_device *device)
|
||||
dgc_emit_userdata_vertex(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *vtx_base_sgpr, nir_def *first_vertex,
|
||||
nir_def *first_instance, nir_def *drawid, const struct radv_device *device)
|
||||
{
|
||||
vtx_base_sgpr = nir_u2u32(b, vtx_base_sgpr);
|
||||
nir_ssa_def *has_drawid = nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID);
|
||||
nir_ssa_def *has_baseinstance = nir_test_mask(b, vtx_base_sgpr, DGC_USES_BASEINSTANCE);
|
||||
nir_def *has_drawid = nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID);
|
||||
nir_def *has_baseinstance = nir_test_mask(b, vtx_base_sgpr, DGC_USES_BASEINSTANCE);
|
||||
|
||||
nir_ssa_def *pkt_cnt = nir_imm_int(b, 1);
|
||||
nir_def *pkt_cnt = nir_imm_int(b, 1);
|
||||
pkt_cnt = nir_bcsel(b, has_drawid, nir_iadd_imm(b, pkt_cnt, 1), pkt_cnt);
|
||||
pkt_cnt = nir_bcsel(b, has_baseinstance, nir_iadd_imm(b, pkt_cnt, 1), pkt_cnt);
|
||||
|
||||
nir_ssa_def *values[5] = {
|
||||
nir_def *values[5] = {
|
||||
nir_pkt3(b, PKT3_SET_SH_REG, pkt_cnt), nir_iand_imm(b, vtx_base_sgpr, 0x3FFF), first_vertex,
|
||||
dgc_get_nop_packet(b, device), dgc_get_nop_packet(b, device),
|
||||
};
|
||||
|
|
@ -230,51 +230,51 @@ dgc_emit_userdata_vertex(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vtx
|
|||
}
|
||||
|
||||
static void
|
||||
dgc_emit_instance_count(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *instance_count)
|
||||
dgc_emit_instance_count(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *instance_count)
|
||||
{
|
||||
nir_ssa_def *values[2] = {nir_imm_int(b, PKT3(PKT3_NUM_INSTANCES, 0, false)), instance_count};
|
||||
nir_def *values[2] = {nir_imm_int(b, PKT3(PKT3_NUM_INSTANCES, 0, false)), instance_count};
|
||||
|
||||
dgc_emit(b, cs, nir_vec(b, values, 2));
|
||||
}
|
||||
|
||||
static void
|
||||
dgc_emit_draw_index_offset_2(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *index_offset, nir_ssa_def *index_count,
|
||||
nir_ssa_def *max_index_count)
|
||||
dgc_emit_draw_index_offset_2(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *index_offset, nir_def *index_count,
|
||||
nir_def *max_index_count)
|
||||
{
|
||||
nir_ssa_def *values[5] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_OFFSET_2, 3, false)), max_index_count, index_offset,
|
||||
nir_def *values[5] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_OFFSET_2, 3, false)), max_index_count, index_offset,
|
||||
index_count, nir_imm_int(b, V_0287F0_DI_SRC_SEL_DMA)};
|
||||
|
||||
dgc_emit(b, cs, nir_vec(b, values, 5));
|
||||
}
|
||||
|
||||
static void
|
||||
dgc_emit_draw_index_auto(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *vertex_count)
|
||||
dgc_emit_draw_index_auto(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *vertex_count)
|
||||
{
|
||||
nir_ssa_def *values[3] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_AUTO, 1, false)), vertex_count,
|
||||
nir_def *values[3] = {nir_imm_int(b, PKT3(PKT3_DRAW_INDEX_AUTO, 1, false)), vertex_count,
|
||||
nir_imm_int(b, V_0287F0_DI_SRC_SEL_AUTO_INDEX)};
|
||||
|
||||
dgc_emit(b, cs, nir_vec(b, values, 3));
|
||||
}
|
||||
|
||||
static void
|
||||
build_dgc_buffer_tail(nir_builder *b, nir_ssa_def *sequence_count, const struct radv_device *device)
|
||||
build_dgc_buffer_tail(nir_builder *b, nir_def *sequence_count, const struct radv_device *device)
|
||||
{
|
||||
nir_ssa_def *global_id = get_global_ids(b, 1);
|
||||
nir_def *global_id = get_global_ids(b, 1);
|
||||
|
||||
nir_ssa_def *cmd_buf_stride = load_param32(b, cmd_buf_stride);
|
||||
nir_ssa_def *cmd_buf_size = load_param32(b, cmd_buf_size);
|
||||
nir_def *cmd_buf_stride = load_param32(b, cmd_buf_stride);
|
||||
nir_def *cmd_buf_size = load_param32(b, cmd_buf_size);
|
||||
|
||||
nir_push_if(b, nir_ieq_imm(b, global_id, 0));
|
||||
{
|
||||
nir_ssa_def *cmd_buf_tail_start = nir_imul(b, cmd_buf_stride, sequence_count);
|
||||
nir_def *cmd_buf_tail_start = nir_imul(b, cmd_buf_stride, sequence_count);
|
||||
|
||||
nir_variable *offset = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "offset");
|
||||
nir_store_var(b, offset, cmd_buf_tail_start, 0x1);
|
||||
|
||||
nir_ssa_def *dst_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PREPARE);
|
||||
nir_def *dst_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PREPARE);
|
||||
nir_push_loop(b);
|
||||
{
|
||||
nir_ssa_def *curr_offset = nir_load_var(b, offset);
|
||||
nir_def *curr_offset = nir_load_var(b, offset);
|
||||
const unsigned MAX_PACKET_WORDS = 0x3FFC;
|
||||
|
||||
nir_push_if(b, nir_ieq(b, curr_offset, cmd_buf_size));
|
||||
|
|
@ -283,7 +283,7 @@ build_dgc_buffer_tail(nir_builder *b, nir_ssa_def *sequence_count, const struct
|
|||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_ssa_def *packet, *packet_size;
|
||||
nir_def *packet, *packet_size;
|
||||
|
||||
if (device->physical_device->rad_info.gfx_ib_pad_with_type2) {
|
||||
packet_size = nir_imm_int(b, 4);
|
||||
|
|
@ -292,7 +292,7 @@ build_dgc_buffer_tail(nir_builder *b, nir_ssa_def *sequence_count, const struct
|
|||
packet_size = nir_isub(b, cmd_buf_size, curr_offset);
|
||||
packet_size = nir_umin(b, packet_size, nir_imm_int(b, MAX_PACKET_WORDS * 4));
|
||||
|
||||
nir_ssa_def *len = nir_ushr_imm(b, packet_size, 2);
|
||||
nir_def *len = nir_ushr_imm(b, packet_size, 2);
|
||||
len = nir_iadd_imm(b, len, -2);
|
||||
packet = nir_pkt3(b, PKT3_NOP, len);
|
||||
}
|
||||
|
|
@ -309,17 +309,17 @@ build_dgc_buffer_tail(nir_builder *b, nir_ssa_def *sequence_count, const struct
|
|||
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV.
|
||||
*/
|
||||
static void
|
||||
dgc_emit_draw(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base,
|
||||
nir_ssa_def *draw_params_offset, nir_ssa_def *sequence_id, const struct radv_device *device)
|
||||
dgc_emit_draw(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
|
||||
nir_def *draw_params_offset, nir_def *sequence_id, const struct radv_device *device)
|
||||
{
|
||||
nir_ssa_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
|
||||
nir_ssa_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base);
|
||||
nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
|
||||
nir_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base);
|
||||
|
||||
nir_ssa_def *draw_data0 = nir_load_ssbo(b, 4, 32, stream_buf, stream_offset);
|
||||
nir_ssa_def *vertex_count = nir_channel(b, draw_data0, 0);
|
||||
nir_ssa_def *instance_count = nir_channel(b, draw_data0, 1);
|
||||
nir_ssa_def *vertex_offset = nir_channel(b, draw_data0, 2);
|
||||
nir_ssa_def *first_instance = nir_channel(b, draw_data0, 3);
|
||||
nir_def *draw_data0 = nir_load_ssbo(b, 4, 32, stream_buf, stream_offset);
|
||||
nir_def *vertex_count = nir_channel(b, draw_data0, 0);
|
||||
nir_def *instance_count = nir_channel(b, draw_data0, 1);
|
||||
nir_def *vertex_offset = nir_channel(b, draw_data0, 2);
|
||||
nir_def *first_instance = nir_channel(b, draw_data0, 3);
|
||||
|
||||
nir_push_if(b, nir_iand(b, nir_ine_imm(b, vertex_count, 0), nir_ine_imm(b, instance_count, 0)));
|
||||
{
|
||||
|
|
@ -334,20 +334,20 @@ dgc_emit_draw(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, ni
|
|||
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV.
|
||||
*/
|
||||
static void
|
||||
dgc_emit_draw_indexed(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base,
|
||||
nir_ssa_def *draw_params_offset, nir_ssa_def *sequence_id, nir_ssa_def *max_index_count,
|
||||
dgc_emit_draw_indexed(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
|
||||
nir_def *draw_params_offset, nir_def *sequence_id, nir_def *max_index_count,
|
||||
const struct radv_device *device)
|
||||
{
|
||||
nir_ssa_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
|
||||
nir_ssa_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base);
|
||||
nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
|
||||
nir_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base);
|
||||
|
||||
nir_ssa_def *draw_data0 = nir_load_ssbo(b, 4, 32, stream_buf, stream_offset);
|
||||
nir_ssa_def *draw_data1 = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd_imm(b, stream_offset, 16));
|
||||
nir_ssa_def *index_count = nir_channel(b, draw_data0, 0);
|
||||
nir_ssa_def *instance_count = nir_channel(b, draw_data0, 1);
|
||||
nir_ssa_def *first_index = nir_channel(b, draw_data0, 2);
|
||||
nir_ssa_def *vertex_offset = nir_channel(b, draw_data0, 3);
|
||||
nir_ssa_def *first_instance = nir_channel(b, draw_data1, 0);
|
||||
nir_def *draw_data0 = nir_load_ssbo(b, 4, 32, stream_buf, stream_offset);
|
||||
nir_def *draw_data1 = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd_imm(b, stream_offset, 16));
|
||||
nir_def *index_count = nir_channel(b, draw_data0, 0);
|
||||
nir_def *instance_count = nir_channel(b, draw_data0, 1);
|
||||
nir_def *first_index = nir_channel(b, draw_data0, 2);
|
||||
nir_def *vertex_offset = nir_channel(b, draw_data0, 3);
|
||||
nir_def *first_instance = nir_channel(b, draw_data1, 0);
|
||||
|
||||
nir_push_if(b, nir_iand(b, nir_ine_imm(b, index_count, 0), nir_ine_imm(b, instance_count, 0)));
|
||||
{
|
||||
|
|
@ -362,25 +362,25 @@ dgc_emit_draw_indexed(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream
|
|||
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV.
|
||||
*/
|
||||
static void
|
||||
dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base,
|
||||
nir_ssa_def *index_buffer_offset, nir_ssa_def *ibo_type_32, nir_ssa_def *ibo_type_8,
|
||||
dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
|
||||
nir_def *index_buffer_offset, nir_def *ibo_type_32, nir_def *ibo_type_8,
|
||||
nir_variable *index_size_var, nir_variable *max_index_count_var, const struct radv_device *device)
|
||||
{
|
||||
nir_ssa_def *index_stream_offset = nir_iadd(b, index_buffer_offset, stream_base);
|
||||
nir_ssa_def *data = nir_load_ssbo(b, 4, 32, stream_buf, index_stream_offset);
|
||||
nir_def *index_stream_offset = nir_iadd(b, index_buffer_offset, stream_base);
|
||||
nir_def *data = nir_load_ssbo(b, 4, 32, stream_buf, index_stream_offset);
|
||||
|
||||
nir_ssa_def *vk_index_type = nir_channel(b, data, 3);
|
||||
nir_ssa_def *index_type = nir_bcsel(b, nir_ieq(b, vk_index_type, ibo_type_32), nir_imm_int(b, V_028A7C_VGT_INDEX_32),
|
||||
nir_def *vk_index_type = nir_channel(b, data, 3);
|
||||
nir_def *index_type = nir_bcsel(b, nir_ieq(b, vk_index_type, ibo_type_32), nir_imm_int(b, V_028A7C_VGT_INDEX_32),
|
||||
nir_imm_int(b, V_028A7C_VGT_INDEX_16));
|
||||
index_type = nir_bcsel(b, nir_ieq(b, vk_index_type, ibo_type_8), nir_imm_int(b, V_028A7C_VGT_INDEX_8), index_type);
|
||||
|
||||
nir_ssa_def *index_size = nir_iand_imm(b, nir_ushr(b, nir_imm_int(b, 0x142), nir_imul_imm(b, index_type, 4)), 0xf);
|
||||
nir_def *index_size = nir_iand_imm(b, nir_ushr(b, nir_imm_int(b, 0x142), nir_imul_imm(b, index_type, 4)), 0xf);
|
||||
nir_store_var(b, index_size_var, index_size, 0x1);
|
||||
|
||||
nir_ssa_def *max_index_count = nir_udiv(b, nir_channel(b, data, 2), index_size);
|
||||
nir_def *max_index_count = nir_udiv(b, nir_channel(b, data, 2), index_size);
|
||||
nir_store_var(b, max_index_count_var, max_index_count, 0x1);
|
||||
|
||||
nir_ssa_def *cmd_values[3 + 2 + 3];
|
||||
nir_def *cmd_values[3 + 2 + 3];
|
||||
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX9) {
|
||||
unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
|
||||
|
|
@ -396,7 +396,7 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream
|
|||
cmd_values[2] = dgc_get_nop_packet(b, device);
|
||||
}
|
||||
|
||||
nir_ssa_def *addr_upper = nir_channel(b, data, 1);
|
||||
nir_def *addr_upper = nir_channel(b, data, 1);
|
||||
addr_upper = nir_ishr_imm(b, nir_ishl_imm(b, addr_upper, 16), 16);
|
||||
|
||||
cmd_values[3] = nir_imm_int(b, PKT3(PKT3_INDEX_BASE, 1, 0));
|
||||
|
|
@ -412,26 +412,26 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream
|
|||
* Emit VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV.
|
||||
*/
|
||||
static void
|
||||
dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base,
|
||||
nir_ssa_def *push_const_mask, nir_variable *upload_offset)
|
||||
dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
|
||||
nir_def *push_const_mask, nir_variable *upload_offset)
|
||||
{
|
||||
nir_ssa_def *vbo_cnt = load_param8(b, vbo_cnt);
|
||||
nir_ssa_def *const_copy = nir_ine_imm(b, load_param8(b, const_copy), 0);
|
||||
nir_ssa_def *const_copy_size = load_param16(b, const_copy_size);
|
||||
nir_ssa_def *const_copy_words = nir_ushr_imm(b, const_copy_size, 2);
|
||||
nir_def *vbo_cnt = load_param8(b, vbo_cnt);
|
||||
nir_def *const_copy = nir_ine_imm(b, load_param8(b, const_copy), 0);
|
||||
nir_def *const_copy_size = load_param16(b, const_copy_size);
|
||||
nir_def *const_copy_words = nir_ushr_imm(b, const_copy_size, 2);
|
||||
const_copy_words = nir_bcsel(b, const_copy, const_copy_words, nir_imm_int(b, 0));
|
||||
|
||||
nir_variable *idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "const_copy_idx");
|
||||
nir_store_var(b, idx, nir_imm_int(b, 0), 0x1);
|
||||
|
||||
nir_ssa_def *param_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PARAMS);
|
||||
nir_ssa_def *param_offset = nir_imul_imm(b, vbo_cnt, 24);
|
||||
nir_ssa_def *param_offset_offset = nir_iadd_imm(b, param_offset, MESA_VULKAN_SHADER_STAGES * 12);
|
||||
nir_ssa_def *param_const_offset =
|
||||
nir_def *param_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PARAMS);
|
||||
nir_def *param_offset = nir_imul_imm(b, vbo_cnt, 24);
|
||||
nir_def *param_offset_offset = nir_iadd_imm(b, param_offset, MESA_VULKAN_SHADER_STAGES * 12);
|
||||
nir_def *param_const_offset =
|
||||
nir_iadd_imm(b, param_offset, MAX_PUSH_CONSTANTS_SIZE + MESA_VULKAN_SHADER_STAGES * 12);
|
||||
nir_push_loop(b);
|
||||
{
|
||||
nir_ssa_def *cur_idx = nir_load_var(b, idx);
|
||||
nir_def *cur_idx = nir_load_var(b, idx);
|
||||
nir_push_if(b, nir_uge(b, cur_idx, const_copy_words));
|
||||
{
|
||||
nir_jump(b, nir_jump_break);
|
||||
|
|
@ -440,14 +440,14 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
|
|||
|
||||
nir_variable *data = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "copy_data");
|
||||
|
||||
nir_ssa_def *update = nir_iand(b, push_const_mask, nir_ishl(b, nir_imm_int64(b, 1), cur_idx));
|
||||
nir_def *update = nir_iand(b, push_const_mask, nir_ishl(b, nir_imm_int64(b, 1), cur_idx));
|
||||
update = nir_bcsel(b, nir_ult_imm(b, cur_idx, 64 /* bits in push_const_mask */), update, nir_imm_int64(b, 0));
|
||||
|
||||
nir_push_if(b, nir_ine_imm(b, update, 0));
|
||||
{
|
||||
nir_ssa_def *stream_offset =
|
||||
nir_def *stream_offset =
|
||||
nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_offset_offset, nir_ishl_imm(b, cur_idx, 2)));
|
||||
nir_ssa_def *new_data = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd(b, stream_base, stream_offset));
|
||||
nir_def *new_data = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd(b, stream_base, stream_offset));
|
||||
nir_store_var(b, data, new_data, 0x1);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
|
|
@ -468,26 +468,26 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
|
|||
|
||||
nir_variable *shader_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "shader_idx");
|
||||
nir_store_var(b, shader_idx, nir_imm_int(b, 0), 0x1);
|
||||
nir_ssa_def *shader_cnt = load_param16(b, push_constant_shader_cnt);
|
||||
nir_def *shader_cnt = load_param16(b, push_constant_shader_cnt);
|
||||
|
||||
nir_push_loop(b);
|
||||
{
|
||||
nir_ssa_def *cur_shader_idx = nir_load_var(b, shader_idx);
|
||||
nir_def *cur_shader_idx = nir_load_var(b, shader_idx);
|
||||
nir_push_if(b, nir_uge(b, cur_shader_idx, shader_cnt));
|
||||
{
|
||||
nir_jump(b, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_ssa_def *reg_info =
|
||||
nir_def *reg_info =
|
||||
nir_load_ssbo(b, 3, 32, param_buf, nir_iadd(b, param_offset, nir_imul_imm(b, cur_shader_idx, 12)));
|
||||
nir_ssa_def *upload_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 0, 16);
|
||||
nir_ssa_def *inline_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 16, 16);
|
||||
nir_ssa_def *inline_mask = nir_pack_64_2x32(b, nir_channels(b, reg_info, 0x6));
|
||||
nir_def *upload_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 0, 16);
|
||||
nir_def *inline_sgpr = nir_ubfe_imm(b, nir_channel(b, reg_info, 0), 16, 16);
|
||||
nir_def *inline_mask = nir_pack_64_2x32(b, nir_channels(b, reg_info, 0x6));
|
||||
|
||||
nir_push_if(b, nir_ine_imm(b, upload_sgpr, 0));
|
||||
{
|
||||
nir_ssa_def *pkt[3] = {nir_imm_int(b, PKT3(PKT3_SET_SH_REG, 1, 0)), upload_sgpr,
|
||||
nir_def *pkt[3] = {nir_imm_int(b, PKT3(PKT3_SET_SH_REG, 1, 0)), upload_sgpr,
|
||||
nir_iadd(b, load_param32(b, upload_addr), nir_load_var(b, upload_offset))};
|
||||
|
||||
dgc_emit(b, cs, nir_vec(b, pkt, 3));
|
||||
|
|
@ -496,23 +496,23 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
|
|||
|
||||
nir_push_if(b, nir_ine_imm(b, inline_sgpr, 0));
|
||||
{
|
||||
nir_ssa_def *inline_len = nir_bit_count(b, inline_mask);
|
||||
nir_def *inline_len = nir_bit_count(b, inline_mask);
|
||||
nir_store_var(b, idx, nir_imm_int(b, 0), 0x1);
|
||||
|
||||
nir_ssa_def *pkt[2] = {nir_pkt3(b, PKT3_SET_SH_REG, inline_len), inline_sgpr};
|
||||
nir_def *pkt[2] = {nir_pkt3(b, PKT3_SET_SH_REG, inline_len), inline_sgpr};
|
||||
|
||||
dgc_emit(b, cs, nir_vec(b, pkt, 2));
|
||||
|
||||
nir_push_loop(b);
|
||||
{
|
||||
nir_ssa_def *cur_idx = nir_load_var(b, idx);
|
||||
nir_def *cur_idx = nir_load_var(b, idx);
|
||||
nir_push_if(b, nir_uge_imm(b, cur_idx, 64 /* bits in inline_mask */));
|
||||
{
|
||||
nir_jump(b, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_ssa_def *l = nir_ishl(b, nir_imm_int64(b, 1), cur_idx);
|
||||
nir_def *l = nir_ishl(b, nir_imm_int64(b, 1), cur_idx);
|
||||
nir_push_if(b, nir_ieq_imm(b, nir_iand(b, l, inline_mask), 0));
|
||||
{
|
||||
nir_store_var(b, idx, nir_iadd_imm(b, cur_idx, 1), 0x1);
|
||||
|
|
@ -522,15 +522,15 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
|
|||
|
||||
nir_variable *data = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "copy_data");
|
||||
|
||||
nir_ssa_def *update = nir_iand(b, push_const_mask, nir_ishl(b, nir_imm_int64(b, 1), cur_idx));
|
||||
nir_def *update = nir_iand(b, push_const_mask, nir_ishl(b, nir_imm_int64(b, 1), cur_idx));
|
||||
update =
|
||||
nir_bcsel(b, nir_ult_imm(b, cur_idx, 64 /* bits in push_const_mask */), update, nir_imm_int64(b, 0));
|
||||
|
||||
nir_push_if(b, nir_ine_imm(b, update, 0));
|
||||
{
|
||||
nir_ssa_def *stream_offset =
|
||||
nir_def *stream_offset =
|
||||
nir_load_ssbo(b, 1, 32, param_buf, nir_iadd(b, param_offset_offset, nir_ishl_imm(b, cur_idx, 2)));
|
||||
nir_ssa_def *new_data = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd(b, stream_base, stream_offset));
|
||||
nir_def *new_data = nir_load_ssbo(b, 1, 32, stream_buf, nir_iadd(b, stream_base, stream_offset));
|
||||
nir_store_var(b, data, new_data, 0x1);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
|
|
@ -558,10 +558,10 @@ dgc_emit_push_constant(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
|
|||
* For emitting VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV.
|
||||
*/
|
||||
static void
|
||||
dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *stream_buf, nir_ssa_def *stream_base,
|
||||
nir_ssa_def *vbo_bind_mask, nir_variable *upload_offset, const struct radv_device *device)
|
||||
dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
|
||||
nir_def *vbo_bind_mask, nir_variable *upload_offset, const struct radv_device *device)
|
||||
{
|
||||
nir_ssa_def *vbo_cnt = load_param8(b, vbo_cnt);
|
||||
nir_def *vbo_cnt = load_param8(b, vbo_cnt);
|
||||
nir_variable *vbo_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx");
|
||||
nir_store_var(b, vbo_idx, nir_imm_int(b, 0), 0x1);
|
||||
|
||||
|
|
@ -573,40 +573,39 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
|
|||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_ssa_def *vbo_offset = nir_imul_imm(b, nir_load_var(b, vbo_idx), 16);
|
||||
nir_def *vbo_offset = nir_imul_imm(b, nir_load_var(b, vbo_idx), 16);
|
||||
nir_variable *vbo_data = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uvec4_type(), "vbo_data");
|
||||
|
||||
nir_ssa_def *param_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PARAMS);
|
||||
nir_def *param_buf = radv_meta_load_descriptor(b, 0, DGC_DESC_PARAMS);
|
||||
nir_store_var(b, vbo_data, nir_load_ssbo(b, 4, 32, param_buf, vbo_offset), 0xf);
|
||||
|
||||
nir_ssa_def *vbo_override =
|
||||
nir_def *vbo_override =
|
||||
nir_ine_imm(b, nir_iand(b, vbo_bind_mask, nir_ishl(b, nir_imm_int(b, 1), nir_load_var(b, vbo_idx))), 0);
|
||||
nir_push_if(b, vbo_override);
|
||||
{
|
||||
nir_ssa_def *vbo_offset_offset =
|
||||
nir_def *vbo_offset_offset =
|
||||
nir_iadd(b, nir_imul_imm(b, vbo_cnt, 16), nir_imul_imm(b, nir_load_var(b, vbo_idx), 8));
|
||||
nir_ssa_def *vbo_over_data = nir_load_ssbo(b, 2, 32, param_buf, vbo_offset_offset);
|
||||
nir_ssa_def *stream_offset =
|
||||
nir_iadd(b, stream_base, nir_iand_imm(b, nir_channel(b, vbo_over_data, 0), 0x7FFF));
|
||||
nir_ssa_def *stream_data = nir_load_ssbo(b, 4, 32, stream_buf, stream_offset);
|
||||
nir_def *vbo_over_data = nir_load_ssbo(b, 2, 32, param_buf, vbo_offset_offset);
|
||||
nir_def *stream_offset = nir_iadd(b, stream_base, nir_iand_imm(b, nir_channel(b, vbo_over_data, 0), 0x7FFF));
|
||||
nir_def *stream_data = nir_load_ssbo(b, 4, 32, stream_buf, stream_offset);
|
||||
|
||||
nir_ssa_def *va = nir_pack_64_2x32(b, nir_trim_vector(b, stream_data, 2));
|
||||
nir_ssa_def *size = nir_channel(b, stream_data, 2);
|
||||
nir_ssa_def *stride = nir_channel(b, stream_data, 3);
|
||||
nir_def *va = nir_pack_64_2x32(b, nir_trim_vector(b, stream_data, 2));
|
||||
nir_def *size = nir_channel(b, stream_data, 2);
|
||||
nir_def *stride = nir_channel(b, stream_data, 3);
|
||||
|
||||
nir_ssa_def *dyn_stride = nir_test_mask(b, nir_channel(b, vbo_over_data, 0), DGC_DYNAMIC_STRIDE);
|
||||
nir_ssa_def *old_stride = nir_ubfe_imm(b, nir_channel(b, nir_load_var(b, vbo_data), 1), 16, 14);
|
||||
nir_def *dyn_stride = nir_test_mask(b, nir_channel(b, vbo_over_data, 0), DGC_DYNAMIC_STRIDE);
|
||||
nir_def *old_stride = nir_ubfe_imm(b, nir_channel(b, nir_load_var(b, vbo_data), 1), 16, 14);
|
||||
stride = nir_bcsel(b, dyn_stride, stride, old_stride);
|
||||
|
||||
nir_ssa_def *use_per_attribute_vb_descs = nir_test_mask(b, nir_channel(b, vbo_over_data, 0), 1u << 31);
|
||||
nir_def *use_per_attribute_vb_descs = nir_test_mask(b, nir_channel(b, vbo_over_data, 0), 1u << 31);
|
||||
nir_variable *num_records =
|
||||
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "num_records");
|
||||
nir_store_var(b, num_records, size, 0x1);
|
||||
|
||||
nir_push_if(b, use_per_attribute_vb_descs);
|
||||
{
|
||||
nir_ssa_def *attrib_end = nir_ubfe_imm(b, nir_channel(b, vbo_over_data, 1), 16, 16);
|
||||
nir_ssa_def *attrib_index_offset = nir_ubfe_imm(b, nir_channel(b, vbo_over_data, 1), 0, 16);
|
||||
nir_def *attrib_end = nir_ubfe_imm(b, nir_channel(b, vbo_over_data, 1), 16, 16);
|
||||
nir_def *attrib_index_offset = nir_ubfe_imm(b, nir_channel(b, vbo_over_data, 1), 0, 16);
|
||||
|
||||
nir_push_if(b, nir_ult(b, nir_load_var(b, num_records), attrib_end));
|
||||
{
|
||||
|
|
@ -619,7 +618,7 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
|
|||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_ssa_def *r = nir_iadd(
|
||||
nir_def *r = nir_iadd(
|
||||
b, nir_iadd_imm(b, nir_udiv(b, nir_isub(b, nir_load_var(b, num_records), attrib_end), stride), 1),
|
||||
attrib_index_offset);
|
||||
nir_store_var(b, num_records, r, 0x1);
|
||||
|
|
@ -627,13 +626,13 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
|
|||
nir_pop_if(b, NULL);
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_ssa_def *convert_cond = nir_ine_imm(b, nir_load_var(b, num_records), 0);
|
||||
nir_def *convert_cond = nir_ine_imm(b, nir_load_var(b, num_records), 0);
|
||||
if (device->physical_device->rad_info.gfx_level == GFX9)
|
||||
convert_cond = nir_imm_false(b);
|
||||
else if (device->physical_device->rad_info.gfx_level != GFX8)
|
||||
convert_cond = nir_iand(b, convert_cond, nir_ieq_imm(b, stride, 0));
|
||||
|
||||
nir_ssa_def *new_records =
|
||||
nir_def *new_records =
|
||||
nir_iadd(b, nir_imul(b, nir_iadd_imm(b, nir_load_var(b, num_records), -1), stride), attrib_end);
|
||||
new_records = nir_bcsel(b, convert_cond, new_records, nir_load_var(b, num_records));
|
||||
nir_store_var(b, num_records, new_records, 0x1);
|
||||
|
|
@ -643,7 +642,7 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
|
|||
if (device->physical_device->rad_info.gfx_level != GFX8) {
|
||||
nir_push_if(b, nir_ine_imm(b, stride, 0));
|
||||
{
|
||||
nir_ssa_def *r = nir_iadd(b, nir_load_var(b, num_records), nir_iadd_imm(b, stride, -1));
|
||||
nir_def *r = nir_iadd(b, nir_load_var(b, num_records), nir_iadd_imm(b, stride, -1));
|
||||
nir_store_var(b, num_records, nir_udiv(b, r, stride), 0x1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
|
@ -651,19 +650,18 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
|
|||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_ssa_def *rsrc_word3 = nir_channel(b, nir_load_var(b, vbo_data), 3);
|
||||
nir_def *rsrc_word3 = nir_channel(b, nir_load_var(b, vbo_data), 3);
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX10) {
|
||||
nir_ssa_def *oob_select = nir_bcsel(b, nir_ieq_imm(b, stride, 0), nir_imm_int(b, V_008F0C_OOB_SELECT_RAW),
|
||||
nir_def *oob_select = nir_bcsel(b, nir_ieq_imm(b, stride, 0), nir_imm_int(b, V_008F0C_OOB_SELECT_RAW),
|
||||
nir_imm_int(b, V_008F0C_OOB_SELECT_STRUCTURED));
|
||||
rsrc_word3 = nir_iand_imm(b, rsrc_word3, C_008F0C_OOB_SELECT);
|
||||
rsrc_word3 = nir_ior(b, rsrc_word3, nir_ishl_imm(b, oob_select, 28));
|
||||
}
|
||||
|
||||
nir_ssa_def *va_hi = nir_iand_imm(b, nir_unpack_64_2x32_split_y(b, va), 0xFFFF);
|
||||
nir_def *va_hi = nir_iand_imm(b, nir_unpack_64_2x32_split_y(b, va), 0xFFFF);
|
||||
stride = nir_iand_imm(b, stride, 0x3FFF);
|
||||
nir_ssa_def *new_vbo_data[4] = {nir_unpack_64_2x32_split_x(b, va),
|
||||
nir_ior(b, nir_ishl_imm(b, stride, 16), va_hi), nir_load_var(b, num_records),
|
||||
rsrc_word3};
|
||||
nir_def *new_vbo_data[4] = {nir_unpack_64_2x32_split_x(b, va), nir_ior(b, nir_ishl_imm(b, stride, 16), va_hi),
|
||||
nir_load_var(b, num_records), rsrc_word3};
|
||||
nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
|
@ -672,22 +670,22 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_ssa_def *strea
|
|||
* num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and
|
||||
* GFX10.3 but it doesn't hurt.
|
||||
*/
|
||||
nir_ssa_def *num_records = nir_channel(b, nir_load_var(b, vbo_data), 2);
|
||||
nir_ssa_def *buf_va =
|
||||
nir_def *num_records = nir_channel(b, nir_load_var(b, vbo_data), 2);
|
||||
nir_def *buf_va =
|
||||
nir_iand_imm(b, nir_pack_64_2x32(b, nir_trim_vector(b, nir_load_var(b, vbo_data), 2)), (1ull << 48) - 1ull);
|
||||
nir_push_if(b, nir_ior(b, nir_ieq_imm(b, num_records, 0), nir_ieq_imm(b, buf_va, 0)));
|
||||
{
|
||||
nir_ssa_def *new_vbo_data[4] = {nir_imm_int(b, 0), nir_imm_int(b, 0), nir_imm_int(b, 0), nir_imm_int(b, 0)};
|
||||
nir_def *new_vbo_data[4] = {nir_imm_int(b, 0), nir_imm_int(b, 0), nir_imm_int(b, 0), nir_imm_int(b, 0)};
|
||||
nir_store_var(b, vbo_data, nir_vec(b, new_vbo_data, 4), 0xf);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_ssa_def *upload_off = nir_iadd(b, nir_load_var(b, upload_offset), vbo_offset);
|
||||
nir_def *upload_off = nir_iadd(b, nir_load_var(b, upload_offset), vbo_offset);
|
||||
nir_store_ssbo(b, nir_load_var(b, vbo_data), cs->descriptor, upload_off, .access = ACCESS_NON_READABLE);
|
||||
nir_store_var(b, vbo_idx, nir_iadd_imm(b, nir_load_var(b, vbo_idx), 1), 0x1);
|
||||
}
|
||||
nir_pop_loop(b, NULL);
|
||||
nir_ssa_def *packet[3] = {nir_imm_int(b, PKT3(PKT3_SET_SH_REG, 1, 0)), load_param16(b, vbo_reg),
|
||||
nir_def *packet[3] = {nir_imm_int(b, PKT3(PKT3_SET_SH_REG, 1, 0)), load_param16(b, vbo_reg),
|
||||
nir_iadd(b, load_param32(b, upload_addr), nir_load_var(b, upload_offset))};
|
||||
|
||||
dgc_emit(b, cs, nir_vec(b, packet, 3));
|
||||
|
|
@ -701,15 +699,15 @@ build_dgc_prepare_shader(struct radv_device *dev)
|
|||
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_dgc_prepare");
|
||||
b.shader->info.workgroup_size[0] = 64;
|
||||
|
||||
nir_ssa_def *global_id = get_global_ids(&b, 1);
|
||||
nir_def *global_id = get_global_ids(&b, 1);
|
||||
|
||||
nir_ssa_def *sequence_id = global_id;
|
||||
nir_def *sequence_id = global_id;
|
||||
|
||||
nir_ssa_def *cmd_buf_stride = load_param32(&b, cmd_buf_stride);
|
||||
nir_ssa_def *sequence_count = load_param32(&b, sequence_count);
|
||||
nir_ssa_def *stream_stride = load_param32(&b, stream_stride);
|
||||
nir_def *cmd_buf_stride = load_param32(&b, cmd_buf_stride);
|
||||
nir_def *sequence_count = load_param32(&b, sequence_count);
|
||||
nir_def *stream_stride = load_param32(&b, stream_stride);
|
||||
|
||||
nir_ssa_def *use_count = nir_iand_imm(&b, sequence_count, 1u << 31);
|
||||
nir_def *use_count = nir_iand_imm(&b, sequence_count, 1u << 31);
|
||||
sequence_count = nir_iand_imm(&b, sequence_count, UINT32_MAX >> 1);
|
||||
|
||||
/* The effective number of draws is
|
||||
|
|
@ -720,8 +718,8 @@ build_dgc_prepare_shader(struct radv_device *dev)
|
|||
|
||||
nir_push_if(&b, nir_ine_imm(&b, use_count, 0));
|
||||
{
|
||||
nir_ssa_def *count_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_COUNT);
|
||||
nir_ssa_def *cnt = nir_load_ssbo(&b, 1, 32, count_buf, nir_imm_int(&b, 0));
|
||||
nir_def *count_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_COUNT);
|
||||
nir_def *cnt = nir_load_ssbo(&b, 1, 32, count_buf, nir_imm_int(&b, 0));
|
||||
/* Must clamp count against the API count explicitly.
|
||||
* The workgroup potentially contains more threads than maxSequencesCount from API,
|
||||
* and we have to ensure these threads write NOP packets to pad out the IB. */
|
||||
|
|
@ -739,10 +737,10 @@ build_dgc_prepare_shader(struct radv_device *dev)
|
|||
.offset = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "cmd_buf_offset"),
|
||||
};
|
||||
nir_store_var(&b, cmd_buf.offset, nir_imul(&b, global_id, cmd_buf_stride), 1);
|
||||
nir_ssa_def *cmd_buf_end = nir_iadd(&b, nir_load_var(&b, cmd_buf.offset), cmd_buf_stride);
|
||||
nir_def *cmd_buf_end = nir_iadd(&b, nir_load_var(&b, cmd_buf.offset), cmd_buf_stride);
|
||||
|
||||
nir_ssa_def *stream_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_STREAM);
|
||||
nir_ssa_def *stream_base = nir_imul(&b, sequence_id, stream_stride);
|
||||
nir_def *stream_buf = radv_meta_load_descriptor(&b, 0, DGC_DESC_STREAM);
|
||||
nir_def *stream_base = nir_imul(&b, sequence_id, stream_stride);
|
||||
|
||||
nir_variable *upload_offset =
|
||||
nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "upload_offset");
|
||||
|
|
@ -750,14 +748,14 @@ build_dgc_prepare_shader(struct radv_device *dev)
|
|||
&b, upload_offset,
|
||||
nir_iadd(&b, load_param32(&b, cmd_buf_size), nir_imul(&b, load_param32(&b, upload_stride), sequence_id)), 0x1);
|
||||
|
||||
nir_ssa_def *vbo_bind_mask = load_param32(&b, vbo_bind_mask);
|
||||
nir_def *vbo_bind_mask = load_param32(&b, vbo_bind_mask);
|
||||
nir_push_if(&b, nir_ine_imm(&b, vbo_bind_mask, 0));
|
||||
{
|
||||
dgc_emit_vertex_buffer(&b, &cmd_buf, stream_buf, stream_base, vbo_bind_mask, upload_offset, dev);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
nir_ssa_def *push_const_mask = load_param64(&b, push_constant_mask);
|
||||
nir_def *push_const_mask = load_param64(&b, push_constant_mask);
|
||||
nir_push_if(&b, nir_ine_imm(&b, push_const_mask, 0));
|
||||
{
|
||||
dgc_emit_push_constant(&b, &cmd_buf, stream_buf, stream_base, push_const_mask, upload_offset);
|
||||
|
|
@ -777,7 +775,7 @@ build_dgc_prepare_shader(struct radv_device *dev)
|
|||
nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "max_index_count");
|
||||
nir_store_var(&b, max_index_count_var, load_param32(&b, max_index_count), 0x1);
|
||||
|
||||
nir_ssa_def *bind_index_buffer = nir_ieq_imm(&b, nir_load_var(&b, index_size_var), 0);
|
||||
nir_def *bind_index_buffer = nir_ieq_imm(&b, nir_load_var(&b, index_size_var), 0);
|
||||
nir_push_if(&b, bind_index_buffer);
|
||||
{
|
||||
dgc_emit_index_buffer(&b, &cmd_buf, stream_buf, stream_base, load_param16(&b, index_buffer_offset),
|
||||
|
|
@ -786,8 +784,8 @@ build_dgc_prepare_shader(struct radv_device *dev)
|
|||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
nir_ssa_def *index_size = nir_load_var(&b, index_size_var);
|
||||
nir_ssa_def *max_index_count = nir_load_var(&b, max_index_count_var);
|
||||
nir_def *index_size = nir_load_var(&b, index_size_var);
|
||||
nir_def *max_index_count = nir_load_var(&b, max_index_count_var);
|
||||
|
||||
index_size = nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, index_size_var), index_size);
|
||||
max_index_count = nir_bcsel(&b, bind_index_buffer, nir_load_var(&b, max_index_count_var), max_index_count);
|
||||
|
|
@ -803,7 +801,7 @@ build_dgc_prepare_shader(struct radv_device *dev)
|
|||
if (dev->physical_device->rad_info.gfx_ib_pad_with_type2) {
|
||||
nir_push_loop(&b);
|
||||
{
|
||||
nir_ssa_def *curr_offset = nir_load_var(&b, cmd_buf.offset);
|
||||
nir_def *curr_offset = nir_load_var(&b, cmd_buf.offset);
|
||||
|
||||
nir_push_if(&b, nir_ieq(&b, curr_offset, cmd_buf_end));
|
||||
{
|
||||
|
|
@ -811,16 +809,16 @@ build_dgc_prepare_shader(struct radv_device *dev)
|
|||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
nir_ssa_def *pkt = nir_imm_int(&b, PKT2_NOP_PAD);
|
||||
nir_def *pkt = nir_imm_int(&b, PKT2_NOP_PAD);
|
||||
|
||||
dgc_emit(&b, &cmd_buf, pkt);
|
||||
}
|
||||
nir_pop_loop(&b, NULL);
|
||||
} else {
|
||||
nir_ssa_def *cnt = nir_isub(&b, cmd_buf_end, nir_load_var(&b, cmd_buf.offset));
|
||||
nir_def *cnt = nir_isub(&b, cmd_buf_end, nir_load_var(&b, cmd_buf.offset));
|
||||
cnt = nir_ushr_imm(&b, cnt, 2);
|
||||
cnt = nir_iadd_imm(&b, cnt, -2);
|
||||
nir_ssa_def *pkt = nir_pkt3(&b, PKT3_NOP, cnt);
|
||||
nir_def *pkt = nir_pkt3(&b, PKT3_NOP, cnt);
|
||||
|
||||
dgc_emit(&b, &cmd_buf, pkt);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,8 +58,7 @@ radv_get_pipelinestat_query_size(struct radv_device *device)
|
|||
}
|
||||
|
||||
static void
|
||||
radv_store_availability(nir_builder *b, nir_ssa_def *flags, nir_ssa_def *dst_buf, nir_ssa_def *offset,
|
||||
nir_ssa_def *value32)
|
||||
radv_store_availability(nir_builder *b, nir_def *flags, nir_def *dst_buf, nir_def *offset, nir_def *value32)
|
||||
{
|
||||
nir_push_if(b, nir_test_mask(b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
|
||||
|
||||
|
|
@ -129,23 +128,23 @@ build_occlusion_query_shader(struct radv_device *device)
|
|||
uint64_t enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
|
||||
unsigned db_count = device->physical_device->rad_info.max_render_backends;
|
||||
|
||||
nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
|
||||
nir_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
|
||||
|
||||
nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
|
||||
nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
|
||||
nir_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
|
||||
nir_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
|
||||
|
||||
nir_ssa_def *global_id = get_global_ids(&b, 1);
|
||||
nir_def *global_id = get_global_ids(&b, 1);
|
||||
|
||||
nir_ssa_def *input_stride = nir_imm_int(&b, db_count * 16);
|
||||
nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
|
||||
nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8);
|
||||
nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
|
||||
nir_def *input_stride = nir_imm_int(&b, db_count * 16);
|
||||
nir_def *input_base = nir_imul(&b, input_stride, global_id);
|
||||
nir_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8);
|
||||
nir_def *output_base = nir_imul(&b, output_stride, global_id);
|
||||
|
||||
nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1);
|
||||
nir_store_var(&b, outer_counter, nir_imm_int(&b, 0), 0x1);
|
||||
nir_store_var(&b, available, nir_imm_true(&b), 0x1);
|
||||
|
||||
nir_ssa_def *query_result_wait = nir_test_mask(&b, flags, VK_QUERY_RESULT_WAIT_BIT);
|
||||
nir_def *query_result_wait = nir_test_mask(&b, flags, VK_QUERY_RESULT_WAIT_BIT);
|
||||
nir_push_if(&b, query_result_wait);
|
||||
{
|
||||
/* Wait on the upper word of the last DB entry. */
|
||||
|
|
@ -156,8 +155,8 @@ build_occlusion_query_shader(struct radv_device *device)
|
|||
/* Prevent the SSBO load to be moved out of the loop. */
|
||||
nir_scoped_memory_barrier(&b, SCOPE_INVOCATION, NIR_MEMORY_ACQUIRE, nir_var_mem_ssbo);
|
||||
|
||||
nir_ssa_def *load_offset = nir_iadd_imm(&b, input_base, rb_avail_offset);
|
||||
nir_ssa_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4, .access = ACCESS_COHERENT);
|
||||
nir_def *load_offset = nir_iadd_imm(&b, input_base, rb_avail_offset);
|
||||
nir_def *load = nir_load_ssbo(&b, 1, 32, src_buf, load_offset, .align_mul = 4, .access = ACCESS_COHERENT);
|
||||
|
||||
nir_push_if(&b, nir_ige_imm(&b, load, 0x80000000));
|
||||
{
|
||||
|
|
@ -171,24 +170,23 @@ build_occlusion_query_shader(struct radv_device *device)
|
|||
|
||||
nir_push_loop(&b);
|
||||
|
||||
nir_ssa_def *current_outer_count = nir_load_var(&b, outer_counter);
|
||||
nir_def *current_outer_count = nir_load_var(&b, outer_counter);
|
||||
radv_break_on_count(&b, outer_counter, nir_imm_int(&b, db_count));
|
||||
|
||||
nir_ssa_def *enabled_cond =
|
||||
nir_iand_imm(&b, nir_ishl(&b, nir_imm_int64(&b, 1), current_outer_count), enabled_rb_mask);
|
||||
nir_def *enabled_cond = nir_iand_imm(&b, nir_ishl(&b, nir_imm_int64(&b, 1), current_outer_count), enabled_rb_mask);
|
||||
|
||||
nir_push_if(&b, nir_i2b(&b, enabled_cond));
|
||||
|
||||
nir_ssa_def *load_offset = nir_imul_imm(&b, current_outer_count, 16);
|
||||
nir_def *load_offset = nir_imul_imm(&b, current_outer_count, 16);
|
||||
load_offset = nir_iadd(&b, input_base, load_offset);
|
||||
|
||||
nir_ssa_def *load = nir_load_ssbo(&b, 2, 64, src_buf, load_offset, .align_mul = 16);
|
||||
nir_def *load = nir_load_ssbo(&b, 2, 64, src_buf, load_offset, .align_mul = 16);
|
||||
|
||||
nir_store_var(&b, start, nir_channel(&b, load, 0), 0x1);
|
||||
nir_store_var(&b, end, nir_channel(&b, load, 1), 0x1);
|
||||
|
||||
nir_ssa_def *start_done = nir_ilt_imm(&b, nir_load_var(&b, start), 0);
|
||||
nir_ssa_def *end_done = nir_ilt_imm(&b, nir_load_var(&b, end), 0);
|
||||
nir_def *start_done = nir_ilt_imm(&b, nir_load_var(&b, start), 0);
|
||||
nir_def *end_done = nir_ilt_imm(&b, nir_load_var(&b, end), 0);
|
||||
|
||||
nir_push_if(&b, nir_iand(&b, start_done, end_done));
|
||||
|
||||
|
|
@ -206,8 +204,8 @@ build_occlusion_query_shader(struct radv_device *device)
|
|||
|
||||
/* Store the result if complete or if partial results have been requested. */
|
||||
|
||||
nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
|
||||
nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
|
||||
nir_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
|
||||
nir_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
|
||||
nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available)));
|
||||
|
||||
nir_push_if(&b, result_is_64bit);
|
||||
|
|
@ -280,27 +278,27 @@ build_pipeline_statistics_query_shader(struct radv_device *device)
|
|||
nir_variable *output_offset = nir_local_variable_create(b.impl, glsl_int_type(), "output_offset");
|
||||
nir_variable *result = nir_local_variable_create(b.impl, glsl_int64_t_type(), "result");
|
||||
|
||||
nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
|
||||
nir_ssa_def *stats_mask = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 12);
|
||||
nir_ssa_def *avail_offset = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
|
||||
nir_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
|
||||
nir_def *stats_mask = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 12);
|
||||
nir_def *avail_offset = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
|
||||
|
||||
nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
|
||||
nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
|
||||
nir_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
|
||||
nir_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
|
||||
|
||||
nir_ssa_def *global_id = get_global_ids(&b, 1);
|
||||
nir_def *global_id = get_global_ids(&b, 1);
|
||||
|
||||
nir_ssa_def *input_stride = nir_imm_int(&b, pipelinestat_block_size * 2);
|
||||
nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
|
||||
nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8);
|
||||
nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
|
||||
nir_def *input_stride = nir_imm_int(&b, pipelinestat_block_size * 2);
|
||||
nir_def *input_base = nir_imul(&b, input_stride, global_id);
|
||||
nir_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8);
|
||||
nir_def *output_base = nir_imul(&b, output_stride, global_id);
|
||||
|
||||
avail_offset = nir_iadd(&b, avail_offset, nir_imul_imm(&b, global_id, 4));
|
||||
|
||||
nir_ssa_def *available32 = nir_load_ssbo(&b, 1, 32, src_buf, avail_offset);
|
||||
nir_def *available32 = nir_load_ssbo(&b, 1, 32, src_buf, avail_offset);
|
||||
|
||||
nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
|
||||
nir_ssa_def *elem_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
|
||||
nir_ssa_def *elem_count = nir_ushr_imm(&b, stats_mask, 16);
|
||||
nir_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
|
||||
nir_def *elem_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
|
||||
nir_def *elem_count = nir_ushr_imm(&b, stats_mask, 16);
|
||||
|
||||
radv_store_availability(&b, flags, dst_buf, nir_iadd(&b, output_base, nir_imul(&b, elem_count, elem_size)),
|
||||
available32);
|
||||
|
|
@ -311,12 +309,11 @@ build_pipeline_statistics_query_shader(struct radv_device *device)
|
|||
for (int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) {
|
||||
nir_push_if(&b, nir_test_mask(&b, stats_mask, BITFIELD64_BIT(i)));
|
||||
|
||||
nir_ssa_def *start_offset = nir_iadd_imm(&b, input_base, pipeline_statistics_indices[i] * 8);
|
||||
nir_ssa_def *start = nir_load_ssbo(&b, 1, 64, src_buf, start_offset);
|
||||
nir_def *start_offset = nir_iadd_imm(&b, input_base, pipeline_statistics_indices[i] * 8);
|
||||
nir_def *start = nir_load_ssbo(&b, 1, 64, src_buf, start_offset);
|
||||
|
||||
nir_ssa_def *end_offset =
|
||||
nir_iadd_imm(&b, input_base, pipeline_statistics_indices[i] * 8 + pipelinestat_block_size);
|
||||
nir_ssa_def *end = nir_load_ssbo(&b, 1, 64, src_buf, end_offset);
|
||||
nir_def *end_offset = nir_iadd_imm(&b, input_base, pipeline_statistics_indices[i] * 8 + pipelinestat_block_size);
|
||||
nir_def *end = nir_load_ssbo(&b, 1, 64, src_buf, end_offset);
|
||||
|
||||
nir_store_var(&b, result, nir_isub(&b, end, start), 0x1);
|
||||
|
||||
|
|
@ -347,10 +344,10 @@ build_pipeline_statistics_query_shader(struct radv_device *device)
|
|||
|
||||
nir_loop *loop = nir_push_loop(&b);
|
||||
|
||||
nir_ssa_def *current_counter = nir_load_var(&b, counter);
|
||||
nir_def *current_counter = nir_load_var(&b, counter);
|
||||
radv_break_on_count(&b, counter, elem_count);
|
||||
|
||||
nir_ssa_def *output_elem = nir_iadd(&b, output_base, nir_imul(&b, elem_size, current_counter));
|
||||
nir_def *output_elem = nir_iadd(&b, output_base, nir_imul(&b, elem_size, current_counter));
|
||||
nir_push_if(&b, result_is_64bit);
|
||||
|
||||
nir_store_ssbo(&b, nir_imm_int64(&b, 0), dst_buf, output_elem);
|
||||
|
|
@ -415,44 +412,44 @@ build_tfb_query_shader(struct radv_device *device)
|
|||
nir_store_var(&b, result, nir_replicate(&b, nir_imm_int64(&b, 0), 2), 0x3);
|
||||
nir_store_var(&b, available, nir_imm_false(&b), 0x1);
|
||||
|
||||
nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
|
||||
nir_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
|
||||
|
||||
/* Load resources. */
|
||||
nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
|
||||
nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
|
||||
nir_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
|
||||
nir_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
|
||||
|
||||
/* Compute global ID. */
|
||||
nir_ssa_def *global_id = get_global_ids(&b, 1);
|
||||
nir_def *global_id = get_global_ids(&b, 1);
|
||||
|
||||
/* Compute src/dst strides. */
|
||||
nir_ssa_def *input_stride = nir_imm_int(&b, 32);
|
||||
nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
|
||||
nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8);
|
||||
nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
|
||||
nir_def *input_stride = nir_imm_int(&b, 32);
|
||||
nir_def *input_base = nir_imul(&b, input_stride, global_id);
|
||||
nir_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8);
|
||||
nir_def *output_base = nir_imul(&b, output_stride, global_id);
|
||||
|
||||
/* Load data from the query pool. */
|
||||
nir_ssa_def *load1 = nir_load_ssbo(&b, 4, 32, src_buf, input_base, .align_mul = 32);
|
||||
nir_ssa_def *load2 = nir_load_ssbo(&b, 4, 32, src_buf, nir_iadd_imm(&b, input_base, 16), .align_mul = 16);
|
||||
nir_def *load1 = nir_load_ssbo(&b, 4, 32, src_buf, input_base, .align_mul = 32);
|
||||
nir_def *load2 = nir_load_ssbo(&b, 4, 32, src_buf, nir_iadd_imm(&b, input_base, 16), .align_mul = 16);
|
||||
|
||||
/* Check if result is available. */
|
||||
nir_ssa_def *avails[2];
|
||||
nir_def *avails[2];
|
||||
avails[0] = nir_iand(&b, nir_channel(&b, load1, 1), nir_channel(&b, load1, 3));
|
||||
avails[1] = nir_iand(&b, nir_channel(&b, load2, 1), nir_channel(&b, load2, 3));
|
||||
nir_ssa_def *result_is_available = nir_test_mask(&b, nir_iand(&b, avails[0], avails[1]), 0x80000000);
|
||||
nir_def *result_is_available = nir_test_mask(&b, nir_iand(&b, avails[0], avails[1]), 0x80000000);
|
||||
|
||||
/* Only compute result if available. */
|
||||
nir_push_if(&b, result_is_available);
|
||||
|
||||
/* Pack values. */
|
||||
nir_ssa_def *packed64[4];
|
||||
nir_def *packed64[4];
|
||||
packed64[0] = nir_pack_64_2x32(&b, nir_trim_vector(&b, load1, 2));
|
||||
packed64[1] = nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load1, 2), nir_channel(&b, load1, 3)));
|
||||
packed64[2] = nir_pack_64_2x32(&b, nir_trim_vector(&b, load2, 2));
|
||||
packed64[3] = nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load2, 2), nir_channel(&b, load2, 3)));
|
||||
|
||||
/* Compute result. */
|
||||
nir_ssa_def *num_primitive_written = nir_isub(&b, packed64[3], packed64[1]);
|
||||
nir_ssa_def *primitive_storage_needed = nir_isub(&b, packed64[2], packed64[0]);
|
||||
nir_def *num_primitive_written = nir_isub(&b, packed64[3], packed64[1]);
|
||||
nir_def *primitive_storage_needed = nir_isub(&b, packed64[2], packed64[0]);
|
||||
|
||||
nir_store_var(&b, result, nir_vec2(&b, num_primitive_written, primitive_storage_needed), 0x3);
|
||||
nir_store_var(&b, available, nir_imm_true(&b), 0x1);
|
||||
|
|
@ -460,8 +457,8 @@ build_tfb_query_shader(struct radv_device *device)
|
|||
nir_pop_if(&b, NULL);
|
||||
|
||||
/* Determine if result is 64 or 32 bit. */
|
||||
nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
|
||||
nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16), nir_imm_int(&b, 8));
|
||||
nir_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
|
||||
nir_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16), nir_imm_int(&b, 8));
|
||||
|
||||
/* Store the result if complete or partial results have been requested. */
|
||||
nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available)));
|
||||
|
|
@ -527,30 +524,30 @@ build_timestamp_query_shader(struct radv_device *device)
|
|||
nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1);
|
||||
nir_store_var(&b, available, nir_imm_false(&b), 0x1);
|
||||
|
||||
nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
|
||||
nir_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
|
||||
|
||||
/* Load resources. */
|
||||
nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
|
||||
nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
|
||||
nir_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
|
||||
nir_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
|
||||
|
||||
/* Compute global ID. */
|
||||
nir_ssa_def *global_id = get_global_ids(&b, 1);
|
||||
nir_def *global_id = get_global_ids(&b, 1);
|
||||
|
||||
/* Compute src/dst strides. */
|
||||
nir_ssa_def *input_stride = nir_imm_int(&b, 8);
|
||||
nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
|
||||
nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8);
|
||||
nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
|
||||
nir_def *input_stride = nir_imm_int(&b, 8);
|
||||
nir_def *input_base = nir_imul(&b, input_stride, global_id);
|
||||
nir_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 8);
|
||||
nir_def *output_base = nir_imul(&b, output_stride, global_id);
|
||||
|
||||
/* Load data from the query pool. */
|
||||
nir_ssa_def *load = nir_load_ssbo(&b, 2, 32, src_buf, input_base, .align_mul = 8);
|
||||
nir_def *load = nir_load_ssbo(&b, 2, 32, src_buf, input_base, .align_mul = 8);
|
||||
|
||||
/* Pack the timestamp. */
|
||||
nir_ssa_def *timestamp;
|
||||
nir_def *timestamp;
|
||||
timestamp = nir_pack_64_2x32(&b, nir_trim_vector(&b, load, 2));
|
||||
|
||||
/* Check if result is available. */
|
||||
nir_ssa_def *result_is_available = nir_i2b(&b, nir_ine_imm(&b, timestamp, TIMESTAMP_NOT_READY));
|
||||
nir_def *result_is_available = nir_i2b(&b, nir_ine_imm(&b, timestamp, TIMESTAMP_NOT_READY));
|
||||
|
||||
/* Only store result if available. */
|
||||
nir_push_if(&b, result_is_available);
|
||||
|
|
@ -561,8 +558,8 @@ build_timestamp_query_shader(struct radv_device *device)
|
|||
nir_pop_if(&b, NULL);
|
||||
|
||||
/* Determine if result is 64 or 32 bit. */
|
||||
nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
|
||||
nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
|
||||
nir_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
|
||||
nir_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
|
||||
|
||||
/* Store the result if complete or partial results have been requested. */
|
||||
nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available)));
|
||||
|
|
@ -639,58 +636,57 @@ build_pg_query_shader(struct radv_device *device)
|
|||
nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1);
|
||||
nir_store_var(&b, available, nir_imm_false(&b), 0x1);
|
||||
|
||||
nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 16);
|
||||
nir_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 16);
|
||||
|
||||
/* Load resources. */
|
||||
nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
|
||||
nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
|
||||
nir_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
|
||||
nir_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
|
||||
|
||||
/* Compute global ID. */
|
||||
nir_ssa_def *global_id = get_global_ids(&b, 1);
|
||||
nir_def *global_id = get_global_ids(&b, 1);
|
||||
|
||||
/* Determine if the query pool uses GDS for NGG. */
|
||||
nir_ssa_def *uses_gds = nir_i2b(&b, nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20));
|
||||
nir_def *uses_gds = nir_i2b(&b, nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20));
|
||||
|
||||
/* Compute src/dst strides. */
|
||||
nir_ssa_def *input_stride =
|
||||
nir_def *input_stride =
|
||||
nir_bcsel(&b, uses_gds, nir_imm_int(&b, RADV_PGQ_STRIDE_GDS), nir_imm_int(&b, RADV_PGQ_STRIDE));
|
||||
nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
|
||||
nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 16);
|
||||
nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
|
||||
nir_def *input_base = nir_imul(&b, input_stride, global_id);
|
||||
nir_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 16);
|
||||
nir_def *output_base = nir_imul(&b, output_stride, global_id);
|
||||
|
||||
/* Load data from the query pool. */
|
||||
nir_ssa_def *load1 = nir_load_ssbo(&b, 2, 32, src_buf, input_base, .align_mul = 32);
|
||||
nir_ssa_def *load2 =
|
||||
nir_load_ssbo(&b, 2, 32, src_buf, nir_iadd(&b, input_base, nir_imm_int(&b, 16)), .align_mul = 16);
|
||||
nir_def *load1 = nir_load_ssbo(&b, 2, 32, src_buf, input_base, .align_mul = 32);
|
||||
nir_def *load2 = nir_load_ssbo(&b, 2, 32, src_buf, nir_iadd(&b, input_base, nir_imm_int(&b, 16)), .align_mul = 16);
|
||||
|
||||
/* Check if result is available. */
|
||||
nir_ssa_def *avails[2];
|
||||
nir_def *avails[2];
|
||||
avails[0] = nir_channel(&b, load1, 1);
|
||||
avails[1] = nir_channel(&b, load2, 1);
|
||||
nir_ssa_def *result_is_available =
|
||||
nir_def *result_is_available =
|
||||
nir_i2b(&b, nir_iand(&b, nir_iand(&b, avails[0], avails[1]), nir_imm_int(&b, 0x80000000)));
|
||||
|
||||
/* Only compute result if available. */
|
||||
nir_push_if(&b, result_is_available);
|
||||
|
||||
/* Pack values. */
|
||||
nir_ssa_def *packed64[2];
|
||||
nir_def *packed64[2];
|
||||
packed64[0] = nir_pack_64_2x32(&b, nir_trim_vector(&b, load1, 2));
|
||||
packed64[1] = nir_pack_64_2x32(&b, nir_trim_vector(&b, load2, 2));
|
||||
|
||||
/* Compute result. */
|
||||
nir_ssa_def *primitive_storage_needed = nir_isub(&b, packed64[1], packed64[0]);
|
||||
nir_def *primitive_storage_needed = nir_isub(&b, packed64[1], packed64[0]);
|
||||
|
||||
nir_store_var(&b, result, primitive_storage_needed, 0x1);
|
||||
|
||||
nir_push_if(&b, uses_gds);
|
||||
{
|
||||
nir_ssa_def *gds_start =
|
||||
nir_def *gds_start =
|
||||
nir_load_ssbo(&b, 1, 32, src_buf, nir_iadd(&b, input_base, nir_imm_int(&b, 32)), .align_mul = 4);
|
||||
nir_ssa_def *gds_end =
|
||||
nir_def *gds_end =
|
||||
nir_load_ssbo(&b, 1, 32, src_buf, nir_iadd(&b, input_base, nir_imm_int(&b, 36)), .align_mul = 4);
|
||||
|
||||
nir_ssa_def *ngg_gds_result = nir_isub(&b, gds_end, gds_start);
|
||||
nir_def *ngg_gds_result = nir_isub(&b, gds_end, gds_start);
|
||||
|
||||
nir_store_var(&b, result, nir_iadd(&b, nir_load_var(&b, result), nir_u2u64(&b, ngg_gds_result)), 0x1);
|
||||
}
|
||||
|
|
@ -701,8 +697,8 @@ build_pg_query_shader(struct radv_device *device)
|
|||
nir_pop_if(&b, NULL);
|
||||
|
||||
/* Determine if result is 64 or 32 bit. */
|
||||
nir_ssa_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
|
||||
nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
|
||||
nir_def *result_is_64bit = nir_test_mask(&b, flags, VK_QUERY_RESULT_64_BIT);
|
||||
nir_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
|
||||
|
||||
/* Store the result if complete or partial results have been requested. */
|
||||
nir_push_if(&b, nir_ior(&b, nir_test_mask(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT), nir_load_var(&b, available)));
|
||||
|
|
|
|||
|
|
@ -29,8 +29,7 @@
|
|||
#include <llvm/Config/llvm-config.h>
|
||||
#endif
|
||||
|
||||
static nir_ssa_def *build_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node,
|
||||
bool skip_type_and);
|
||||
static nir_def *build_node_to_addr(struct radv_device *device, nir_builder *b, nir_def *node, bool skip_type_and);
|
||||
|
||||
bool
|
||||
radv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipelines)
|
||||
|
|
@ -54,16 +53,14 @@ void
|
|||
nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var_indices, uint32_t chan_1,
|
||||
uint32_t chan_2)
|
||||
{
|
||||
nir_ssa_def *ssa_distances = nir_load_var(b, var_distances);
|
||||
nir_ssa_def *ssa_indices = nir_load_var(b, var_indices);
|
||||
nir_def *ssa_distances = nir_load_var(b, var_distances);
|
||||
nir_def *ssa_indices = nir_load_var(b, var_indices);
|
||||
/* if (distances[chan_2] < distances[chan_1]) { */
|
||||
nir_push_if(b, nir_flt(b, nir_channel(b, ssa_distances, chan_2), nir_channel(b, ssa_distances, chan_1)));
|
||||
{
|
||||
/* swap(distances[chan_2], distances[chan_1]); */
|
||||
nir_ssa_def *new_distances[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32),
|
||||
nir_ssa_undef(b, 1, 32)};
|
||||
nir_ssa_def *new_indices[4] = {nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32), nir_ssa_undef(b, 1, 32),
|
||||
nir_ssa_undef(b, 1, 32)};
|
||||
nir_def *new_distances[4] = {nir_undef(b, 1, 32), nir_undef(b, 1, 32), nir_undef(b, 1, 32), nir_undef(b, 1, 32)};
|
||||
nir_def *new_indices[4] = {nir_undef(b, 1, 32), nir_undef(b, 1, 32), nir_undef(b, 1, 32), nir_undef(b, 1, 32)};
|
||||
new_distances[chan_2] = nir_channel(b, ssa_distances, chan_1);
|
||||
new_distances[chan_1] = nir_channel(b, ssa_distances, chan_2);
|
||||
new_indices[chan_2] = nir_channel(b, ssa_indices, chan_1);
|
||||
|
|
@ -75,9 +72,9 @@ nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var
|
|||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node, nir_ssa_def *ray_tmax,
|
||||
nir_ssa_def *origin, nir_ssa_def *dir, nir_ssa_def *inv_dir)
|
||||
nir_def *
|
||||
intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_def *bvh_node, nir_def *ray_tmax,
|
||||
nir_def *origin, nir_def *dir, nir_def *inv_dir)
|
||||
{
|
||||
const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4);
|
||||
const struct glsl_type *uvec4_type = glsl_vector_type(GLSL_TYPE_UINT, 4);
|
||||
|
|
@ -85,7 +82,7 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_s
|
|||
bool old_exact = b->exact;
|
||||
b->exact = true;
|
||||
|
||||
nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node, false);
|
||||
nir_def *node_addr = build_node_to_addr(device, b, bvh_node, false);
|
||||
|
||||
/* vec4 distances = vec4(INF, INF, INF, INF); */
|
||||
nir_variable *distances = nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "distances");
|
||||
|
|
@ -108,10 +105,10 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_s
|
|||
};
|
||||
|
||||
/* node->children[i] -> uint */
|
||||
nir_ssa_def *child_index = nir_build_load_global(b, 1, 32, nir_iadd_imm(b, node_addr, child_offset),
|
||||
.align_mul = 64, .align_offset = child_offset % 64);
|
||||
nir_def *child_index = nir_build_load_global(b, 1, 32, nir_iadd_imm(b, node_addr, child_offset), .align_mul = 64,
|
||||
.align_offset = child_offset % 64);
|
||||
/* node->coords[i][0], node->coords[i][1] -> vec3 */
|
||||
nir_ssa_def *node_coords[2] = {
|
||||
nir_def *node_coords[2] = {
|
||||
nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0]), .align_mul = 64,
|
||||
.align_offset = coord_offsets[0] % 64),
|
||||
nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1]), .align_mul = 64,
|
||||
|
|
@ -122,24 +119,24 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_s
|
|||
* We don't need to care about any other components being NaN as that is UB.
|
||||
* https://www.khronos.org/registry/vulkan/specs/1.2-extensions/html/chap36.html#VkAabbPositionsKHR
|
||||
*/
|
||||
nir_ssa_def *min_x = nir_channel(b, node_coords[0], 0);
|
||||
nir_ssa_def *min_x_is_not_nan = nir_inot(b, nir_fneu(b, min_x, min_x)); /* NaN != NaN -> true */
|
||||
nir_def *min_x = nir_channel(b, node_coords[0], 0);
|
||||
nir_def *min_x_is_not_nan = nir_inot(b, nir_fneu(b, min_x, min_x)); /* NaN != NaN -> true */
|
||||
|
||||
/* vec3 bound0 = (node->coords[i][0] - origin) * inv_dir; */
|
||||
nir_ssa_def *bound0 = nir_fmul(b, nir_fsub(b, node_coords[0], origin), inv_dir);
|
||||
nir_def *bound0 = nir_fmul(b, nir_fsub(b, node_coords[0], origin), inv_dir);
|
||||
/* vec3 bound1 = (node->coords[i][1] - origin) * inv_dir; */
|
||||
nir_ssa_def *bound1 = nir_fmul(b, nir_fsub(b, node_coords[1], origin), inv_dir);
|
||||
nir_def *bound1 = nir_fmul(b, nir_fsub(b, node_coords[1], origin), inv_dir);
|
||||
|
||||
/* float tmin = max(max(min(bound0.x, bound1.x), min(bound0.y, bound1.y)), min(bound0.z,
|
||||
* bound1.z)); */
|
||||
nir_ssa_def *tmin = nir_fmax(b,
|
||||
nir_def *tmin = nir_fmax(b,
|
||||
nir_fmax(b, nir_fmin(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
|
||||
nir_fmin(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
|
||||
nir_fmin(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
|
||||
|
||||
/* float tmax = min(min(max(bound0.x, bound1.x), max(bound0.y, bound1.y)), max(bound0.z,
|
||||
* bound1.z)); */
|
||||
nir_ssa_def *tmax = nir_fmin(b,
|
||||
nir_def *tmax = nir_fmin(b,
|
||||
nir_fmin(b, nir_fmax(b, nir_channel(b, bound0, 0), nir_channel(b, bound1, 0)),
|
||||
nir_fmax(b, nir_channel(b, bound0, 1), nir_channel(b, bound1, 1))),
|
||||
nir_fmax(b, nir_channel(b, bound0, 2), nir_channel(b, bound1, 2)));
|
||||
|
|
@ -150,11 +147,11 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_s
|
|||
nir_flt(b, tmin, ray_tmax))));
|
||||
{
|
||||
/* child_indices[i] = node->children[i]; */
|
||||
nir_ssa_def *new_child_indices[4] = {child_index, child_index, child_index, child_index};
|
||||
nir_def *new_child_indices[4] = {child_index, child_index, child_index, child_index};
|
||||
nir_store_var(b, child_indices, nir_vec(b, new_child_indices, 4), 1u << i);
|
||||
|
||||
/* distances[i] = tmin; */
|
||||
nir_ssa_def *new_distances[4] = {tmin, tmin, tmin, tmin};
|
||||
nir_def *new_distances[4] = {tmin, tmin, tmin, tmin};
|
||||
nir_store_var(b, distances, nir_vec(b, new_distances, 4), 1u << i);
|
||||
}
|
||||
/* } */
|
||||
|
|
@ -172,16 +169,16 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_s
|
|||
return nir_load_var(b, child_indices);
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node, nir_ssa_def *ray_tmax,
|
||||
nir_ssa_def *origin, nir_ssa_def *dir, nir_ssa_def *inv_dir)
|
||||
nir_def *
|
||||
intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_def *bvh_node, nir_def *ray_tmax,
|
||||
nir_def *origin, nir_def *dir, nir_def *inv_dir)
|
||||
{
|
||||
const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4);
|
||||
|
||||
bool old_exact = b->exact;
|
||||
b->exact = true;
|
||||
|
||||
nir_ssa_def *node_addr = build_node_to_addr(device, b, bvh_node, false);
|
||||
nir_def *node_addr = build_node_to_addr(device, b, bvh_node, false);
|
||||
|
||||
const uint32_t coord_offsets[3] = {
|
||||
offsetof(struct radv_bvh_triangle_node, coords[0]),
|
||||
|
|
@ -190,7 +187,7 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s
|
|||
};
|
||||
|
||||
/* node->coords[0], node->coords[1], node->coords[2] -> vec3 */
|
||||
nir_ssa_def *node_coords[3] = {
|
||||
nir_def *node_coords[3] = {
|
||||
nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[0]), .align_mul = 64,
|
||||
.align_offset = coord_offsets[0] % 64),
|
||||
nir_build_load_global(b, 3, 32, nir_iadd_imm(b, node_addr, coord_offsets[1]), .align_mul = 64,
|
||||
|
|
@ -206,22 +203,21 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s
|
|||
* http://jcgt.org/published/0002/01/05/paper.pdf */
|
||||
|
||||
/* Calculate the dimension where the ray direction is largest */
|
||||
nir_ssa_def *abs_dir = nir_fabs(b, dir);
|
||||
nir_def *abs_dir = nir_fabs(b, dir);
|
||||
|
||||
nir_ssa_def *abs_dirs[3] = {
|
||||
nir_def *abs_dirs[3] = {
|
||||
nir_channel(b, abs_dir, 0),
|
||||
nir_channel(b, abs_dir, 1),
|
||||
nir_channel(b, abs_dir, 2),
|
||||
};
|
||||
/* Find index of greatest value of abs_dir and put that as kz. */
|
||||
nir_ssa_def *kz =
|
||||
nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[1]),
|
||||
nir_def *kz = nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[1]),
|
||||
nir_bcsel(b, nir_fge(b, abs_dirs[0], abs_dirs[2]), nir_imm_int(b, 0), nir_imm_int(b, 2)),
|
||||
nir_bcsel(b, nir_fge(b, abs_dirs[1], abs_dirs[2]), nir_imm_int(b, 1), nir_imm_int(b, 2)));
|
||||
nir_ssa_def *kx = nir_imod_imm(b, nir_iadd_imm(b, kz, 1), 3);
|
||||
nir_ssa_def *ky = nir_imod_imm(b, nir_iadd_imm(b, kx, 1), 3);
|
||||
nir_ssa_def *k_indices[3] = {kx, ky, kz};
|
||||
nir_ssa_def *k = nir_vec(b, k_indices, 3);
|
||||
nir_def *kx = nir_imod_imm(b, nir_iadd_imm(b, kz, 1), 3);
|
||||
nir_def *ky = nir_imod_imm(b, nir_iadd_imm(b, kx, 1), 3);
|
||||
nir_def *k_indices[3] = {kx, ky, kz};
|
||||
nir_def *k = nir_vec(b, k_indices, 3);
|
||||
|
||||
/* Swap kx and ky dimensions to preserve winding order */
|
||||
unsigned swap_xy_swizzle[4] = {1, 0, 2, 3};
|
||||
|
|
@ -232,35 +228,35 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s
|
|||
kz = nir_channel(b, k, 2);
|
||||
|
||||
/* Calculate shear constants */
|
||||
nir_ssa_def *sz = nir_frcp(b, nir_vector_extract(b, dir, kz));
|
||||
nir_ssa_def *sx = nir_fmul(b, nir_vector_extract(b, dir, kx), sz);
|
||||
nir_ssa_def *sy = nir_fmul(b, nir_vector_extract(b, dir, ky), sz);
|
||||
nir_def *sz = nir_frcp(b, nir_vector_extract(b, dir, kz));
|
||||
nir_def *sx = nir_fmul(b, nir_vector_extract(b, dir, kx), sz);
|
||||
nir_def *sy = nir_fmul(b, nir_vector_extract(b, dir, ky), sz);
|
||||
|
||||
/* Calculate vertices relative to ray origin */
|
||||
nir_ssa_def *v_a = nir_fsub(b, node_coords[0], origin);
|
||||
nir_ssa_def *v_b = nir_fsub(b, node_coords[1], origin);
|
||||
nir_ssa_def *v_c = nir_fsub(b, node_coords[2], origin);
|
||||
nir_def *v_a = nir_fsub(b, node_coords[0], origin);
|
||||
nir_def *v_b = nir_fsub(b, node_coords[1], origin);
|
||||
nir_def *v_c = nir_fsub(b, node_coords[2], origin);
|
||||
|
||||
/* Perform shear and scale */
|
||||
nir_ssa_def *ax = nir_fsub(b, nir_vector_extract(b, v_a, kx), nir_fmul(b, sx, nir_vector_extract(b, v_a, kz)));
|
||||
nir_ssa_def *ay = nir_fsub(b, nir_vector_extract(b, v_a, ky), nir_fmul(b, sy, nir_vector_extract(b, v_a, kz)));
|
||||
nir_ssa_def *bx = nir_fsub(b, nir_vector_extract(b, v_b, kx), nir_fmul(b, sx, nir_vector_extract(b, v_b, kz)));
|
||||
nir_ssa_def *by = nir_fsub(b, nir_vector_extract(b, v_b, ky), nir_fmul(b, sy, nir_vector_extract(b, v_b, kz)));
|
||||
nir_ssa_def *cx = nir_fsub(b, nir_vector_extract(b, v_c, kx), nir_fmul(b, sx, nir_vector_extract(b, v_c, kz)));
|
||||
nir_ssa_def *cy = nir_fsub(b, nir_vector_extract(b, v_c, ky), nir_fmul(b, sy, nir_vector_extract(b, v_c, kz)));
|
||||
nir_def *ax = nir_fsub(b, nir_vector_extract(b, v_a, kx), nir_fmul(b, sx, nir_vector_extract(b, v_a, kz)));
|
||||
nir_def *ay = nir_fsub(b, nir_vector_extract(b, v_a, ky), nir_fmul(b, sy, nir_vector_extract(b, v_a, kz)));
|
||||
nir_def *bx = nir_fsub(b, nir_vector_extract(b, v_b, kx), nir_fmul(b, sx, nir_vector_extract(b, v_b, kz)));
|
||||
nir_def *by = nir_fsub(b, nir_vector_extract(b, v_b, ky), nir_fmul(b, sy, nir_vector_extract(b, v_b, kz)));
|
||||
nir_def *cx = nir_fsub(b, nir_vector_extract(b, v_c, kx), nir_fmul(b, sx, nir_vector_extract(b, v_c, kz)));
|
||||
nir_def *cy = nir_fsub(b, nir_vector_extract(b, v_c, ky), nir_fmul(b, sy, nir_vector_extract(b, v_c, kz)));
|
||||
|
||||
nir_ssa_def *u = nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx));
|
||||
nir_ssa_def *v = nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx));
|
||||
nir_ssa_def *w = nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax));
|
||||
nir_def *u = nir_fsub(b, nir_fmul(b, cx, by), nir_fmul(b, cy, bx));
|
||||
nir_def *v = nir_fsub(b, nir_fmul(b, ax, cy), nir_fmul(b, ay, cx));
|
||||
nir_def *w = nir_fsub(b, nir_fmul(b, bx, ay), nir_fmul(b, by, ax));
|
||||
|
||||
/* Perform edge tests. */
|
||||
nir_ssa_def *cond_back =
|
||||
nir_def *cond_back =
|
||||
nir_ior(b, nir_ior(b, nir_flt_imm(b, u, 0.0f), nir_flt_imm(b, v, 0.0f)), nir_flt_imm(b, w, 0.0f));
|
||||
|
||||
nir_ssa_def *cond_front =
|
||||
nir_def *cond_front =
|
||||
nir_ior(b, nir_ior(b, nir_fgt_imm(b, u, 0.0f), nir_fgt_imm(b, v, 0.0f)), nir_fgt_imm(b, w, 0.0f));
|
||||
|
||||
nir_ssa_def *cond = nir_inot(b, nir_iand(b, cond_back, cond_front));
|
||||
nir_def *cond = nir_inot(b, nir_iand(b, cond_back, cond_front));
|
||||
|
||||
/* If the ray is exactly on the edge where v is 0, consider it a miss.
|
||||
* This seems to correspond to what the hardware is doing.
|
||||
|
|
@ -271,21 +267,21 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s
|
|||
|
||||
nir_push_if(b, cond);
|
||||
{
|
||||
nir_ssa_def *det = nir_fadd(b, u, nir_fadd(b, v, w));
|
||||
nir_def *det = nir_fadd(b, u, nir_fadd(b, v, w));
|
||||
|
||||
nir_ssa_def *az = nir_fmul(b, sz, nir_vector_extract(b, v_a, kz));
|
||||
nir_ssa_def *bz = nir_fmul(b, sz, nir_vector_extract(b, v_b, kz));
|
||||
nir_ssa_def *cz = nir_fmul(b, sz, nir_vector_extract(b, v_c, kz));
|
||||
nir_def *az = nir_fmul(b, sz, nir_vector_extract(b, v_a, kz));
|
||||
nir_def *bz = nir_fmul(b, sz, nir_vector_extract(b, v_b, kz));
|
||||
nir_def *cz = nir_fmul(b, sz, nir_vector_extract(b, v_c, kz));
|
||||
|
||||
nir_ssa_def *t = nir_fadd(b, nir_fadd(b, nir_fmul(b, u, az), nir_fmul(b, v, bz)), nir_fmul(b, w, cz));
|
||||
nir_def *t = nir_fadd(b, nir_fadd(b, nir_fmul(b, u, az), nir_fmul(b, v, bz)), nir_fmul(b, w, cz));
|
||||
|
||||
nir_ssa_def *t_signed = nir_fmul(b, nir_fsign(b, det), t);
|
||||
nir_def *t_signed = nir_fmul(b, nir_fsign(b, det), t);
|
||||
|
||||
nir_ssa_def *det_cond_front = nir_inot(b, nir_flt_imm(b, t_signed, 0.0f));
|
||||
nir_def *det_cond_front = nir_inot(b, nir_flt_imm(b, t_signed, 0.0f));
|
||||
|
||||
nir_push_if(b, det_cond_front);
|
||||
{
|
||||
nir_ssa_def *indices[4] = {t, det, v, w};
|
||||
nir_def *indices[4] = {t, det, v, w};
|
||||
nir_store_var(b, result, nir_vec(b, indices, 4), 0xf);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
|
@ -296,35 +292,35 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_s
|
|||
return nir_load_var(b, result);
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
build_addr_to_node(nir_builder *b, nir_ssa_def *addr)
|
||||
nir_def *
|
||||
build_addr_to_node(nir_builder *b, nir_def *addr)
|
||||
{
|
||||
const uint64_t bvh_size = 1ull << 42;
|
||||
nir_ssa_def *node = nir_ushr_imm(b, addr, 3);
|
||||
nir_def *node = nir_ushr_imm(b, addr, 3);
|
||||
return nir_iand_imm(b, node, (bvh_size - 1) << 3);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
build_node_to_addr(struct radv_device *device, nir_builder *b, nir_ssa_def *node, bool skip_type_and)
|
||||
static nir_def *
|
||||
build_node_to_addr(struct radv_device *device, nir_builder *b, nir_def *node, bool skip_type_and)
|
||||
{
|
||||
nir_ssa_def *addr = skip_type_and ? node : nir_iand_imm(b, node, ~7ull);
|
||||
nir_def *addr = skip_type_and ? node : nir_iand_imm(b, node, ~7ull);
|
||||
addr = nir_ishl_imm(b, addr, 3);
|
||||
/* Assumes everything is in the top half of address space, which is true in
|
||||
* GFX9+ for now. */
|
||||
return device->physical_device->rad_info.gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr;
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[], bool translation)
|
||||
nir_def *
|
||||
nir_build_vec3_mat_mult(nir_builder *b, nir_def *vec, nir_def *matrix[], bool translation)
|
||||
{
|
||||
nir_ssa_def *result_components[3] = {
|
||||
nir_def *result_components[3] = {
|
||||
nir_channel(b, matrix[0], 3),
|
||||
nir_channel(b, matrix[1], 3),
|
||||
nir_channel(b, matrix[2], 3),
|
||||
};
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
for (unsigned j = 0; j < 3; ++j) {
|
||||
nir_ssa_def *v = nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[i], 1 << j));
|
||||
nir_def *v = nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[i], 1 << j));
|
||||
result_components[i] = (translation || j) ? nir_fadd(b, result_components[i], v) : v;
|
||||
}
|
||||
}
|
||||
|
|
@ -332,7 +328,7 @@ nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[],
|
|||
}
|
||||
|
||||
void
|
||||
nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out)
|
||||
nir_build_wto_matrix_load(nir_builder *b, nir_def *instance_addr, nir_def **out)
|
||||
{
|
||||
unsigned offset = offsetof(struct radv_bvh_instance_node, wto_matrix);
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
|
|
@ -343,18 +339,18 @@ nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_de
|
|||
|
||||
/* When a hit is opaque the any_hit shader is skipped for this hit and the hit
|
||||
* is assumed to be an actual hit. */
|
||||
static nir_ssa_def *
|
||||
hit_is_opaque(nir_builder *b, nir_ssa_def *sbt_offset_and_flags, const struct radv_ray_flags *ray_flags,
|
||||
nir_ssa_def *geometry_id_and_flags)
|
||||
static nir_def *
|
||||
hit_is_opaque(nir_builder *b, nir_def *sbt_offset_and_flags, const struct radv_ray_flags *ray_flags,
|
||||
nir_def *geometry_id_and_flags)
|
||||
{
|
||||
nir_ssa_def *opaque = nir_uge_imm(b, nir_ior(b, geometry_id_and_flags, sbt_offset_and_flags),
|
||||
nir_def *opaque = nir_uge_imm(b, nir_ior(b, geometry_id_and_flags, sbt_offset_and_flags),
|
||||
RADV_INSTANCE_FORCE_OPAQUE | RADV_INSTANCE_NO_FORCE_NOT_OPAQUE);
|
||||
opaque = nir_bcsel(b, ray_flags->force_opaque, nir_imm_true(b), opaque);
|
||||
opaque = nir_bcsel(b, ray_flags->force_not_opaque, nir_imm_false(b), opaque);
|
||||
return opaque;
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
nir_def *
|
||||
create_bvh_descriptor(nir_builder *b)
|
||||
{
|
||||
/* We create a BVH descriptor that covers the entire memory range. That way we can always
|
||||
|
|
@ -367,25 +363,25 @@ create_bvh_descriptor(nir_builder *b)
|
|||
|
||||
static void
|
||||
insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args,
|
||||
const struct radv_ray_flags *ray_flags, nir_ssa_def *result, nir_ssa_def *bvh_node)
|
||||
const struct radv_ray_flags *ray_flags, nir_def *result, nir_def *bvh_node)
|
||||
{
|
||||
if (!args->triangle_cb)
|
||||
return;
|
||||
|
||||
struct radv_triangle_intersection intersection;
|
||||
intersection.t = nir_channel(b, result, 0);
|
||||
nir_ssa_def *div = nir_channel(b, result, 1);
|
||||
nir_def *div = nir_channel(b, result, 1);
|
||||
intersection.t = nir_fdiv(b, intersection.t, div);
|
||||
|
||||
nir_push_if(b, nir_flt(b, intersection.t, nir_load_deref(b, args->vars.tmax)));
|
||||
{
|
||||
intersection.frontface = nir_fgt_imm(b, div, 0);
|
||||
nir_ssa_def *switch_ccw =
|
||||
nir_def *switch_ccw =
|
||||
nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), RADV_INSTANCE_TRIANGLE_FLIP_FACING);
|
||||
intersection.frontface = nir_ixor(b, intersection.frontface, switch_ccw);
|
||||
|
||||
nir_ssa_def *not_cull = ray_flags->no_skip_triangles;
|
||||
nir_ssa_def *not_facing_cull =
|
||||
nir_def *not_cull = ray_flags->no_skip_triangles;
|
||||
nir_def *not_facing_cull =
|
||||
nir_bcsel(b, intersection.frontface, ray_flags->no_cull_front, ray_flags->no_cull_back);
|
||||
|
||||
not_cull = nir_iand(b, not_cull,
|
||||
|
|
@ -398,7 +394,7 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const
|
|||
nir_flt(b, args->tmin, intersection.t), not_cull));
|
||||
{
|
||||
intersection.base.node_addr = build_node_to_addr(device, b, bvh_node, false);
|
||||
nir_ssa_def *triangle_info = nir_build_load_global(
|
||||
nir_def *triangle_info = nir_build_load_global(
|
||||
b, 2, 32,
|
||||
nir_iadd_imm(b, intersection.base.node_addr, offsetof(struct radv_bvh_triangle_node, triangle_id)));
|
||||
intersection.base.primitive_id = nir_channel(b, triangle_info, 0);
|
||||
|
|
@ -409,7 +405,7 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const
|
|||
not_cull = nir_bcsel(b, intersection.base.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque);
|
||||
nir_push_if(b, not_cull);
|
||||
{
|
||||
nir_ssa_def *divs[2] = {div, div};
|
||||
nir_def *divs[2] = {div, div};
|
||||
intersection.barycentrics = nir_fdiv(b, nir_channels(b, result, 0xc), nir_vec(b, divs, 2));
|
||||
|
||||
args->triangle_cb(b, &intersection, args, ray_flags);
|
||||
|
|
@ -423,21 +419,21 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const
|
|||
|
||||
static void
|
||||
insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args,
|
||||
const struct radv_ray_flags *ray_flags, nir_ssa_def *bvh_node)
|
||||
const struct radv_ray_flags *ray_flags, nir_def *bvh_node)
|
||||
{
|
||||
if (!args->aabb_cb)
|
||||
return;
|
||||
|
||||
struct radv_leaf_intersection intersection;
|
||||
intersection.node_addr = build_node_to_addr(device, b, bvh_node, false);
|
||||
nir_ssa_def *triangle_info = nir_build_load_global(
|
||||
nir_def *triangle_info = nir_build_load_global(
|
||||
b, 2, 32, nir_iadd_imm(b, intersection.node_addr, offsetof(struct radv_bvh_aabb_node, primitive_id)));
|
||||
intersection.primitive_id = nir_channel(b, triangle_info, 0);
|
||||
intersection.geometry_id_and_flags = nir_channel(b, triangle_info, 1);
|
||||
intersection.opaque = hit_is_opaque(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), ray_flags,
|
||||
intersection.geometry_id_and_flags);
|
||||
|
||||
nir_ssa_def *not_cull = nir_bcsel(b, intersection.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque);
|
||||
nir_def *not_cull = nir_bcsel(b, intersection.opaque, ray_flags->no_cull_opaque, ray_flags->no_cull_no_opaque);
|
||||
not_cull = nir_iand(b, not_cull, ray_flags->no_skip_aabbs);
|
||||
nir_push_if(b, not_cull);
|
||||
{
|
||||
|
|
@ -446,22 +442,22 @@ insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, const str
|
|||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
fetch_parent_node(nir_builder *b, nir_ssa_def *bvh, nir_ssa_def *node)
|
||||
static nir_def *
|
||||
fetch_parent_node(nir_builder *b, nir_def *bvh, nir_def *node)
|
||||
{
|
||||
nir_ssa_def *offset = nir_iadd_imm(b, nir_imul_imm(b, nir_udiv_imm(b, node, 8), 4), 4);
|
||||
nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, nir_udiv_imm(b, node, 8), 4), 4);
|
||||
|
||||
return nir_build_load_global(b, 1, 32, nir_isub(b, bvh, nir_u2u64(b, offset)), .align_mul = 4);
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
nir_def *
|
||||
radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args)
|
||||
{
|
||||
nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
|
||||
nir_store_var(b, incomplete, nir_imm_true(b), 0x1);
|
||||
|
||||
nir_ssa_def *desc = create_bvh_descriptor(b);
|
||||
nir_ssa_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0);
|
||||
nir_def *desc = create_bvh_descriptor(b);
|
||||
nir_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0);
|
||||
|
||||
struct radv_ray_flags ray_flags = {
|
||||
.force_opaque = nir_test_mask(b, args->flags, SpvRayFlagsOpaqueKHRMask),
|
||||
|
|
@ -487,9 +483,9 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_ssa_def *stack_instance_exit =
|
||||
nir_def *stack_instance_exit =
|
||||
nir_ige(b, nir_load_deref(b, args->vars.top_stack), nir_load_deref(b, args->vars.stack));
|
||||
nir_ssa_def *root_instance_exit =
|
||||
nir_def *root_instance_exit =
|
||||
nir_ieq(b, nir_load_deref(b, args->vars.previous_node), nir_load_deref(b, args->vars.instance_bottom_node));
|
||||
nir_if *instance_exit = nir_push_if(b, nir_ior(b, stack_instance_exit, root_instance_exit));
|
||||
instance_exit->control = nir_selection_control_dont_flatten;
|
||||
|
|
@ -508,10 +504,10 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
nir_push_if(
|
||||
b, nir_ige(b, nir_load_deref(b, args->vars.stack_low_watermark), nir_load_deref(b, args->vars.stack)));
|
||||
{
|
||||
nir_ssa_def *prev = nir_load_deref(b, args->vars.previous_node);
|
||||
nir_ssa_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
|
||||
nir_def *prev = nir_load_deref(b, args->vars.previous_node);
|
||||
nir_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
|
||||
|
||||
nir_ssa_def *parent = fetch_parent_node(b, bvh_addr, prev);
|
||||
nir_def *parent = fetch_parent_node(b, bvh_addr, prev);
|
||||
nir_push_if(b, nir_ieq_imm(b, parent, RADV_BVH_INVALID_NODE));
|
||||
{
|
||||
nir_store_var(b, incomplete, nir_imm_false(b), 0x1);
|
||||
|
|
@ -525,9 +521,9 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
nir_store_deref(b, args->vars.stack,
|
||||
nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_stride), 1);
|
||||
|
||||
nir_ssa_def *stack_ptr =
|
||||
nir_def *stack_ptr =
|
||||
nir_umod_imm(b, nir_load_deref(b, args->vars.stack), args->stack_stride * args->stack_entries);
|
||||
nir_ssa_def *bvh_node = args->stack_load_cb(b, stack_ptr, args);
|
||||
nir_def *bvh_node = args->stack_load_cb(b, stack_ptr, args);
|
||||
nir_store_deref(b, args->vars.current_node, bvh_node, 0x1);
|
||||
nir_store_deref(b, args->vars.previous_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
|
||||
}
|
||||
|
|
@ -539,15 +535,15 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_ssa_def *bvh_node = nir_load_deref(b, args->vars.current_node);
|
||||
nir_def *bvh_node = nir_load_deref(b, args->vars.current_node);
|
||||
|
||||
nir_ssa_def *prev_node = nir_load_deref(b, args->vars.previous_node);
|
||||
nir_def *prev_node = nir_load_deref(b, args->vars.previous_node);
|
||||
nir_store_deref(b, args->vars.previous_node, bvh_node, 0x1);
|
||||
nir_store_deref(b, args->vars.current_node, nir_imm_int(b, RADV_BVH_INVALID_NODE), 0x1);
|
||||
|
||||
nir_ssa_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));
|
||||
nir_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));
|
||||
|
||||
nir_ssa_def *intrinsic_result = NULL;
|
||||
nir_def *intrinsic_result = NULL;
|
||||
if (!radv_emulate_rt(device->physical_device)) {
|
||||
intrinsic_result =
|
||||
nir_bvh64_intersect_ray_amd(b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node),
|
||||
|
|
@ -555,7 +551,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir));
|
||||
}
|
||||
|
||||
nir_ssa_def *node_type = nir_iand_imm(b, bvh_node, 7);
|
||||
nir_def *node_type = nir_iand_imm(b, bvh_node, 7);
|
||||
nir_push_if(b, nir_uge_imm(b, node_type, radv_bvh_node_box16));
|
||||
{
|
||||
nir_push_if(b, nir_uge_imm(b, node_type, radv_bvh_node_instance));
|
||||
|
|
@ -567,18 +563,18 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
nir_push_else(b, NULL);
|
||||
{
|
||||
/* instance */
|
||||
nir_ssa_def *instance_node_addr = build_node_to_addr(device, b, global_bvh_node, false);
|
||||
nir_def *instance_node_addr = build_node_to_addr(device, b, global_bvh_node, false);
|
||||
nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1);
|
||||
|
||||
nir_ssa_def *instance_data =
|
||||
nir_def *instance_data =
|
||||
nir_build_load_global(b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0);
|
||||
|
||||
nir_ssa_def *wto_matrix[3];
|
||||
nir_def *wto_matrix[3];
|
||||
nir_build_wto_matrix_load(b, instance_node_addr, wto_matrix);
|
||||
|
||||
nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), 1);
|
||||
|
||||
nir_ssa_def *instance_and_mask = nir_channel(b, instance_data, 2);
|
||||
nir_def *instance_and_mask = nir_channel(b, instance_data, 2);
|
||||
nir_push_if(b, nir_ult(b, nir_iand(b, instance_and_mask, args->cull_mask), nir_imm_int(b, 1 << 24)));
|
||||
{
|
||||
nir_jump(b, nir_jump_continue);
|
||||
|
|
@ -602,7 +598,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_ssa_def *result = intrinsic_result;
|
||||
nir_def *result = intrinsic_result;
|
||||
if (!result) {
|
||||
/* If we didn't run the intrinsic cause the hardware didn't support it,
|
||||
* emulate ray/box intersection here */
|
||||
|
|
@ -614,7 +610,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
/* box */
|
||||
nir_push_if(b, nir_ieq_imm(b, prev_node, RADV_BVH_INVALID_NODE));
|
||||
{
|
||||
nir_ssa_def *new_nodes[4];
|
||||
nir_def *new_nodes[4];
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
new_nodes[i] = nir_channel(b, result, i);
|
||||
|
||||
|
|
@ -622,13 +618,13 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
nir_push_if(b, nir_ine_imm(b, new_nodes[i], RADV_BVH_INVALID_NODE));
|
||||
|
||||
for (unsigned i = 4; i-- > 1;) {
|
||||
nir_ssa_def *stack = nir_load_deref(b, args->vars.stack);
|
||||
nir_ssa_def *stack_ptr = nir_umod_imm(b, stack, args->stack_entries * args->stack_stride);
|
||||
nir_def *stack = nir_load_deref(b, args->vars.stack);
|
||||
nir_def *stack_ptr = nir_umod_imm(b, stack, args->stack_entries * args->stack_stride);
|
||||
args->stack_store_cb(b, stack_ptr, new_nodes[i], args);
|
||||
nir_store_deref(b, args->vars.stack, nir_iadd_imm(b, stack, args->stack_stride), 1);
|
||||
|
||||
if (i == 1) {
|
||||
nir_ssa_def *new_watermark =
|
||||
nir_def *new_watermark =
|
||||
nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), -args->stack_entries * args->stack_stride);
|
||||
new_watermark = nir_imax(b, nir_load_deref(b, args->vars.stack_low_watermark), new_watermark);
|
||||
nir_store_deref(b, args->vars.stack_low_watermark, new_watermark, 0x1);
|
||||
|
|
@ -640,7 +636,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_ssa_def *next = nir_imm_int(b, RADV_BVH_INVALID_NODE);
|
||||
nir_def *next = nir_imm_int(b, RADV_BVH_INVALID_NODE);
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
next = nir_bcsel(b, nir_ieq(b, prev_node, nir_channel(b, result, i)), nir_channel(b, result, i + 1),
|
||||
next);
|
||||
|
|
@ -653,7 +649,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_ssa_def *result = intrinsic_result;
|
||||
nir_def *result = intrinsic_result;
|
||||
if (!result) {
|
||||
/* If we didn't run the intrinsic cause the hardware didn't support it,
|
||||
* emulate ray/tri intersection here */
|
||||
|
|
|
|||
|
|
@ -35,41 +35,39 @@
|
|||
void nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var_indices, uint32_t chan_1,
|
||||
uint32_t chan_2);
|
||||
|
||||
nir_ssa_def *intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node,
|
||||
nir_ssa_def *ray_tmax, nir_ssa_def *origin, nir_ssa_def *dir,
|
||||
nir_ssa_def *inv_dir);
|
||||
nir_def *intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_def *bvh_node,
|
||||
nir_def *ray_tmax, nir_def *origin, nir_def *dir, nir_def *inv_dir);
|
||||
|
||||
nir_ssa_def *intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_ssa_def *bvh_node,
|
||||
nir_ssa_def *ray_tmax, nir_ssa_def *origin, nir_ssa_def *dir,
|
||||
nir_ssa_def *inv_dir);
|
||||
nir_def *intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_def *bvh_node,
|
||||
nir_def *ray_tmax, nir_def *origin, nir_def *dir, nir_def *inv_dir);
|
||||
|
||||
nir_ssa_def *build_addr_to_node(nir_builder *b, nir_ssa_def *addr);
|
||||
nir_def *build_addr_to_node(nir_builder *b, nir_def *addr);
|
||||
|
||||
nir_ssa_def *nir_build_vec3_mat_mult(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *matrix[], bool translation);
|
||||
nir_def *nir_build_vec3_mat_mult(nir_builder *b, nir_def *vec, nir_def *matrix[], bool translation);
|
||||
|
||||
void nir_build_wto_matrix_load(nir_builder *b, nir_ssa_def *instance_addr, nir_ssa_def **out);
|
||||
void nir_build_wto_matrix_load(nir_builder *b, nir_def *instance_addr, nir_def **out);
|
||||
|
||||
nir_ssa_def *create_bvh_descriptor(nir_builder *b);
|
||||
nir_def *create_bvh_descriptor(nir_builder *b);
|
||||
|
||||
struct radv_ray_traversal_args;
|
||||
|
||||
struct radv_ray_flags {
|
||||
nir_ssa_def *force_opaque;
|
||||
nir_ssa_def *force_not_opaque;
|
||||
nir_ssa_def *terminate_on_first_hit;
|
||||
nir_ssa_def *no_cull_front;
|
||||
nir_ssa_def *no_cull_back;
|
||||
nir_ssa_def *no_cull_opaque;
|
||||
nir_ssa_def *no_cull_no_opaque;
|
||||
nir_ssa_def *no_skip_triangles;
|
||||
nir_ssa_def *no_skip_aabbs;
|
||||
nir_def *force_opaque;
|
||||
nir_def *force_not_opaque;
|
||||
nir_def *terminate_on_first_hit;
|
||||
nir_def *no_cull_front;
|
||||
nir_def *no_cull_back;
|
||||
nir_def *no_cull_opaque;
|
||||
nir_def *no_cull_no_opaque;
|
||||
nir_def *no_skip_triangles;
|
||||
nir_def *no_skip_aabbs;
|
||||
};
|
||||
|
||||
struct radv_leaf_intersection {
|
||||
nir_ssa_def *node_addr;
|
||||
nir_ssa_def *primitive_id;
|
||||
nir_ssa_def *geometry_id_and_flags;
|
||||
nir_ssa_def *opaque;
|
||||
nir_def *node_addr;
|
||||
nir_def *primitive_id;
|
||||
nir_def *geometry_id_and_flags;
|
||||
nir_def *opaque;
|
||||
};
|
||||
|
||||
typedef void (*radv_aabb_intersection_cb)(nir_builder *b, struct radv_leaf_intersection *intersection,
|
||||
|
|
@ -78,20 +76,19 @@ typedef void (*radv_aabb_intersection_cb)(nir_builder *b, struct radv_leaf_inter
|
|||
struct radv_triangle_intersection {
|
||||
struct radv_leaf_intersection base;
|
||||
|
||||
nir_ssa_def *t;
|
||||
nir_ssa_def *frontface;
|
||||
nir_ssa_def *barycentrics;
|
||||
nir_def *t;
|
||||
nir_def *frontface;
|
||||
nir_def *barycentrics;
|
||||
};
|
||||
|
||||
typedef void (*radv_triangle_intersection_cb)(nir_builder *b, struct radv_triangle_intersection *intersection,
|
||||
const struct radv_ray_traversal_args *args,
|
||||
const struct radv_ray_flags *ray_flags);
|
||||
|
||||
typedef void (*radv_rt_stack_store_cb)(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value,
|
||||
typedef void (*radv_rt_stack_store_cb)(nir_builder *b, nir_def *index, nir_def *value,
|
||||
const struct radv_ray_traversal_args *args);
|
||||
|
||||
typedef nir_ssa_def *(*radv_rt_stack_load_cb)(nir_builder *b, nir_ssa_def *index,
|
||||
const struct radv_ray_traversal_args *args);
|
||||
typedef nir_def *(*radv_rt_stack_load_cb)(nir_builder *b, nir_def *index, const struct radv_ray_traversal_args *args);
|
||||
|
||||
struct radv_ray_traversal_vars {
|
||||
/* For each accepted hit, tmax will be set to the t value. This allows for automatic intersection
|
||||
|
|
@ -132,12 +129,12 @@ struct radv_ray_traversal_vars {
|
|||
};
|
||||
|
||||
struct radv_ray_traversal_args {
|
||||
nir_ssa_def *root_bvh_base;
|
||||
nir_ssa_def *flags;
|
||||
nir_ssa_def *cull_mask;
|
||||
nir_ssa_def *origin;
|
||||
nir_ssa_def *tmin;
|
||||
nir_ssa_def *dir;
|
||||
nir_def *root_bvh_base;
|
||||
nir_def *flags;
|
||||
nir_def *cull_mask;
|
||||
nir_def *origin;
|
||||
nir_def *tmin;
|
||||
nir_def *dir;
|
||||
|
||||
struct radv_ray_traversal_vars vars;
|
||||
|
||||
|
|
@ -164,7 +161,7 @@ struct radv_ray_traversal_args {
|
|||
* rayQueryProceedEXT. Traversal will only be considered incomplete, if one of the specified
|
||||
* callbacks breaks out of the traversal loop.
|
||||
*/
|
||||
nir_ssa_def *radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
|
||||
nir_def *radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
|
||||
const struct radv_ray_traversal_args *args);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ lower_rt_derefs(nir_shader *shader)
|
|||
|
||||
nir_builder b = nir_builder_at(nir_before_cf_list(&impl->body));
|
||||
|
||||
nir_ssa_def *arg_offset = nir_load_rt_arg_scratch_offset_amd(&b);
|
||||
nir_def *arg_offset = nir_load_rt_arg_scratch_offset_amd(&b);
|
||||
|
||||
nir_foreach_block (block, impl) {
|
||||
nir_foreach_instr_safe (instr, block) {
|
||||
|
|
@ -62,7 +62,7 @@ lower_rt_derefs(nir_shader *shader)
|
|||
b.cursor = nir_before_instr(&deref->instr);
|
||||
nir_deref_instr *replacement =
|
||||
nir_build_deref_cast(&b, arg_offset, nir_var_function_temp, deref->var->type, 0);
|
||||
nir_ssa_def_rewrite_uses(&deref->dest.ssa, &replacement->dest.ssa);
|
||||
nir_def_rewrite_uses(&deref->dest.ssa, &replacement->dest.ssa);
|
||||
nir_instr_remove(&deref->instr);
|
||||
}
|
||||
}
|
||||
|
|
@ -239,25 +239,25 @@ enum sbt_entry {
|
|||
SBT_ANY_HIT_IDX = offsetof(struct radv_pipeline_group_handle, any_hit_index),
|
||||
};
|
||||
|
||||
static nir_ssa_def *
|
||||
get_sbt_ptr(nir_builder *b, nir_ssa_def *idx, enum sbt_type binding)
|
||||
static nir_def *
|
||||
get_sbt_ptr(nir_builder *b, nir_def *idx, enum sbt_type binding)
|
||||
{
|
||||
nir_ssa_def *desc_base_addr = nir_load_sbt_base_amd(b);
|
||||
nir_def *desc_base_addr = nir_load_sbt_base_amd(b);
|
||||
|
||||
nir_ssa_def *desc = nir_pack_64_2x32(b, nir_load_smem_amd(b, 2, desc_base_addr, nir_imm_int(b, binding)));
|
||||
nir_def *desc = nir_pack_64_2x32(b, nir_load_smem_amd(b, 2, desc_base_addr, nir_imm_int(b, binding)));
|
||||
|
||||
nir_ssa_def *stride_offset = nir_imm_int(b, binding + (binding == SBT_RAYGEN ? 8 : 16));
|
||||
nir_ssa_def *stride = nir_pack_64_2x32(b, nir_load_smem_amd(b, 2, desc_base_addr, stride_offset));
|
||||
nir_def *stride_offset = nir_imm_int(b, binding + (binding == SBT_RAYGEN ? 8 : 16));
|
||||
nir_def *stride = nir_pack_64_2x32(b, nir_load_smem_amd(b, 2, desc_base_addr, stride_offset));
|
||||
|
||||
return nir_iadd(b, desc, nir_imul(b, nir_u2u64(b, idx), stride));
|
||||
}
|
||||
|
||||
static void
|
||||
load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_ssa_def *idx, enum sbt_type binding,
|
||||
load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_def *idx, enum sbt_type binding,
|
||||
enum sbt_entry offset)
|
||||
{
|
||||
nir_ssa_def *addr = get_sbt_ptr(b, idx, binding);
|
||||
nir_ssa_def *load_addr = nir_iadd_imm(b, addr, offset);
|
||||
nir_def *addr = get_sbt_ptr(b, idx, binding);
|
||||
nir_def *load_addr = nir_iadd_imm(b, addr, offset);
|
||||
|
||||
if (offset == SBT_RECURSIVE_PTR) {
|
||||
nir_store_var(b, vars->shader_va, nir_build_load_global(b, 1, 64, load_addr), 1);
|
||||
|
|
@ -265,7 +265,7 @@ load_sbt_entry(nir_builder *b, const struct rt_variables *vars, nir_ssa_def *idx
|
|||
nir_store_var(b, vars->idx, nir_build_load_global(b, 1, 32, load_addr), 1);
|
||||
}
|
||||
|
||||
nir_ssa_def *record_addr = nir_iadd_imm(b, addr, RADV_RT_HANDLE_SIZE);
|
||||
nir_def *record_addr = nir_iadd_imm(b, addr, RADV_RT_HANDLE_SIZE);
|
||||
nir_store_var(b, vars->shader_record_ptr, record_addr, 1);
|
||||
}
|
||||
|
||||
|
|
@ -282,12 +282,12 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
|
|||
case nir_instr_type_intrinsic: {
|
||||
b_shader.cursor = nir_before_instr(instr);
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
nir_ssa_def *ret = NULL;
|
||||
nir_def *ret = NULL;
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_rt_execute_callable: {
|
||||
uint32_t size = align(nir_intrinsic_stack_size(intr), 16);
|
||||
nir_ssa_def *ret_ptr = nir_load_resume_shader_address_amd(&b_shader, nir_intrinsic_call_idx(intr));
|
||||
nir_def *ret_ptr = nir_load_resume_shader_address_amd(&b_shader, nir_intrinsic_call_idx(intr));
|
||||
ret_ptr = nir_ior_imm(&b_shader, ret_ptr, radv_get_rt_priority(shader->info.stage));
|
||||
|
||||
nir_store_var(&b_shader, vars->stack_ptr,
|
||||
|
|
@ -305,7 +305,7 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
|
|||
}
|
||||
case nir_intrinsic_rt_trace_ray: {
|
||||
uint32_t size = align(nir_intrinsic_stack_size(intr), 16);
|
||||
nir_ssa_def *ret_ptr = nir_load_resume_shader_address_amd(&b_shader, nir_intrinsic_call_idx(intr));
|
||||
nir_def *ret_ptr = nir_load_resume_shader_address_amd(&b_shader, nir_intrinsic_call_idx(intr));
|
||||
ret_ptr = nir_ior_imm(&b_shader, ret_ptr, radv_get_rt_priority(shader->info.stage));
|
||||
|
||||
nir_store_var(&b_shader, vars->stack_ptr,
|
||||
|
|
@ -385,8 +385,8 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
|
|||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_instance_custom_index: {
|
||||
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
|
||||
nir_ssa_def *custom_instance_and_mask = nir_build_load_global(
|
||||
nir_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
|
||||
nir_def *custom_instance_and_mask = nir_build_load_global(
|
||||
&b_shader, 1, 32,
|
||||
nir_iadd_imm(&b_shader, instance_node_addr,
|
||||
offsetof(struct radv_bvh_instance_node, custom_instance_and_mask)));
|
||||
|
|
@ -403,7 +403,7 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
|
|||
break;
|
||||
}
|
||||
case nir_intrinsic_load_instance_id: {
|
||||
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
|
||||
nir_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
|
||||
ret = nir_build_load_global(
|
||||
&b_shader, 1, 32,
|
||||
nir_iadd_imm(&b_shader, instance_node_addr, offsetof(struct radv_bvh_instance_node, instance_id)));
|
||||
|
|
@ -419,11 +419,11 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
|
|||
}
|
||||
case nir_intrinsic_load_ray_world_to_object: {
|
||||
unsigned c = nir_intrinsic_column(intr);
|
||||
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
|
||||
nir_ssa_def *wto_matrix[3];
|
||||
nir_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
|
||||
nir_def *wto_matrix[3];
|
||||
nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix);
|
||||
|
||||
nir_ssa_def *vals[3];
|
||||
nir_def *vals[3];
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
vals[i] = nir_channel(&b_shader, wto_matrix[i], c);
|
||||
|
||||
|
|
@ -432,8 +432,8 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
|
|||
}
|
||||
case nir_intrinsic_load_ray_object_to_world: {
|
||||
unsigned c = nir_intrinsic_column(intr);
|
||||
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
|
||||
nir_ssa_def *rows[3];
|
||||
nir_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
|
||||
nir_def *rows[3];
|
||||
for (unsigned r = 0; r < 3; ++r)
|
||||
rows[r] =
|
||||
nir_build_load_global(&b_shader, 4, 32,
|
||||
|
|
@ -444,15 +444,15 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
|
|||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_object_origin: {
|
||||
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
|
||||
nir_ssa_def *wto_matrix[3];
|
||||
nir_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
|
||||
nir_def *wto_matrix[3];
|
||||
nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix);
|
||||
ret = nir_build_vec3_mat_mult(&b_shader, nir_load_var(&b_shader, vars->origin), wto_matrix, true);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_object_direction: {
|
||||
nir_ssa_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
|
||||
nir_ssa_def *wto_matrix[3];
|
||||
nir_def *instance_node_addr = nir_load_var(&b_shader, vars->instance_addr);
|
||||
nir_def *wto_matrix[3];
|
||||
nir_build_wto_matrix_load(&b_shader, instance_node_addr, wto_matrix);
|
||||
ret = nir_build_vec3_mat_mult(&b_shader, nir_load_var(&b_shader, vars->direction), wto_matrix, false);
|
||||
break;
|
||||
|
|
@ -521,7 +521,7 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
|
|||
nir_store_var(&b_shader, vars->hit_kind, intr->src[5].ssa, 0x1);
|
||||
load_sbt_entry(&b_shader, vars, intr->src[0].ssa, SBT_HIT, SBT_RECURSIVE_PTR);
|
||||
|
||||
nir_ssa_def *should_return = nir_test_mask(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags),
|
||||
nir_def *should_return = nir_test_mask(&b_shader, nir_load_var(&b_shader, vars->cull_mask_and_flags),
|
||||
SpvRayFlagsSkipClosestHitShaderKHRMask);
|
||||
|
||||
if (!(vars->flags & VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR)) {
|
||||
|
|
@ -538,12 +538,12 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
|
|||
}
|
||||
case nir_intrinsic_execute_miss_amd: {
|
||||
nir_store_var(&b_shader, vars->tmax, intr->src[0].ssa, 0x1);
|
||||
nir_ssa_def *undef = nir_ssa_undef(&b_shader, 1, 32);
|
||||
nir_def *undef = nir_undef(&b_shader, 1, 32);
|
||||
nir_store_var(&b_shader, vars->primitive_id, undef, 0x1);
|
||||
nir_store_var(&b_shader, vars->instance_addr, nir_ssa_undef(&b_shader, 1, 64), 0x1);
|
||||
nir_store_var(&b_shader, vars->instance_addr, nir_undef(&b_shader, 1, 64), 0x1);
|
||||
nir_store_var(&b_shader, vars->geometry_id_and_flags, undef, 0x1);
|
||||
nir_store_var(&b_shader, vars->hit_kind, undef, 0x1);
|
||||
nir_ssa_def *miss_index = nir_load_var(&b_shader, vars->miss_index);
|
||||
nir_def *miss_index = nir_load_var(&b_shader, vars->miss_index);
|
||||
load_sbt_entry(&b_shader, vars, miss_index, SBT_MISS, SBT_RECURSIVE_PTR);
|
||||
|
||||
if (!(vars->flags & VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR)) {
|
||||
|
|
@ -560,7 +560,7 @@ lower_rt_instructions(nir_shader *shader, struct rt_variables *vars, unsigned ca
|
|||
}
|
||||
|
||||
if (ret)
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, ret);
|
||||
nir_def_rewrite_uses(&intr->dest.ssa, ret);
|
||||
nir_instr_remove(instr);
|
||||
break;
|
||||
}
|
||||
|
|
@ -603,7 +603,7 @@ lower_hit_attrib_deref(nir_builder *b, nir_instr *instr, void *data)
|
|||
uint32_t num_components = intrin->dest.ssa.num_components;
|
||||
uint32_t bit_size = intrin->dest.ssa.bit_size;
|
||||
|
||||
nir_ssa_def *components[NIR_MAX_VEC_COMPONENTS];
|
||||
nir_def *components[NIR_MAX_VEC_COMPONENTS];
|
||||
|
||||
for (uint32_t comp = 0; comp < num_components; comp++) {
|
||||
uint32_t offset = deref->var->data.driver_location + comp * bit_size / 8;
|
||||
|
|
@ -626,9 +626,9 @@ lower_hit_attrib_deref(nir_builder *b, nir_instr *instr, void *data)
|
|||
}
|
||||
}
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_vec(b, components, num_components));
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, nir_vec(b, components, num_components));
|
||||
} else {
|
||||
nir_ssa_def *value = intrin->src[1].ssa;
|
||||
nir_def *value = intrin->src[1].ssa;
|
||||
uint32_t num_components = value->num_components;
|
||||
uint32_t bit_size = value->bit_size;
|
||||
|
||||
|
|
@ -637,7 +637,7 @@ lower_hit_attrib_deref(nir_builder *b, nir_instr *instr, void *data)
|
|||
uint32_t base = offset / 4;
|
||||
uint32_t comp_offset = offset % 4;
|
||||
|
||||
nir_ssa_def *component = nir_channel(b, value, comp);
|
||||
nir_def *component = nir_channel(b, value, comp);
|
||||
|
||||
if (bit_size == 64) {
|
||||
nir_store_hit_attrib_amd(b, nir_unpack_64_2x32_split_x(b, component), .base = base);
|
||||
|
|
@ -645,14 +645,14 @@ lower_hit_attrib_deref(nir_builder *b, nir_instr *instr, void *data)
|
|||
} else if (bit_size == 32) {
|
||||
nir_store_hit_attrib_amd(b, component, .base = base);
|
||||
} else if (bit_size == 16) {
|
||||
nir_ssa_def *prev = nir_unpack_32_2x16(b, nir_load_hit_attrib_amd(b, .base = base));
|
||||
nir_ssa_def *components[2];
|
||||
nir_def *prev = nir_unpack_32_2x16(b, nir_load_hit_attrib_amd(b, .base = base));
|
||||
nir_def *components[2];
|
||||
for (uint32_t word = 0; word < 2; word++)
|
||||
components[word] = (word == comp_offset / 2) ? nir_channel(b, value, comp) : nir_channel(b, prev, word);
|
||||
nir_store_hit_attrib_amd(b, nir_pack_32_2x16(b, nir_vec(b, components, 2)), .base = base);
|
||||
} else if (bit_size == 8) {
|
||||
nir_ssa_def *prev = nir_unpack_bits(b, nir_load_hit_attrib_amd(b, .base = base), 8);
|
||||
nir_ssa_def *components[4];
|
||||
nir_def *prev = nir_unpack_bits(b, nir_load_hit_attrib_amd(b, .base = base), 8);
|
||||
nir_def *components[4];
|
||||
for (uint32_t byte = 0; byte < 4; byte++)
|
||||
components[byte] = (byte == comp_offset) ? nir_channel(b, value, comp) : nir_channel(b, prev, byte);
|
||||
nir_store_hit_attrib_amd(b, nir_pack_32_4x8(b, nir_vec(b, components, 4)), .base = base);
|
||||
|
|
@ -703,19 +703,19 @@ lower_hit_attribs(nir_shader *shader, nir_variable **hit_attribs, uint32_t workg
|
|||
|
||||
b.cursor = nir_after_instr(instr);
|
||||
|
||||
nir_ssa_def *offset;
|
||||
nir_def *offset;
|
||||
if (!hit_attribs)
|
||||
offset = nir_imul_imm(
|
||||
&b, nir_iadd_imm(&b, nir_load_local_invocation_index(&b), nir_intrinsic_base(intrin) * workgroup_size),
|
||||
sizeof(uint32_t));
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_load_hit_attrib_amd) {
|
||||
nir_ssa_def *ret;
|
||||
nir_def *ret;
|
||||
if (hit_attribs)
|
||||
ret = nir_load_var(&b, hit_attribs[nir_intrinsic_base(intrin)]);
|
||||
else
|
||||
ret = nir_load_shared(&b, 1, 32, offset, .base = 0, .align_mul = 4);
|
||||
nir_ssa_def_rewrite_uses(nir_instr_ssa_def(instr), ret);
|
||||
nir_def_rewrite_uses(nir_instr_ssa_def(instr), ret);
|
||||
} else {
|
||||
if (hit_attribs)
|
||||
nir_store_var(&b, hit_attribs[nir_intrinsic_base(intrin)], intrin->src->ssa, 0x1);
|
||||
|
|
@ -772,7 +772,7 @@ inline_constants(nir_shader *dst, nir_shader *src)
|
|||
}
|
||||
|
||||
static void
|
||||
insert_rt_case(nir_builder *b, nir_shader *shader, struct rt_variables *vars, nir_ssa_def *idx, uint32_t call_idx_base,
|
||||
insert_rt_case(nir_builder *b, nir_shader *shader, struct rt_variables *vars, nir_def *idx, uint32_t call_idx_base,
|
||||
uint32_t call_idx, unsigned stage_idx, struct radv_ray_tracing_stage *stages)
|
||||
{
|
||||
uint32_t workgroup_size =
|
||||
|
|
@ -880,10 +880,10 @@ lower_any_hit_for_intersection(nir_shader *any_hit)
|
|||
nir_builder build = nir_builder_at(nir_before_cf_list(&impl->body));
|
||||
nir_builder *b = &build;
|
||||
|
||||
nir_ssa_def *commit_ptr = nir_load_param(b, 0);
|
||||
nir_ssa_def *hit_t = nir_load_param(b, 1);
|
||||
nir_ssa_def *hit_kind = nir_load_param(b, 2);
|
||||
nir_ssa_def *scratch_offset = nir_load_param(b, 3);
|
||||
nir_def *commit_ptr = nir_load_param(b, 0);
|
||||
nir_def *hit_t = nir_load_param(b, 1);
|
||||
nir_def *hit_kind = nir_load_param(b, 2);
|
||||
nir_def *scratch_offset = nir_load_param(b, 3);
|
||||
|
||||
nir_deref_instr *commit = nir_build_deref_cast(b, commit_ptr, nir_var_function_temp, glsl_bool_type(), 0);
|
||||
|
||||
|
|
@ -913,12 +913,12 @@ lower_any_hit_for_intersection(nir_shader *any_hit)
|
|||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_t_max:
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, hit_t);
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, hit_t);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_hit_kind:
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, hit_kind);
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, hit_kind);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
break;
|
||||
|
||||
|
|
@ -939,8 +939,8 @@ lower_any_hit_for_intersection(nir_shader *any_hit)
|
|||
break;
|
||||
case nir_intrinsic_load_rt_arg_scratch_offset_amd:
|
||||
b->cursor = nir_after_instr(instr);
|
||||
nir_ssa_def *arg_offset = nir_isub(b, &intrin->dest.ssa, scratch_offset);
|
||||
nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, arg_offset, arg_offset->parent_instr);
|
||||
nir_def *arg_offset = nir_isub(b, &intrin->dest.ssa, scratch_offset);
|
||||
nir_def_rewrite_uses_after(&intrin->dest.ssa, arg_offset, arg_offset->parent_instr);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
@ -1012,10 +1012,10 @@ nir_lower_intersection_shader(nir_shader *intersection, nir_shader *any_hit)
|
|||
continue;
|
||||
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
nir_ssa_def *hit_t = nir_ssa_for_src(b, intrin->src[0], 1);
|
||||
nir_ssa_def *hit_kind = nir_ssa_for_src(b, intrin->src[1], 1);
|
||||
nir_ssa_def *min_t = nir_load_ray_t_min(b);
|
||||
nir_ssa_def *max_t = nir_load_ray_t_max(b);
|
||||
nir_def *hit_t = nir_ssa_for_src(b, intrin->src[0], 1);
|
||||
nir_def *hit_kind = nir_ssa_for_src(b, intrin->src[1], 1);
|
||||
nir_def *min_t = nir_load_ray_t_min(b);
|
||||
nir_def *max_t = nir_load_ray_t_max(b);
|
||||
|
||||
/* bool commit_tmp = false; */
|
||||
nir_variable *commit_tmp = nir_local_variable_create(impl, glsl_bool_type(), "commit_tmp");
|
||||
|
|
@ -1029,7 +1029,7 @@ nir_lower_intersection_shader(nir_shader *intersection, nir_shader *any_hit)
|
|||
if (any_hit_impl != NULL) {
|
||||
nir_push_if(b, nir_inot(b, nir_load_intersection_opaque_amd(b)));
|
||||
{
|
||||
nir_ssa_def *params[] = {
|
||||
nir_def *params[] = {
|
||||
&nir_build_deref_var(b, commit_tmp)->dest.ssa,
|
||||
hit_t,
|
||||
hit_kind,
|
||||
|
|
@ -1048,8 +1048,8 @@ nir_lower_intersection_shader(nir_shader *intersection, nir_shader *any_hit)
|
|||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_ssa_def *accepted = nir_load_var(b, commit_tmp);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, accepted);
|
||||
nir_def *accepted = nir_load_var(b, commit_tmp);
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, accepted);
|
||||
}
|
||||
}
|
||||
nir_metadata_preserve(impl, nir_metadata_none);
|
||||
|
|
@ -1124,7 +1124,7 @@ static void
|
|||
visit_any_hit_shaders(struct radv_device *device, nir_builder *b, struct traversal_data *data,
|
||||
struct rt_variables *vars)
|
||||
{
|
||||
nir_ssa_def *sbt_idx = nir_load_var(b, vars->idx);
|
||||
nir_def *sbt_idx = nir_load_var(b, vars->idx);
|
||||
|
||||
if (!(vars->flags & VK_PIPELINE_CREATE_2_RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR))
|
||||
nir_push_if(b, nir_ine_imm(b, sbt_idx, 0));
|
||||
|
|
@ -1170,16 +1170,16 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
|
|||
{
|
||||
struct traversal_data *data = args->data;
|
||||
|
||||
nir_ssa_def *geometry_id = nir_iand_imm(b, intersection->base.geometry_id_and_flags, 0xfffffff);
|
||||
nir_ssa_def *sbt_idx =
|
||||
nir_def *geometry_id = nir_iand_imm(b, intersection->base.geometry_id_and_flags, 0xfffffff);
|
||||
nir_def *sbt_idx =
|
||||
nir_iadd(b,
|
||||
nir_iadd(b, nir_load_var(b, data->vars->sbt_offset),
|
||||
nir_iand_imm(b, nir_load_var(b, data->trav_vars->sbt_offset_and_flags), 0xffffff)),
|
||||
nir_imul(b, nir_load_var(b, data->vars->sbt_stride), geometry_id));
|
||||
|
||||
nir_ssa_def *hit_kind = nir_bcsel(b, intersection->frontface, nir_imm_int(b, 0xFE), nir_imm_int(b, 0xFF));
|
||||
nir_def *hit_kind = nir_bcsel(b, intersection->frontface, nir_imm_int(b, 0xFE), nir_imm_int(b, 0xFF));
|
||||
|
||||
nir_ssa_def *prev_barycentrics = nir_load_var(b, data->barycentrics);
|
||||
nir_def *prev_barycentrics = nir_load_var(b, data->barycentrics);
|
||||
nir_store_var(b, data->barycentrics, intersection->barycentrics, 0x3);
|
||||
|
||||
nir_store_var(b, data->vars->ahit_accept, nir_imm_true(b), 0x1);
|
||||
|
|
@ -1217,7 +1217,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
|
|||
nir_store_var(b, data->vars->idx, sbt_idx, 1);
|
||||
nir_store_var(b, data->trav_vars->hit, nir_imm_true(b), 1);
|
||||
|
||||
nir_ssa_def *ray_terminated = nir_load_var(b, data->vars->ahit_terminate);
|
||||
nir_def *ray_terminated = nir_load_var(b, data->vars->ahit_terminate);
|
||||
nir_push_if(b, nir_ior(b, ray_flags->terminate_on_first_hit, ray_terminated));
|
||||
{
|
||||
nir_jump(b, nir_jump_break);
|
||||
|
|
@ -1231,8 +1231,8 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
|
|||
{
|
||||
struct traversal_data *data = args->data;
|
||||
|
||||
nir_ssa_def *geometry_id = nir_iand_imm(b, intersection->geometry_id_and_flags, 0xfffffff);
|
||||
nir_ssa_def *sbt_idx =
|
||||
nir_def *geometry_id = nir_iand_imm(b, intersection->geometry_id_and_flags, 0xfffffff);
|
||||
nir_def *sbt_idx =
|
||||
nir_iadd(b,
|
||||
nir_iadd(b, nir_load_var(b, data->vars->sbt_offset),
|
||||
nir_iand_imm(b, nir_load_var(b, data->trav_vars->sbt_offset_and_flags), 0xffffff)),
|
||||
|
|
@ -1317,8 +1317,8 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
|
|||
nir_store_var(b, data->vars->idx, sbt_idx, 1);
|
||||
nir_store_var(b, data->trav_vars->hit, nir_imm_true(b), 1);
|
||||
|
||||
nir_ssa_def *terminate_on_first_hit = nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask);
|
||||
nir_ssa_def *ray_terminated = nir_load_var(b, data->vars->ahit_terminate);
|
||||
nir_def *terminate_on_first_hit = nir_test_mask(b, args->flags, SpvRayFlagsTerminateOnFirstHitKHRMask);
|
||||
nir_def *ray_terminated = nir_load_var(b, data->vars->ahit_terminate);
|
||||
nir_push_if(b, nir_ior(b, terminate_on_first_hit, ray_terminated));
|
||||
{
|
||||
nir_jump(b, nir_jump_break);
|
||||
|
|
@ -1329,13 +1329,13 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
|
|||
}
|
||||
|
||||
static void
|
||||
store_stack_entry(nir_builder *b, nir_ssa_def *index, nir_ssa_def *value, const struct radv_ray_traversal_args *args)
|
||||
store_stack_entry(nir_builder *b, nir_def *index, nir_def *value, const struct radv_ray_traversal_args *args)
|
||||
{
|
||||
nir_store_shared(b, value, index, .base = 0, .align_mul = 4);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
load_stack_entry(nir_builder *b, nir_ssa_def *index, const struct radv_ray_traversal_args *args)
|
||||
static nir_def *
|
||||
load_stack_entry(nir_builder *b, nir_def *index, const struct radv_ray_traversal_args *args)
|
||||
{
|
||||
return nir_load_shared(b, 1, 32, index, .base = 0, .align_mul = 4);
|
||||
}
|
||||
|
|
@ -1366,8 +1366,8 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
|
|||
barycentrics->data.driver_location = 0;
|
||||
|
||||
/* initialize trace_ray arguments */
|
||||
nir_ssa_def *accel_struct = nir_load_accel_struct_amd(&b);
|
||||
nir_ssa_def *cull_mask_and_flags = nir_load_cull_mask_and_flags_amd(&b);
|
||||
nir_def *accel_struct = nir_load_accel_struct_amd(&b);
|
||||
nir_def *cull_mask_and_flags = nir_load_cull_mask_and_flags_amd(&b);
|
||||
nir_store_var(&b, vars.cull_mask_and_flags, cull_mask_and_flags, 0x1);
|
||||
nir_store_var(&b, vars.sbt_offset, nir_load_sbt_offset_amd(&b), 0x1);
|
||||
nir_store_var(&b, vars.sbt_stride, nir_load_sbt_stride_amd(&b), 0x1);
|
||||
|
|
@ -1382,15 +1382,15 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
|
|||
|
||||
nir_store_var(&b, trav_vars.hit, nir_imm_false(&b), 1);
|
||||
|
||||
nir_ssa_def *bvh_offset = nir_build_load_global(
|
||||
nir_def *bvh_offset = nir_build_load_global(
|
||||
&b, 1, 32, nir_iadd_imm(&b, accel_struct, offsetof(struct radv_accel_struct_header, bvh_offset)),
|
||||
.access = ACCESS_NON_WRITEABLE);
|
||||
nir_ssa_def *root_bvh_base = nir_iadd(&b, accel_struct, nir_u2u64(&b, bvh_offset));
|
||||
nir_def *root_bvh_base = nir_iadd(&b, accel_struct, nir_u2u64(&b, bvh_offset));
|
||||
root_bvh_base = build_addr_to_node(&b, root_bvh_base);
|
||||
|
||||
nir_store_var(&b, trav_vars.bvh_base, root_bvh_base, 1);
|
||||
|
||||
nir_ssa_def *vec3ones = nir_imm_vec3(&b, 1.0, 1.0, 1.0);
|
||||
nir_def *vec3ones = nir_imm_vec3(&b, 1.0, 1.0, 1.0);
|
||||
|
||||
nir_store_var(&b, trav_vars.origin, nir_load_var(&b, vars.origin), 7);
|
||||
nir_store_var(&b, trav_vars.dir, nir_load_var(&b, vars.direction), 7);
|
||||
|
|
@ -1504,15 +1504,15 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
|
|||
* CHit / Miss : Callable > Chit / Miss > Traversal > Raygen
|
||||
* Callable : Callable > Chit / Miss > > Raygen
|
||||
*/
|
||||
static nir_ssa_def *
|
||||
select_next_shader(nir_builder *b, nir_ssa_def *shader_va, unsigned wave_size)
|
||||
static nir_def *
|
||||
select_next_shader(nir_builder *b, nir_def *shader_va, unsigned wave_size)
|
||||
{
|
||||
gl_shader_stage stage = b->shader->info.stage;
|
||||
nir_ssa_def *prio = nir_iand_imm(b, shader_va, radv_rt_priority_mask);
|
||||
nir_ssa_def *ballot = nir_ballot(b, 1, wave_size, nir_imm_bool(b, true));
|
||||
nir_ssa_def *ballot_traversal = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_traversal));
|
||||
nir_ssa_def *ballot_hit_miss = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_hit_miss));
|
||||
nir_ssa_def *ballot_callable = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_callable));
|
||||
nir_def *prio = nir_iand_imm(b, shader_va, radv_rt_priority_mask);
|
||||
nir_def *ballot = nir_ballot(b, 1, wave_size, nir_imm_bool(b, true));
|
||||
nir_def *ballot_traversal = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_traversal));
|
||||
nir_def *ballot_hit_miss = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_hit_miss));
|
||||
nir_def *ballot_callable = nir_ballot(b, 1, wave_size, nir_ieq_imm(b, prio, radv_rt_priority_callable));
|
||||
|
||||
if (stage != MESA_SHADER_CALLABLE && stage != MESA_SHADER_INTERSECTION)
|
||||
ballot = nir_bcsel(b, nir_ine_imm(b, ballot_traversal, 0), ballot_traversal, ballot);
|
||||
|
|
@ -1521,8 +1521,8 @@ select_next_shader(nir_builder *b, nir_ssa_def *shader_va, unsigned wave_size)
|
|||
if (stage != MESA_SHADER_INTERSECTION)
|
||||
ballot = nir_bcsel(b, nir_ine_imm(b, ballot_callable, 0), ballot_callable, ballot);
|
||||
|
||||
nir_ssa_def *lsb = nir_find_lsb(b, ballot);
|
||||
nir_ssa_def *next = nir_read_invocation(b, shader_va, lsb);
|
||||
nir_def *lsb = nir_find_lsb(b, ballot);
|
||||
nir_def *next = nir_read_invocation(b, shader_va, lsb);
|
||||
return nir_iand_imm(b, next, ~radv_rt_priority_mask);
|
||||
}
|
||||
|
||||
|
|
@ -1552,17 +1552,17 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
|
|||
/* initialize variables */
|
||||
nir_builder b = nir_builder_at(nir_before_cf_list(&impl->body));
|
||||
|
||||
nir_ssa_def *traversal_addr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.traversal_shader);
|
||||
nir_def *traversal_addr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.traversal_shader);
|
||||
nir_store_var(&b, vars.traversal_addr, nir_pack_64_2x32(&b, traversal_addr), 1);
|
||||
nir_ssa_def *shader_va = ac_nir_load_arg(&b, &args->ac, args->ac.rt.next_shader);
|
||||
nir_def *shader_va = ac_nir_load_arg(&b, &args->ac, args->ac.rt.next_shader);
|
||||
shader_va = nir_pack_64_2x32(&b, shader_va);
|
||||
nir_store_var(&b, vars.shader_va, shader_va, 1);
|
||||
nir_store_var(&b, vars.stack_ptr, ac_nir_load_arg(&b, &args->ac, args->ac.rt.dynamic_callable_stack_base), 1);
|
||||
nir_ssa_def *record_ptr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.shader_record);
|
||||
nir_def *record_ptr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.shader_record);
|
||||
nir_store_var(&b, vars.shader_record_ptr, nir_pack_64_2x32(&b, record_ptr), 1);
|
||||
nir_store_var(&b, vars.arg, ac_nir_load_arg(&b, &args->ac, args->ac.rt.payload_offset), 1);
|
||||
|
||||
nir_ssa_def *accel_struct = ac_nir_load_arg(&b, &args->ac, args->ac.rt.accel_struct);
|
||||
nir_def *accel_struct = ac_nir_load_arg(&b, &args->ac, args->ac.rt.accel_struct);
|
||||
nir_store_var(&b, vars.accel_struct, nir_pack_64_2x32(&b, accel_struct), 1);
|
||||
nir_store_var(&b, vars.cull_mask_and_flags, ac_nir_load_arg(&b, &args->ac, args->ac.rt.cull_mask_and_flags), 1);
|
||||
nir_store_var(&b, vars.sbt_offset, ac_nir_load_arg(&b, &args->ac, args->ac.rt.sbt_offset), 1);
|
||||
|
|
@ -1574,7 +1574,7 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
|
|||
nir_store_var(&b, vars.tmax, ac_nir_load_arg(&b, &args->ac, args->ac.rt.ray_tmax), 1);
|
||||
|
||||
nir_store_var(&b, vars.primitive_id, ac_nir_load_arg(&b, &args->ac, args->ac.rt.primitive_id), 1);
|
||||
nir_ssa_def *instance_addr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.instance_addr);
|
||||
nir_def *instance_addr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.instance_addr);
|
||||
nir_store_var(&b, vars.instance_addr, nir_pack_64_2x32(&b, instance_addr), 1);
|
||||
nir_store_var(&b, vars.geometry_id_and_flags, ac_nir_load_arg(&b, &args->ac, args->ac.rt.geometry_id_and_flags), 1);
|
||||
nir_store_var(&b, vars.hit_kind, ac_nir_load_arg(&b, &args->ac, args->ac.rt.hit_kind), 1);
|
||||
|
|
@ -1582,7 +1582,7 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
|
|||
/* guard the shader, so that only the correct invocations execute it */
|
||||
nir_if *shader_guard = NULL;
|
||||
if (shader->info.stage != MESA_SHADER_RAYGEN || resume_shader) {
|
||||
nir_ssa_def *shader_pc = ac_nir_load_arg(&b, &args->ac, args->ac.rt.shader_pc);
|
||||
nir_def *shader_pc = ac_nir_load_arg(&b, &args->ac, args->ac.rt.shader_pc);
|
||||
shader_pc = nir_pack_64_2x32(&b, shader_pc);
|
||||
shader_pc = nir_ior_imm(&b, shader_pc, radv_get_rt_priority(shader->info.stage));
|
||||
|
||||
|
|
@ -1598,7 +1598,7 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
|
|||
/* select next shader */
|
||||
b.cursor = nir_after_cf_list(&impl->body);
|
||||
shader_va = nir_load_var(&b, vars.shader_va);
|
||||
nir_ssa_def *next = select_next_shader(&b, shader_va, info->wave_size);
|
||||
nir_def *next = select_next_shader(&b, shader_va, info->wave_size);
|
||||
ac_nir_store_arg(&b, &args->ac, args->ac.rt.shader_pc, next);
|
||||
|
||||
/* store back all variables to registers */
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ gather_intrinsic_load_input_info(const nir_shader *nir, const nir_intrinsic_inst
|
|||
case MESA_SHADER_VERTEX: {
|
||||
unsigned idx = nir_intrinsic_io_semantics(instr).location;
|
||||
unsigned component = nir_intrinsic_component(instr);
|
||||
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
|
||||
unsigned mask = nir_def_components_read(&instr->dest.ssa);
|
||||
mask = (instr->dest.ssa.bit_size == 64 ? util_widen_mask(mask, 2) : mask) << component;
|
||||
|
||||
info->vs.input_usage_mask[idx] |= mask & 0xf;
|
||||
|
|
@ -95,11 +95,11 @@ gather_intrinsic_store_output_info(const nir_shader *nir, const nir_intrinsic_in
|
|||
unsigned pos_w_chan = 3 - component;
|
||||
|
||||
if (write_mask & BITFIELD_BIT(pos_w_chan)) {
|
||||
nir_ssa_scalar pos_w = nir_ssa_scalar_resolved(instr->src[0].ssa, pos_w_chan);
|
||||
nir_scalar pos_w = nir_scalar_resolved(instr->src[0].ssa, pos_w_chan);
|
||||
/* Use coarse shading if the value of Pos.W can't be determined or if its value is != 1
|
||||
* (typical for non-GUI elements).
|
||||
*/
|
||||
if (!nir_ssa_scalar_is_const(pos_w) || nir_ssa_scalar_as_uint(pos_w) != 0x3f800000u)
|
||||
if (!nir_scalar_is_const(pos_w) || nir_scalar_as_uint(pos_w) != 0x3f800000u)
|
||||
info->force_vrs_per_vertex = true;
|
||||
}
|
||||
}
|
||||
|
|
@ -179,7 +179,7 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, s
|
|||
break;
|
||||
case nir_intrinsic_load_local_invocation_id:
|
||||
case nir_intrinsic_load_workgroup_id: {
|
||||
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
|
||||
unsigned mask = nir_def_components_read(&instr->dest.ssa);
|
||||
while (mask) {
|
||||
unsigned i = u_bit_scan(&mask);
|
||||
|
||||
|
|
@ -191,10 +191,10 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr, s
|
|||
break;
|
||||
}
|
||||
case nir_intrinsic_load_frag_coord:
|
||||
info->ps.reads_frag_coord_mask |= nir_ssa_def_components_read(&instr->dest.ssa);
|
||||
info->ps.reads_frag_coord_mask |= nir_def_components_read(&instr->dest.ssa);
|
||||
break;
|
||||
case nir_intrinsic_load_sample_pos:
|
||||
info->ps.reads_sample_pos_mask |= nir_ssa_def_components_read(&instr->dest.ssa);
|
||||
info->ps.reads_sample_pos_mask |= nir_def_components_read(&instr->dest.ssa);
|
||||
break;
|
||||
case nir_intrinsic_load_push_constant:
|
||||
gather_push_constant_info(nir, instr, info);
|
||||
|
|
|
|||
|
|
@ -409,7 +409,7 @@ agx_emit_load_vary(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
|
|||
nir_src *offset = nir_get_io_offset_src(instr);
|
||||
assert(nir_src_is_const(*offset) && "no indirects");
|
||||
|
||||
assert(nir_ssa_def_components_read(&instr->dest.ssa) ==
|
||||
assert(nir_def_components_read(&instr->dest.ssa) ==
|
||||
nir_component_mask(components) &&
|
||||
"iter does not handle write-after-write hazards");
|
||||
|
||||
|
|
@ -771,10 +771,10 @@ agx_emit_local_store(agx_builder *b, nir_intrinsic_instr *instr)
|
|||
static agx_index
|
||||
agx_translate_bindless_handle(agx_builder *b, nir_src *handle, agx_index *base)
|
||||
{
|
||||
nir_ssa_scalar base_scalar = nir_ssa_scalar_resolved(handle->ssa, 0);
|
||||
assert(nir_ssa_scalar_is_const(base_scalar) && "base must be constant");
|
||||
nir_scalar base_scalar = nir_scalar_resolved(handle->ssa, 0);
|
||||
assert(nir_scalar_is_const(base_scalar) && "base must be constant");
|
||||
|
||||
unsigned base_uint = nir_ssa_scalar_as_uint(base_scalar);
|
||||
unsigned base_uint = nir_scalar_as_uint(base_scalar);
|
||||
*base = agx_uniform(base_uint, AGX_SIZE_64);
|
||||
|
||||
return agx_emit_extract(b, agx_src_index(handle), 1);
|
||||
|
|
@ -801,7 +801,7 @@ static unsigned
|
|||
agx_expand_tex_to(agx_builder *b, nir_dest *dest, agx_index src, bool masked)
|
||||
{
|
||||
unsigned nr_channels = nir_dest_num_components(*dest);
|
||||
nir_component_mask_t mask = nir_ssa_def_components_read(&dest->ssa);
|
||||
nir_component_mask_t mask = nir_def_components_read(&dest->ssa);
|
||||
|
||||
if (!masked)
|
||||
mask = (nir_component_mask_t)BITFIELD_MASK(nr_channels);
|
||||
|
|
@ -1798,7 +1798,7 @@ agx_emit_phis_deferred(agx_context *ctx)
|
|||
}
|
||||
|
||||
static void
|
||||
agx_emit_undef(agx_builder *b, nir_ssa_undef_instr *instr)
|
||||
agx_emit_undef(agx_builder *b, nir_undef_instr *instr)
|
||||
{
|
||||
/* For now, just lower undefs to zero. This doesn't matter too much, since
|
||||
* the lowering happens in NIR and this just allows for late lowering passes
|
||||
|
|
@ -2095,17 +2095,17 @@ agx_lower_sincos_filter(const nir_instr *instr, UNUSED const void *_)
|
|||
* implemented by shifting by one quadrant: cos(x) = sin(x + tau/4).
|
||||
*/
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
agx_lower_sincos_impl(struct nir_builder *b, nir_instr *instr, UNUSED void *_)
|
||||
{
|
||||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
nir_ssa_def *x = nir_mov_alu(b, alu->src[0], 1);
|
||||
nir_ssa_def *turns = nir_fmul_imm(b, x, M_1_PI * 0.5f);
|
||||
nir_def *x = nir_mov_alu(b, alu->src[0], 1);
|
||||
nir_def *turns = nir_fmul_imm(b, x, M_1_PI * 0.5f);
|
||||
|
||||
if (alu->op == nir_op_fcos)
|
||||
turns = nir_fadd_imm(b, turns, 0.25f);
|
||||
|
||||
nir_ssa_def *quadrants = nir_fmul_imm(b, nir_ffract(b, turns), 4.0);
|
||||
nir_def *quadrants = nir_fmul_imm(b, nir_ffract(b, turns), 4.0);
|
||||
return nir_fsin_agx(b, quadrants);
|
||||
}
|
||||
|
||||
|
|
@ -2126,11 +2126,11 @@ agx_lower_front_face(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
if (intr->intrinsic != nir_intrinsic_load_front_face)
|
||||
return false;
|
||||
|
||||
nir_ssa_def *def = &intr->dest.ssa;
|
||||
nir_def *def = &intr->dest.ssa;
|
||||
assert(def->bit_size == 1);
|
||||
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
nir_ssa_def_rewrite_uses(def, nir_inot(b, nir_load_back_face_agx(b, 1)));
|
||||
nir_def_rewrite_uses(def, nir_inot(b, nir_load_back_face_agx(b, 1)));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -2347,8 +2347,8 @@ agx_gather_texcoords(nir_builder *b, nir_instr *instr, void *data)
|
|||
return false;
|
||||
|
||||
nir_src src = tex->src[coord_idx].src;
|
||||
nir_ssa_scalar x = nir_ssa_scalar_resolved(src.ssa, 0);
|
||||
nir_ssa_scalar y = nir_ssa_scalar_resolved(src.ssa, 1);
|
||||
nir_scalar x = nir_scalar_resolved(src.ssa, 0);
|
||||
nir_scalar y = nir_scalar_resolved(src.ssa, 1);
|
||||
|
||||
if (x.def != y.def)
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -454,7 +454,7 @@ agx_size_for_bits(unsigned bits)
|
|||
}
|
||||
|
||||
static inline agx_index
|
||||
agx_nir_ssa_index(nir_ssa_def *ssa)
|
||||
agx_nir_ssa_index(nir_def *ssa)
|
||||
{
|
||||
return agx_get_index(ssa->index, agx_size_for_bits(ssa->bit_size));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
/* Results of pattern matching */
|
||||
struct match {
|
||||
nir_ssa_scalar base, offset;
|
||||
nir_scalar base, offset;
|
||||
bool has_offset;
|
||||
bool sign_extend;
|
||||
|
||||
|
|
@ -25,18 +25,18 @@ struct match {
|
|||
* variables. Otherwise, returns false.
|
||||
*/
|
||||
static bool
|
||||
match_imul_imm(nir_ssa_scalar scalar, nir_ssa_scalar *variable, uint32_t *imm)
|
||||
match_imul_imm(nir_scalar scalar, nir_scalar *variable, uint32_t *imm)
|
||||
{
|
||||
if (!nir_ssa_scalar_is_alu(scalar))
|
||||
if (!nir_scalar_is_alu(scalar))
|
||||
return false;
|
||||
|
||||
nir_op op = nir_ssa_scalar_alu_op(scalar);
|
||||
nir_op op = nir_scalar_alu_op(scalar);
|
||||
if (op != nir_op_imul && op != nir_op_ishl)
|
||||
return false;
|
||||
|
||||
nir_ssa_scalar inputs[] = {
|
||||
nir_ssa_scalar_chase_alu_src(scalar, 0),
|
||||
nir_ssa_scalar_chase_alu_src(scalar, 1),
|
||||
nir_scalar inputs[] = {
|
||||
nir_scalar_chase_alu_src(scalar, 0),
|
||||
nir_scalar_chase_alu_src(scalar, 1),
|
||||
};
|
||||
|
||||
/* For imul check both operands for an immediate, since imul is commutative.
|
||||
|
|
@ -45,12 +45,12 @@ match_imul_imm(nir_ssa_scalar scalar, nir_ssa_scalar *variable, uint32_t *imm)
|
|||
bool commutes = (op == nir_op_imul);
|
||||
|
||||
for (unsigned i = commutes ? 0 : 1; i < ARRAY_SIZE(inputs); ++i) {
|
||||
if (!nir_ssa_scalar_is_const(inputs[i]))
|
||||
if (!nir_scalar_is_const(inputs[i]))
|
||||
continue;
|
||||
|
||||
*variable = inputs[1 - i];
|
||||
|
||||
uint32_t value = nir_ssa_scalar_as_uint(inputs[i]);
|
||||
uint32_t value = nir_scalar_as_uint(inputs[i]);
|
||||
|
||||
if (op == nir_op_imul)
|
||||
*imm = value;
|
||||
|
|
@ -75,17 +75,17 @@ match_imul_imm(nir_ssa_scalar scalar, nir_ssa_scalar *variable, uint32_t *imm)
|
|||
static bool
|
||||
match_soa(nir_builder *b, struct match *match, unsigned format_shift)
|
||||
{
|
||||
if (!nir_ssa_scalar_is_alu(match->offset) ||
|
||||
nir_ssa_scalar_alu_op(match->offset) != nir_op_iadd)
|
||||
if (!nir_scalar_is_alu(match->offset) ||
|
||||
nir_scalar_alu_op(match->offset) != nir_op_iadd)
|
||||
return false;
|
||||
|
||||
nir_ssa_scalar summands[] = {
|
||||
nir_ssa_scalar_chase_alu_src(match->offset, 0),
|
||||
nir_ssa_scalar_chase_alu_src(match->offset, 1),
|
||||
nir_scalar summands[] = {
|
||||
nir_scalar_chase_alu_src(match->offset, 0),
|
||||
nir_scalar_chase_alu_src(match->offset, 1),
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(summands); ++i) {
|
||||
if (!nir_ssa_scalar_is_const(summands[i]))
|
||||
if (!nir_scalar_is_const(summands[i]))
|
||||
continue;
|
||||
|
||||
/* Note: This is treated as signed regardless of the sign of the match.
|
||||
|
|
@ -104,8 +104,8 @@ match_soa(nir_builder *b, struct match *match, unsigned format_shift)
|
|||
* TODO: We need to confirm how the hardware handles 32-bit overflow when
|
||||
* applying the format shift, which might need rework here again.
|
||||
*/
|
||||
int offset = nir_ssa_scalar_as_int(summands[i]);
|
||||
nir_ssa_scalar variable;
|
||||
int offset = nir_scalar_as_int(summands[i]);
|
||||
nir_scalar variable;
|
||||
uint32_t multiplier;
|
||||
|
||||
/* The other operand must multiply */
|
||||
|
|
@ -123,9 +123,9 @@ match_soa(nir_builder *b, struct match *match, unsigned format_shift)
|
|||
return false;
|
||||
|
||||
/* Otherwise, rewrite! */
|
||||
nir_ssa_def *unmultiplied = nir_vec_scalars(b, &variable, 1);
|
||||
nir_def *unmultiplied = nir_vec_scalars(b, &variable, 1);
|
||||
|
||||
nir_ssa_def *rewrite = nir_iadd_imm(
|
||||
nir_def *rewrite = nir_iadd_imm(
|
||||
b, nir_imul_imm(b, unmultiplied, multiplier_shifted), offset_shifted);
|
||||
|
||||
match->offset = nir_get_ssa_scalar(rewrite, 0);
|
||||
|
|
@ -138,27 +138,26 @@ match_soa(nir_builder *b, struct match *match, unsigned format_shift)
|
|||
|
||||
/* Try to pattern match address calculation */
|
||||
static struct match
|
||||
match_address(nir_builder *b, nir_ssa_scalar base, int8_t format_shift)
|
||||
match_address(nir_builder *b, nir_scalar base, int8_t format_shift)
|
||||
{
|
||||
struct match match = {.base = base};
|
||||
|
||||
/* All address calculations are iadd at the root */
|
||||
if (!nir_ssa_scalar_is_alu(base) ||
|
||||
nir_ssa_scalar_alu_op(base) != nir_op_iadd)
|
||||
if (!nir_scalar_is_alu(base) || nir_scalar_alu_op(base) != nir_op_iadd)
|
||||
return match;
|
||||
|
||||
/* Only 64+32 addition is supported, look for an extension */
|
||||
nir_ssa_scalar summands[] = {
|
||||
nir_ssa_scalar_chase_alu_src(base, 0),
|
||||
nir_ssa_scalar_chase_alu_src(base, 1),
|
||||
nir_scalar summands[] = {
|
||||
nir_scalar_chase_alu_src(base, 0),
|
||||
nir_scalar_chase_alu_src(base, 1),
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(summands); ++i) {
|
||||
/* We can add a small constant to the 64-bit base for free */
|
||||
if (nir_ssa_scalar_is_const(summands[i]) &&
|
||||
nir_ssa_scalar_as_uint(summands[i]) < (1ull << 32)) {
|
||||
if (nir_scalar_is_const(summands[i]) &&
|
||||
nir_scalar_as_uint(summands[i]) < (1ull << 32)) {
|
||||
|
||||
uint32_t value = nir_ssa_scalar_as_uint(summands[i]);
|
||||
uint32_t value = nir_scalar_as_uint(summands[i]);
|
||||
|
||||
return (struct match){
|
||||
.base = summands[1 - i],
|
||||
|
|
@ -169,17 +168,17 @@ match_address(nir_builder *b, nir_ssa_scalar base, int8_t format_shift)
|
|||
}
|
||||
|
||||
/* Otherwise, we can only add an offset extended from 32-bits */
|
||||
if (!nir_ssa_scalar_is_alu(summands[i]))
|
||||
if (!nir_scalar_is_alu(summands[i]))
|
||||
continue;
|
||||
|
||||
nir_op op = nir_ssa_scalar_alu_op(summands[i]);
|
||||
nir_op op = nir_scalar_alu_op(summands[i]);
|
||||
|
||||
if (op != nir_op_u2u64 && op != nir_op_i2i64)
|
||||
continue;
|
||||
|
||||
/* We've found a summand, commit to it */
|
||||
match.base = summands[1 - i];
|
||||
match.offset = nir_ssa_scalar_chase_alu_src(summands[i], 0);
|
||||
match.offset = nir_scalar_chase_alu_src(summands[i], 0);
|
||||
match.sign_extend = (op == nir_op_i2i64);
|
||||
|
||||
/* Undo the implicit shift from using as offset */
|
||||
|
|
@ -192,7 +191,7 @@ match_address(nir_builder *b, nir_ssa_scalar base, int8_t format_shift)
|
|||
return match;
|
||||
|
||||
/* But if we did, we can try to fold in in a multiply */
|
||||
nir_ssa_scalar multiplied;
|
||||
nir_scalar multiplied;
|
||||
uint32_t multiplier;
|
||||
|
||||
if (match_imul_imm(match.offset, &multiplied, &multiplier)) {
|
||||
|
|
@ -211,7 +210,7 @@ match_address(nir_builder *b, nir_ssa_scalar base, int8_t format_shift)
|
|||
return match;
|
||||
}
|
||||
|
||||
nir_ssa_def *multiplied_ssa = nir_vec_scalars(b, &multiplied, 1);
|
||||
nir_def *multiplied_ssa = nir_vec_scalars(b, &multiplied, 1);
|
||||
|
||||
/* Only fold in if we wouldn't overflow the lsl field */
|
||||
if (new_shift <= 2) {
|
||||
|
|
@ -224,7 +223,7 @@ match_address(nir_builder *b, nir_ssa_scalar base, int8_t format_shift)
|
|||
*/
|
||||
assert(new_shift >= 3);
|
||||
|
||||
nir_ssa_def *rewrite =
|
||||
nir_def *rewrite =
|
||||
nir_imul_imm(b, multiplied_ssa, multiplier << new_shift);
|
||||
|
||||
match.offset = nir_get_ssa_scalar(rewrite, 0);
|
||||
|
|
@ -276,11 +275,10 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
unsigned format_shift = util_logbase2(util_format_get_blocksize(format));
|
||||
|
||||
nir_src *orig_offset = nir_get_io_offset_src(intr);
|
||||
nir_ssa_scalar base = nir_ssa_scalar_resolved(orig_offset->ssa, 0);
|
||||
nir_scalar base = nir_scalar_resolved(orig_offset->ssa, 0);
|
||||
struct match match = match_address(b, base, format_shift);
|
||||
|
||||
nir_ssa_def *offset =
|
||||
match.offset.def != NULL
|
||||
nir_def *offset = match.offset.def != NULL
|
||||
? nir_channel(b, match.offset.def, match.offset.comp)
|
||||
: nir_imm_int(b, 0);
|
||||
|
||||
|
|
@ -309,9 +307,9 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
}
|
||||
|
||||
assert(match.shift >= 0);
|
||||
nir_ssa_def *new_base = nir_channel(b, match.base.def, match.base.comp);
|
||||
nir_def *new_base = nir_channel(b, match.base.def, match.base.comp);
|
||||
|
||||
nir_ssa_def *repl = NULL;
|
||||
nir_def *repl = NULL;
|
||||
bool has_dest = (intr->intrinsic != nir_intrinsic_store_global);
|
||||
unsigned num_components = has_dest ? nir_dest_num_components(intr->dest) : 0;
|
||||
unsigned bit_size = has_dest ? nir_dest_bit_size(intr->dest) : 0;
|
||||
|
|
@ -346,7 +344,7 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
}
|
||||
|
||||
if (repl)
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, repl);
|
||||
nir_def_rewrite_uses(&intr->dest.ssa, repl);
|
||||
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ lower_zs_emit(nir_block *block)
|
|||
|
||||
nir_builder b = nir_builder_at(nir_before_instr(instr));
|
||||
|
||||
nir_ssa_def *value = intr->src[0].ssa;
|
||||
nir_def *value = intr->src[0].ssa;
|
||||
bool z = (sem.location == FRAG_RESULT_DEPTH);
|
||||
|
||||
unsigned src_idx = z ? 1 : 2;
|
||||
|
|
@ -51,10 +51,10 @@ lower_zs_emit(nir_block *block)
|
|||
/* Multisampling will get lowered later if needed, default to
|
||||
* broadcast
|
||||
*/
|
||||
nir_ssa_def *sample_mask = nir_imm_intN_t(&b, ALL_SAMPLES, 16);
|
||||
zs_emit = nir_store_zs_agx(&b, sample_mask,
|
||||
nir_ssa_undef(&b, 1, 32) /* depth */,
|
||||
nir_ssa_undef(&b, 1, 16) /* stencil */);
|
||||
nir_def *sample_mask = nir_imm_intN_t(&b, ALL_SAMPLES, 16);
|
||||
zs_emit =
|
||||
nir_store_zs_agx(&b, sample_mask, nir_undef(&b, 1, 32) /* depth */,
|
||||
nir_undef(&b, 1, 16) /* stencil */);
|
||||
}
|
||||
|
||||
assert((nir_intrinsic_base(zs_emit) & base) == 0 &&
|
||||
|
|
@ -83,9 +83,9 @@ lower_discard(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
nir_ssa_def *all_samples = nir_imm_intN_t(b, ALL_SAMPLES, 16);
|
||||
nir_ssa_def *no_samples = nir_imm_intN_t(b, 0, 16);
|
||||
nir_ssa_def *killed_samples = all_samples;
|
||||
nir_def *all_samples = nir_imm_intN_t(b, ALL_SAMPLES, 16);
|
||||
nir_def *no_samples = nir_imm_intN_t(b, 0, 16);
|
||||
nir_def *killed_samples = all_samples;
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_discard_if)
|
||||
killed_samples = nir_bcsel(b, intr->src[0].ssa, all_samples, no_samples);
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@
|
|||
static void
|
||||
insert_z_write(nir_builder *b)
|
||||
{
|
||||
nir_ssa_def *z = nir_load_frag_coord_zw(b, .component = 2);
|
||||
nir_def *z = nir_load_frag_coord_zw(b, .component = 2);
|
||||
|
||||
nir_store_output(b, z, nir_imm_int(b, 0),
|
||||
.io_semantics.location = FRAG_RESULT_DEPTH,
|
||||
|
|
|
|||
|
|
@ -24,49 +24,48 @@
|
|||
*/
|
||||
|
||||
/* XXX: It's not clear what this is for, but seems necessary */
|
||||
static nir_ssa_def *
|
||||
cf_valid(nir_builder *b, nir_ssa_def *cf)
|
||||
static nir_def *
|
||||
cf_valid(nir_builder *b, nir_def *cf)
|
||||
{
|
||||
nir_ssa_def *bit =
|
||||
nir_ieq_imm(b, nir_iand_imm(b, nir_channel(b, cf, 0), 1), 0);
|
||||
nir_def *bit = nir_ieq_imm(b, nir_iand_imm(b, nir_channel(b, cf, 0), 1), 0);
|
||||
|
||||
/* XXX: Apple's compiler actually checks that the significand is nonzero and
|
||||
* the exponent is 0 or 1. This is probably a typo -- it doesn't make any
|
||||
* logical sense. Presumably they just meant to check for denorms, so let's
|
||||
* do that. Either way the tests pass.
|
||||
*/
|
||||
nir_ssa_def *cf01 = nir_trim_vector(b, cf, 2);
|
||||
nir_def *cf01 = nir_trim_vector(b, cf, 2);
|
||||
return nir_ior(b, bit, nir_fisnormal(b, cf01));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
interpolate_at_offset(nir_builder *b, nir_ssa_def *cf, nir_ssa_def *offset,
|
||||
static nir_def *
|
||||
interpolate_at_offset(nir_builder *b, nir_def *cf, nir_def *offset,
|
||||
bool perspective)
|
||||
{
|
||||
/* Get the coordinate of the pixel within the tile */
|
||||
nir_ssa_def *pixel_coords = nir_load_pixel_coord(b);
|
||||
nir_ssa_def *tile_offs = nir_umod_imm(b, pixel_coords, 32);
|
||||
nir_def *pixel_coords = nir_load_pixel_coord(b);
|
||||
nir_def *tile_offs = nir_umod_imm(b, pixel_coords, 32);
|
||||
|
||||
/* Convert to float, getting the center of the pixel */
|
||||
nir_ssa_def *center = nir_fadd_imm(b, nir_u2f32(b, tile_offs), 0.5);
|
||||
nir_def *center = nir_fadd_imm(b, nir_u2f32(b, tile_offs), 0.5);
|
||||
|
||||
/* Calculate the location to interpolate. offset is defined relative to the
|
||||
* center of the pixel and is a float.
|
||||
*/
|
||||
nir_ssa_def *pos = nir_fadd(b, center, nir_f2f32(b, offset));
|
||||
nir_def *pos = nir_fadd(b, center, nir_f2f32(b, offset));
|
||||
|
||||
/* Interpolate with the given coefficients */
|
||||
nir_ssa_def *interp = nir_ffma(b, nir_channel(b, pos, 1),
|
||||
nir_channel(b, cf, 1), nir_channel(b, cf, 2));
|
||||
nir_def *interp = nir_ffma(b, nir_channel(b, pos, 1), nir_channel(b, cf, 1),
|
||||
nir_channel(b, cf, 2));
|
||||
|
||||
interp = nir_ffma(b, nir_channel(b, pos, 0), nir_channel(b, cf, 0), interp);
|
||||
|
||||
/* Divide by RHW. This load will be lowered recursively. */
|
||||
if (perspective) {
|
||||
nir_ssa_def *bary = nir_load_barycentric_at_offset(
|
||||
nir_def *bary = nir_load_barycentric_at_offset(
|
||||
b, 32, offset, .interp_mode = INTERP_MODE_NOPERSPECTIVE);
|
||||
|
||||
nir_ssa_def *rhw = nir_load_interpolated_input(
|
||||
nir_def *rhw = nir_load_interpolated_input(
|
||||
b, 1, 32, bary, nir_imm_int(b, 0), .component = 3,
|
||||
.io_semantics = {
|
||||
.location = VARYING_SLOT_POS,
|
||||
|
|
@ -80,8 +79,8 @@ interpolate_at_offset(nir_builder *b, nir_ssa_def *cf, nir_ssa_def *offset,
|
|||
return nir_bcsel(b, cf_valid(b, cf), interp, nir_channel(b, cf, 2));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
interpolate_flat(nir_builder *b, nir_ssa_def *coefficients)
|
||||
static nir_def *
|
||||
interpolate_flat(nir_builder *b, nir_def *coefficients)
|
||||
{
|
||||
/* Same value anywhere, so just take the constant (affine) component */
|
||||
return nir_channel(b, coefficients, 2);
|
||||
|
|
@ -114,7 +113,7 @@ needs_lower(const nir_instr *instr, UNUSED const void *_)
|
|||
return (load->intrinsic == nir_intrinsic_load_input);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
interpolate_channel(nir_builder *b, nir_intrinsic_instr *load, unsigned channel)
|
||||
{
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(load);
|
||||
|
|
@ -123,7 +122,7 @@ interpolate_channel(nir_builder *b, nir_intrinsic_instr *load, unsigned channel)
|
|||
sem.location += nir_src_as_uint(*nir_get_io_offset_src(load));
|
||||
sem.num_slots = 1;
|
||||
|
||||
nir_ssa_def *coefficients = nir_load_coefficients_agx(
|
||||
nir_def *coefficients = nir_load_coefficients_agx(
|
||||
b, .component = nir_intrinsic_component(load) + channel,
|
||||
.interp_mode = interp_mode_for_load(load), .io_semantics = sem);
|
||||
|
||||
|
|
@ -133,7 +132,7 @@ interpolate_channel(nir_builder *b, nir_intrinsic_instr *load, unsigned channel)
|
|||
} else {
|
||||
nir_intrinsic_instr *bary = nir_src_as_intrinsic(load->src[0]);
|
||||
|
||||
nir_ssa_def *interp = interpolate_at_offset(
|
||||
nir_def *interp = interpolate_at_offset(
|
||||
b, coefficients, bary->src[0].ssa,
|
||||
nir_intrinsic_interp_mode(bary) != INTERP_MODE_NOPERSPECTIVE);
|
||||
|
||||
|
|
@ -141,13 +140,13 @@ interpolate_channel(nir_builder *b, nir_intrinsic_instr *load, unsigned channel)
|
|||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
lower(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
||||
/* Each component is loaded separated */
|
||||
nir_ssa_def *values[NIR_MAX_VEC_COMPONENTS] = {NULL};
|
||||
nir_def *values[NIR_MAX_VEC_COMPONENTS] = {NULL};
|
||||
for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
|
||||
values[i] = interpolate_channel(b, intr, i);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,13 +21,13 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
if (intr->intrinsic != nir_intrinsic_load_interpolated_input)
|
||||
return false;
|
||||
|
||||
unsigned mask = nir_ssa_def_components_read(&intr->dest.ssa);
|
||||
unsigned mask = nir_def_components_read(&intr->dest.ssa);
|
||||
if (mask == 0 || mask == nir_component_mask(intr->num_components))
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
unsigned bit_size = nir_dest_bit_size(intr->dest);
|
||||
nir_ssa_def *comps[4] = {NULL};
|
||||
nir_def *comps[4] = {NULL};
|
||||
|
||||
for (unsigned c = 0; c < intr->num_components; ++c) {
|
||||
if (mask & BITFIELD_BIT(c)) {
|
||||
|
|
@ -44,7 +44,7 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
|
||||
/* Shrink the load to count contiguous components */
|
||||
nir_ssa_dest_init(clone, &clone_intr->dest, count, bit_size);
|
||||
nir_ssa_def *clone_vec = &clone_intr->dest.ssa;
|
||||
nir_def *clone_vec = &clone_intr->dest.ssa;
|
||||
clone_intr->num_components = count;
|
||||
|
||||
/* The load starts from component c relative to the original load */
|
||||
|
|
@ -64,11 +64,11 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
/* The value of unused components is irrelevant, but use an undef for
|
||||
* semantics. It will be eliminated by DCE after copyprop.
|
||||
*/
|
||||
comps[c] = nir_ssa_undef(b, 1, bit_size);
|
||||
comps[c] = nir_undef(b, 1, bit_size);
|
||||
}
|
||||
}
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa,
|
||||
nir_def_rewrite_uses(&intr->dest.ssa,
|
||||
nir_vec(b, comps, intr->num_components));
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ lower_sample_mask_to_zs(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
*/
|
||||
if (intr->intrinsic == nir_intrinsic_store_zs_agx && !depth_written) {
|
||||
/* Load the current depth at this pixel */
|
||||
nir_ssa_def *z = nir_load_frag_coord_zw(b, .component = 2);
|
||||
nir_def *z = nir_load_frag_coord_zw(b, .component = 2);
|
||||
|
||||
/* Write it out from this store_zs */
|
||||
nir_intrinsic_set_base(intr, nir_intrinsic_base(intr) | BASE_Z);
|
||||
|
|
@ -103,7 +103,7 @@ lower_sample_mask_to_zs(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
/* Write a NaN depth value for discarded samples */
|
||||
nir_store_zs_agx(b, intr->src[0].ssa, nir_imm_float(b, NAN),
|
||||
stencil_written ? nir_imm_intN_t(b, 0, 16)
|
||||
: nir_ssa_undef(b, 1, 16) /* stencil */,
|
||||
: nir_undef(b, 1, 16) /* stencil */,
|
||||
.base = BASE_Z | (stencil_written ? BASE_S : 0));
|
||||
|
||||
nir_instr_remove(instr);
|
||||
|
|
@ -196,9 +196,9 @@ agx_nir_lower_sample_mask(nir_shader *shader, unsigned nr_samples)
|
|||
/* Last discard is executed unconditionally, so fuse tests. */
|
||||
b.cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
nir_ssa_def *all_samples = nir_imm_intN_t(&b, ALL_SAMPLES, 16);
|
||||
nir_ssa_def *killed = intr->src[0].ssa;
|
||||
nir_ssa_def *live = nir_ixor(&b, killed, all_samples);
|
||||
nir_def *all_samples = nir_imm_intN_t(&b, ALL_SAMPLES, 16);
|
||||
nir_def *killed = intr->src[0].ssa;
|
||||
nir_def *live = nir_ixor(&b, killed, all_samples);
|
||||
|
||||
nir_sample_mask_agx(&b, all_samples, live);
|
||||
nir_instr_remove(&intr->instr);
|
||||
|
|
|
|||
|
|
@ -19,30 +19,30 @@
|
|||
#define AGX_FORMAT_RGB32_EMULATED 0x36
|
||||
#define AGX_LAYOUT_LINEAR 0x0
|
||||
|
||||
static nir_ssa_def *
|
||||
texture_descriptor_ptr_for_handle(nir_builder *b, nir_ssa_def *handle)
|
||||
static nir_def *
|
||||
texture_descriptor_ptr_for_handle(nir_builder *b, nir_def *handle)
|
||||
{
|
||||
/* Bindless handles are a vec2, where the first source is the (constant)
|
||||
* uniform register number and the second source is the byte offset.
|
||||
*/
|
||||
nir_ssa_scalar uniform = nir_ssa_scalar_resolved(handle, 0);
|
||||
unsigned uniform_idx = nir_ssa_scalar_as_uint(uniform);
|
||||
nir_scalar uniform = nir_scalar_resolved(handle, 0);
|
||||
unsigned uniform_idx = nir_scalar_as_uint(uniform);
|
||||
|
||||
nir_ssa_def *base = nir_load_preamble(b, 1, 64, uniform_idx);
|
||||
nir_ssa_def *offset = nir_u2u64(b, nir_channel(b, handle, 1));
|
||||
nir_def *base = nir_load_preamble(b, 1, 64, uniform_idx);
|
||||
nir_def *offset = nir_u2u64(b, nir_channel(b, handle, 1));
|
||||
|
||||
return nir_iadd(b, base, offset);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
texture_descriptor_ptr_for_index(nir_builder *b, nir_ssa_def *index)
|
||||
static nir_def *
|
||||
texture_descriptor_ptr_for_index(nir_builder *b, nir_def *index)
|
||||
{
|
||||
return nir_iadd(
|
||||
b, nir_load_texture_base_agx(b),
|
||||
nir_u2u64(b, nir_imul_imm(b, index, AGX_TEXTURE_DESC_STRIDE)));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
texture_descriptor_ptr(nir_builder *b, nir_tex_instr *tex)
|
||||
{
|
||||
int handle_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
|
||||
|
|
@ -50,7 +50,7 @@ texture_descriptor_ptr(nir_builder *b, nir_tex_instr *tex)
|
|||
return texture_descriptor_ptr_for_handle(b, tex->src[handle_idx].src.ssa);
|
||||
|
||||
/* For non-bindless, compute from the texture index */
|
||||
nir_ssa_def *index;
|
||||
nir_def *index;
|
||||
|
||||
int offs_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_offset);
|
||||
if (offs_idx >= 0)
|
||||
|
|
@ -66,40 +66,40 @@ texture_descriptor_ptr(nir_builder *b, nir_tex_instr *tex)
|
|||
* original size is irrecoverable. Instead, we stash it in the "Acceleration
|
||||
* buffer" field, which is unused for linear images. Fetch just that.
|
||||
*/
|
||||
static nir_ssa_def *
|
||||
agx_txs_buffer(nir_builder *b, nir_ssa_def *descriptor)
|
||||
static nir_def *
|
||||
agx_txs_buffer(nir_builder *b, nir_def *descriptor)
|
||||
{
|
||||
nir_ssa_def *size_ptr = nir_iadd_imm(b, descriptor, 16);
|
||||
nir_def *size_ptr = nir_iadd_imm(b, descriptor, 16);
|
||||
|
||||
return nir_load_global_constant(b, size_ptr, 8, 1, 32);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
agx_txs(nir_builder *b, nir_tex_instr *tex)
|
||||
{
|
||||
nir_ssa_def *ptr = texture_descriptor_ptr(b, tex);
|
||||
nir_ssa_def *comp[4] = {NULL};
|
||||
nir_def *ptr = texture_descriptor_ptr(b, tex);
|
||||
nir_def *comp[4] = {NULL};
|
||||
|
||||
if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF)
|
||||
return agx_txs_buffer(b, ptr);
|
||||
|
||||
nir_ssa_def *desc = nir_load_global_constant(b, ptr, 8, 4, 32);
|
||||
nir_ssa_def *w0 = nir_channel(b, desc, 0);
|
||||
nir_ssa_def *w1 = nir_channel(b, desc, 1);
|
||||
nir_ssa_def *w3 = nir_channel(b, desc, 3);
|
||||
nir_def *desc = nir_load_global_constant(b, ptr, 8, 4, 32);
|
||||
nir_def *w0 = nir_channel(b, desc, 0);
|
||||
nir_def *w1 = nir_channel(b, desc, 1);
|
||||
nir_def *w3 = nir_channel(b, desc, 3);
|
||||
|
||||
/* Width minus 1: bits [28, 42) */
|
||||
nir_ssa_def *width_m1 =
|
||||
nir_def *width_m1 =
|
||||
nir_extr_agx(b, w0, w1, nir_imm_int(b, 28), nir_imm_int(b, 14));
|
||||
|
||||
/* Height minus 1: bits [42, 56) */
|
||||
nir_ssa_def *height_m1 = nir_ubitfield_extract_imm(b, w1, 42 - 32, 14);
|
||||
nir_def *height_m1 = nir_ubitfield_extract_imm(b, w1, 42 - 32, 14);
|
||||
|
||||
/* Depth minus 1: bits [110, 124) */
|
||||
nir_ssa_def *depth_m1 = nir_ubitfield_extract_imm(b, w3, 110 - 96, 14);
|
||||
nir_def *depth_m1 = nir_ubitfield_extract_imm(b, w3, 110 - 96, 14);
|
||||
|
||||
/* First level: bits [56, 60) */
|
||||
nir_ssa_def *lod = nir_ubitfield_extract_imm(b, w1, 56 - 32, 4);
|
||||
nir_def *lod = nir_ubitfield_extract_imm(b, w1, 56 - 32, 4);
|
||||
|
||||
/* Add LOD offset to first level to get the interesting LOD */
|
||||
int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
|
||||
|
|
@ -116,14 +116,13 @@ agx_txs(nir_builder *b, nir_tex_instr *tex)
|
|||
* TODO: Optimize this, since linear 2D arrays aren't needed for APIs and
|
||||
* this just gets used internally for blits.
|
||||
*/
|
||||
nir_ssa_def *layout = nir_ubitfield_extract_imm(b, w0, 4, 2);
|
||||
nir_def *layout = nir_ubitfield_extract_imm(b, w0, 4, 2);
|
||||
|
||||
/* Get the 2 bytes after the first 128-bit descriptor */
|
||||
nir_ssa_def *extension =
|
||||
nir_def *extension =
|
||||
nir_load_global_constant(b, nir_iadd_imm(b, ptr, 16), 8, 1, 16);
|
||||
|
||||
nir_ssa_def *depth_linear_m1 =
|
||||
nir_iand_imm(b, extension, BITFIELD_MASK(11));
|
||||
nir_def *depth_linear_m1 = nir_iand_imm(b, extension, BITFIELD_MASK(11));
|
||||
|
||||
depth_linear_m1 = nir_u2uN(b, depth_linear_m1, depth_m1->bit_size);
|
||||
|
||||
|
|
@ -132,9 +131,9 @@ agx_txs(nir_builder *b, nir_tex_instr *tex)
|
|||
}
|
||||
|
||||
/* Add 1 to width-1, height-1 to get base dimensions */
|
||||
nir_ssa_def *width = nir_iadd_imm(b, width_m1, 1);
|
||||
nir_ssa_def *height = nir_iadd_imm(b, height_m1, 1);
|
||||
nir_ssa_def *depth = nir_iadd_imm(b, depth_m1, 1);
|
||||
nir_def *width = nir_iadd_imm(b, width_m1, 1);
|
||||
nir_def *height = nir_iadd_imm(b, height_m1, 1);
|
||||
nir_def *depth = nir_iadd_imm(b, depth_m1, 1);
|
||||
|
||||
/* 1D Arrays have their second component as the layer count */
|
||||
if (tex->sampler_dim == GLSL_SAMPLER_DIM_1D && tex->is_array)
|
||||
|
|
@ -179,42 +178,42 @@ lower_txs(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
if (tex->op != nir_texop_txs)
|
||||
return false;
|
||||
|
||||
nir_ssa_def *res = agx_txs(b, tex);
|
||||
nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, res, instr);
|
||||
nir_def *res = agx_txs(b, tex);
|
||||
nir_def_rewrite_uses_after(&tex->dest.ssa, res, instr);
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
format_is_rgb32(nir_builder *b, nir_tex_instr *tex)
|
||||
{
|
||||
nir_ssa_def *ptr = texture_descriptor_ptr(b, tex);
|
||||
nir_ssa_def *desc = nir_load_global_constant(b, ptr, 8, 1, 32);
|
||||
nir_ssa_def *channels = nir_ubitfield_extract_imm(b, desc, 6, 7);
|
||||
nir_def *ptr = texture_descriptor_ptr(b, tex);
|
||||
nir_def *desc = nir_load_global_constant(b, ptr, 8, 1, 32);
|
||||
nir_def *channels = nir_ubitfield_extract_imm(b, desc, 6, 7);
|
||||
|
||||
return nir_ieq_imm(b, channels, AGX_FORMAT_RGB32_EMULATED);
|
||||
}
|
||||
|
||||
/* Load from an RGB32 buffer texture */
|
||||
static nir_ssa_def *
|
||||
load_rgb32(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *coordinate)
|
||||
static nir_def *
|
||||
load_rgb32(nir_builder *b, nir_tex_instr *tex, nir_def *coordinate)
|
||||
{
|
||||
/* Base address right-shifted 4: bits [66, 102) */
|
||||
nir_ssa_def *ptr_hi = nir_iadd_imm(b, texture_descriptor_ptr(b, tex), 8);
|
||||
nir_ssa_def *desc_hi_words = nir_load_global_constant(b, ptr_hi, 8, 2, 32);
|
||||
nir_ssa_def *desc_hi = nir_pack_64_2x32(b, desc_hi_words);
|
||||
nir_ssa_def *base_shr4 =
|
||||
nir_def *ptr_hi = nir_iadd_imm(b, texture_descriptor_ptr(b, tex), 8);
|
||||
nir_def *desc_hi_words = nir_load_global_constant(b, ptr_hi, 8, 2, 32);
|
||||
nir_def *desc_hi = nir_pack_64_2x32(b, desc_hi_words);
|
||||
nir_def *base_shr4 =
|
||||
nir_iand_imm(b, nir_ushr_imm(b, desc_hi, 2), BITFIELD64_MASK(36));
|
||||
nir_ssa_def *base = nir_ishl_imm(b, base_shr4, 4);
|
||||
nir_def *base = nir_ishl_imm(b, base_shr4, 4);
|
||||
|
||||
nir_ssa_def *raw = nir_load_constant_agx(
|
||||
b, 3, nir_dest_bit_size(tex->dest), base, nir_imul_imm(b, coordinate, 3),
|
||||
nir_def *raw = nir_load_constant_agx(b, 3, nir_dest_bit_size(tex->dest),
|
||||
base, nir_imul_imm(b, coordinate, 3),
|
||||
.format = AGX_INTERNAL_FORMAT_I32);
|
||||
|
||||
/* Set alpha to 1 (in the appropriate format) */
|
||||
bool is_float = nir_alu_type_get_base_type(tex->dest_type) == nir_type_float;
|
||||
|
||||
nir_ssa_def *swizzled[4] = {
|
||||
nir_def *swizzled[4] = {
|
||||
nir_channel(b, raw, 0), nir_channel(b, raw, 1), nir_channel(b, raw, 2),
|
||||
is_float ? nir_imm_float(b, 1.0) : nir_imm_int(b, 1)};
|
||||
|
||||
|
|
@ -225,8 +224,8 @@ load_rgb32(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *coordinate)
|
|||
* Given a 1D buffer texture coordinate, calculate the 2D coordinate vector that
|
||||
* will be used to access the linear 2D texture bound to the buffer.
|
||||
*/
|
||||
static nir_ssa_def *
|
||||
coords_for_buffer_texture(nir_builder *b, nir_ssa_def *coord)
|
||||
static nir_def *
|
||||
coords_for_buffer_texture(nir_builder *b, nir_def *coord)
|
||||
{
|
||||
return nir_vec2(b, nir_iand_imm(b, coord, BITFIELD_MASK(10)),
|
||||
nir_ushr_imm(b, coord, 10));
|
||||
|
|
@ -247,7 +246,7 @@ coords_for_buffer_texture(nir_builder *b, nir_ssa_def *coord)
|
|||
static bool
|
||||
lower_buffer_texture(nir_builder *b, nir_tex_instr *tex)
|
||||
{
|
||||
nir_ssa_def *coord = nir_steal_tex_src(tex, nir_tex_src_coord);
|
||||
nir_def *coord = nir_steal_tex_src(tex, nir_tex_src_coord);
|
||||
|
||||
/* The OpenGL ES 3.2 specification says on page 187:
|
||||
*
|
||||
|
|
@ -258,19 +257,19 @@ lower_buffer_texture(nir_builder *b, nir_tex_instr *tex)
|
|||
*
|
||||
* However, faulting would be undesirable for robustness, so clamp.
|
||||
*/
|
||||
nir_ssa_def *size = nir_get_texture_size(b, tex);
|
||||
nir_def *size = nir_get_texture_size(b, tex);
|
||||
coord = nir_umin(b, coord, nir_iadd_imm(b, size, -1));
|
||||
|
||||
/* Lower RGB32 reads if the format requires */
|
||||
nir_if *nif = nir_push_if(b, format_is_rgb32(b, tex));
|
||||
nir_ssa_def *rgb32 = load_rgb32(b, tex, coord);
|
||||
nir_def *rgb32 = load_rgb32(b, tex, coord);
|
||||
nir_push_else(b, nif);
|
||||
|
||||
/* Otherwise, lower the texture instruction to read from 2D */
|
||||
assert(coord->num_components == 1 && "buffer textures are 1D");
|
||||
tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
|
||||
|
||||
nir_ssa_def *coord2d = coords_for_buffer_texture(b, coord);
|
||||
nir_def *coord2d = coords_for_buffer_texture(b, coord);
|
||||
nir_instr_remove(&tex->instr);
|
||||
nir_builder_instr_insert(b, &tex->instr);
|
||||
nir_tex_instr_add_src(tex, nir_tex_src_backend1, nir_src_for_ssa(coord2d));
|
||||
|
|
@ -278,8 +277,8 @@ lower_buffer_texture(nir_builder *b, nir_tex_instr *tex)
|
|||
nir_pop_if(b, nif);
|
||||
|
||||
/* Put it together with a phi */
|
||||
nir_ssa_def *phi = nir_if_phi(b, rgb32, &tex->dest.ssa);
|
||||
nir_ssa_def_rewrite_uses(&tex->dest.ssa, phi);
|
||||
nir_def *phi = nir_if_phi(b, rgb32, &tex->dest.ssa);
|
||||
nir_def_rewrite_uses(&tex->dest.ssa, phi);
|
||||
nir_phi_instr *phi_instr = nir_instr_as_phi(phi->parent_instr);
|
||||
nir_phi_src *else_src = nir_phi_get_src_from_block(phi_instr, else_block);
|
||||
nir_instr_rewrite_src_ssa(phi->parent_instr, &else_src->src, &tex->dest.ssa);
|
||||
|
|
@ -307,8 +306,8 @@ lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
return lower_buffer_texture(b, tex);
|
||||
|
||||
/* Get the coordinates */
|
||||
nir_ssa_def *coord = nir_steal_tex_src(tex, nir_tex_src_coord);
|
||||
nir_ssa_def *ms_idx = nir_steal_tex_src(tex, nir_tex_src_ms_index);
|
||||
nir_def *coord = nir_steal_tex_src(tex, nir_tex_src_coord);
|
||||
nir_def *ms_idx = nir_steal_tex_src(tex, nir_tex_src_ms_index);
|
||||
|
||||
/* It's unclear if mipmapped 1D textures work in the hardware. For now, we
|
||||
* always lower to 2D.
|
||||
|
|
@ -333,7 +332,7 @@ lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
};
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(other_srcs); ++i) {
|
||||
nir_ssa_def *src = nir_steal_tex_src(tex, other_srcs[i]);
|
||||
nir_def *src = nir_steal_tex_src(tex, other_srcs[i]);
|
||||
|
||||
if (!src)
|
||||
continue;
|
||||
|
|
@ -350,11 +349,11 @@ lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
/* The layer is always the last component of the NIR coordinate, split it off
|
||||
* because we'll need to swizzle.
|
||||
*/
|
||||
nir_ssa_def *layer = NULL;
|
||||
nir_def *layer = NULL;
|
||||
|
||||
if (tex->is_array) {
|
||||
unsigned lidx = coord->num_components - 1;
|
||||
nir_ssa_def *unclamped_layer = nir_channel(b, coord, lidx);
|
||||
nir_def *unclamped_layer = nir_channel(b, coord, lidx);
|
||||
coord = nir_trim_vector(b, coord, lidx);
|
||||
|
||||
/* Round layer to nearest even */
|
||||
|
|
@ -364,9 +363,9 @@ lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
/* Clamp to max layer = (# of layers - 1) for out-of-bounds handling.
|
||||
* Layer must be 16-bits for the hardware, drop top bits after clamping.
|
||||
*/
|
||||
nir_ssa_def *txs = nir_get_texture_size(b, tex);
|
||||
nir_ssa_def *nr_layers = nir_channel(b, txs, lidx);
|
||||
nir_ssa_def *max_layer = nir_iadd_imm(b, nr_layers, -1);
|
||||
nir_def *txs = nir_get_texture_size(b, tex);
|
||||
nir_def *nr_layers = nir_channel(b, txs, lidx);
|
||||
nir_def *max_layer = nir_iadd_imm(b, nr_layers, -1);
|
||||
layer = nir_u2u16(b, nir_umin(b, unclamped_layer, max_layer));
|
||||
}
|
||||
|
||||
|
|
@ -374,7 +373,7 @@ lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
* vec6 16-bit coordinate tuple, which would be inconvenient in NIR for
|
||||
* little benefit (a minor optimization, I guess).
|
||||
*/
|
||||
nir_ssa_def *sample_array = (ms_idx && layer)
|
||||
nir_def *sample_array = (ms_idx && layer)
|
||||
? nir_pack_32_2x16_split(b, ms_idx, layer)
|
||||
: ms_idx ? nir_u2u32(b, ms_idx)
|
||||
: layer ? nir_u2u32(b, layer)
|
||||
|
|
@ -390,14 +389,14 @@ lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
nir_tex_instr_add_src(tex, nir_tex_src_backend1, nir_src_for_ssa(coord));
|
||||
|
||||
/* Furthermore, if there is an offset vector, it must be packed */
|
||||
nir_ssa_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset);
|
||||
nir_def *offset = nir_steal_tex_src(tex, nir_tex_src_offset);
|
||||
|
||||
if (offset != NULL) {
|
||||
nir_ssa_def *packed = NULL;
|
||||
nir_def *packed = NULL;
|
||||
|
||||
for (unsigned c = 0; c < offset->num_components; ++c) {
|
||||
nir_ssa_def *nibble = nir_iand_imm(b, nir_channel(b, offset, c), 0xF);
|
||||
nir_ssa_def *shifted = nir_ishl_imm(b, nibble, 4 * c);
|
||||
nir_def *nibble = nir_iand_imm(b, nir_channel(b, offset, c), 0xF);
|
||||
nir_def *shifted = nir_ishl_imm(b, nibble, 4 * c);
|
||||
|
||||
if (packed != NULL)
|
||||
packed = nir_ior(b, packed, shifted);
|
||||
|
|
@ -411,7 +410,7 @@ lower_regular_texture(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
return true;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
bias_for_tex(nir_builder *b, nir_tex_instr *tex)
|
||||
{
|
||||
nir_instr *instr = nir_get_texture_size(b, tex)->parent_instr;
|
||||
|
|
@ -446,7 +445,7 @@ lower_sampler_bias(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
nir_tex_src_type src =
|
||||
tex->op == nir_texop_txl ? nir_tex_src_lod : nir_tex_src_bias;
|
||||
|
||||
nir_ssa_def *orig = nir_steal_tex_src(tex, src);
|
||||
nir_def *orig = nir_steal_tex_src(tex, src);
|
||||
assert(orig != NULL && "invalid NIR");
|
||||
|
||||
if (orig->bit_size != 16)
|
||||
|
|
@ -463,14 +462,14 @@ lower_sampler_bias(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
* derivatives. So scale derivatives by exp2(bias) to
|
||||
* get level-of-detail log2(exp2(bias) * rho) = bias + log2(rho).
|
||||
*/
|
||||
nir_ssa_def *scale = nir_fexp2(b, nir_f2f32(b, bias_for_tex(b, tex)));
|
||||
nir_def *scale = nir_fexp2(b, nir_f2f32(b, bias_for_tex(b, tex)));
|
||||
nir_tex_src_type src[] = {nir_tex_src_ddx, nir_tex_src_ddy};
|
||||
|
||||
for (unsigned s = 0; s < ARRAY_SIZE(src); ++s) {
|
||||
nir_ssa_def *orig = nir_steal_tex_src(tex, src[s]);
|
||||
nir_def *orig = nir_steal_tex_src(tex, src[s]);
|
||||
assert(orig != NULL && "invalid");
|
||||
|
||||
nir_ssa_def *scaled = nir_fmul(b, nir_f2f32(b, orig), scale);
|
||||
nir_def *scaled = nir_fmul(b, nir_f2f32(b, orig), scale);
|
||||
nir_tex_instr_add_src(tex, src[s], nir_src_for_ssa(scaled));
|
||||
}
|
||||
|
||||
|
|
@ -520,11 +519,11 @@ legalize_image_lod(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
nir_src_rewrite_ssa(src, nir_i2i16(b, src->ssa));
|
||||
nir_src_rewrite(src, nir_i2i16(b, src->ssa));
|
||||
return true;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
txs_for_image(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
unsigned num_components, unsigned bit_size)
|
||||
{
|
||||
|
|
@ -554,44 +553,40 @@ txs_for_image(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
return &tex->dest.ssa;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
nir_bitfield_mask(nir_builder *b, nir_ssa_def *x)
|
||||
static nir_def *
|
||||
nir_bitfield_mask(nir_builder *b, nir_def *x)
|
||||
{
|
||||
nir_ssa_def *one = nir_imm_intN_t(b, 1, x->bit_size);
|
||||
nir_def *one = nir_imm_intN_t(b, 1, x->bit_size);
|
||||
return nir_iadd_imm(b, nir_ishl(b, one, nir_u2u32(b, x)), -1);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
calculate_twiddled_coordinates(nir_builder *b, nir_ssa_def *coord,
|
||||
nir_ssa_def *tile_w_px_log2,
|
||||
nir_ssa_def *tile_h_px_log2,
|
||||
nir_ssa_def *width_tl,
|
||||
nir_ssa_def *layer_stride_el)
|
||||
static nir_def *
|
||||
calculate_twiddled_coordinates(nir_builder *b, nir_def *coord,
|
||||
nir_def *tile_w_px_log2, nir_def *tile_h_px_log2,
|
||||
nir_def *width_tl, nir_def *layer_stride_el)
|
||||
{
|
||||
/* SIMD-within-a-register */
|
||||
nir_ssa_def *coord_px = nir_pack_32_2x16(b, nir_u2u16(b, coord));
|
||||
nir_ssa_def *tile_mask =
|
||||
nir_def *coord_px = nir_pack_32_2x16(b, nir_u2u16(b, coord));
|
||||
nir_def *tile_mask =
|
||||
nir_pack_32_2x16_split(b, nir_bitfield_mask(b, tile_w_px_log2),
|
||||
nir_bitfield_mask(b, tile_h_px_log2));
|
||||
|
||||
/* Modulo by the tile width/height to get the offsets within the tile */
|
||||
nir_ssa_def *offs_xy_px = nir_iand(b, coord_px, tile_mask);
|
||||
nir_def *offs_xy_px = nir_iand(b, coord_px, tile_mask);
|
||||
|
||||
/* Get the coordinates of the corner of the tile */
|
||||
nir_ssa_def *tile_xy_px = nir_isub(b, coord_px, offs_xy_px);
|
||||
nir_def *tile_xy_px = nir_isub(b, coord_px, offs_xy_px);
|
||||
|
||||
/* Unpack SIMD-within-a-register */
|
||||
nir_ssa_def *offs_x_px = nir_unpack_32_2x16_split_x(b, offs_xy_px);
|
||||
nir_ssa_def *offs_y_px = nir_unpack_32_2x16_split_y(b, offs_xy_px);
|
||||
nir_ssa_def *tile_x_px =
|
||||
nir_u2u32(b, nir_unpack_32_2x16_split_x(b, tile_xy_px));
|
||||
nir_ssa_def *tile_y_px =
|
||||
nir_u2u32(b, nir_unpack_32_2x16_split_y(b, tile_xy_px));
|
||||
nir_def *offs_x_px = nir_unpack_32_2x16_split_x(b, offs_xy_px);
|
||||
nir_def *offs_y_px = nir_unpack_32_2x16_split_y(b, offs_xy_px);
|
||||
nir_def *tile_x_px = nir_u2u32(b, nir_unpack_32_2x16_split_x(b, tile_xy_px));
|
||||
nir_def *tile_y_px = nir_u2u32(b, nir_unpack_32_2x16_split_y(b, tile_xy_px));
|
||||
|
||||
/* Get the tile size */
|
||||
nir_ssa_def *one_32 = nir_imm_int(b, 1);
|
||||
nir_ssa_def *tile_w_px = nir_ishl(b, one_32, nir_u2u32(b, tile_w_px_log2));
|
||||
nir_ssa_def *tile_h_px = nir_ishl(b, one_32, nir_u2u32(b, tile_h_px_log2));
|
||||
nir_def *one_32 = nir_imm_int(b, 1);
|
||||
nir_def *tile_w_px = nir_ishl(b, one_32, nir_u2u32(b, tile_w_px_log2));
|
||||
nir_def *tile_h_px = nir_ishl(b, one_32, nir_u2u32(b, tile_h_px_log2));
|
||||
|
||||
/* tile row start (px) =
|
||||
* (y // tile height) * (# of tiles/row) * (# of pix/tile) =
|
||||
|
|
@ -599,7 +594,7 @@ calculate_twiddled_coordinates(nir_builder *b, nir_ssa_def *coord,
|
|||
* tile height =
|
||||
* align_down(y, tile height) * width_tl * tile width
|
||||
*/
|
||||
nir_ssa_def *tile_row_start_px =
|
||||
nir_def *tile_row_start_px =
|
||||
nir_imul(b, nir_u2u32(b, tile_y_px), nir_imul(b, width_tl, tile_w_px));
|
||||
|
||||
/* tile column start (px) =
|
||||
|
|
@ -607,38 +602,37 @@ calculate_twiddled_coordinates(nir_builder *b, nir_ssa_def *coord,
|
|||
* align(x, tile width) / tile width * tile width * tile height =
|
||||
* align(x, tile width) * tile height
|
||||
*/
|
||||
nir_ssa_def *tile_col_start_px = nir_imul(b, tile_x_px, tile_h_px);
|
||||
nir_def *tile_col_start_px = nir_imul(b, tile_x_px, tile_h_px);
|
||||
|
||||
/* The pixel at which the tile starts is thus... */
|
||||
nir_ssa_def *tile_offset_px =
|
||||
nir_iadd(b, tile_row_start_px, tile_col_start_px);
|
||||
nir_def *tile_offset_px = nir_iadd(b, tile_row_start_px, tile_col_start_px);
|
||||
|
||||
/* Get the total offset */
|
||||
nir_ssa_def *offs_px = nir_interleave_agx(b, offs_x_px, offs_y_px);
|
||||
nir_ssa_def *total_px = nir_iadd(b, tile_offset_px, nir_u2u32(b, offs_px));
|
||||
nir_def *offs_px = nir_interleave_agx(b, offs_x_px, offs_y_px);
|
||||
nir_def *total_px = nir_iadd(b, tile_offset_px, nir_u2u32(b, offs_px));
|
||||
|
||||
if (layer_stride_el) {
|
||||
nir_ssa_def *layer = nir_channel(b, coord, 2);
|
||||
nir_ssa_def *layer_offset_px = nir_imul(b, layer, layer_stride_el);
|
||||
nir_def *layer = nir_channel(b, coord, 2);
|
||||
nir_def *layer_offset_px = nir_imul(b, layer, layer_stride_el);
|
||||
total_px = nir_iadd(b, total_px, layer_offset_px);
|
||||
}
|
||||
|
||||
return total_px;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
image_texel_address(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
bool return_index)
|
||||
{
|
||||
/* First, calculate the address of the PBE descriptor */
|
||||
nir_ssa_def *desc_address;
|
||||
nir_def *desc_address;
|
||||
if (intr->intrinsic == nir_intrinsic_bindless_image_texel_address ||
|
||||
intr->intrinsic == nir_intrinsic_bindless_image_store)
|
||||
desc_address = texture_descriptor_ptr_for_handle(b, intr->src[0].ssa);
|
||||
else
|
||||
desc_address = texture_descriptor_ptr_for_index(b, intr->src[0].ssa);
|
||||
|
||||
nir_ssa_def *coord = intr->src[1].ssa;
|
||||
nir_def *coord = intr->src[1].ssa;
|
||||
|
||||
enum glsl_sampler_dim dim = nir_intrinsic_image_dim(intr);
|
||||
bool layered = nir_intrinsic_image_array(intr) ||
|
||||
|
|
@ -649,36 +643,36 @@ image_texel_address(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
* software-defined atomic descriptor, or (if array image) a pointer to the
|
||||
* descriptor. Grab it.
|
||||
*/
|
||||
nir_ssa_def *meta_ptr = nir_iadd_imm(b, desc_address, 16);
|
||||
nir_ssa_def *meta = nir_load_global_constant(b, meta_ptr, 8, 1, 64);
|
||||
nir_ssa_def *layer_stride_el = NULL;
|
||||
nir_def *meta_ptr = nir_iadd_imm(b, desc_address, 16);
|
||||
nir_def *meta = nir_load_global_constant(b, meta_ptr, 8, 1, 64);
|
||||
nir_def *layer_stride_el = NULL;
|
||||
|
||||
if (layered) {
|
||||
nir_ssa_def *desc = nir_load_global_constant(b, meta, 8, 3, 32);
|
||||
nir_def *desc = nir_load_global_constant(b, meta, 8, 3, 32);
|
||||
meta = nir_pack_64_2x32(b, nir_trim_vector(b, desc, 2));
|
||||
layer_stride_el = nir_channel(b, desc, 2);
|
||||
}
|
||||
|
||||
nir_ssa_def *meta_hi = nir_unpack_64_2x32_split_y(b, meta);
|
||||
nir_def *meta_hi = nir_unpack_64_2x32_split_y(b, meta);
|
||||
|
||||
/* See the GenXML definitions of the software-defined atomic descriptors */
|
||||
nir_ssa_def *base;
|
||||
nir_def *base;
|
||||
|
||||
if (dim == GLSL_SAMPLER_DIM_BUF)
|
||||
base = meta;
|
||||
else
|
||||
base = nir_ishl_imm(b, nir_iand_imm(b, meta, BITFIELD64_MASK(33)), 7);
|
||||
|
||||
nir_ssa_def *tile_w_px_log2 =
|
||||
nir_def *tile_w_px_log2 =
|
||||
nir_u2u16(b, nir_ubitfield_extract_imm(b, meta_hi, 33 - 32, 3));
|
||||
nir_ssa_def *tile_h_px_log2 =
|
||||
nir_def *tile_h_px_log2 =
|
||||
nir_u2u16(b, nir_ubitfield_extract_imm(b, meta_hi, 36 - 32, 3));
|
||||
nir_ssa_def *width_tl = nir_ubitfield_extract_imm(b, meta_hi, 39 - 32, 14);
|
||||
nir_def *width_tl = nir_ubitfield_extract_imm(b, meta_hi, 39 - 32, 14);
|
||||
|
||||
/* We do not allow atomics on linear 2D or linear 2D arrays, as there are no
|
||||
* known use cases. So, we're linear if buffer or 1D, and twiddled otherwise.
|
||||
*/
|
||||
nir_ssa_def *total_px;
|
||||
nir_def *total_px;
|
||||
if (dim == GLSL_SAMPLER_DIM_BUF || dim == GLSL_SAMPLER_DIM_1D) {
|
||||
/* 1D linear is indexed directly */
|
||||
total_px = nir_channel(b, coord, 0);
|
||||
|
|
@ -687,12 +681,11 @@ image_texel_address(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
b, coord, tile_w_px_log2, tile_h_px_log2, width_tl, layer_stride_el);
|
||||
}
|
||||
|
||||
nir_ssa_def *total_sa;
|
||||
nir_def *total_sa;
|
||||
|
||||
if (dim == GLSL_SAMPLER_DIM_MS) {
|
||||
nir_ssa_def *sample_idx = intr->src[2].ssa;
|
||||
nir_ssa_def *samples_log2 =
|
||||
nir_ubitfield_extract_imm(b, meta_hi, 54 - 32, 2);
|
||||
nir_def *sample_idx = intr->src[2].ssa;
|
||||
nir_def *samples_log2 = nir_ubitfield_extract_imm(b, meta_hi, 54 - 32, 2);
|
||||
|
||||
total_sa = nir_iadd(b, nir_ishl(b, total_px, samples_log2), sample_idx);
|
||||
} else {
|
||||
|
|
@ -709,7 +702,7 @@ image_texel_address(nir_builder *b, nir_intrinsic_instr *intr,
|
|||
enum pipe_format format = nir_intrinsic_format(intr);
|
||||
unsigned bytes_per_sample_B = util_format_get_blocksize(format);
|
||||
|
||||
nir_ssa_def *total_B = nir_imul_imm(b, total_sa, bytes_per_sample_B);
|
||||
nir_def *total_B = nir_imul_imm(b, total_sa, bytes_per_sample_B);
|
||||
return nir_iadd(b, base, nir_u2u64(b, total_B));
|
||||
}
|
||||
|
||||
|
|
@ -719,14 +712,14 @@ lower_buffer_image(nir_builder *b, nir_intrinsic_instr *intr)
|
|||
if (nir_intrinsic_image_dim(intr) != GLSL_SAMPLER_DIM_BUF)
|
||||
return false;
|
||||
|
||||
nir_ssa_def *coord_vector = intr->src[1].ssa;
|
||||
nir_ssa_def *coord = nir_channel(b, coord_vector, 0);
|
||||
nir_def *coord_vector = intr->src[1].ssa;
|
||||
nir_def *coord = nir_channel(b, coord_vector, 0);
|
||||
|
||||
/* Lower the buffer load/store to a 2D image load/store, matching the 2D
|
||||
* texture/PBE descriptor the driver supplies for buffer images.
|
||||
*/
|
||||
nir_ssa_def *coord2d = coords_for_buffer_texture(b, coord);
|
||||
nir_src_rewrite_ssa(&intr->src[1], nir_pad_vector(b, coord2d, 4));
|
||||
nir_def *coord2d = coords_for_buffer_texture(b, coord);
|
||||
nir_src_rewrite(&intr->src[1], nir_pad_vector(b, coord2d, 4));
|
||||
nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -749,7 +742,7 @@ lower_images(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
|
||||
case nir_intrinsic_image_size:
|
||||
case nir_intrinsic_bindless_image_size:
|
||||
nir_ssa_def_rewrite_uses(
|
||||
nir_def_rewrite_uses(
|
||||
&intr->dest.ssa,
|
||||
txs_for_image(b, intr, nir_dest_num_components(intr->dest),
|
||||
nir_dest_bit_size(intr->dest)));
|
||||
|
|
@ -757,7 +750,7 @@ lower_images(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
|
||||
case nir_intrinsic_image_texel_address:
|
||||
case nir_intrinsic_bindless_image_texel_address:
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa,
|
||||
nir_def_rewrite_uses(&intr->dest.ssa,
|
||||
image_texel_address(b, intr, false));
|
||||
return true;
|
||||
|
||||
|
|
@ -842,10 +835,10 @@ lower_multisampled_store(nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
if (nir_intrinsic_image_dim(intr) != GLSL_SAMPLER_DIM_MS)
|
||||
return false;
|
||||
|
||||
nir_ssa_def *index_px = image_texel_address(b, intr, true);
|
||||
nir_ssa_def *coord2d = coords_for_buffer_texture(b, index_px);
|
||||
nir_def *index_px = image_texel_address(b, intr, true);
|
||||
nir_def *coord2d = coords_for_buffer_texture(b, index_px);
|
||||
|
||||
nir_src_rewrite_ssa(&intr->src[1], nir_pad_vector(b, coord2d, 4));
|
||||
nir_src_rewrite(&intr->src[1], nir_pad_vector(b, coord2d, 4));
|
||||
nir_intrinsic_set_image_dim(intr, GLSL_SAMPLER_DIM_2D);
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,15 +20,15 @@ pass(struct nir_builder *b, nir_instr *instr, UNUSED void *data)
|
|||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
nir_ssa_def *ubo_index = nir_ssa_for_src(b, intr->src[0], 1);
|
||||
nir_ssa_def *offset = nir_ssa_for_src(b, *nir_get_io_offset_src(intr), 1);
|
||||
nir_ssa_def *address =
|
||||
nir_def *ubo_index = nir_ssa_for_src(b, intr->src[0], 1);
|
||||
nir_def *offset = nir_ssa_for_src(b, *nir_get_io_offset_src(intr), 1);
|
||||
nir_def *address =
|
||||
nir_iadd(b, nir_load_ubo_base_agx(b, ubo_index), nir_u2u64(b, offset));
|
||||
nir_ssa_def *value = nir_load_global_constant(
|
||||
nir_def *value = nir_load_global_constant(
|
||||
b, address, nir_intrinsic_align(intr), intr->num_components,
|
||||
nir_dest_bit_size(intr->dest));
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, value);
|
||||
nir_def_rewrite_uses(&intr->dest.ssa, value);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
#include "agx_compiler.h"
|
||||
|
||||
static void
|
||||
def_size(nir_ssa_def *def, unsigned *size, unsigned *align)
|
||||
def_size(nir_def *def, unsigned *size, unsigned *align)
|
||||
{
|
||||
unsigned bit_size = MAX2(def->bit_size, 16);
|
||||
|
||||
|
|
@ -50,7 +50,7 @@ instr_cost(nir_instr *instr, const void *data)
|
|||
}
|
||||
|
||||
static float
|
||||
rewrite_cost(nir_ssa_def *def, const void *data)
|
||||
rewrite_cost(nir_def *def, const void *data)
|
||||
{
|
||||
bool mov_needed = false;
|
||||
nir_foreach_use(use, def) {
|
||||
|
|
@ -76,7 +76,7 @@ rewrite_cost(nir_ssa_def *def, const void *data)
|
|||
static bool
|
||||
avoid_instr(const nir_instr *instr, const void *data)
|
||||
{
|
||||
const nir_ssa_def *def = nir_instr_ssa_def((nir_instr *)instr);
|
||||
const nir_def *def = nir_instr_ssa_def((nir_instr *)instr);
|
||||
|
||||
/* Do not move bindless handles, since we need those to retain their constant
|
||||
* base index.
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ agx_compile_meta_shader(struct agx_meta_cache *cache, nir_shader *shader,
|
|||
return res;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
build_background_op(nir_builder *b, enum agx_meta_op op, unsigned rt,
|
||||
unsigned nr, bool msaa)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@
|
|||
#include "nir_builder.h"
|
||||
#include "nir_format_convert.h"
|
||||
|
||||
static inline nir_ssa_def *
|
||||
nir_sign_extend_if_sint(nir_builder *b, nir_ssa_def *x, enum pipe_format format)
|
||||
static inline nir_def *
|
||||
nir_sign_extend_if_sint(nir_builder *b, nir_def *x, enum pipe_format format)
|
||||
{
|
||||
if (!util_format_is_pure_sint(format))
|
||||
return x;
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ agx_nir_lower_alpha_to_coverage(nir_shader *shader, uint8_t nr_samples)
|
|||
return;
|
||||
|
||||
/* Similarly, if there are less than 4 components, alpha is undefined */
|
||||
nir_ssa_def *rgba = store->src[0].ssa;
|
||||
nir_def *rgba = store->src[0].ssa;
|
||||
if (rgba->num_components < 4)
|
||||
return;
|
||||
|
||||
|
|
@ -59,9 +59,9 @@ agx_nir_lower_alpha_to_coverage(nir_shader *shader, uint8_t nr_samples)
|
|||
* # of bits = (unsigned int) (alpha * nr_samples)
|
||||
* mask = (1 << (# of bits)) - 1
|
||||
*/
|
||||
nir_ssa_def *alpha = nir_channel(b, rgba, 3);
|
||||
nir_ssa_def *bits = nir_f2u32(b, nir_fmul_imm(b, alpha, nr_samples));
|
||||
nir_ssa_def *mask =
|
||||
nir_def *alpha = nir_channel(b, rgba, 3);
|
||||
nir_def *bits = nir_f2u32(b, nir_fmul_imm(b, alpha, nr_samples));
|
||||
nir_def *mask =
|
||||
nir_iadd_imm(b, nir_ishl(b, nir_imm_intN_t(b, 1, 16), bits), -1);
|
||||
|
||||
/* Discard samples that aren't covered */
|
||||
|
|
@ -100,12 +100,12 @@ agx_nir_lower_alpha_to_one(nir_shader *shader)
|
|||
if (sem.location < FRAG_RESULT_DATA0)
|
||||
continue;
|
||||
|
||||
nir_ssa_def *rgba = intr->src[0].ssa;
|
||||
nir_def *rgba = intr->src[0].ssa;
|
||||
if (rgba->num_components < 4)
|
||||
continue;
|
||||
|
||||
nir_builder b = nir_builder_at(nir_before_instr(instr));
|
||||
nir_ssa_def *rgb1 = nir_vector_insert_imm(
|
||||
nir_def *rgb1 = nir_vector_insert_imm(
|
||||
&b, rgba, nir_imm_floatN_t(&b, 1.0, rgba->bit_size), 3);
|
||||
|
||||
nir_instr_rewrite_src_ssa(instr, &intr->src[0], rgb1);
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
static bool
|
||||
lower_wrapped(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
nir_ssa_def *sample_id = data;
|
||||
nir_def *sample_id = data;
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
|
|
@ -21,7 +21,7 @@ lower_wrapped(nir_builder *b, nir_instr *instr, void *data)
|
|||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_sample_id: {
|
||||
unsigned size = nir_dest_bit_size(intr->dest);
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_u2uN(b, sample_id, size));
|
||||
nir_def_rewrite_uses(&intr->dest.ssa, nir_u2uN(b, sample_id, size));
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -34,10 +34,10 @@ lower_wrapped(nir_builder *b, nir_instr *instr, void *data)
|
|||
unsigned mask_index =
|
||||
(intr->intrinsic == nir_intrinsic_store_local_pixel_agx) ? 1 : 0;
|
||||
|
||||
nir_ssa_def *mask = intr->src[mask_index].ssa;
|
||||
nir_ssa_def *id_mask = nir_ishl(b, nir_imm_intN_t(b, 1, mask->bit_size),
|
||||
nir_def *mask = intr->src[mask_index].ssa;
|
||||
nir_def *id_mask = nir_ishl(b, nir_imm_intN_t(b, 1, mask->bit_size),
|
||||
nir_u2u32(b, sample_id));
|
||||
nir_src_rewrite_ssa(&intr->src[mask_index], nir_iand(b, mask, id_mask));
|
||||
nir_src_rewrite(&intr->src[mask_index], nir_iand(b, mask, id_mask));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -70,7 +70,7 @@ agx_nir_wrap_per_sample_loop(nir_shader *shader, uint8_t nr_samples)
|
|||
nir_variable *i =
|
||||
nir_local_variable_create(impl, glsl_uintN_t_type(16), NULL);
|
||||
nir_store_var(&b, i, nir_imm_intN_t(&b, 0, 16), ~0);
|
||||
nir_ssa_def *index = NULL;
|
||||
nir_def *index = NULL;
|
||||
|
||||
/* Create a loop in the wrapped function */
|
||||
nir_loop *loop = nir_push_loop(&b);
|
||||
|
|
@ -151,11 +151,11 @@ lower_sample_mask_read(nir_builder *b, nir_instr *instr, UNUSED void *_)
|
|||
if (intr->intrinsic != nir_intrinsic_load_sample_mask_in)
|
||||
return false;
|
||||
|
||||
nir_ssa_def *old = &intr->dest.ssa;
|
||||
nir_ssa_def *lowered = nir_iand(
|
||||
nir_def *old = &intr->dest.ssa;
|
||||
nir_def *lowered = nir_iand(
|
||||
b, old, nir_u2uN(b, nir_load_api_sample_mask_agx(b), old->bit_size));
|
||||
|
||||
nir_ssa_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
|
||||
nir_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,10 +7,10 @@
|
|||
#include "agx_tilebuffer.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
static nir_ssa_def *
|
||||
mask_by_sample_id(nir_builder *b, nir_ssa_def *mask)
|
||||
static nir_def *
|
||||
mask_by_sample_id(nir_builder *b, nir_def *mask)
|
||||
{
|
||||
nir_ssa_def *id_mask =
|
||||
nir_def *id_mask =
|
||||
nir_ishl(b, nir_imm_intN_t(b, 1, mask->bit_size), nir_load_sample_id(b));
|
||||
return nir_iand(b, mask, id_mask);
|
||||
}
|
||||
|
|
@ -36,16 +36,16 @@ lower_to_sample(nir_builder *b, nir_instr *instr, void *_)
|
|||
* xy[component] = ((float)nibble) / 16.0;
|
||||
* }
|
||||
*/
|
||||
nir_ssa_def *packed = nir_load_sample_positions_agx(b);
|
||||
nir_def *packed = nir_load_sample_positions_agx(b);
|
||||
|
||||
/* The n'th sample is the in the n'th byte of the register */
|
||||
nir_ssa_def *shifted = nir_ushr(
|
||||
nir_def *shifted = nir_ushr(
|
||||
b, packed, nir_u2u32(b, nir_imul_imm(b, nir_load_sample_id(b), 8)));
|
||||
|
||||
nir_ssa_def *xy[2];
|
||||
nir_def *xy[2];
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
/* Get the appropriate nibble */
|
||||
nir_ssa_def *nibble =
|
||||
nir_def *nibble =
|
||||
nir_iand_imm(b, nir_ushr_imm(b, shifted, i * 4), 0xF);
|
||||
|
||||
/* Convert it from fixed point to float */
|
||||
|
|
@ -56,7 +56,7 @@ lower_to_sample(nir_builder *b, nir_instr *instr, void *_)
|
|||
}
|
||||
|
||||
/* Collect and rewrite */
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_vec2(b, xy[0], xy[1]));
|
||||
nir_def_rewrite_uses(&intr->dest.ssa, nir_vec2(b, xy[0], xy[1]));
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -67,9 +67,9 @@ lower_to_sample(nir_builder *b, nir_instr *instr, void *_)
|
|||
* by the sample ID to make that happen.
|
||||
*/
|
||||
b->cursor = nir_after_instr(instr);
|
||||
nir_ssa_def *old = &intr->dest.ssa;
|
||||
nir_ssa_def *lowered = mask_by_sample_id(b, old);
|
||||
nir_ssa_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
|
||||
nir_def *old = &intr->dest.ssa;
|
||||
nir_def *lowered = mask_by_sample_id(b, old);
|
||||
nir_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -78,13 +78,13 @@ lower_to_sample(nir_builder *b, nir_instr *instr, void *_)
|
|||
* interpolateAtSample() with the sample ID
|
||||
*/
|
||||
b->cursor = nir_after_instr(instr);
|
||||
nir_ssa_def *old = &intr->dest.ssa;
|
||||
nir_def *old = &intr->dest.ssa;
|
||||
|
||||
nir_ssa_def *lowered = nir_load_barycentric_at_sample(
|
||||
nir_def *lowered = nir_load_barycentric_at_sample(
|
||||
b, nir_dest_bit_size(intr->dest), nir_load_sample_id(b),
|
||||
.interp_mode = nir_intrinsic_interp_mode(intr));
|
||||
|
||||
nir_ssa_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
|
||||
nir_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ tib_filter(const nir_instr *instr, UNUSED const void *_)
|
|||
static void
|
||||
store_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
|
||||
enum pipe_format format, enum pipe_format logical_format,
|
||||
unsigned rt, nir_ssa_def *value, unsigned write_mask)
|
||||
unsigned rt, nir_def *value, unsigned write_mask)
|
||||
{
|
||||
/* The hardware cannot extend for a 32-bit format. Extend ourselves. */
|
||||
if (format == PIPE_FORMAT_R32_UINT && value->bit_size == 16) {
|
||||
|
|
@ -61,7 +61,7 @@ store_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
|
|||
.format = format);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
load_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
|
||||
uint8_t load_comps, uint8_t bit_size, unsigned rt,
|
||||
enum pipe_format format, enum pipe_format logical_format)
|
||||
|
|
@ -74,7 +74,7 @@ load_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
|
|||
format = PIPE_FORMAT_R16_UINT;
|
||||
|
||||
uint8_t offset_B = agx_tilebuffer_offset_B(tib, rt);
|
||||
nir_ssa_def *res = nir_load_local_pixel_agx(
|
||||
nir_def *res = nir_load_local_pixel_agx(
|
||||
b, MIN2(load_comps, comps), f16 ? 16 : bit_size,
|
||||
nir_imm_intN_t(b, ALL_SAMPLES, 16), .base = offset_B, .format = format);
|
||||
|
||||
|
|
@ -100,7 +100,7 @@ load_tilebuffer(nir_builder *b, struct agx_tilebuffer_layout *tib,
|
|||
* texture/PBE descriptors are alternated for each render target. This is
|
||||
* ABI. If we need to make this more flexible for Vulkan later, we can.
|
||||
*/
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
handle_for_rt(nir_builder *b, unsigned base, unsigned rt, bool pbe,
|
||||
bool *bindless)
|
||||
{
|
||||
|
|
@ -117,7 +117,7 @@ handle_for_rt(nir_builder *b, unsigned base, unsigned rt, bool pbe,
|
|||
}
|
||||
|
||||
static enum glsl_sampler_dim
|
||||
dim_for_rt(nir_builder *b, unsigned nr_samples, nir_ssa_def **sample)
|
||||
dim_for_rt(nir_builder *b, unsigned nr_samples, nir_def **sample)
|
||||
{
|
||||
if (nr_samples == 1) {
|
||||
*sample = nir_imm_intN_t(b, 0, 16);
|
||||
|
|
@ -129,7 +129,7 @@ dim_for_rt(nir_builder *b, unsigned nr_samples, nir_ssa_def **sample)
|
|||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
image_coords(nir_builder *b)
|
||||
{
|
||||
return nir_pad_vector(b, nir_u2u32(b, nir_load_pixel_coord(b)), 4);
|
||||
|
|
@ -137,25 +137,25 @@ image_coords(nir_builder *b)
|
|||
|
||||
static void
|
||||
store_memory(nir_builder *b, unsigned bindless_base, unsigned nr_samples,
|
||||
enum pipe_format format, unsigned rt, nir_ssa_def *value)
|
||||
enum pipe_format format, unsigned rt, nir_def *value)
|
||||
{
|
||||
/* Force bindless for multisampled image writes. It avoids the late lowering
|
||||
* needing a texture_base_agx sysval.
|
||||
*/
|
||||
bool bindless = (nr_samples > 1);
|
||||
nir_ssa_def *image = handle_for_rt(b, bindless_base, rt, true, &bindless);
|
||||
nir_ssa_def *zero = nir_imm_intN_t(b, 0, 16);
|
||||
nir_ssa_def *lod = zero;
|
||||
nir_def *image = handle_for_rt(b, bindless_base, rt, true, &bindless);
|
||||
nir_def *zero = nir_imm_intN_t(b, 0, 16);
|
||||
nir_def *lod = zero;
|
||||
|
||||
nir_ssa_def *sample;
|
||||
nir_def *sample;
|
||||
enum glsl_sampler_dim dim = dim_for_rt(b, nr_samples, &sample);
|
||||
nir_ssa_def *coords = image_coords(b);
|
||||
nir_def *coords = image_coords(b);
|
||||
|
||||
nir_begin_invocation_interlock(b);
|
||||
|
||||
if (nr_samples > 1) {
|
||||
nir_ssa_def *coverage = nir_load_sample_mask(b);
|
||||
nir_ssa_def *covered = nir_ubitfield_extract(
|
||||
nir_def *coverage = nir_load_sample_mask(b);
|
||||
nir_def *covered = nir_ubitfield_extract(
|
||||
b, coverage, nir_u2u32(b, sample), nir_imm_int(b, 1));
|
||||
|
||||
nir_push_if(b, nir_ine_imm(b, covered, 0));
|
||||
|
|
@ -176,19 +176,19 @@ store_memory(nir_builder *b, unsigned bindless_base, unsigned nr_samples,
|
|||
b->shader->info.writes_memory = true;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
load_memory(nir_builder *b, unsigned bindless_base, unsigned nr_samples,
|
||||
uint8_t comps, uint8_t bit_size, unsigned rt,
|
||||
enum pipe_format format)
|
||||
{
|
||||
bool bindless = false;
|
||||
nir_ssa_def *image = handle_for_rt(b, bindless_base, rt, false, &bindless);
|
||||
nir_ssa_def *zero = nir_imm_intN_t(b, 0, 16);
|
||||
nir_ssa_def *lod = zero;
|
||||
nir_def *image = handle_for_rt(b, bindless_base, rt, false, &bindless);
|
||||
nir_def *zero = nir_imm_intN_t(b, 0, 16);
|
||||
nir_def *lod = zero;
|
||||
|
||||
nir_ssa_def *sample;
|
||||
nir_def *sample;
|
||||
enum glsl_sampler_dim dim = dim_for_rt(b, nr_samples, &sample);
|
||||
nir_ssa_def *coords = image_coords(b);
|
||||
nir_def *coords = image_coords(b);
|
||||
|
||||
/* Ensure pixels below this one have written out their results */
|
||||
nir_begin_invocation_interlock(b);
|
||||
|
|
@ -204,7 +204,7 @@ load_memory(nir_builder *b, unsigned bindless_base, unsigned nr_samples,
|
|||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
tib_impl(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
struct ctx *ctx = data;
|
||||
|
|
@ -250,7 +250,7 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
|
|||
if (!write_mask)
|
||||
return NIR_LOWER_INSTR_PROGRESS_REPLACE;
|
||||
|
||||
nir_ssa_def *value = intr->src[0].ssa;
|
||||
nir_def *value = intr->src[0].ssa;
|
||||
|
||||
/* Trim to format as required by hardware */
|
||||
value = nir_trim_vector(b, intr->src[0].ssa, comps);
|
||||
|
|
@ -272,7 +272,7 @@ tib_impl(nir_builder *b, nir_instr *instr, void *data)
|
|||
* possible to encode in the hardware, delete them.
|
||||
*/
|
||||
if (logical_format == PIPE_FORMAT_NONE) {
|
||||
return nir_ssa_undef(b, intr->num_components, bit_size);
|
||||
return nir_undef(b, intr->num_components, bit_size);
|
||||
} else if (tib->spilled[rt]) {
|
||||
*(ctx->translucent) = true;
|
||||
|
||||
|
|
|
|||
|
|
@ -78,8 +78,8 @@ agx_vbo_supports_format(enum pipe_format format)
|
|||
return agx_vbo_internal_format(format) != PIPE_FORMAT_NONE;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
apply_swizzle_channel(nir_builder *b, nir_ssa_def *vec, unsigned swizzle,
|
||||
static nir_def *
|
||||
apply_swizzle_channel(nir_builder *b, nir_def *vec, unsigned swizzle,
|
||||
bool is_int)
|
||||
{
|
||||
switch (swizzle) {
|
||||
|
|
@ -158,12 +158,11 @@ pass(struct nir_builder *b, nir_instr *instr, void *data)
|
|||
/* Calculate the element to fetch the vertex for. Divide the instance ID by
|
||||
* the divisor for per-instance data. Divisor=0 specifies per-vertex data.
|
||||
*/
|
||||
nir_ssa_def *el =
|
||||
(attrib.divisor == 0)
|
||||
nir_def *el = (attrib.divisor == 0)
|
||||
? nir_load_vertex_id(b)
|
||||
: nir_udiv_imm(b, nir_load_instance_id(b), attrib.divisor);
|
||||
|
||||
nir_ssa_def *base = nir_load_vbo_base_agx(b, nir_imm_int(b, attrib.buf));
|
||||
nir_def *base = nir_load_vbo_base_agx(b, nir_imm_int(b, attrib.buf));
|
||||
|
||||
assert((stride % interchange_align) == 0 && "must be aligned");
|
||||
assert((offset % interchange_align) == 0 && "must be aligned");
|
||||
|
|
@ -183,11 +182,11 @@ pass(struct nir_builder *b, nir_instr *instr, void *data)
|
|||
stride_el = 1;
|
||||
}
|
||||
|
||||
nir_ssa_def *stride_offset_el =
|
||||
nir_def *stride_offset_el =
|
||||
nir_iadd_imm(b, nir_imul_imm(b, el, stride_el), offset_el);
|
||||
|
||||
/* Load the raw vector */
|
||||
nir_ssa_def *memory = nir_load_constant_agx(
|
||||
nir_def *memory = nir_load_constant_agx(
|
||||
b, interchange_comps, interchange_register_size, base, stride_offset_el,
|
||||
.format = interchange_format, .base = shift);
|
||||
|
||||
|
|
@ -240,14 +239,14 @@ pass(struct nir_builder *b, nir_instr *instr, void *data)
|
|||
/* We now have a properly formatted vector of the components in memory. Apply
|
||||
* the format swizzle forwards to trim/pad/reorder as needed.
|
||||
*/
|
||||
nir_ssa_def *channels[4] = {NULL};
|
||||
nir_def *channels[4] = {NULL};
|
||||
assert(nir_intrinsic_component(intr) == 0 && "unimplemented");
|
||||
|
||||
for (unsigned i = 0; i < intr->num_components; ++i)
|
||||
channels[i] = apply_swizzle_channel(b, memory, desc->swizzle[i], is_int);
|
||||
|
||||
nir_ssa_def *logical = nir_vec(b, channels, intr->num_components);
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, logical);
|
||||
nir_def *logical = nir_vec(b, channels, intr->num_components);
|
||||
nir_def_rewrite_uses(&intr->dest.ssa, logical);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -310,7 +310,7 @@ ntq_add_pending_tmu_flush(struct v3d_compile *c,
|
|||
|
||||
nir_intrinsic_instr *store = nir_store_reg_for_def(&dest->ssa);
|
||||
if (store != NULL) {
|
||||
nir_ssa_def *reg = store->src[1].ssa;
|
||||
nir_def *reg = store->src[1].ssa;
|
||||
_mesa_set_add(c->tmu.outstanding_regs, reg);
|
||||
}
|
||||
}
|
||||
|
|
@ -716,7 +716,7 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
|
|||
}
|
||||
|
||||
static struct qreg *
|
||||
ntq_init_ssa_def(struct v3d_compile *c, nir_ssa_def *def)
|
||||
ntq_init_ssa_def(struct v3d_compile *c, nir_def *def)
|
||||
{
|
||||
struct qreg *qregs = ralloc_array(c->def_ht, struct qreg,
|
||||
def->num_components);
|
||||
|
|
@ -789,7 +789,7 @@ ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan,
|
|||
|
||||
qregs[chan] = result;
|
||||
} else {
|
||||
nir_ssa_def *reg = store->src[1].ssa;
|
||||
nir_def *reg = store->src[1].ssa;
|
||||
ASSERTED nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
|
||||
assert(nir_intrinsic_base(store) == 0);
|
||||
assert(nir_intrinsic_num_array_elems(decl) == 0);
|
||||
|
|
@ -858,7 +858,7 @@ ntq_get_src(struct v3d_compile *c, nir_src src, int i)
|
|||
entry = _mesa_hash_table_search(c->def_ht, src.ssa);
|
||||
}
|
||||
} else {
|
||||
nir_ssa_def *reg = load->src[0].ssa;
|
||||
nir_def *reg = load->src[0].ssa;
|
||||
ASSERTED nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
|
||||
assert(nir_intrinsic_base(load) == 0);
|
||||
assert(nir_intrinsic_num_array_elems(decl) == 0);
|
||||
|
|
@ -2471,7 +2471,7 @@ ntq_setup_registers(struct v3d_compile *c, nir_function_impl *impl)
|
|||
struct qreg *qregs = ralloc_array(c->def_ht, struct qreg,
|
||||
array_len * num_components);
|
||||
|
||||
nir_ssa_def *nir_reg = &decl->dest.ssa;
|
||||
nir_def *nir_reg = &decl->dest.ssa;
|
||||
_mesa_hash_table_insert(c->def_ht, nir_reg, qregs);
|
||||
|
||||
for (int i = 0; i < array_len * num_components; i++)
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
|||
* instruction writes and how many the instruction could produce.
|
||||
*/
|
||||
p1_unpacked.return_words_of_texture_data =
|
||||
nir_ssa_def_components_read(&instr->dest.ssa);
|
||||
nir_def_components_read(&instr->dest.ssa);
|
||||
|
||||
uint32_t p0_packed;
|
||||
V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL,
|
||||
|
|
|
|||
|
|
@ -253,15 +253,15 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
|||
nir_intrinsic_instr *store = nir_store_reg_for_def(&instr->dest.ssa);
|
||||
if (store == NULL) {
|
||||
p0_unpacked.return_words_of_texture_data =
|
||||
nir_ssa_def_components_read(&instr->dest.ssa);
|
||||
nir_def_components_read(&instr->dest.ssa);
|
||||
} else {
|
||||
nir_ssa_def *reg = store->src[1].ssa;
|
||||
nir_def *reg = store->src[1].ssa;
|
||||
nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
|
||||
unsigned reg_num_components =
|
||||
nir_intrinsic_num_components(decl);
|
||||
|
||||
/* For the non-ssa case we don't have a full equivalent to
|
||||
* nir_ssa_def_components_read. This is a problem for the 16
|
||||
* nir_def_components_read. This is a problem for the 16
|
||||
* bit case. nir_lower_tex will not change the destination as
|
||||
* nir_tex_instr_dest_size will still return 4. The driver is
|
||||
* just expected to not store on other channels, so we
|
||||
|
|
|
|||
|
|
@ -622,7 +622,7 @@ struct v3d_compile {
|
|||
void *debug_output_data;
|
||||
|
||||
/**
|
||||
* Mapping from nir_register * or nir_ssa_def * to array of struct
|
||||
* Mapping from nir_register * or nir_def * to array of struct
|
||||
* qreg for the values.
|
||||
*/
|
||||
struct hash_table *def_ht;
|
||||
|
|
|
|||
|
|
@ -60,14 +60,14 @@ v3d_gl_format_is_return_32(enum pipe_format format)
|
|||
/* Packs a 32-bit vector of colors in the range [0, (1 << bits[i]) - 1] to a
|
||||
* 32-bit SSA value, with as many channels as necessary to store all the bits
|
||||
*/
|
||||
static nir_ssa_def *
|
||||
pack_bits(nir_builder *b, nir_ssa_def *color, const unsigned *bits,
|
||||
static nir_def *
|
||||
pack_bits(nir_builder *b, nir_def *color, const unsigned *bits,
|
||||
int num_components, bool mask)
|
||||
{
|
||||
nir_ssa_def *results[4];
|
||||
nir_def *results[4];
|
||||
int offset = 0;
|
||||
for (int i = 0; i < num_components; i++) {
|
||||
nir_ssa_def *chan = nir_channel(b, color, i);
|
||||
nir_def *chan = nir_channel(b, color, i);
|
||||
|
||||
/* Channels being stored shouldn't cross a 32-bit boundary. */
|
||||
assert((offset & ~31) == ((offset + bits[i] - 1) & ~31));
|
||||
|
|
@ -103,10 +103,10 @@ v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr)
|
|||
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
|
||||
nir_ssa_def *color = nir_trim_vector(b,
|
||||
nir_def *color = nir_trim_vector(b,
|
||||
nir_ssa_for_src(b, instr->src[3], 4),
|
||||
num_components);
|
||||
nir_ssa_def *formatted = NULL;
|
||||
nir_def *formatted = NULL;
|
||||
|
||||
if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
|
||||
formatted = nir_format_pack_11f11f10f(b, color);
|
||||
|
|
@ -182,14 +182,14 @@ v3d_nir_lower_image_load(nir_builder *b, nir_intrinsic_instr *instr)
|
|||
|
||||
b->cursor = nir_after_instr(&instr->instr);
|
||||
|
||||
nir_ssa_def *result = &instr->dest.ssa;
|
||||
nir_def *result = &instr->dest.ssa;
|
||||
if (util_format_is_pure_uint(format)) {
|
||||
result = nir_format_unpack_uint(b, result, bits16, 4);
|
||||
} else if (util_format_is_pure_sint(format)) {
|
||||
result = nir_format_unpack_sint(b, result, bits16, 4);
|
||||
} else {
|
||||
nir_ssa_def *rg = nir_channel(b, result, 0);
|
||||
nir_ssa_def *ba = nir_channel(b, result, 1);
|
||||
nir_def *rg = nir_channel(b, result, 0);
|
||||
nir_def *ba = nir_channel(b, result, 1);
|
||||
result = nir_vec4(b,
|
||||
nir_unpack_half_2x16_split_x(b, rg),
|
||||
nir_unpack_half_2x16_split_y(b, rg),
|
||||
|
|
@ -197,7 +197,7 @@ v3d_nir_lower_image_load(nir_builder *b, nir_intrinsic_instr *instr)
|
|||
nir_unpack_half_2x16_split_y(b, ba));
|
||||
}
|
||||
|
||||
nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, result,
|
||||
nir_def_rewrite_uses_after(&instr->dest.ssa, result,
|
||||
result->parent_instr);
|
||||
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ struct v3d_nir_lower_io_state {
|
|||
|
||||
BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS)];
|
||||
|
||||
nir_ssa_def *pos[4];
|
||||
nir_def *pos[4];
|
||||
};
|
||||
|
||||
static void
|
||||
|
|
@ -70,8 +70,8 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
|
|||
struct v3d_nir_lower_io_state *state);
|
||||
|
||||
static void
|
||||
v3d_nir_store_output(nir_builder *b, int base, nir_ssa_def *offset,
|
||||
nir_ssa_def *chan)
|
||||
v3d_nir_store_output(nir_builder *b, int base, nir_def *offset,
|
||||
nir_def *chan)
|
||||
{
|
||||
if (offset) {
|
||||
/* When generating the VIR instruction, the base and the offset
|
||||
|
|
@ -134,13 +134,13 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
|
|||
/* If this is a geometry shader we need to emit our outputs
|
||||
* to the current vertex offset in the VPM.
|
||||
*/
|
||||
nir_ssa_def *offset_reg =
|
||||
nir_def *offset_reg =
|
||||
c->s->info.stage == MESA_SHADER_GEOMETRY ?
|
||||
nir_load_var(b, state->gs.output_offset_var) : NULL;
|
||||
|
||||
int start_comp = nir_intrinsic_component(intr);
|
||||
unsigned location = nir_intrinsic_io_semantics(intr).location;
|
||||
nir_ssa_def *src = nir_ssa_for_src(b, intr->src[0],
|
||||
nir_def *src = nir_ssa_for_src(b, intr->src[0],
|
||||
intr->num_components);
|
||||
/* Save off the components of the position for the setup of VPM inputs
|
||||
* read by fixed function HW.
|
||||
|
|
@ -159,7 +159,7 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
|
|||
|
||||
if (location == VARYING_SLOT_LAYER) {
|
||||
assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
|
||||
nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
|
||||
nir_def *header = nir_load_var(b, state->gs.header_var);
|
||||
header = nir_iand_imm(b, header, 0xff00ffff);
|
||||
|
||||
/* From the GLES 3.2 spec:
|
||||
|
|
@ -180,9 +180,9 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
|
|||
* to 0 in that case (we always allocate tile state for at
|
||||
* least one layer).
|
||||
*/
|
||||
nir_ssa_def *fb_layers = nir_load_fb_layers_v3d(b, 32);
|
||||
nir_ssa_def *cond = nir_ige(b, src, fb_layers);
|
||||
nir_ssa_def *layer_id =
|
||||
nir_def *fb_layers = nir_load_fb_layers_v3d(b, 32);
|
||||
nir_def *cond = nir_ige(b, src, fb_layers);
|
||||
nir_def *layer_id =
|
||||
nir_bcsel(b, cond,
|
||||
nir_imm_int(b, 0),
|
||||
nir_ishl_imm(b, src, 16));
|
||||
|
|
@ -238,9 +238,9 @@ v3d_nir_lower_emit_vertex(struct v3d_compile *c, nir_builder *b,
|
|||
{
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
|
||||
nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
|
||||
nir_ssa_def *header_offset = nir_load_var(b, state->gs.header_offset_var);
|
||||
nir_ssa_def *output_offset = nir_load_var(b, state->gs.output_offset_var);
|
||||
nir_def *header = nir_load_var(b, state->gs.header_var);
|
||||
nir_def *header_offset = nir_load_var(b, state->gs.header_offset_var);
|
||||
nir_def *output_offset = nir_load_var(b, state->gs.output_offset_var);
|
||||
|
||||
/* Emit fixed function outputs */
|
||||
v3d_nir_emit_ff_vpm_outputs(c, b, state);
|
||||
|
|
@ -476,16 +476,16 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
|
|||
/* If this is a geometry shader we need to emit our fixed function
|
||||
* outputs to the current vertex offset in the VPM.
|
||||
*/
|
||||
nir_ssa_def *offset_reg =
|
||||
nir_def *offset_reg =
|
||||
c->s->info.stage == MESA_SHADER_GEOMETRY ?
|
||||
nir_load_var(b, state->gs.output_offset_var) : NULL;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (!state->pos[i])
|
||||
state->pos[i] = nir_ssa_undef(b, 1, 32);
|
||||
state->pos[i] = nir_undef(b, 1, 32);
|
||||
}
|
||||
|
||||
nir_ssa_def *rcp_wc = nir_frcp(b, state->pos[3]);
|
||||
nir_def *rcp_wc = nir_frcp(b, state->pos[3]);
|
||||
|
||||
if (state->pos_vpm_offset != -1) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
|
|
@ -496,8 +496,8 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
|
|||
|
||||
if (state->vp_vpm_offset != -1) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
nir_ssa_def *pos;
|
||||
nir_ssa_def *scale;
|
||||
nir_def *pos;
|
||||
nir_def *scale;
|
||||
pos = state->pos[i];
|
||||
if (i == 0)
|
||||
scale = nir_load_viewport_x_scale(b);
|
||||
|
|
@ -523,7 +523,7 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
|
|||
}
|
||||
|
||||
if (state->zs_vpm_offset != -1) {
|
||||
nir_ssa_def *z = state->pos[2];
|
||||
nir_def *z = state->pos[2];
|
||||
z = nir_fmul(b, z, nir_load_viewport_z_scale(b));
|
||||
z = nir_fmul(b, z, rcp_wc);
|
||||
z = nir_fadd(b, z, nir_load_viewport_z_offset(b));
|
||||
|
|
@ -599,11 +599,11 @@ emit_gs_vpm_output_header_prolog(struct v3d_compile *c, nir_builder *b,
|
|||
* offset variable by removing the one generic header slot that always
|
||||
* goes at the beginning of out header.
|
||||
*/
|
||||
nir_ssa_def *header_offset =
|
||||
nir_def *header_offset =
|
||||
nir_load_var(b, state->gs.header_offset_var);
|
||||
nir_ssa_def *vertex_count =
|
||||
nir_def *vertex_count =
|
||||
nir_iadd_imm(b, header_offset, -1);
|
||||
nir_ssa_def *header =
|
||||
nir_def *header =
|
||||
nir_ior_imm(b,
|
||||
nir_ishl_imm(b, vertex_count,
|
||||
VERTEX_COUNT_OFFSET),
|
||||
|
|
|
|||
|
|
@ -42,11 +42,11 @@ lower_line_smooth_intrinsic(struct lower_line_smooth_state *state,
|
|||
{
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
nir_ssa_def *one = nir_imm_float(b, 1.0f);
|
||||
nir_def *one = nir_imm_float(b, 1.0f);
|
||||
|
||||
nir_ssa_def *coverage = nir_load_var(b, state->coverage);
|
||||
nir_def *coverage = nir_load_var(b, state->coverage);
|
||||
|
||||
nir_ssa_def *new_val = nir_fmul(b, nir_vec4(b, one, one, one, coverage),
|
||||
nir_def *new_val = nir_fmul(b, nir_vec4(b, one, one, one, coverage),
|
||||
intr->src[0].ssa);
|
||||
|
||||
nir_instr_rewrite_src(&intr->instr,
|
||||
|
|
@ -89,21 +89,21 @@ initialise_coverage_var(struct lower_line_smooth_state *state,
|
|||
{
|
||||
nir_builder b = nir_builder_at(nir_before_block(nir_start_block(impl)));
|
||||
|
||||
nir_ssa_def *line_width = nir_load_line_width(&b);
|
||||
nir_def *line_width = nir_load_line_width(&b);
|
||||
|
||||
nir_ssa_def *real_line_width = nir_load_aa_line_width(&b);
|
||||
nir_def *real_line_width = nir_load_aa_line_width(&b);
|
||||
|
||||
/* The line coord varies from 0.0 to 1.0 across the width of the line */
|
||||
nir_ssa_def *line_coord = nir_load_line_coord(&b);
|
||||
nir_def *line_coord = nir_load_line_coord(&b);
|
||||
|
||||
/* fabs(line_coord - 0.5) * real_line_width */
|
||||
nir_ssa_def *pixels_from_center =
|
||||
nir_def *pixels_from_center =
|
||||
nir_fmul(&b, real_line_width,
|
||||
nir_fabs(&b, nir_fsub(&b, line_coord,
|
||||
nir_imm_float(&b, 0.5f))));
|
||||
|
||||
/* 0.5 - 1/√2 * (pixels_from_center - line_width * 0.5) */
|
||||
nir_ssa_def *coverage =
|
||||
nir_def *coverage =
|
||||
nir_fsub(&b,
|
||||
nir_imm_float(&b, 0.5f),
|
||||
nir_fmul(&b,
|
||||
|
|
@ -114,14 +114,14 @@ initialise_coverage_var(struct lower_line_smooth_state *state,
|
|||
0.5f))));
|
||||
|
||||
/* Discard fragments that aren’t covered at all by the line */
|
||||
nir_ssa_def *outside = nir_fle_imm(&b, coverage, 0.0f);
|
||||
nir_def *outside = nir_fle_imm(&b, coverage, 0.0f);
|
||||
|
||||
nir_discard_if(&b, outside);
|
||||
|
||||
/* Clamp to at most 1.0. If it was less than 0.0 then the fragment will
|
||||
* be discarded so we don’t need to handle that.
|
||||
*/
|
||||
nir_ssa_def *clamped = nir_fmin(&b, coverage, nir_imm_float(&b, 1.0f));
|
||||
nir_def *clamped = nir_fmin(&b, coverage, nir_imm_float(&b, 1.0f));
|
||||
|
||||
nir_store_var(&b, state->coverage, clamped, 0x1 /* writemask */);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -71,9 +71,9 @@ static nir_intrinsic_instr *
|
|||
init_scalar_intrinsic(nir_builder *b,
|
||||
nir_intrinsic_instr *intr,
|
||||
uint32_t component,
|
||||
nir_ssa_def *offset,
|
||||
nir_def *offset,
|
||||
uint32_t bit_size,
|
||||
nir_ssa_def **scalar_offset)
|
||||
nir_def **scalar_offset)
|
||||
{
|
||||
|
||||
nir_intrinsic_instr *new_intr =
|
||||
|
|
@ -129,20 +129,20 @@ lower_load_bitsize(nir_builder *b,
|
|||
|
||||
/* For global 2x32 we ignore Y component because it must be zero */
|
||||
unsigned offset_idx = offset_src(intr->intrinsic);
|
||||
nir_ssa_def *offset = nir_ssa_for_src(b, intr->src[offset_idx], 1);
|
||||
nir_def *offset = nir_ssa_for_src(b, intr->src[offset_idx], 1);
|
||||
|
||||
/* Split vector store to multiple scalar loads */
|
||||
nir_ssa_def *dest_components[4] = { NULL };
|
||||
nir_def *dest_components[4] = { NULL };
|
||||
const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
|
||||
for (int component = 0; component < num_comp; component++) {
|
||||
nir_ssa_def *scalar_offset;
|
||||
nir_def *scalar_offset;
|
||||
nir_intrinsic_instr *new_intr =
|
||||
init_scalar_intrinsic(b, intr, component, offset,
|
||||
bit_size, &scalar_offset);
|
||||
|
||||
for (unsigned i = 0; i < info->num_srcs; i++) {
|
||||
if (i == offset_idx) {
|
||||
nir_ssa_def *final_offset;
|
||||
nir_def *final_offset;
|
||||
final_offset = intr->intrinsic != nir_intrinsic_load_global_2x32 ?
|
||||
scalar_offset :
|
||||
nir_vec2(b, scalar_offset,
|
||||
|
|
@ -160,8 +160,8 @@ lower_load_bitsize(nir_builder *b,
|
|||
nir_builder_instr_insert(b, &new_intr->instr);
|
||||
}
|
||||
|
||||
nir_ssa_def *new_dst = nir_vec(b, dest_components, num_comp);
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_dst);
|
||||
nir_def *new_dst = nir_vec(b, dest_components, num_comp);
|
||||
nir_def_rewrite_uses(&intr->dest.ssa, new_dst);
|
||||
|
||||
nir_instr_remove(&intr->instr);
|
||||
return true;
|
||||
|
|
@ -181,13 +181,13 @@ lower_store_bitsize(nir_builder *b,
|
|||
if (nir_src_bit_size(intr->src[value_idx]) == 32)
|
||||
return false;
|
||||
|
||||
nir_ssa_def *value = nir_ssa_for_src(b, intr->src[value_idx], num_comp);
|
||||
nir_def *value = nir_ssa_for_src(b, intr->src[value_idx], num_comp);
|
||||
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
/* For global 2x32 we ignore Y component because it must be zero */
|
||||
unsigned offset_idx = offset_src(intr->intrinsic);
|
||||
nir_ssa_def *offset = nir_ssa_for_src(b, intr->src[offset_idx], 1);
|
||||
nir_def *offset = nir_ssa_for_src(b, intr->src[offset_idx], 1);
|
||||
|
||||
/* Split vector store to multiple scalar stores */
|
||||
const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
|
||||
|
|
@ -195,7 +195,7 @@ lower_store_bitsize(nir_builder *b,
|
|||
while (wrmask) {
|
||||
unsigned component = ffs(wrmask) - 1;
|
||||
|
||||
nir_ssa_def *scalar_offset;
|
||||
nir_def *scalar_offset;
|
||||
nir_intrinsic_instr *new_intr =
|
||||
init_scalar_intrinsic(b, intr, component, offset,
|
||||
value->bit_size, &scalar_offset);
|
||||
|
|
@ -204,11 +204,11 @@ lower_store_bitsize(nir_builder *b,
|
|||
|
||||
for (unsigned i = 0; i < info->num_srcs; i++) {
|
||||
if (i == value_idx) {
|
||||
nir_ssa_def *scalar_value =
|
||||
nir_def *scalar_value =
|
||||
nir_channels(b, value, 1 << component);
|
||||
new_intr->src[i] = nir_src_for_ssa(scalar_value);
|
||||
} else if (i == offset_idx) {
|
||||
nir_ssa_def *final_offset;
|
||||
nir_def *final_offset;
|
||||
final_offset = intr->intrinsic != nir_intrinsic_store_global_2x32 ?
|
||||
scalar_offset :
|
||||
nir_vec2(b, scalar_offset,
|
||||
|
|
|
|||
|
|
@ -36,8 +36,8 @@
|
|||
#include "v3d_compiler.h"
|
||||
|
||||
|
||||
typedef nir_ssa_def *(*nir_pack_func)(nir_builder *b, nir_ssa_def *c);
|
||||
typedef nir_ssa_def *(*nir_unpack_func)(nir_builder *b, nir_ssa_def *c);
|
||||
typedef nir_def *(*nir_pack_func)(nir_builder *b, nir_def *c);
|
||||
typedef nir_def *(*nir_unpack_func)(nir_builder *b, nir_def *c);
|
||||
|
||||
static bool
|
||||
logicop_depends_on_dst_color(int logicop_func)
|
||||
|
|
@ -53,9 +53,9 @@ logicop_depends_on_dst_color(int logicop_func)
|
|||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
v3d_logicop(nir_builder *b, int logicop_func,
|
||||
nir_ssa_def *src, nir_ssa_def *dst)
|
||||
nir_def *src, nir_def *dst)
|
||||
{
|
||||
switch (logicop_func) {
|
||||
case PIPE_LOGICOP_CLEAR:
|
||||
|
|
@ -96,8 +96,8 @@ v3d_logicop(nir_builder *b, int logicop_func,
|
|||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
v3d_nir_get_swizzled_channel(nir_builder *b, nir_ssa_def **srcs, int swiz)
|
||||
static nir_def *
|
||||
v3d_nir_get_swizzled_channel(nir_builder *b, nir_def **srcs, int swiz)
|
||||
{
|
||||
switch (swiz) {
|
||||
default:
|
||||
|
|
@ -116,48 +116,48 @@ v3d_nir_get_swizzled_channel(nir_builder *b, nir_ssa_def **srcs, int swiz)
|
|||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
v3d_nir_swizzle_and_pack(nir_builder *b, nir_ssa_def **chans,
|
||||
static nir_def *
|
||||
v3d_nir_swizzle_and_pack(nir_builder *b, nir_def **chans,
|
||||
const uint8_t *swiz, nir_pack_func pack_func)
|
||||
{
|
||||
nir_ssa_def *c[4];
|
||||
nir_def *c[4];
|
||||
for (int i = 0; i < 4; i++)
|
||||
c[i] = v3d_nir_get_swizzled_channel(b, chans, swiz[i]);
|
||||
|
||||
return pack_func(b, nir_vec4(b, c[0], c[1], c[2], c[3]));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
v3d_nir_unpack_and_swizzle(nir_builder *b, nir_ssa_def *packed,
|
||||
static nir_def *
|
||||
v3d_nir_unpack_and_swizzle(nir_builder *b, nir_def *packed,
|
||||
const uint8_t *swiz, nir_unpack_func unpack_func)
|
||||
{
|
||||
nir_ssa_def *unpacked = unpack_func(b, packed);
|
||||
nir_def *unpacked = unpack_func(b, packed);
|
||||
|
||||
nir_ssa_def *unpacked_chans[4];
|
||||
nir_def *unpacked_chans[4];
|
||||
for (int i = 0; i < 4; i++)
|
||||
unpacked_chans[i] = nir_channel(b, unpacked, i);
|
||||
|
||||
nir_ssa_def *c[4];
|
||||
nir_def *c[4];
|
||||
for (int i = 0; i < 4; i++)
|
||||
c[i] = v3d_nir_get_swizzled_channel(b, unpacked_chans, swiz[i]);
|
||||
|
||||
return nir_vec4(b, c[0], c[1], c[2], c[3]);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
pack_unorm_rgb10a2(nir_builder *b, nir_ssa_def *c)
|
||||
static nir_def *
|
||||
pack_unorm_rgb10a2(nir_builder *b, nir_def *c)
|
||||
{
|
||||
static const unsigned bits[4] = { 10, 10, 10, 2 };
|
||||
nir_ssa_def *unorm = nir_format_float_to_unorm(b, c, bits);
|
||||
nir_def *unorm = nir_format_float_to_unorm(b, c, bits);
|
||||
|
||||
nir_ssa_def *chans[4];
|
||||
nir_def *chans[4];
|
||||
for (int i = 0; i < 4; i++)
|
||||
chans[i] = nir_channel(b, unorm, i);
|
||||
|
||||
nir_ssa_def *result = nir_mov(b, chans[0]);
|
||||
nir_def *result = nir_mov(b, chans[0]);
|
||||
int offset = bits[0];
|
||||
for (int i = 1; i < 4; i++) {
|
||||
nir_ssa_def *shifted_chan =
|
||||
nir_def *shifted_chan =
|
||||
nir_ishl_imm(b, chans[i], offset);
|
||||
result = nir_ior(b, result, shifted_chan);
|
||||
offset += bits[i];
|
||||
|
|
@ -165,8 +165,8 @@ pack_unorm_rgb10a2(nir_builder *b, nir_ssa_def *c)
|
|||
return result;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
unpack_unorm_rgb10a2(nir_builder *b, nir_ssa_def *c)
|
||||
static nir_def *
|
||||
unpack_unorm_rgb10a2(nir_builder *b, nir_def *c)
|
||||
{
|
||||
static const unsigned bits[4] = { 10, 10, 10, 2 };
|
||||
const unsigned masks[4] = { BITFIELD_MASK(bits[0]),
|
||||
|
|
@ -174,9 +174,9 @@ unpack_unorm_rgb10a2(nir_builder *b, nir_ssa_def *c)
|
|||
BITFIELD_MASK(bits[2]),
|
||||
BITFIELD_MASK(bits[3]) };
|
||||
|
||||
nir_ssa_def *chans[4];
|
||||
nir_def *chans[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
nir_ssa_def *unorm = nir_iand_imm(b, c, masks[i]);
|
||||
nir_def *unorm = nir_iand_imm(b, c, masks[i]);
|
||||
chans[i] = nir_format_unorm_to_float(b, unorm, &bits[i]);
|
||||
c = nir_ushr_imm(b, c, bits[i]);
|
||||
}
|
||||
|
|
@ -201,13 +201,13 @@ v3d_get_format_swizzle_for_rt(struct v3d_compile *c, int rt)
|
|||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
v3d_nir_get_tlb_color(nir_builder *b, struct v3d_compile *c, int rt, int sample)
|
||||
{
|
||||
uint32_t num_components =
|
||||
util_format_get_nr_components(c->fs_key->color_fmt[rt].format);
|
||||
|
||||
nir_ssa_def *color[4];
|
||||
nir_def *color[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (i < num_components) {
|
||||
color[i] =
|
||||
|
|
@ -222,17 +222,17 @@ v3d_nir_get_tlb_color(nir_builder *b, struct v3d_compile *c, int rt, int sample)
|
|||
return nir_vec4(b, color[0], color[1], color[2], color[3]);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
v3d_emit_logic_op_raw(struct v3d_compile *c, nir_builder *b,
|
||||
nir_ssa_def **src_chans, nir_ssa_def **dst_chans,
|
||||
nir_def **src_chans, nir_def **dst_chans,
|
||||
int rt, int sample)
|
||||
{
|
||||
const uint8_t *fmt_swz = v3d_get_format_swizzle_for_rt(c, rt);
|
||||
|
||||
nir_ssa_def *op_res[4];
|
||||
nir_def *op_res[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
nir_ssa_def *src = src_chans[i];
|
||||
nir_ssa_def *dst =
|
||||
nir_def *src = src_chans[i];
|
||||
nir_def *dst =
|
||||
v3d_nir_get_swizzled_channel(b, dst_chans, fmt_swz[i]);
|
||||
op_res[i] = v3d_logicop(b, c->fs_key->logicop_func, src, dst);
|
||||
|
||||
|
|
@ -250,40 +250,40 @@ v3d_emit_logic_op_raw(struct v3d_compile *c, nir_builder *b,
|
|||
}
|
||||
}
|
||||
|
||||
nir_ssa_def *r[4];
|
||||
nir_def *r[4];
|
||||
for (int i = 0; i < 4; i++)
|
||||
r[i] = v3d_nir_get_swizzled_channel(b, op_res, fmt_swz[i]);
|
||||
|
||||
return nir_vec4(b, r[0], r[1], r[2], r[3]);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
v3d_emit_logic_op_unorm(struct v3d_compile *c, nir_builder *b,
|
||||
nir_ssa_def **src_chans, nir_ssa_def **dst_chans,
|
||||
nir_def **src_chans, nir_def **dst_chans,
|
||||
int rt, int sample,
|
||||
nir_pack_func pack_func, nir_unpack_func unpack_func)
|
||||
{
|
||||
static const uint8_t src_swz[4] = { 0, 1, 2, 3 };
|
||||
nir_ssa_def *packed_src =
|
||||
nir_def *packed_src =
|
||||
v3d_nir_swizzle_and_pack(b, src_chans, src_swz, pack_func);
|
||||
|
||||
const uint8_t *fmt_swz = v3d_get_format_swizzle_for_rt(c, rt);
|
||||
nir_ssa_def *packed_dst =
|
||||
nir_def *packed_dst =
|
||||
v3d_nir_swizzle_and_pack(b, dst_chans, fmt_swz, pack_func);
|
||||
|
||||
nir_ssa_def *packed_result =
|
||||
nir_def *packed_result =
|
||||
v3d_logicop(b, c->fs_key->logicop_func, packed_src, packed_dst);
|
||||
|
||||
return v3d_nir_unpack_and_swizzle(b, packed_result, fmt_swz, unpack_func);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
v3d_nir_emit_logic_op(struct v3d_compile *c, nir_builder *b,
|
||||
nir_ssa_def *src, int rt, int sample)
|
||||
nir_def *src, int rt, int sample)
|
||||
{
|
||||
nir_ssa_def *dst = v3d_nir_get_tlb_color(b, c, rt, sample);
|
||||
nir_def *dst = v3d_nir_get_tlb_color(b, c, rt, sample);
|
||||
|
||||
nir_ssa_def *src_chans[4], *dst_chans[4];
|
||||
nir_def *src_chans[4], *dst_chans[4];
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
src_chans[i] = nir_channel(b, src, i);
|
||||
dst_chans[i] = nir_channel(b, dst, i);
|
||||
|
|
@ -306,7 +306,7 @@ v3d_nir_emit_logic_op(struct v3d_compile *c, nir_builder *b,
|
|||
|
||||
static void
|
||||
v3d_emit_ms_output(nir_builder *b,
|
||||
nir_ssa_def *color, nir_src *offset,
|
||||
nir_def *color, nir_src *offset,
|
||||
nir_alu_type type, int rt, int sample)
|
||||
{
|
||||
nir_store_tlb_sample_color_v3d(b, color, nir_imm_int(b, rt), .base = sample, .component = 0, .src_type = type);
|
||||
|
|
@ -318,7 +318,7 @@ v3d_nir_lower_logic_op_instr(struct v3d_compile *c,
|
|||
nir_intrinsic_instr *intr,
|
||||
int rt)
|
||||
{
|
||||
nir_ssa_def *frag_color = intr->src[0].ssa;
|
||||
nir_def *frag_color = intr->src[0].ssa;
|
||||
|
||||
|
||||
const int logic_op = c->fs_key->logicop_func;
|
||||
|
|
@ -328,7 +328,7 @@ v3d_nir_lower_logic_op_instr(struct v3d_compile *c,
|
|||
nir_src *offset = &intr->src[1];
|
||||
nir_alu_type type = nir_intrinsic_src_type(intr);
|
||||
for (int i = 0; i < V3D_MAX_SAMPLES; i++) {
|
||||
nir_ssa_def *sample =
|
||||
nir_def *sample =
|
||||
v3d_nir_emit_logic_op(c, b, frag_color, rt, i);
|
||||
|
||||
v3d_emit_ms_output(b, sample, offset, type, rt, i);
|
||||
|
|
@ -336,7 +336,7 @@ v3d_nir_lower_logic_op_instr(struct v3d_compile *c,
|
|||
|
||||
nir_instr_remove(&intr->instr);
|
||||
} else {
|
||||
nir_ssa_def *result =
|
||||
nir_def *result =
|
||||
v3d_nir_emit_logic_op(c, b, frag_color, rt, 0);
|
||||
|
||||
nir_instr_rewrite_src(&intr->instr, &intr->src[0],
|
||||
|
|
|
|||
|
|
@ -34,11 +34,11 @@
|
|||
* writemasks in the process.
|
||||
*/
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
v3d_nir_scratch_offset(nir_builder *b, nir_intrinsic_instr *instr)
|
||||
{
|
||||
bool is_store = instr->intrinsic == nir_intrinsic_store_scratch;
|
||||
nir_ssa_def *offset = nir_ssa_for_src(b, instr->src[is_store ? 1 : 0], 1);
|
||||
nir_def *offset = nir_ssa_for_src(b, instr->src[is_store ? 1 : 0], 1);
|
||||
|
||||
assert(nir_intrinsic_align_mul(instr) >= 4);
|
||||
assert(nir_intrinsic_align_offset(instr) == 0);
|
||||
|
|
@ -55,11 +55,11 @@ v3d_nir_lower_load_scratch(nir_builder *b, nir_intrinsic_instr *instr)
|
|||
{
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
|
||||
nir_ssa_def *offset = v3d_nir_scratch_offset(b,instr);
|
||||
nir_def *offset = v3d_nir_scratch_offset(b,instr);
|
||||
|
||||
nir_ssa_def *chans[NIR_MAX_VEC_COMPONENTS];
|
||||
nir_def *chans[NIR_MAX_VEC_COMPONENTS];
|
||||
for (int i = 0; i < instr->num_components; i++) {
|
||||
nir_ssa_def *chan_offset =
|
||||
nir_def *chan_offset =
|
||||
nir_iadd_imm(b, offset, V3D_CHANNELS * i * 4);
|
||||
|
||||
nir_intrinsic_instr *chan_instr =
|
||||
|
|
@ -77,8 +77,8 @@ v3d_nir_lower_load_scratch(nir_builder *b, nir_intrinsic_instr *instr)
|
|||
chans[i] = &chan_instr->dest.ssa;
|
||||
}
|
||||
|
||||
nir_ssa_def *result = nir_vec(b, chans, instr->num_components);
|
||||
nir_ssa_def_rewrite_uses(&instr->dest.ssa, result);
|
||||
nir_def *result = nir_vec(b, chans, instr->num_components);
|
||||
nir_def_rewrite_uses(&instr->dest.ssa, result);
|
||||
nir_instr_remove(&instr->instr);
|
||||
}
|
||||
|
||||
|
|
@ -87,15 +87,15 @@ v3d_nir_lower_store_scratch(nir_builder *b, nir_intrinsic_instr *instr)
|
|||
{
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
|
||||
nir_ssa_def *offset = v3d_nir_scratch_offset(b, instr);
|
||||
nir_ssa_def *value = nir_ssa_for_src(b, instr->src[0],
|
||||
nir_def *offset = v3d_nir_scratch_offset(b, instr);
|
||||
nir_def *value = nir_ssa_for_src(b, instr->src[0],
|
||||
instr->num_components);
|
||||
|
||||
for (int i = 0; i < instr->num_components; i++) {
|
||||
if (!(nir_intrinsic_write_mask(instr) & (1 << i)))
|
||||
continue;
|
||||
|
||||
nir_ssa_def *chan_offset =
|
||||
nir_def *chan_offset =
|
||||
nir_iadd_imm(b, offset, V3D_CHANNELS * i * 4);
|
||||
|
||||
nir_intrinsic_instr *chan_instr =
|
||||
|
|
|
|||
|
|
@ -32,21 +32,21 @@
|
|||
* 2x2 quad.
|
||||
*/
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
v3d_nir_lower_txf_ms_instr(nir_builder *b, nir_instr *in_instr, void *data)
|
||||
{
|
||||
nir_tex_instr *instr = nir_instr_as_tex(in_instr);
|
||||
|
||||
b->cursor = nir_before_instr(&instr->instr);
|
||||
|
||||
nir_ssa_def *coord = nir_steal_tex_src(instr, nir_tex_src_coord);
|
||||
nir_ssa_def *sample = nir_steal_tex_src(instr, nir_tex_src_ms_index);
|
||||
nir_def *coord = nir_steal_tex_src(instr, nir_tex_src_coord);
|
||||
nir_def *sample = nir_steal_tex_src(instr, nir_tex_src_ms_index);
|
||||
|
||||
nir_ssa_def *one = nir_imm_int(b, 1);
|
||||
nir_ssa_def *x = nir_iadd(b,
|
||||
nir_def *one = nir_imm_int(b, 1);
|
||||
nir_def *x = nir_iadd(b,
|
||||
nir_ishl(b, nir_channel(b, coord, 0), one),
|
||||
nir_iand(b, sample, one));
|
||||
nir_ssa_def *y = nir_iadd(b,
|
||||
nir_def *y = nir_iadd(b,
|
||||
nir_ishl(b, nir_channel(b, coord, 1), one),
|
||||
nir_iand(b, nir_ushr(b, sample, one), one));
|
||||
if (instr->is_array)
|
||||
|
|
|
|||
|
|
@ -1481,8 +1481,8 @@ lower_load_num_subgroups(struct v3d_compile *c,
|
|||
DIV_ROUND_UP(c->s->info.workgroup_size[0] *
|
||||
c->s->info.workgroup_size[1] *
|
||||
c->s->info.workgroup_size[2], V3D_CHANNELS);
|
||||
nir_ssa_def *result = nir_imm_int(b, num_subgroups);
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, result);
|
||||
nir_def *result = nir_imm_int(b, num_subgroups);
|
||||
nir_def_rewrite_uses(&intr->dest.ssa, result);
|
||||
nir_instr_remove(&intr->instr);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -33,16 +33,16 @@ get_set_event_cs()
|
|||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
|
||||
"set event cs");
|
||||
|
||||
nir_ssa_def *buf =
|
||||
nir_def *buf =
|
||||
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
|
||||
.desc_set = 0,
|
||||
.binding = 0,
|
||||
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
|
||||
|
||||
nir_ssa_def *offset =
|
||||
nir_def *offset =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
|
||||
|
||||
nir_ssa_def *value =
|
||||
nir_def *value =
|
||||
nir_load_push_constant(&b, 1, 8, nir_imm_int(&b, 0), .base = 4, .range = 4);
|
||||
|
||||
nir_store_ssbo(&b, value, buf, offset,
|
||||
|
|
@ -58,19 +58,19 @@ get_wait_event_cs()
|
|||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
|
||||
"wait event cs");
|
||||
|
||||
nir_ssa_def *buf =
|
||||
nir_def *buf =
|
||||
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
|
||||
.desc_set = 0,
|
||||
.binding = 0,
|
||||
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
|
||||
|
||||
nir_ssa_def *offset =
|
||||
nir_def *offset =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
|
||||
|
||||
nir_loop *loop = nir_push_loop(&b);
|
||||
nir_ssa_def *load =
|
||||
nir_def *load =
|
||||
nir_load_ssbo(&b, 1, 8, buf, offset, .access = 0, .align_mul = 4);
|
||||
nir_ssa_def *value = nir_i2i32(&b, load);
|
||||
nir_def *value = nir_i2i32(&b, load);
|
||||
|
||||
nir_if *if_stmt = nir_push_if(&b, nir_ieq_imm(&b, value, 1));
|
||||
nir_jump(&b, nir_jump_break);
|
||||
|
|
|
|||
|
|
@ -329,7 +329,7 @@ get_clear_rect_vs()
|
|||
nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
|
||||
vs_out_pos->data.location = VARYING_SLOT_POS;
|
||||
|
||||
nir_ssa_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
|
||||
nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
|
||||
nir_store_var(&b, vs_out_pos, pos, 0xf);
|
||||
|
||||
return b.shader;
|
||||
|
|
@ -386,7 +386,7 @@ get_clear_rect_gs(uint32_t push_constant_layer_base)
|
|||
nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
|
||||
|
||||
/* gl_Layer from push constants */
|
||||
nir_ssa_def *layer =
|
||||
nir_def *layer =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
|
||||
.base = push_constant_layer_base, .range = 4);
|
||||
nir_store_var(&b, gs_out_layer, layer, 0x1);
|
||||
|
|
@ -414,7 +414,7 @@ get_color_clear_rect_fs(uint32_t rt_idx, VkFormat format)
|
|||
nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
|
||||
fs_out_color->data.location = FRAG_RESULT_DATA0 + rt_idx;
|
||||
|
||||
nir_ssa_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
|
||||
nir_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
|
||||
nir_store_var(&b, fs_out_color, color_load, 0xf);
|
||||
|
||||
return b.shader;
|
||||
|
|
@ -432,7 +432,7 @@ get_depth_clear_rect_fs()
|
|||
"out_depth");
|
||||
fs_out_depth->data.location = FRAG_RESULT_DEPTH;
|
||||
|
||||
nir_ssa_def *depth_load =
|
||||
nir_def *depth_load =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
|
||||
|
||||
nir_store_var(&b, fs_out_depth, depth_load, 0x1);
|
||||
|
|
|
|||
|
|
@ -2151,7 +2151,7 @@ get_texel_buffer_copy_vs()
|
|||
glsl_vec4_type(), "gl_Position");
|
||||
vs_out_pos->data.location = VARYING_SLOT_POS;
|
||||
|
||||
nir_ssa_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
|
||||
nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
|
||||
nir_store_var(&b, vs_out_pos, pos, 0xf);
|
||||
|
||||
return b.shader;
|
||||
|
|
@ -2208,7 +2208,7 @@ get_texel_buffer_copy_gs()
|
|||
nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
|
||||
|
||||
/* gl_Layer from push constants */
|
||||
nir_ssa_def *layer =
|
||||
nir_def *layer =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
|
||||
.base = TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET,
|
||||
.range = 4);
|
||||
|
|
@ -2222,7 +2222,7 @@ get_texel_buffer_copy_gs()
|
|||
return nir;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
load_frag_coord(nir_builder *b)
|
||||
{
|
||||
nir_foreach_shader_in_variable(var, b->shader) {
|
||||
|
|
@ -2286,24 +2286,24 @@ get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
|
|||
/* Load the box describing the pixel region we want to copy from the
|
||||
* texel buffer.
|
||||
*/
|
||||
nir_ssa_def *box =
|
||||
nir_def *box =
|
||||
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0),
|
||||
.base = TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET,
|
||||
.range = 16);
|
||||
|
||||
/* Load the buffer stride (this comes in texel units) */
|
||||
nir_ssa_def *stride =
|
||||
nir_def *stride =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
|
||||
.base = TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET,
|
||||
.range = 4);
|
||||
|
||||
/* Load the buffer offset (this comes in texel units) */
|
||||
nir_ssa_def *offset =
|
||||
nir_def *offset =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
|
||||
.base = TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET,
|
||||
.range = 4);
|
||||
|
||||
nir_ssa_def *coord = nir_f2i32(&b, load_frag_coord(&b));
|
||||
nir_def *coord = nir_f2i32(&b, load_frag_coord(&b));
|
||||
|
||||
/* Load pixel data from texel buffer based on the x,y offset of the pixel
|
||||
* within the box. Texel buffers are 1D arrays of texels.
|
||||
|
|
@ -2313,17 +2313,17 @@ get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
|
|||
* texel buffer should always be within its bounds and we we don't need
|
||||
* to add a check for that here.
|
||||
*/
|
||||
nir_ssa_def *x_offset =
|
||||
nir_def *x_offset =
|
||||
nir_isub(&b, nir_channel(&b, coord, 0),
|
||||
nir_channel(&b, box, 0));
|
||||
nir_ssa_def *y_offset =
|
||||
nir_def *y_offset =
|
||||
nir_isub(&b, nir_channel(&b, coord, 1),
|
||||
nir_channel(&b, box, 1));
|
||||
nir_ssa_def *texel_offset =
|
||||
nir_def *texel_offset =
|
||||
nir_iadd(&b, nir_iadd(&b, offset, x_offset),
|
||||
nir_imul(&b, y_offset, stride));
|
||||
|
||||
nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
|
||||
nir_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
|
||||
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
|
||||
tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
|
||||
tex->op = nir_texop_txf;
|
||||
|
|
@ -2344,7 +2344,7 @@ get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
|
|||
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_B, cswizzle->b);
|
||||
swiz[3] =
|
||||
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_A, cswizzle->a);
|
||||
nir_ssa_def *s = nir_swizzle(&b, &tex->dest.ssa, swiz, 4);
|
||||
nir_def *s = nir_swizzle(&b, &tex->dest.ssa, swiz, 4);
|
||||
nir_store_var(&b, fs_out_color, s, 0xf);
|
||||
|
||||
return b.shader;
|
||||
|
|
@ -3543,16 +3543,16 @@ create_blit_render_pass(struct v3dv_device *device,
|
|||
return result == VK_SUCCESS;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
gen_tex_coords(nir_builder *b)
|
||||
{
|
||||
nir_ssa_def *tex_box =
|
||||
nir_def *tex_box =
|
||||
nir_load_push_constant(b, 4, 32, nir_imm_int(b, 0), .base = 0, .range = 16);
|
||||
|
||||
nir_ssa_def *tex_z =
|
||||
nir_def *tex_z =
|
||||
nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
|
||||
|
||||
nir_ssa_def *vertex_id = nir_load_vertex_id(b);
|
||||
nir_def *vertex_id = nir_load_vertex_id(b);
|
||||
|
||||
/* vertex 0: src0_x, src0_y
|
||||
* vertex 1: src0_x, src1_y
|
||||
|
|
@ -3565,11 +3565,11 @@ gen_tex_coords(nir_builder *b)
|
|||
* channel 1 is vertex id & 1 ? src1_y : src0_y
|
||||
*/
|
||||
|
||||
nir_ssa_def *one = nir_imm_int(b, 1);
|
||||
nir_ssa_def *c0cmp = nir_ilt_imm(b, vertex_id, 2);
|
||||
nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
|
||||
nir_def *one = nir_imm_int(b, 1);
|
||||
nir_def *c0cmp = nir_ilt_imm(b, vertex_id, 2);
|
||||
nir_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
|
||||
|
||||
nir_ssa_def *comp[4];
|
||||
nir_def *comp[4];
|
||||
comp[0] = nir_bcsel(b, c0cmp,
|
||||
nir_channel(b, tex_box, 0),
|
||||
nir_channel(b, tex_box, 2));
|
||||
|
|
@ -3582,9 +3582,9 @@ gen_tex_coords(nir_builder *b)
|
|||
return nir_vec(b, comp, 4);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
build_nir_tex_op_read(struct nir_builder *b,
|
||||
nir_ssa_def *tex_pos,
|
||||
nir_def *tex_pos,
|
||||
enum glsl_base_type tex_type,
|
||||
enum glsl_sampler_dim dim)
|
||||
{
|
||||
|
|
@ -3597,7 +3597,7 @@ build_nir_tex_op_read(struct nir_builder *b,
|
|||
sampler->data.descriptor_set = 0;
|
||||
sampler->data.binding = 0;
|
||||
|
||||
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
|
||||
nir_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
|
||||
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
|
||||
tex->sampler_dim = dim;
|
||||
tex->op = nir_texop_tex;
|
||||
|
|
@ -3613,13 +3613,13 @@ build_nir_tex_op_read(struct nir_builder *b,
|
|||
return &tex->dest.ssa;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
build_nir_tex_op_ms_fetch_sample(struct nir_builder *b,
|
||||
nir_variable *sampler,
|
||||
nir_ssa_def *tex_deref,
|
||||
nir_def *tex_deref,
|
||||
enum glsl_base_type tex_type,
|
||||
nir_ssa_def *tex_pos,
|
||||
nir_ssa_def *sample_idx)
|
||||
nir_def *tex_pos,
|
||||
nir_def *sample_idx)
|
||||
{
|
||||
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
|
||||
tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
|
||||
|
|
@ -3637,9 +3637,9 @@ build_nir_tex_op_ms_fetch_sample(struct nir_builder *b,
|
|||
}
|
||||
|
||||
/* Fetches all samples at the given position and averages them */
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
build_nir_tex_op_ms_resolve(struct nir_builder *b,
|
||||
nir_ssa_def *tex_pos,
|
||||
nir_def *tex_pos,
|
||||
enum glsl_base_type tex_type,
|
||||
VkSampleCountFlagBits src_samples)
|
||||
{
|
||||
|
|
@ -3653,10 +3653,10 @@ build_nir_tex_op_ms_resolve(struct nir_builder *b,
|
|||
|
||||
const bool is_int = glsl_base_type_is_integer(tex_type);
|
||||
|
||||
nir_ssa_def *tmp = NULL;
|
||||
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
|
||||
nir_def *tmp = NULL;
|
||||
nir_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
|
||||
for (uint32_t i = 0; i < src_samples; i++) {
|
||||
nir_ssa_def *s =
|
||||
nir_def *s =
|
||||
build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
|
||||
tex_type, tex_pos,
|
||||
nir_imm_int(b, i));
|
||||
|
|
@ -3675,9 +3675,9 @@ build_nir_tex_op_ms_resolve(struct nir_builder *b,
|
|||
}
|
||||
|
||||
/* Fetches the current sample (gl_SampleID) at the given position */
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
build_nir_tex_op_ms_read(struct nir_builder *b,
|
||||
nir_ssa_def *tex_pos,
|
||||
nir_def *tex_pos,
|
||||
enum glsl_base_type tex_type)
|
||||
{
|
||||
const struct glsl_type *sampler_type =
|
||||
|
|
@ -3687,17 +3687,17 @@ build_nir_tex_op_ms_read(struct nir_builder *b,
|
|||
sampler->data.descriptor_set = 0;
|
||||
sampler->data.binding = 0;
|
||||
|
||||
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
|
||||
nir_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
|
||||
|
||||
return build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
|
||||
tex_type, tex_pos,
|
||||
nir_load_sample_id(b));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
build_nir_tex_op(struct nir_builder *b,
|
||||
struct v3dv_device *device,
|
||||
nir_ssa_def *tex_pos,
|
||||
nir_def *tex_pos,
|
||||
enum glsl_base_type tex_type,
|
||||
VkSampleCountFlagBits dst_samples,
|
||||
VkSampleCountFlagBits src_samples,
|
||||
|
|
@ -3741,10 +3741,10 @@ get_blit_vs()
|
|||
vs_out_tex_coord->data.location = VARYING_SLOT_VAR0;
|
||||
vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH;
|
||||
|
||||
nir_ssa_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
|
||||
nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
|
||||
nir_store_var(&b, vs_out_pos, pos, 0xf);
|
||||
|
||||
nir_ssa_def *tex_coord = gen_tex_coords(&b);
|
||||
nir_def *tex_coord = gen_tex_coords(&b);
|
||||
nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf);
|
||||
|
||||
return b.shader;
|
||||
|
|
@ -3795,11 +3795,11 @@ get_color_blit_fs(struct v3dv_device *device,
|
|||
nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
|
||||
fs_out_color->data.location = FRAG_RESULT_DATA0;
|
||||
|
||||
nir_ssa_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
|
||||
nir_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
|
||||
const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim);
|
||||
tex_coord = nir_channels(&b, tex_coord, channel_mask);
|
||||
|
||||
nir_ssa_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
|
||||
nir_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
|
||||
dst_samples, src_samples, sampler_dim);
|
||||
|
||||
/* For integer textures, if the bit-size of the destination is too small to
|
||||
|
|
@ -3814,7 +3814,7 @@ get_color_blit_fs(struct v3dv_device *device,
|
|||
enum pipe_format src_pformat = vk_format_to_pipe_format(src_format);
|
||||
enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format);
|
||||
|
||||
nir_ssa_def *c[4];
|
||||
nir_def *c[4];
|
||||
for (uint32_t i = 0; i < 4; i++) {
|
||||
c[i] = nir_channel(&b, color, i);
|
||||
|
||||
|
|
@ -3832,11 +3832,11 @@ get_color_blit_fs(struct v3dv_device *device,
|
|||
|
||||
assert(dst_bit_size > 0);
|
||||
if (util_format_is_pure_uint(dst_pformat)) {
|
||||
nir_ssa_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
|
||||
nir_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
|
||||
c[i] = nir_umin(&b, c[i], max);
|
||||
} else {
|
||||
nir_ssa_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
|
||||
nir_ssa_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
|
||||
nir_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
|
||||
nir_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
|
||||
c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -568,7 +568,7 @@ lower_vulkan_resource_index(nir_builder *b,
|
|||
* vulkan_load_descriptor return a vec2 providing an index and
|
||||
* offset. Our backend compiler only cares about the index part.
|
||||
*/
|
||||
nir_ssa_def_rewrite_uses(&instr->dest.ssa,
|
||||
nir_def_rewrite_uses(&instr->dest.ssa,
|
||||
nir_imm_ivec2(b, index, 0));
|
||||
nir_instr_remove(&instr->instr);
|
||||
}
|
||||
|
|
@ -594,7 +594,7 @@ lower_tex_src(nir_builder *b,
|
|||
unsigned src_idx,
|
||||
struct lower_pipeline_layout_state *state)
|
||||
{
|
||||
nir_ssa_def *index = NULL;
|
||||
nir_def *index = NULL;
|
||||
unsigned base_index = 0;
|
||||
unsigned array_elements = 1;
|
||||
nir_tex_src *src = &instr->src[src_idx];
|
||||
|
|
@ -739,7 +739,7 @@ lower_image_deref(nir_builder *b,
|
|||
struct lower_pipeline_layout_state *state)
|
||||
{
|
||||
nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
|
||||
nir_ssa_def *index = NULL;
|
||||
nir_def *index = NULL;
|
||||
unsigned array_elements = 1;
|
||||
unsigned base_index = 0;
|
||||
|
||||
|
|
@ -826,7 +826,7 @@ lower_intrinsic(nir_builder *b,
|
|||
/* Loading the descriptor happens as part of load/store instructions,
|
||||
* so for us this is a no-op.
|
||||
*/
|
||||
nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);
|
||||
nir_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);
|
||||
nir_instr_remove(&instr->instr);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -907,11 +907,11 @@ lower_point_coord_cb(nir_builder *b, nir_instr *instr, void *_state)
|
|||
return false;
|
||||
|
||||
b->cursor = nir_after_instr(&intr->instr);
|
||||
nir_ssa_def *result = &intr->dest.ssa;
|
||||
nir_def *result = &intr->dest.ssa;
|
||||
result =
|
||||
nir_vector_insert_imm(b, result,
|
||||
nir_fsub_imm(b, 1.0, nir_channel(b, result, 1)), 1);
|
||||
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
|
||||
nir_def_rewrite_uses_after(&intr->dest.ssa,
|
||||
result, result->parent_instr);
|
||||
return true;
|
||||
}
|
||||
|
|
@ -2257,7 +2257,7 @@ pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline,
|
|||
out_layer->data.location = VARYING_SLOT_LAYER;
|
||||
|
||||
/* Get the view index value that we will write to gl_Layer */
|
||||
nir_ssa_def *layer =
|
||||
nir_def *layer =
|
||||
nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32);
|
||||
|
||||
/* Emit all output vertices */
|
||||
|
|
|
|||
|
|
@ -1345,23 +1345,23 @@ v3dv_ReleaseProfilingLockKHR(VkDevice device)
|
|||
|
||||
static inline void
|
||||
nir_set_query_availability(nir_builder *b,
|
||||
nir_ssa_def *buf,
|
||||
nir_ssa_def *offset,
|
||||
nir_ssa_def *query_idx,
|
||||
nir_ssa_def *avail)
|
||||
nir_def *buf,
|
||||
nir_def *offset,
|
||||
nir_def *query_idx,
|
||||
nir_def *avail)
|
||||
{
|
||||
offset = nir_iadd(b, offset, query_idx); /* we use 1B per query */
|
||||
nir_store_ssbo(b, avail, buf, offset, .write_mask = 0x1, .align_mul = 1);
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
static inline nir_def *
|
||||
nir_get_query_availability(nir_builder *b,
|
||||
nir_ssa_def *buf,
|
||||
nir_ssa_def *offset,
|
||||
nir_ssa_def *query_idx)
|
||||
nir_def *buf,
|
||||
nir_def *offset,
|
||||
nir_def *query_idx)
|
||||
{
|
||||
offset = nir_iadd(b, offset, query_idx); /* we use 1B per query */
|
||||
nir_ssa_def *avail = nir_load_ssbo(b, 1, 8, buf, offset, .align_mul = 1);
|
||||
nir_def *avail = nir_load_ssbo(b, 1, 8, buf, offset, .align_mul = 1);
|
||||
return nir_i2i32(b, avail);
|
||||
}
|
||||
|
||||
|
|
@ -1372,7 +1372,7 @@ get_set_query_availability_cs()
|
|||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
|
||||
"set query availability cs");
|
||||
|
||||
nir_ssa_def *buf =
|
||||
nir_def *buf =
|
||||
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
|
||||
.desc_set = 0,
|
||||
.binding = 0,
|
||||
|
|
@ -1382,15 +1382,15 @@ get_set_query_availability_cs()
|
|||
* ever change any of these parameters we need to update how we compute the
|
||||
* query index here.
|
||||
*/
|
||||
nir_ssa_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b, 32), 0);
|
||||
nir_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b, 32), 0);
|
||||
|
||||
nir_ssa_def *offset =
|
||||
nir_def *offset =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
|
||||
|
||||
nir_ssa_def *query_idx =
|
||||
nir_def *query_idx =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 4, .range = 4);
|
||||
|
||||
nir_ssa_def *avail =
|
||||
nir_def *avail =
|
||||
nir_load_push_constant(&b, 1, 8, nir_imm_int(&b, 0), .base = 8, .range = 1);
|
||||
|
||||
query_idx = nir_iadd(&b, query_idx, wg_id);
|
||||
|
|
@ -1399,12 +1399,12 @@ get_set_query_availability_cs()
|
|||
return b.shader;
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
nir_get_occlusion_counter_offset(nir_builder *b, nir_ssa_def *query_idx)
|
||||
static inline nir_def *
|
||||
nir_get_occlusion_counter_offset(nir_builder *b, nir_def *query_idx)
|
||||
{
|
||||
nir_ssa_def *query_group = nir_udiv_imm(b, query_idx, 16);
|
||||
nir_ssa_def *query_group_offset = nir_umod_imm(b, query_idx, 16);
|
||||
nir_ssa_def *offset =
|
||||
nir_def *query_group = nir_udiv_imm(b, query_idx, 16);
|
||||
nir_def *query_group_offset = nir_umod_imm(b, query_idx, 16);
|
||||
nir_def *offset =
|
||||
nir_iadd(b, nir_imul_imm(b, query_group, 1024),
|
||||
nir_imul_imm(b, query_group_offset, 4));
|
||||
return offset;
|
||||
|
|
@ -1412,20 +1412,20 @@ nir_get_occlusion_counter_offset(nir_builder *b, nir_ssa_def *query_idx)
|
|||
|
||||
static inline void
|
||||
nir_reset_occlusion_counter(nir_builder *b,
|
||||
nir_ssa_def *buf,
|
||||
nir_ssa_def *query_idx)
|
||||
nir_def *buf,
|
||||
nir_def *query_idx)
|
||||
{
|
||||
nir_ssa_def *offset = nir_get_occlusion_counter_offset(b, query_idx);
|
||||
nir_ssa_def *zero = nir_imm_int(b, 0);
|
||||
nir_def *offset = nir_get_occlusion_counter_offset(b, query_idx);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
nir_store_ssbo(b, zero, buf, offset, .write_mask = 0x1, .align_mul = 4);
|
||||
}
|
||||
|
||||
static inline nir_ssa_def *
|
||||
static inline nir_def *
|
||||
nir_read_occlusion_counter(nir_builder *b,
|
||||
nir_ssa_def *buf,
|
||||
nir_ssa_def *query_idx)
|
||||
nir_def *buf,
|
||||
nir_def *query_idx)
|
||||
{
|
||||
nir_ssa_def *offset = nir_get_occlusion_counter_offset(b, query_idx);
|
||||
nir_def *offset = nir_get_occlusion_counter_offset(b, query_idx);
|
||||
return nir_load_ssbo(b, 1, 32, buf, offset, .access = 0, .align_mul = 4);
|
||||
}
|
||||
|
||||
|
|
@ -1436,7 +1436,7 @@ get_reset_occlusion_query_cs()
|
|||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
|
||||
"reset occlusion query cs");
|
||||
|
||||
nir_ssa_def *buf =
|
||||
nir_def *buf =
|
||||
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
|
||||
.desc_set = 0,
|
||||
.binding = 0,
|
||||
|
|
@ -1446,15 +1446,15 @@ get_reset_occlusion_query_cs()
|
|||
* ever change any of these parameters we need to update how we compute the
|
||||
* query index here.
|
||||
*/
|
||||
nir_ssa_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b, 32), 0);
|
||||
nir_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b, 32), 0);
|
||||
|
||||
nir_ssa_def *avail_offset =
|
||||
nir_def *avail_offset =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
|
||||
|
||||
nir_ssa_def *base_query_idx =
|
||||
nir_def *base_query_idx =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 4, .range = 4);
|
||||
|
||||
nir_ssa_def *query_idx = nir_iadd(&b, base_query_idx, wg_id);
|
||||
nir_def *query_idx = nir_iadd(&b, base_query_idx, wg_id);
|
||||
|
||||
nir_set_query_availability(&b, buf, avail_offset, query_idx,
|
||||
nir_imm_intN_t(&b, 0, 8));
|
||||
|
|
@ -1465,16 +1465,16 @@ get_reset_occlusion_query_cs()
|
|||
|
||||
static void
|
||||
write_query_buffer(nir_builder *b,
|
||||
nir_ssa_def *buf,
|
||||
nir_ssa_def **offset,
|
||||
nir_ssa_def *value,
|
||||
nir_def *buf,
|
||||
nir_def **offset,
|
||||
nir_def *value,
|
||||
bool flag_64bit)
|
||||
{
|
||||
if (flag_64bit) {
|
||||
/* Create a 64-bit value using a vec2 with the .Y component set to 0
|
||||
* so we can write a 64-bit value in a single store.
|
||||
*/
|
||||
nir_ssa_def *value64 = nir_vec2(b, value, nir_imm_int(b, 0));
|
||||
nir_def *value64 = nir_vec2(b, value, nir_imm_int(b, 0));
|
||||
nir_store_ssbo(b, value64, buf, *offset, .write_mask = 0x3, .align_mul = 8);
|
||||
*offset = nir_iadd_imm(b, *offset, 8);
|
||||
} else {
|
||||
|
|
@ -1494,55 +1494,55 @@ get_copy_query_results_cs(VkQueryResultFlags flags)
|
|||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
|
||||
"copy query results cs");
|
||||
|
||||
nir_ssa_def *buf =
|
||||
nir_def *buf =
|
||||
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
|
||||
.desc_set = 0,
|
||||
.binding = 0,
|
||||
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
|
||||
|
||||
nir_ssa_def *buf_out =
|
||||
nir_def *buf_out =
|
||||
nir_vulkan_resource_index(&b, 2, 32, nir_imm_int(&b, 0),
|
||||
.desc_set = 1,
|
||||
.binding = 0,
|
||||
.desc_type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
|
||||
|
||||
/* Read push constants */
|
||||
nir_ssa_def *avail_offset =
|
||||
nir_def *avail_offset =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
|
||||
|
||||
nir_ssa_def *base_query_idx =
|
||||
nir_def *base_query_idx =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 4, .range = 4);
|
||||
|
||||
nir_ssa_def *base_offset_out =
|
||||
nir_def *base_offset_out =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 8, .range = 4);
|
||||
|
||||
nir_ssa_def *stride =
|
||||
nir_def *stride =
|
||||
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 12, .range = 4);
|
||||
|
||||
/* This assumes a local size of 1 and a horizontal-only dispatch. If we
|
||||
* ever change any of these parameters we need to update how we compute the
|
||||
* query index here.
|
||||
*/
|
||||
nir_ssa_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b, 32), 0);
|
||||
nir_ssa_def *query_idx = nir_iadd(&b, base_query_idx, wg_id);
|
||||
nir_def *wg_id = nir_channel(&b, nir_load_workgroup_id(&b, 32), 0);
|
||||
nir_def *query_idx = nir_iadd(&b, base_query_idx, wg_id);
|
||||
|
||||
/* Read query availability if needed */
|
||||
nir_ssa_def *avail = NULL;
|
||||
nir_def *avail = NULL;
|
||||
if (flag_avail || !flag_partial)
|
||||
avail = nir_get_query_availability(&b, buf, avail_offset, query_idx);
|
||||
|
||||
/* Write occusion query result... */
|
||||
nir_ssa_def *offset =
|
||||
nir_def *offset =
|
||||
nir_iadd(&b, base_offset_out, nir_imul(&b, wg_id, stride));
|
||||
|
||||
/* ...if partial is requested, we always write */
|
||||
if(flag_partial) {
|
||||
nir_ssa_def *query_res = nir_read_occlusion_counter(&b, buf, query_idx);
|
||||
nir_def *query_res = nir_read_occlusion_counter(&b, buf, query_idx);
|
||||
write_query_buffer(&b, buf_out, &offset, query_res, flag_64bit);
|
||||
} else {
|
||||
/*...otherwise, we only write if the query is available */
|
||||
nir_if *if_stmt = nir_push_if(&b, nir_ine_imm(&b, avail, 0));
|
||||
nir_ssa_def *query_res = nir_read_occlusion_counter(&b, buf, query_idx);
|
||||
nir_def *query_res = nir_read_occlusion_counter(&b, buf, query_idx);
|
||||
write_query_buffer(&b, buf_out, &offset, query_res, flag_64bit);
|
||||
nir_pop_if(&b, if_stmt);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ lower_clc_call_instr(nir_instr *instr, nir_builder *b,
|
|||
return false;
|
||||
}
|
||||
|
||||
nir_ssa_def **params = rzalloc_array(b->shader, nir_ssa_def*, call->num_params);
|
||||
nir_def **params = rzalloc_array(b->shader, nir_def*, call->num_params);
|
||||
|
||||
for (unsigned i = 0; i < call->num_params; i++) {
|
||||
params[i] = nir_ssa_for_src(b, call->params[i],
|
||||
|
|
|
|||
|
|
@ -2610,10 +2610,10 @@ replace_unused_interpolate_at_with_undef(nir_builder *b, nir_instr *instr,
|
|||
nir_variable *var = nir_intrinsic_get_var(intrin, 0);
|
||||
if (var->data.mode == nir_var_shader_temp) {
|
||||
/* Create undef and rewrite the interp uses */
|
||||
nir_ssa_def *undef =
|
||||
nir_ssa_undef(b, intrin->dest.ssa.num_components,
|
||||
nir_def *undef =
|
||||
nir_undef(b, intrin->dest.ssa.num_components,
|
||||
intrin->dest.ssa.bit_size);
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, undef);
|
||||
nir_def_rewrite_uses(&intrin->dest.ssa, undef);
|
||||
|
||||
nir_instr_remove(&intrin->instr);
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -936,7 +936,7 @@ static void
|
|||
zero_array_members(nir_builder *b, nir_variable *var)
|
||||
{
|
||||
nir_deref_instr *deref = nir_build_deref_var(b, var);
|
||||
nir_ssa_def *zero = nir_imm_zero(b, 4, 32);
|
||||
nir_def *zero = nir_imm_zero(b, 4, 32);
|
||||
for (int i = 0; i < glsl_array_size(var->type); i++) {
|
||||
nir_deref_instr *arr = nir_build_deref_array_imm(b, deref, i);
|
||||
uint32_t mask = BITFIELD_MASK(glsl_get_vector_elements(arr->type));
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ lower_deref_instr(nir_builder *b, nir_intrinsic_instr *instr,
|
|||
else
|
||||
range_base = var->data.offset;
|
||||
|
||||
nir_ssa_def *offset = nir_imm_int(b, offset_value);
|
||||
nir_def *offset = nir_imm_int(b, offset_value);
|
||||
for (nir_deref_instr *d = deref; d->deref_type != nir_deref_type_var;
|
||||
d = nir_deref_instr_parent(d)) {
|
||||
assert(d->deref_type == nir_deref_type_array);
|
||||
|
|
|
|||
|
|
@ -34,8 +34,8 @@
|
|||
#define imm1(b, x) nir_imm_float(b, x)
|
||||
#define imm3(b, x) nir_imm_vec3(b, x, x, x)
|
||||
|
||||
static nir_ssa_def *
|
||||
swizzle(nir_builder *b, nir_ssa_def *src, int swizzle, int components)
|
||||
static nir_def *
|
||||
swizzle(nir_builder *b, nir_def *src, int swizzle, int components)
|
||||
{
|
||||
unsigned swizzle_arr[4];
|
||||
swizzle_arr[0] = GET_SWZ(swizzle, 0);
|
||||
|
|
@ -46,72 +46,72 @@ swizzle(nir_builder *b, nir_ssa_def *src, int swizzle, int components)
|
|||
return nir_swizzle(b, src, swizzle_arr, components);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
swizzle_x(nir_builder *b, nir_ssa_def *src)
|
||||
static nir_def *
|
||||
swizzle_x(nir_builder *b, nir_def *src)
|
||||
{
|
||||
return nir_channel(b, src, 0);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
swizzle_y(nir_builder *b, nir_ssa_def *src)
|
||||
static nir_def *
|
||||
swizzle_y(nir_builder *b, nir_def *src)
|
||||
{
|
||||
return nir_channel(b, src, 1);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
swizzle_z(nir_builder *b, nir_ssa_def *src)
|
||||
static nir_def *
|
||||
swizzle_z(nir_builder *b, nir_def *src)
|
||||
{
|
||||
return nir_channel(b, src, 2);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
swizzle_w(nir_builder *b, nir_ssa_def *src)
|
||||
static nir_def *
|
||||
swizzle_w(nir_builder *b, nir_def *src)
|
||||
{
|
||||
return nir_channel(b, src, 3);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
blend_multiply(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
|
||||
static nir_def *
|
||||
blend_multiply(nir_builder *b, nir_def *src, nir_def *dst)
|
||||
{
|
||||
/* f(Cs,Cd) = Cs*Cd */
|
||||
return nir_fmul(b, src, dst);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
blend_screen(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
|
||||
static nir_def *
|
||||
blend_screen(nir_builder *b, nir_def *src, nir_def *dst)
|
||||
{
|
||||
/* f(Cs,Cd) = Cs+Cd-Cs*Cd */
|
||||
return nir_fsub(b, nir_fadd(b, src, dst), nir_fmul(b, src, dst));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
blend_overlay(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
|
||||
static nir_def *
|
||||
blend_overlay(nir_builder *b, nir_def *src, nir_def *dst)
|
||||
{
|
||||
/* f(Cs,Cd) = 2*Cs*Cd, if Cd <= 0.5
|
||||
* 1-2*(1-Cs)*(1-Cd), otherwise
|
||||
*/
|
||||
nir_ssa_def *rule_1 = nir_fmul(b, nir_fmul(b, src, dst), imm3(b, 2.0));
|
||||
nir_ssa_def *rule_2 =
|
||||
nir_def *rule_1 = nir_fmul(b, nir_fmul(b, src, dst), imm3(b, 2.0));
|
||||
nir_def *rule_2 =
|
||||
nir_fsub(b, imm3(b, 1.0), nir_fmul(b, nir_fmul(b, nir_fsub(b, imm3(b, 1.0), src), nir_fsub(b, imm3(b, 1.0), dst)), imm3(b, 2.0)));
|
||||
return nir_bcsel(b, nir_fge(b, imm3(b, 0.5f), dst), rule_1, rule_2);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
blend_darken(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
|
||||
static nir_def *
|
||||
blend_darken(nir_builder *b, nir_def *src, nir_def *dst)
|
||||
{
|
||||
/* f(Cs,Cd) = min(Cs,Cd) */
|
||||
return nir_fmin(b, src, dst);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
blend_lighten(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
|
||||
static nir_def *
|
||||
blend_lighten(nir_builder *b, nir_def *src, nir_def *dst)
|
||||
{
|
||||
/* f(Cs,Cd) = max(Cs,Cd) */
|
||||
return nir_fmax(b, src, dst);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
blend_colordodge(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
|
||||
static nir_def *
|
||||
blend_colordodge(nir_builder *b, nir_def *src, nir_def *dst)
|
||||
{
|
||||
/* f(Cs,Cd) =
|
||||
* 0, if Cd <= 0
|
||||
|
|
@ -123,8 +123,8 @@ blend_colordodge(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
|
|||
nir_fmin(b, imm3(b, 1.0), nir_fdiv(b, dst, nir_fsub(b, imm3(b, 1.0), src)))));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
blend_colorburn(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
|
||||
static nir_def *
|
||||
blend_colorburn(nir_builder *b, nir_def *src, nir_def *dst)
|
||||
{
|
||||
/* f(Cs,Cd) =
|
||||
* 1, if Cd >= 1
|
||||
|
|
@ -136,20 +136,20 @@ blend_colorburn(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
|
|||
nir_fsub(b, imm3(b, 1.0), nir_fmin(b, imm3(b, 1.0), nir_fdiv(b, nir_fsub(b, imm3(b, 1.0), dst), src)))));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
blend_hardlight(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
|
||||
static nir_def *
|
||||
blend_hardlight(nir_builder *b, nir_def *src, nir_def *dst)
|
||||
{
|
||||
/* f(Cs,Cd) = 2*Cs*Cd, if Cs <= 0.5
|
||||
* 1-2*(1-Cs)*(1-Cd), otherwise
|
||||
*/
|
||||
nir_ssa_def *rule_1 = nir_fmul(b, imm3(b, 2.0), nir_fmul(b, src, dst));
|
||||
nir_ssa_def *rule_2 =
|
||||
nir_def *rule_1 = nir_fmul(b, imm3(b, 2.0), nir_fmul(b, src, dst));
|
||||
nir_def *rule_2 =
|
||||
nir_fsub(b, imm3(b, 1.0), nir_fmul(b, imm3(b, 2.0), nir_fmul(b, nir_fsub(b, imm3(b, 1.0), src), nir_fsub(b, imm3(b, 1.0), dst))));
|
||||
return nir_bcsel(b, nir_fge(b, imm3(b, 0.5), src), rule_1, rule_2);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
blend_softlight(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
|
||||
static nir_def *
|
||||
blend_softlight(nir_builder *b, nir_def *src, nir_def *dst)
|
||||
{
|
||||
/* f(Cs,Cd) =
|
||||
* Cd-(1-2*Cs)*Cd*(1-Cd),
|
||||
|
|
@ -166,49 +166,49 @@ blend_softlight(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
|
|||
* Cd*((16*Cd-12)*Cd+3) if Cs > 0.5 and Cd <= 0.25
|
||||
* sqrt(Cd)-Cd, otherwise
|
||||
*/
|
||||
nir_ssa_def *factor_1 = nir_fmul(b, dst, nir_fsub(b, imm3(b, 1.0), dst));
|
||||
nir_ssa_def *factor_2 =
|
||||
nir_def *factor_1 = nir_fmul(b, dst, nir_fsub(b, imm3(b, 1.0), dst));
|
||||
nir_def *factor_2 =
|
||||
nir_fmul(b, dst, nir_fadd(b, nir_fmul(b, nir_fsub(b, nir_fmul(b, imm3(b, 16.0), dst), imm3(b, 12.0)), dst), imm3(b, 3.0)));
|
||||
nir_ssa_def *factor_3 = nir_fsub(b, nir_fsqrt(b, dst), dst);
|
||||
nir_ssa_def *factor = nir_bcsel(b, nir_fge(b, imm3(b, 0.5), src), factor_1,
|
||||
nir_def *factor_3 = nir_fsub(b, nir_fsqrt(b, dst), dst);
|
||||
nir_def *factor = nir_bcsel(b, nir_fge(b, imm3(b, 0.5), src), factor_1,
|
||||
nir_bcsel(b, nir_fge(b, imm3(b, 0.25), dst), factor_2, factor_3));
|
||||
return nir_fadd(b, dst, nir_fmul(b, nir_fsub(b, nir_fmul(b, imm3(b, 2.0), src), imm3(b, 1.0)), factor));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
blend_difference(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
|
||||
static nir_def *
|
||||
blend_difference(nir_builder *b, nir_def *src, nir_def *dst)
|
||||
{
|
||||
return nir_fabs(b, nir_fsub(b, dst, src));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
blend_exclusion(nir_builder *b, nir_ssa_def *src, nir_ssa_def *dst)
|
||||
static nir_def *
|
||||
blend_exclusion(nir_builder *b, nir_def *src, nir_def *dst)
|
||||
{
|
||||
return nir_fadd(b, src, nir_fsub(b, dst, nir_fmul(b, imm3(b, 2.0), nir_fmul(b, src, dst))));
|
||||
}
|
||||
|
||||
/* Return the minimum of a vec3's components */
|
||||
static nir_ssa_def *
|
||||
minv3(nir_builder *b, nir_ssa_def *v)
|
||||
static nir_def *
|
||||
minv3(nir_builder *b, nir_def *v)
|
||||
{
|
||||
return nir_fmin(b, nir_fmin(b, swizzle_x(b, v), swizzle_y(b, v)), swizzle_z(b, v));
|
||||
}
|
||||
|
||||
/* Return the maximum of a vec3's components */
|
||||
static nir_ssa_def *
|
||||
maxv3(nir_builder *b, nir_ssa_def *v)
|
||||
static nir_def *
|
||||
maxv3(nir_builder *b, nir_def *v)
|
||||
{
|
||||
return nir_fmax(b, nir_fmax(b, swizzle_x(b, v), swizzle_y(b, v)), swizzle_z(b, v));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
lumv3(nir_builder *b, nir_ssa_def *c)
|
||||
static nir_def *
|
||||
lumv3(nir_builder *b, nir_def *c)
|
||||
{
|
||||
return nir_fdot(b, c, nir_imm_vec3(b, 0.30, 0.59, 0.11));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
satv3(nir_builder *b, nir_ssa_def *c)
|
||||
static nir_def *
|
||||
satv3(nir_builder *b, nir_def *c)
|
||||
{
|
||||
return nir_fsub(b, maxv3(b, c), minv3(b, c));
|
||||
}
|
||||
|
|
@ -240,20 +240,20 @@ set_lum(nir_builder *b,
|
|||
nir_variable *cbase,
|
||||
nir_variable *clum)
|
||||
{
|
||||
nir_ssa_def *cbase_def = nir_load_var(b, cbase);
|
||||
nir_def *cbase_def = nir_load_var(b, cbase);
|
||||
nir_store_var(b, color, nir_fadd(b, cbase_def, nir_fsub(b, lumv3(b, nir_load_var(b, clum)), lumv3(b, cbase_def))), ~0);
|
||||
|
||||
nir_variable *llum = add_temp_var(b, "__blend_lum", glsl_float_type());
|
||||
nir_variable *mincol = add_temp_var(b, "__blend_mincol", glsl_float_type());
|
||||
nir_variable *maxcol = add_temp_var(b, "__blend_maxcol", glsl_float_type());
|
||||
|
||||
nir_ssa_def *color_def = nir_load_var(b, color);
|
||||
nir_def *color_def = nir_load_var(b, color);
|
||||
nir_store_var(b, llum, lumv3(b, color_def), ~0);
|
||||
nir_store_var(b, mincol, minv3(b, color_def), ~0);
|
||||
nir_store_var(b, maxcol, maxv3(b, color_def), ~0);
|
||||
|
||||
nir_ssa_def *mincol_def = nir_load_var(b, mincol);
|
||||
nir_ssa_def *llum_def = nir_load_var(b, llum);
|
||||
nir_def *mincol_def = nir_load_var(b, mincol);
|
||||
nir_def *llum_def = nir_load_var(b, llum);
|
||||
nir_if *nif = nir_push_if(b, nir_flt(b, mincol_def, imm1(b, 0.0)));
|
||||
|
||||
/* Add then block */
|
||||
|
|
@ -261,7 +261,7 @@ set_lum(nir_builder *b,
|
|||
|
||||
/* Add else block */
|
||||
nir_push_else(b, nif);
|
||||
nir_ssa_def *maxcol_def = nir_load_var(b, maxcol);
|
||||
nir_def *maxcol_def = nir_load_var(b, maxcol);
|
||||
nir_if *nif2 = nir_push_if(b, nir_flt(b, imm1(b, 1.0), maxcol_def));
|
||||
nir_store_var(b, color, nir_fadd(b, llum_def, nir_fdiv(b, nir_fmul(b, nir_fsub(b, color_def, llum_def), nir_fsub(b, imm3(b, 1.0), llum_def)), nir_fsub(b, maxcol_def, llum_def))), ~0);
|
||||
nir_pop_if(b, nif2);
|
||||
|
|
@ -279,8 +279,8 @@ set_lum_sat(nir_builder *b,
|
|||
nir_variable *csat,
|
||||
nir_variable *clum)
|
||||
{
|
||||
nir_ssa_def *cbase_def = nir_load_var(b, cbase);
|
||||
nir_ssa_def *csat_def = nir_load_var(b, csat);
|
||||
nir_def *cbase_def = nir_load_var(b, cbase);
|
||||
nir_def *csat_def = nir_load_var(b, csat);
|
||||
|
||||
nir_variable *sbase = add_temp_var(b, "__blend_sbase", glsl_float_type());
|
||||
nir_store_var(b, sbase, satv3(b, cbase_def), ~0);
|
||||
|
|
@ -290,10 +290,10 @@ set_lum_sat(nir_builder *b,
|
|||
* and interpolating the "middle" component based on its
|
||||
* original value relative to the smallest/largest.
|
||||
*/
|
||||
nir_ssa_def *sbase_def = nir_load_var(b, sbase);
|
||||
nir_def *sbase_def = nir_load_var(b, sbase);
|
||||
nir_if *nif = nir_push_if(b, nir_flt(b, imm1(b, 0.0), sbase_def));
|
||||
nir_ssa_def *ssat = satv3(b, csat_def);
|
||||
nir_ssa_def *minbase = minv3(b, cbase_def);
|
||||
nir_def *ssat = satv3(b, csat_def);
|
||||
nir_def *minbase = minv3(b, cbase_def);
|
||||
nir_store_var(b, color, nir_fdiv(b, nir_fmul(b, nir_fsub(b, cbase_def, minbase), ssat), sbase_def), ~0);
|
||||
nir_push_else(b, nif);
|
||||
nir_store_var(b, color, imm3(b, 0.0), ~0);
|
||||
|
|
@ -302,7 +302,7 @@ set_lum_sat(nir_builder *b,
|
|||
set_lum(b, color, color, clum);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
is_mode(nir_builder *b, nir_variable *mode, enum gl_advanced_blend_mode q)
|
||||
{
|
||||
return nir_ieq_imm(b, nir_load_var(b, mode), (unsigned) q);
|
||||
|
|
@ -312,7 +312,7 @@ static nir_variable *
|
|||
calc_blend_result(nir_builder *b,
|
||||
nir_variable *mode,
|
||||
nir_variable *fb,
|
||||
nir_ssa_def *blend_src,
|
||||
nir_def *blend_src,
|
||||
GLbitfield blend_qualifiers)
|
||||
{
|
||||
nir_variable *result = add_temp_var(b, "__blend_result", glsl_vec4_type());
|
||||
|
|
@ -337,10 +337,10 @@ calc_blend_result(nir_builder *b,
|
|||
nir_variable *dst_rgb = add_temp_var(b, "__blend_dst_rgb", glsl_vec_type(3));
|
||||
nir_variable *dst_alpha = add_temp_var(b, "__blend_dst_a", glsl_float_type());
|
||||
|
||||
nir_ssa_def *fb_def = nir_load_var(b, fb);
|
||||
nir_def *fb_def = nir_load_var(b, fb);
|
||||
nir_store_var(b, dst_alpha, swizzle_w(b, fb_def), ~0);
|
||||
|
||||
nir_ssa_def *dst_alpha_def = nir_load_var(b, dst_alpha);
|
||||
nir_def *dst_alpha_def = nir_load_var(b, dst_alpha);
|
||||
nir_if *nif = nir_push_if(b, nir_feq(b, dst_alpha_def, imm1(b, 0.0)));
|
||||
nir_store_var(b, dst_rgb, imm3(b, 0.0), ~0);
|
||||
nir_push_else(b, nif);
|
||||
|
|
@ -348,7 +348,7 @@ calc_blend_result(nir_builder *b,
|
|||
nir_pop_if(b, nif);
|
||||
|
||||
nir_store_var(b, src_alpha, swizzle_w(b, blend_src), ~0);
|
||||
nir_ssa_def *src_alpha_def = nir_load_var(b, src_alpha);
|
||||
nir_def *src_alpha_def = nir_load_var(b, src_alpha);
|
||||
nif = nir_push_if(b, nir_feq(b, src_alpha_def, imm1(b, 0.0)));
|
||||
nir_store_var(b, src_rgb, imm3(b, 0.0), ~0);
|
||||
nir_push_else(b, nif);
|
||||
|
|
@ -357,15 +357,15 @@ calc_blend_result(nir_builder *b,
|
|||
|
||||
nir_variable *factor = add_temp_var(b, "__blend_factor", glsl_vec_type(3));
|
||||
|
||||
nir_ssa_def *src_rgb_def = nir_load_var(b, src_rgb);
|
||||
nir_ssa_def *dst_rgb_def = nir_load_var(b, dst_rgb);
|
||||
nir_def *src_rgb_def = nir_load_var(b, src_rgb);
|
||||
nir_def *dst_rgb_def = nir_load_var(b, dst_rgb);
|
||||
|
||||
unsigned choices = blend_qualifiers;
|
||||
while (choices) {
|
||||
enum gl_advanced_blend_mode choice = (enum gl_advanced_blend_mode)u_bit_scan(&choices);
|
||||
|
||||
nir_if *iff = nir_push_if(b, is_mode(b, mode, choice));
|
||||
nir_ssa_def *val = NULL;
|
||||
nir_def *val = NULL;
|
||||
|
||||
switch (choice) {
|
||||
case BLEND_MULTIPLY:
|
||||
|
|
@ -454,7 +454,7 @@ calc_blend_result(nir_builder *b,
|
|||
/* WRITEMASK_XYZ */
|
||||
nir_store_var(b, result, nir_pad_vec4(b, nir_fadd(b, nir_fadd(b, nir_fmul(b, nir_load_var(b, factor), nir_load_var(b, p0)), nir_fmul(b, src_rgb_def, nir_load_var(b, p1))), nir_fmul(b, dst_rgb_def, nir_load_var(b, p2)))), 0x7);
|
||||
/* WRITEMASK_W */
|
||||
nir_ssa_def *val = nir_fadd(b, nir_fadd(b, nir_load_var(b, p0), nir_load_var(b, p1)), nir_load_var(b, p2));
|
||||
nir_def *val = nir_fadd(b, nir_fadd(b, nir_load_var(b, p0), nir_load_var(b, p1)), nir_load_var(b, p2));
|
||||
nir_store_var(b, result, nir_vec4(b, val, val, val, val), 0x8);
|
||||
|
||||
/* reset cursor to the end of the main function */
|
||||
|
|
@ -466,10 +466,10 @@ calc_blend_result(nir_builder *b,
|
|||
/**
|
||||
* Dereference var, or var[0] if it's an array.
|
||||
*/
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
load_output(nir_builder *b, nir_variable *var)
|
||||
{
|
||||
nir_ssa_def *var_def;
|
||||
nir_def *var_def;
|
||||
if (glsl_type_is_array(var->type)) {
|
||||
var_def = nir_load_array_var_imm(b, var, 0);
|
||||
} else {
|
||||
|
|
@ -539,12 +539,12 @@ gl_nir_lower_blend_equation_advanced(nir_shader *sh, bool coherent)
|
|||
/* Combine values written to outputs into a single RGBA blend source.
|
||||
* We assign <0, 0, 0, 1> to any components with no corresponding output.
|
||||
*/
|
||||
nir_ssa_def *blend_source;
|
||||
nir_def *blend_source;
|
||||
if (outputs[0] &&
|
||||
glsl_get_vector_elements(glsl_without_array(outputs[0]->type)) == 4) {
|
||||
blend_source = load_output(&b, outputs[0]);
|
||||
} else {
|
||||
nir_ssa_def *blend_comps[4];
|
||||
nir_def *blend_comps[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
nir_variable *var = outputs[i];
|
||||
if (var) {
|
||||
|
|
@ -570,7 +570,7 @@ gl_nir_lower_blend_equation_advanced(nir_shader *sh, bool coherent)
|
|||
if (glsl_type_is_array(outputs[i]->type)) {
|
||||
nir_store_array_var_imm(&b, outputs[i], 0, nir_load_var(&b, result_dest), 1 << i);
|
||||
} else {
|
||||
nir_ssa_def *val = swizzle(&b, nir_load_var(&b, result_dest), i, 1);
|
||||
nir_def *val = swizzle(&b, nir_load_var(&b, result_dest), i, 1);
|
||||
nir_store_var(&b, outputs[i], nir_vec4(&b, val, val, val, val), 1 << i);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@
|
|||
#include "util/compiler.h"
|
||||
#include "main/shader_types.h"
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
get_block_array_index(nir_builder *b, nir_deref_instr *deref,
|
||||
const struct gl_shader_program *shader_program)
|
||||
{
|
||||
|
|
@ -41,7 +41,7 @@ get_block_array_index(nir_builder *b, nir_deref_instr *deref,
|
|||
*/
|
||||
int const_array_offset = 0;
|
||||
const char *block_name = "";
|
||||
nir_ssa_def *nonconst_index = NULL;
|
||||
nir_def *nonconst_index = NULL;
|
||||
while (deref->deref_type == nir_deref_type_array) {
|
||||
nir_deref_instr *parent = nir_deref_instr_parent(deref);
|
||||
assert(parent && glsl_type_is_array(parent->type));
|
||||
|
|
@ -56,9 +56,9 @@ get_block_array_index(nir_builder *b, nir_deref_instr *deref,
|
|||
|
||||
const_array_offset += arr_index * array_elements;
|
||||
} else {
|
||||
nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
|
||||
nir_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);
|
||||
arr_index = nir_umin(b, arr_index, nir_imm_int(b, arr_size - 1));
|
||||
nir_ssa_def *arr_offset = nir_amul_imm(b, arr_index, array_elements);
|
||||
nir_def *arr_offset = nir_amul_imm(b, arr_index, array_elements);
|
||||
if (nonconst_index)
|
||||
nonconst_index = nir_iadd(b, nonconst_index, arr_offset);
|
||||
else
|
||||
|
|
@ -202,7 +202,7 @@ lower_buffer_interface_derefs_impl(nir_function_impl *impl,
|
|||
b.cursor = nir_before_instr(&deref->instr);
|
||||
|
||||
unsigned offset = 0;
|
||||
nir_ssa_def *ptr;
|
||||
nir_def *ptr;
|
||||
if (deref->deref_type == nir_deref_type_var &&
|
||||
!glsl_type_is_interface(glsl_without_array(deref->var->type))) {
|
||||
/* This variable is contained in an interface block rather than
|
||||
|
|
@ -219,7 +219,7 @@ lower_buffer_interface_derefs_impl(nir_function_impl *impl,
|
|||
* Everything after this point is a byte offset and will be
|
||||
* handled by nir_lower_explicit_io().
|
||||
*/
|
||||
nir_ssa_def *index = get_block_array_index(&b, deref,
|
||||
nir_def *index = get_block_array_index(&b, deref,
|
||||
shader_program);
|
||||
ptr = nir_vec2(&b, index, nir_imm_int(&b, offset));
|
||||
} else {
|
||||
|
|
@ -237,7 +237,7 @@ lower_buffer_interface_derefs_impl(nir_function_impl *impl,
|
|||
cast->cast.align_mul = NIR_ALIGN_MUL_MAX;
|
||||
cast->cast.align_offset = offset % NIR_ALIGN_MUL_MAX;
|
||||
|
||||
nir_ssa_def_rewrite_uses(&deref->dest.ssa,
|
||||
nir_def_rewrite_uses(&deref->dest.ssa,
|
||||
&cast->dest.ssa);
|
||||
nir_deref_instr_remove_if_unused(deref);
|
||||
break;
|
||||
|
|
@ -262,8 +262,8 @@ lower_buffer_interface_derefs_impl(nir_function_impl *impl,
|
|||
if (glsl_type_is_boolean(deref->type)) {
|
||||
b.cursor = nir_after_instr(&intrin->instr);
|
||||
intrin->dest.ssa.bit_size = 32;
|
||||
nir_ssa_def *bval = nir_i2b(&b, &intrin->dest.ssa);
|
||||
nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
|
||||
nir_def *bval = nir_i2b(&b, &intrin->dest.ssa);
|
||||
nir_def_rewrite_uses_after(&intrin->dest.ssa,
|
||||
bval,
|
||||
bval->parent_instr);
|
||||
progress = true;
|
||||
|
|
@ -288,7 +288,7 @@ lower_buffer_interface_derefs_impl(nir_function_impl *impl,
|
|||
*/
|
||||
if (glsl_type_is_boolean(deref->type)) {
|
||||
b.cursor = nir_before_instr(&intrin->instr);
|
||||
nir_ssa_def *ival = nir_b2i32(&b, intrin->src[1].ssa);
|
||||
nir_def *ival = nir_b2i32(&b, intrin->src[1].ssa);
|
||||
nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
|
||||
nir_src_for_ssa(ival));
|
||||
progress = true;
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ lower_instr(nir_builder *b, nir_instr *instr, void *cb_data)
|
|||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
nir_ssa_def *src;
|
||||
nir_def *src;
|
||||
int range_base = 0;
|
||||
if (bindless) {
|
||||
src = nir_load_deref(b, deref);
|
||||
|
|
|
|||
|
|
@ -359,7 +359,7 @@ get_packed_varying_deref(struct lower_packed_varyings_state *state,
|
|||
struct packing_store_values {
|
||||
bool is_64bit;
|
||||
unsigned writemasks[2];
|
||||
nir_ssa_def *values[2];
|
||||
nir_def *values[2];
|
||||
nir_deref_instr *deref;
|
||||
};
|
||||
|
||||
|
|
@ -374,7 +374,7 @@ bitwise_assign_pack(struct lower_packed_varyings_state *state,
|
|||
nir_deref_instr *packed_deref,
|
||||
nir_deref_instr *unpacked_deref,
|
||||
const struct glsl_type *unpacked_type,
|
||||
nir_ssa_def *value,
|
||||
nir_def *value,
|
||||
unsigned writemask)
|
||||
|
||||
{
|
||||
|
|
@ -406,7 +406,7 @@ bitwise_assign_pack(struct lower_packed_varyings_state *state,
|
|||
|
||||
unsigned swiz_x = 0;
|
||||
unsigned writemask = 0x3;
|
||||
nir_ssa_def *swizzle = nir_swizzle(&state->b, value, &swiz_x, 1);
|
||||
nir_def *swizzle = nir_swizzle(&state->b, value, &swiz_x, 1);
|
||||
|
||||
store_state->is_64bit = true;
|
||||
store_state->deref = packed_deref;
|
||||
|
|
@ -450,7 +450,7 @@ bitwise_assign_unpack(struct lower_packed_varyings_state *state,
|
|||
nir_deref_instr *unpacked_deref,
|
||||
nir_deref_instr *packed_deref,
|
||||
const struct glsl_type *unpacked_type,
|
||||
nir_ssa_def *value, unsigned writemask)
|
||||
nir_def *value, unsigned writemask)
|
||||
{
|
||||
nir_variable *packed_var = nir_deref_instr_get_variable(packed_deref);
|
||||
|
||||
|
|
@ -523,7 +523,7 @@ bitwise_assign_unpack(struct lower_packed_varyings_state *state,
|
|||
|
||||
static void
|
||||
create_store_deref(struct lower_packed_varyings_state *state,
|
||||
nir_deref_instr *deref, nir_ssa_def *value,
|
||||
nir_deref_instr *deref, nir_def *value,
|
||||
unsigned writemask, bool is_64bit)
|
||||
{
|
||||
/* If dest and value have different number of components pack the srcs
|
||||
|
|
@ -532,7 +532,7 @@ create_store_deref(struct lower_packed_varyings_state *state,
|
|||
const struct glsl_type *type = glsl_without_array(deref->type);
|
||||
unsigned comps = glsl_get_vector_elements(type);
|
||||
if (value->num_components != comps) {
|
||||
nir_ssa_def *srcs[4];
|
||||
nir_def *srcs[4];
|
||||
|
||||
unsigned comp = 0;
|
||||
for (unsigned i = 0; i < comps; i++) {
|
||||
|
|
@ -543,7 +543,7 @@ create_store_deref(struct lower_packed_varyings_state *state,
|
|||
srcs[i] = nir_swizzle(&state->b, value, &comp, 1);
|
||||
comp++;
|
||||
} else {
|
||||
srcs[i] = nir_ssa_undef(&state->b, 1,
|
||||
srcs[i] = nir_undef(&state->b, 1,
|
||||
glsl_type_is_64bit(type) ? 64 : 32);
|
||||
}
|
||||
}
|
||||
|
|
@ -555,7 +555,7 @@ create_store_deref(struct lower_packed_varyings_state *state,
|
|||
|
||||
static unsigned
|
||||
lower_varying(struct lower_packed_varyings_state *state,
|
||||
nir_ssa_def *rhs_swizzle, unsigned writemask,
|
||||
nir_def *rhs_swizzle, unsigned writemask,
|
||||
const struct glsl_type *type, unsigned fine_location,
|
||||
nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
|
||||
const char *name, bool gs_input_toplevel, unsigned vertex_index);
|
||||
|
|
@ -576,7 +576,7 @@ lower_varying(struct lower_packed_varyings_state *state,
|
|||
*/
|
||||
static unsigned
|
||||
lower_arraylike(struct lower_packed_varyings_state *state,
|
||||
nir_ssa_def *rhs_swizzle, unsigned writemask,
|
||||
nir_def *rhs_swizzle, unsigned writemask,
|
||||
const struct glsl_type *type, unsigned fine_location,
|
||||
nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
|
||||
const char *name, bool gs_input_toplevel, unsigned vertex_index)
|
||||
|
|
@ -640,7 +640,7 @@ lower_arraylike(struct lower_packed_varyings_state *state,
|
|||
*/
|
||||
static unsigned
|
||||
lower_varying(struct lower_packed_varyings_state *state,
|
||||
nir_ssa_def *rhs_swizzle, unsigned writemask,
|
||||
nir_def *rhs_swizzle, unsigned writemask,
|
||||
const struct glsl_type *type, unsigned fine_location,
|
||||
nir_variable *unpacked_var, nir_deref_instr *unpacked_var_deref,
|
||||
const char *name, bool gs_input_toplevel, unsigned vertex_index)
|
||||
|
|
@ -741,10 +741,10 @@ lower_varying(struct lower_packed_varyings_state *state,
|
|||
ralloc_asprintf(state->mem_ctx, "%s.%s", name, left_swizzle_name) :
|
||||
NULL;
|
||||
|
||||
nir_ssa_def *left_swizzle = NULL;
|
||||
nir_def *left_swizzle = NULL;
|
||||
unsigned left_writemask = ~0u;
|
||||
if (state->mode == nir_var_shader_out) {
|
||||
nir_ssa_def *ssa_def = rhs_swizzle ?
|
||||
nir_def *ssa_def = rhs_swizzle ?
|
||||
rhs_swizzle : nir_load_deref(&state->b, unpacked_var_deref);
|
||||
left_swizzle =
|
||||
nir_swizzle(&state->b, ssa_def,
|
||||
|
|
@ -767,10 +767,10 @@ lower_varying(struct lower_packed_varyings_state *state,
|
|||
ralloc_asprintf(state->mem_ctx, "%s.%s", name, right_swizzle_name) :
|
||||
NULL;
|
||||
|
||||
nir_ssa_def *right_swizzle = NULL;
|
||||
nir_def *right_swizzle = NULL;
|
||||
unsigned right_writemask = ~0u;
|
||||
if (state->mode == nir_var_shader_out) {
|
||||
nir_ssa_def *ssa_def = rhs_swizzle ?
|
||||
nir_def *ssa_def = rhs_swizzle ?
|
||||
rhs_swizzle : nir_load_deref(&state->b, unpacked_var_deref);
|
||||
right_swizzle =
|
||||
nir_swizzle(&state->b, ssa_def,
|
||||
|
|
@ -810,7 +810,7 @@ lower_varying(struct lower_packed_varyings_state *state,
|
|||
struct packing_store_values *store_value;
|
||||
if (state->mode == nir_var_shader_out) {
|
||||
unsigned writemask = ((1 << components) - 1) << location_frac;
|
||||
nir_ssa_def *value = rhs_swizzle ? rhs_swizzle :
|
||||
nir_def *value = rhs_swizzle ? rhs_swizzle :
|
||||
nir_load_deref(&state->b, unpacked_var_deref);
|
||||
|
||||
store_value =
|
||||
|
|
@ -822,9 +822,9 @@ lower_varying(struct lower_packed_varyings_state *state,
|
|||
swizzle_values[i] = i + location_frac;
|
||||
}
|
||||
|
||||
nir_ssa_def *ssa_def = &packed_deref->dest.ssa;
|
||||
nir_def *ssa_def = &packed_deref->dest.ssa;
|
||||
ssa_def = nir_load_deref(&state->b, packed_deref);
|
||||
nir_ssa_def *swizzle =
|
||||
nir_def *swizzle =
|
||||
nir_swizzle(&state->b, ssa_def, swizzle_values, components);
|
||||
|
||||
store_value = bitwise_assign_unpack(state, unpacked_var_deref,
|
||||
|
|
|
|||
|
|
@ -140,11 +140,11 @@ copy_to_new_var(nir_builder *b, nir_deref_instr *deref,
|
|||
nir_deref_instr *new_var_m_deref =
|
||||
nir_build_deref_array(b, new_var_deref, &c->def);
|
||||
|
||||
nir_ssa_def *value = nir_load_deref(b, m_deref);
|
||||
nir_def *value = nir_load_deref(b, m_deref);
|
||||
nir_store_deref(b, new_var_m_deref, value, writemask);
|
||||
}
|
||||
} else {
|
||||
nir_ssa_def *value = nir_load_deref(b, deref);
|
||||
nir_def *value = nir_load_deref(b, deref);
|
||||
nir_store_deref(b, new_var_deref, value, writemask);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -298,7 +298,7 @@ rewrite_varying_deref(nir_builder *b, struct replace_varyings_data *rv_data,
|
|||
unsigned i = nir_src_as_uint(deref->arr.index);
|
||||
nir_deref_instr *new_deref =
|
||||
nir_build_deref_var(b, rv_data->new_texcoord[i]);
|
||||
nir_ssa_def_rewrite_uses(&deref->dest.ssa, &new_deref->dest.ssa);
|
||||
nir_def_rewrite_uses(&deref->dest.ssa, &new_deref->dest.ssa);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -84,21 +84,21 @@ public:
|
|||
|
||||
private:
|
||||
void add_instr(nir_instr *instr, unsigned num_components, unsigned bit_size);
|
||||
nir_ssa_def *evaluate_rvalue(ir_rvalue *ir);
|
||||
nir_def *evaluate_rvalue(ir_rvalue *ir);
|
||||
|
||||
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs);
|
||||
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1);
|
||||
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
|
||||
nir_ssa_def *src2);
|
||||
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,
|
||||
nir_ssa_def *src2, nir_ssa_def *src3);
|
||||
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_def **srcs);
|
||||
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_def *src1);
|
||||
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_def *src1,
|
||||
nir_def *src2);
|
||||
nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_def *src1,
|
||||
nir_def *src2, nir_def *src3);
|
||||
|
||||
bool supports_std430;
|
||||
|
||||
nir_shader *shader;
|
||||
nir_function_impl *impl;
|
||||
nir_builder b;
|
||||
nir_ssa_def *result; /* result of the expression tree last visited */
|
||||
nir_def *result; /* result of the expression tree last visited */
|
||||
|
||||
nir_deref_instr *evaluate_deref(ir_instruction *ir);
|
||||
|
||||
|
|
@ -122,7 +122,7 @@ private:
|
|||
struct set *sparse_variable_set;
|
||||
|
||||
void adjust_sparse_variable(nir_deref_instr *var_deref, const glsl_type *type,
|
||||
nir_ssa_def *dest);
|
||||
nir_def *dest);
|
||||
|
||||
const struct gl_constants *consts;
|
||||
};
|
||||
|
|
@ -465,7 +465,7 @@ nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx)
|
|||
|
||||
void
|
||||
nir_visitor::adjust_sparse_variable(nir_deref_instr *var_deref, const glsl_type *type,
|
||||
nir_ssa_def *dest)
|
||||
nir_def *dest)
|
||||
{
|
||||
const glsl_type *texel_type = type->field_type("texel");
|
||||
assert(texel_type);
|
||||
|
|
@ -914,7 +914,7 @@ nir_visitor::visit(ir_return *ir)
|
|||
nir_build_deref_cast(&b, nir_load_param(&b, 0),
|
||||
nir_var_function_temp, ir->value->type, 0);
|
||||
|
||||
nir_ssa_def *val = evaluate_rvalue(ir->value);
|
||||
nir_def *val = evaluate_rvalue(ir->value);
|
||||
nir_store_deref(&b, ret_deref, val, ~0);
|
||||
}
|
||||
|
||||
|
|
@ -1180,7 +1180,7 @@ nir_visitor::visit(ir_call *ir)
|
|||
}
|
||||
|
||||
nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);
|
||||
nir_ssa_def *ret = &instr->dest.ssa;
|
||||
nir_def *ret = &instr->dest.ssa;
|
||||
|
||||
switch (op) {
|
||||
case nir_intrinsic_deref_atomic:
|
||||
|
|
@ -1341,15 +1341,15 @@ nir_visitor::visit(ir_call *ir)
|
|||
/* Set the address argument, extending the coordinate vector to four
|
||||
* components.
|
||||
*/
|
||||
nir_ssa_def *src_addr =
|
||||
nir_def *src_addr =
|
||||
evaluate_rvalue((ir_dereference *)param);
|
||||
nir_ssa_def *srcs[4];
|
||||
nir_def *srcs[4];
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (i < type->coordinate_components())
|
||||
srcs[i] = nir_channel(&b, src_addr, i);
|
||||
else
|
||||
srcs[i] = nir_ssa_undef(&b, 1, 32);
|
||||
srcs[i] = nir_undef(&b, 1, 32);
|
||||
}
|
||||
|
||||
instr->src[1] = nir_src_for_ssa(nir_vec(&b, srcs, 4));
|
||||
|
|
@ -1363,7 +1363,7 @@ nir_visitor::visit(ir_call *ir)
|
|||
nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));
|
||||
param = param->get_next();
|
||||
} else {
|
||||
instr->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
|
||||
instr->src[2] = nir_src_for_ssa(nir_undef(&b, 1, 32));
|
||||
}
|
||||
|
||||
/* Set the intrinsic parameters. */
|
||||
|
|
@ -1468,7 +1468,7 @@ nir_visitor::visit(ir_call *ir)
|
|||
ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
|
||||
assert(write_mask);
|
||||
|
||||
nir_ssa_def *nir_val = evaluate_rvalue(val);
|
||||
nir_def *nir_val = evaluate_rvalue(val);
|
||||
if (val->type->is_boolean())
|
||||
nir_val = nir_b2i32(&b, nir_val);
|
||||
|
||||
|
|
@ -1521,7 +1521,7 @@ nir_visitor::visit(ir_call *ir)
|
|||
|
||||
nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);
|
||||
|
||||
nir_ssa_def *nir_val = evaluate_rvalue(val);
|
||||
nir_def *nir_val = evaluate_rvalue(val);
|
||||
/* The value in shared memory is a 32-bit value */
|
||||
if (val->type->is_boolean())
|
||||
nir_val = nir_b2b32(&b, nir_val);
|
||||
|
|
@ -1639,7 +1639,7 @@ nir_visitor::visit(ir_call *ir)
|
|||
nir_deref_instr *out_deref = evaluate_deref(param_rvalue);
|
||||
call->params[i] = nir_src_for_ssa(&out_deref->dest.ssa);
|
||||
} else if (sig_param->data.mode == ir_var_function_in) {
|
||||
nir_ssa_def *val = evaluate_rvalue(param_rvalue);
|
||||
nir_def *val = evaluate_rvalue(param_rvalue);
|
||||
nir_src src = nir_src_for_ssa(val);
|
||||
|
||||
nir_src_copy(&call->params[i], &src, &call->instr);
|
||||
|
|
@ -1685,7 +1685,7 @@ nir_visitor::visit(ir_assignment *ir)
|
|||
|
||||
ir->lhs->accept(this);
|
||||
nir_deref_instr *lhs_deref = this->deref;
|
||||
nir_ssa_def *src = evaluate_rvalue(ir->rhs);
|
||||
nir_def *src = evaluate_rvalue(ir->rhs);
|
||||
|
||||
if (is_sparse) {
|
||||
adjust_sparse_variable(lhs_deref, tex->type, src);
|
||||
|
|
@ -1767,7 +1767,7 @@ nir_visitor::add_instr(nir_instr *instr, unsigned num_components,
|
|||
}
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
nir_def *
|
||||
nir_visitor::evaluate_rvalue(ir_rvalue* ir)
|
||||
{
|
||||
ir->accept(this);
|
||||
|
|
@ -1880,7 +1880,7 @@ nir_visitor::visit(ir_expression *ir)
|
|||
break;
|
||||
}
|
||||
|
||||
nir_ssa_def *srcs[4];
|
||||
nir_def *srcs[4];
|
||||
for (unsigned i = 0; i < ir->num_operands; i++)
|
||||
srcs[i] = evaluate_rvalue(ir->operands[i]);
|
||||
|
||||
|
|
@ -2457,7 +2457,7 @@ nir_visitor::visit(ir_texture *ir)
|
|||
/* check for bindless handles */
|
||||
if (!nir_deref_mode_is(sampler_deref, nir_var_uniform) ||
|
||||
nir_deref_instr_get_variable(sampler_deref)->data.bindless) {
|
||||
nir_ssa_def *load = nir_load_deref(&b, sampler_deref);
|
||||
nir_def *load = nir_load_deref(&b, sampler_deref);
|
||||
instr->src[0] = nir_tex_src_for_ssa(nir_tex_src_texture_handle, load);
|
||||
instr->src[1] = nir_tex_src_for_ssa(nir_tex_src_sampler_handle, load);
|
||||
} else {
|
||||
|
|
@ -2618,10 +2618,10 @@ nir_visitor::visit(ir_dereference_record *ir)
|
|||
*/
|
||||
if (this->deref->deref_type == nir_deref_type_var &&
|
||||
_mesa_set_search(this->sparse_variable_set, this->deref->var)) {
|
||||
nir_ssa_def *load = nir_load_deref(&b, this->deref);
|
||||
nir_def *load = nir_load_deref(&b, this->deref);
|
||||
assert(load->num_components >= 2);
|
||||
|
||||
nir_ssa_def *ssa;
|
||||
nir_def *ssa;
|
||||
const glsl_type *type = ir->record->type;
|
||||
if (field_index == type->field_index("code")) {
|
||||
/* last channel holds residency code */
|
||||
|
|
@ -2645,7 +2645,7 @@ nir_visitor::visit(ir_dereference_record *ir)
|
|||
void
|
||||
nir_visitor::visit(ir_dereference_array *ir)
|
||||
{
|
||||
nir_ssa_def *index = evaluate_rvalue(ir->array_index);
|
||||
nir_def *index = evaluate_rvalue(ir->array_index);
|
||||
|
||||
ir->array->accept(this);
|
||||
|
||||
|
|
|
|||
|
|
@ -114,7 +114,7 @@ files_libnir = files(
|
|||
'nir_format_convert.h',
|
||||
'nir_from_ssa.c',
|
||||
'nir_gather_info.c',
|
||||
'nir_gather_ssa_types.c',
|
||||
'nir_gather_types.c',
|
||||
'nir_gather_xfb_info.c',
|
||||
'nir_group_loads.c',
|
||||
'nir_gs_count_vertices.c',
|
||||
|
|
|
|||
|
|
@ -718,7 +718,7 @@ nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
|
|||
gc_zalloc_zla(shader->gctx, nir_load_const_instr, nir_const_value, num_components);
|
||||
instr_init(&instr->instr, nir_instr_type_load_const);
|
||||
|
||||
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
|
||||
nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
|
||||
|
||||
return instr;
|
||||
}
|
||||
|
|
@ -870,15 +870,15 @@ nir_parallel_copy_instr_create(nir_shader *shader)
|
|||
return instr;
|
||||
}
|
||||
|
||||
nir_ssa_undef_instr *
|
||||
nir_ssa_undef_instr_create(nir_shader *shader,
|
||||
nir_undef_instr *
|
||||
nir_undef_instr_create(nir_shader *shader,
|
||||
unsigned num_components,
|
||||
unsigned bit_size)
|
||||
{
|
||||
nir_ssa_undef_instr *instr = gc_alloc(shader->gctx, nir_ssa_undef_instr, 1);
|
||||
nir_undef_instr *instr = gc_alloc(shader->gctx, nir_undef_instr, 1);
|
||||
instr_init(&instr->instr, nir_instr_type_ssa_undef);
|
||||
|
||||
nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size);
|
||||
nir_def_init(&instr->instr, &instr->def, num_components, bit_size);
|
||||
|
||||
return instr;
|
||||
}
|
||||
|
|
@ -1036,7 +1036,7 @@ add_use_cb(nir_src *src, void *state)
|
|||
}
|
||||
|
||||
static bool
|
||||
add_ssa_def_cb(nir_ssa_def *def, void *state)
|
||||
add_ssa_def_cb(nir_def *def, void *state)
|
||||
{
|
||||
nir_instr *instr = state;
|
||||
|
||||
|
|
@ -1194,11 +1194,11 @@ nir_instr_free_list(struct exec_list *list)
|
|||
}
|
||||
|
||||
static bool
|
||||
nir_instr_free_and_dce_live_cb(nir_ssa_def *def, void *state)
|
||||
nir_instr_free_and_dce_live_cb(nir_def *def, void *state)
|
||||
{
|
||||
bool *live = state;
|
||||
|
||||
if (!nir_ssa_def_is_unused(def)) {
|
||||
if (!nir_def_is_unused(def)) {
|
||||
*live = true;
|
||||
return false;
|
||||
} else {
|
||||
|
|
@ -1291,7 +1291,7 @@ struct foreach_ssa_def_state {
|
|||
};
|
||||
|
||||
static inline bool
|
||||
nir_ssa_def_visitor(nir_dest *dest, void *void_state)
|
||||
nir_def_visitor(nir_dest *dest, void *void_state)
|
||||
{
|
||||
struct foreach_ssa_def_state *state = void_state;
|
||||
|
||||
|
|
@ -1309,7 +1309,7 @@ nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
|
|||
case nir_instr_type_phi:
|
||||
case nir_instr_type_parallel_copy: {
|
||||
struct foreach_ssa_def_state foreach_state = { cb, state };
|
||||
return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
|
||||
return nir_foreach_dest(instr, nir_def_visitor, &foreach_state);
|
||||
}
|
||||
|
||||
case nir_instr_type_load_const:
|
||||
|
|
@ -1324,7 +1324,7 @@ nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
|
|||
}
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
nir_def *
|
||||
nir_instr_ssa_def(nir_instr *instr)
|
||||
{
|
||||
switch (instr->type) {
|
||||
|
|
@ -1539,7 +1539,7 @@ nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
|
|||
}
|
||||
|
||||
void
|
||||
nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
|
||||
nir_def_init(nir_instr *instr, nir_def *def,
|
||||
unsigned num_components,
|
||||
unsigned bit_size)
|
||||
{
|
||||
|
|
@ -1565,22 +1565,22 @@ void
|
|||
nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
|
||||
unsigned num_components, unsigned bit_size)
|
||||
{
|
||||
nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size);
|
||||
nir_def_init(instr, &dest->ssa, num_components, bit_size);
|
||||
}
|
||||
|
||||
void
|
||||
nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_ssa_def *new_ssa)
|
||||
nir_def_rewrite_uses(nir_def *def, nir_def *new_ssa)
|
||||
{
|
||||
assert(def != new_ssa);
|
||||
nir_foreach_use_including_if_safe(use_src, def) {
|
||||
nir_src_rewrite_ssa(use_src, new_ssa);
|
||||
nir_src_rewrite(use_src, new_ssa);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
nir_ssa_def_rewrite_uses_src(nir_ssa_def *def, nir_src new_src)
|
||||
nir_def_rewrite_uses_src(nir_def *def, nir_src new_src)
|
||||
{
|
||||
nir_ssa_def_rewrite_uses(def, new_src.ssa);
|
||||
nir_def_rewrite_uses(def, new_src.ssa);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -1614,7 +1614,7 @@ is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
|
|||
* def->parent_instr and that after_me comes after def->parent_instr.
|
||||
*/
|
||||
void
|
||||
nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa,
|
||||
nir_def_rewrite_uses_after(nir_def *def, nir_def *new_ssa,
|
||||
nir_instr *after_me)
|
||||
{
|
||||
if (def == new_ssa)
|
||||
|
|
@ -1632,11 +1632,11 @@ nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa,
|
|||
continue;
|
||||
}
|
||||
|
||||
nir_src_rewrite_ssa(use_src, new_ssa);
|
||||
nir_src_rewrite(use_src, new_ssa);
|
||||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
static nir_def *
|
||||
get_store_value(nir_intrinsic_instr *intrin)
|
||||
{
|
||||
assert(nir_intrinsic_has_write_mask(intrin));
|
||||
|
|
@ -1672,7 +1672,7 @@ nir_src_components_read(const nir_src *src)
|
|||
}
|
||||
|
||||
nir_component_mask_t
|
||||
nir_ssa_def_components_read(const nir_ssa_def *def)
|
||||
nir_def_components_read(const nir_def *def)
|
||||
{
|
||||
nir_component_mask_t read_mask = 0;
|
||||
|
||||
|
|
@ -1950,7 +1950,7 @@ nir_index_blocks(nir_function_impl *impl)
|
|||
}
|
||||
|
||||
static bool
|
||||
index_ssa_def_cb(nir_ssa_def *def, void *state)
|
||||
index_ssa_def_cb(nir_def *def, void *state)
|
||||
{
|
||||
unsigned *index = (unsigned *)state;
|
||||
def->index = (*index)++;
|
||||
|
|
@ -2084,14 +2084,14 @@ nir_function_impl_lower_instructions(nir_function_impl *impl,
|
|||
continue;
|
||||
}
|
||||
|
||||
nir_ssa_def *old_def = nir_instr_ssa_def(instr);
|
||||
nir_def *old_def = nir_instr_ssa_def(instr);
|
||||
struct list_head old_uses;
|
||||
if (old_def != NULL) {
|
||||
/* We're about to ask the callback to generate a replacement for instr.
|
||||
* Save off the uses from instr's SSA def so we know what uses to
|
||||
* rewrite later. If we use nir_ssa_def_rewrite_uses, it fails in the
|
||||
* rewrite later. If we use nir_def_rewrite_uses, it fails in the
|
||||
* case where the generated replacement code uses the result of instr
|
||||
* itself. If we use nir_ssa_def_rewrite_uses_after (which is the
|
||||
* itself. If we use nir_def_rewrite_uses_after (which is the
|
||||
* normal solution to this problem), it doesn't work well if control-
|
||||
* flow is inserted as part of the replacement, doesn't handle cases
|
||||
* where the replacement is something consumed by instr, and suffers
|
||||
|
|
@ -2104,7 +2104,7 @@ nir_function_impl_lower_instructions(nir_function_impl *impl,
|
|||
}
|
||||
|
||||
b.cursor = nir_after_instr(instr);
|
||||
nir_ssa_def *new_def = lower(&b, instr, cb_data);
|
||||
nir_def *new_def = lower(&b, instr, cb_data);
|
||||
if (new_def && new_def != NIR_LOWER_INSTR_PROGRESS &&
|
||||
new_def != NIR_LOWER_INSTR_PROGRESS_REPLACE) {
|
||||
assert(old_def != NULL);
|
||||
|
|
@ -2119,7 +2119,7 @@ nir_function_impl_lower_instructions(nir_function_impl *impl,
|
|||
nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
|
||||
}
|
||||
|
||||
if (nir_ssa_def_is_unused(old_def)) {
|
||||
if (nir_def_is_unused(old_def)) {
|
||||
iter = nir_instr_free_and_dce(instr);
|
||||
} else {
|
||||
iter = nir_after_instr(instr);
|
||||
|
|
@ -2530,7 +2530,7 @@ nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
|
|||
}
|
||||
|
||||
void
|
||||
nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src,
|
||||
nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_def *src,
|
||||
bool bindless)
|
||||
{
|
||||
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
|
||||
|
|
@ -2760,10 +2760,10 @@ nir_alu_instr_is_copy(nir_alu_instr *instr)
|
|||
return nir_op_is_vec(instr->op);
|
||||
}
|
||||
|
||||
nir_ssa_scalar
|
||||
nir_ssa_scalar_chase_movs(nir_ssa_scalar s)
|
||||
nir_scalar
|
||||
nir_scalar_chase_movs(nir_scalar s)
|
||||
{
|
||||
while (nir_ssa_scalar_is_alu(s)) {
|
||||
while (nir_scalar_is_alu(s)) {
|
||||
nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
|
||||
if (!nir_alu_instr_is_copy(alu))
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -946,7 +946,7 @@ nir_instr_is_last(const nir_instr *instr)
|
|||
return exec_node_is_tail_sentinel(exec_node_get_next_const(&instr->node));
|
||||
}
|
||||
|
||||
typedef struct nir_ssa_def {
|
||||
typedef struct nir_def {
|
||||
/** Instruction which produces this SSA value. */
|
||||
nir_instr *parent_instr;
|
||||
|
||||
|
|
@ -966,7 +966,7 @@ typedef struct nir_ssa_def {
|
|||
* invocations of the shader. This is set by nir_divergence_analysis.
|
||||
*/
|
||||
bool divergent;
|
||||
} nir_ssa_def;
|
||||
} nir_def;
|
||||
|
||||
struct nir_src;
|
||||
struct nir_if;
|
||||
|
|
@ -979,7 +979,7 @@ typedef struct nir_src {
|
|||
};
|
||||
|
||||
struct list_head use_link;
|
||||
nir_ssa_def *ssa;
|
||||
nir_def *ssa;
|
||||
|
||||
bool is_if;
|
||||
} nir_src;
|
||||
|
|
@ -1030,7 +1030,7 @@ nir_src_init(void)
|
|||
if (src->is_if)
|
||||
|
||||
static inline bool
|
||||
nir_ssa_def_used_by_if(const nir_ssa_def *def)
|
||||
nir_def_used_by_if(const nir_def *def)
|
||||
{
|
||||
nir_foreach_if_use(_, def)
|
||||
return true;
|
||||
|
|
@ -1039,7 +1039,7 @@ nir_ssa_def_used_by_if(const nir_ssa_def *def)
|
|||
}
|
||||
|
||||
typedef struct {
|
||||
nir_ssa_def ssa;
|
||||
nir_def ssa;
|
||||
} nir_dest;
|
||||
|
||||
static inline nir_dest
|
||||
|
|
@ -1052,7 +1052,7 @@ nir_dest_init(void)
|
|||
#define NIR_DEST_INIT nir_dest_init()
|
||||
|
||||
static inline nir_src
|
||||
nir_src_for_ssa(nir_ssa_def *def)
|
||||
nir_src_for_ssa(nir_def *def)
|
||||
{
|
||||
nir_src src = NIR_SRC_INIT;
|
||||
|
||||
|
|
@ -1984,7 +1984,7 @@ nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr);
|
|||
|
||||
/* Converts a image_deref_* intrinsic into a image_* one */
|
||||
void nir_rewrite_image_intrinsic(nir_intrinsic_instr *instr,
|
||||
nir_ssa_def *handle, bool bindless);
|
||||
nir_def *handle, bool bindless);
|
||||
|
||||
/* Determine if an intrinsic can be arbitrarily reordered and eliminated. */
|
||||
static inline bool
|
||||
|
|
@ -2412,7 +2412,7 @@ bool nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex);
|
|||
typedef struct {
|
||||
nir_instr instr;
|
||||
|
||||
nir_ssa_def def;
|
||||
nir_def def;
|
||||
|
||||
nir_const_value value[];
|
||||
} nir_load_const_instr;
|
||||
|
|
@ -2478,8 +2478,8 @@ typedef struct {
|
|||
|
||||
typedef struct {
|
||||
nir_instr instr;
|
||||
nir_ssa_def def;
|
||||
} nir_ssa_undef_instr;
|
||||
nir_def def;
|
||||
} nir_undef_instr;
|
||||
|
||||
typedef struct {
|
||||
struct exec_node node;
|
||||
|
|
@ -2554,7 +2554,7 @@ NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr,
|
|||
type, nir_instr_type_intrinsic)
|
||||
NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr,
|
||||
type, nir_instr_type_load_const)
|
||||
NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr,
|
||||
NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_undef_instr, instr,
|
||||
type, nir_instr_type_ssa_undef)
|
||||
NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr,
|
||||
type, nir_instr_type_phi)
|
||||
|
|
@ -2589,24 +2589,24 @@ NIR_DEFINE_SRC_AS_CONST(double, float)
|
|||
#undef NIR_DEFINE_SRC_AS_CONST
|
||||
|
||||
typedef struct {
|
||||
nir_ssa_def *def;
|
||||
nir_def *def;
|
||||
unsigned comp;
|
||||
} nir_ssa_scalar;
|
||||
} nir_scalar;
|
||||
|
||||
static inline bool
|
||||
nir_ssa_scalar_is_const(nir_ssa_scalar s)
|
||||
nir_scalar_is_const(nir_scalar s)
|
||||
{
|
||||
return s.def->parent_instr->type == nir_instr_type_load_const;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
nir_ssa_scalar_is_undef(nir_ssa_scalar s)
|
||||
nir_scalar_is_undef(nir_scalar s)
|
||||
{
|
||||
return s.def->parent_instr->type == nir_instr_type_ssa_undef;
|
||||
}
|
||||
|
||||
static inline nir_const_value
|
||||
nir_ssa_scalar_as_const_value(nir_ssa_scalar s)
|
||||
nir_scalar_as_const_value(nir_scalar s)
|
||||
{
|
||||
assert(s.comp < s.def->num_components);
|
||||
nir_load_const_instr *load = nir_instr_as_load_const(s.def->parent_instr);
|
||||
|
|
@ -2615,10 +2615,10 @@ nir_ssa_scalar_as_const_value(nir_ssa_scalar s)
|
|||
|
||||
#define NIR_DEFINE_SCALAR_AS_CONST(type, suffix) \
|
||||
static inline type \
|
||||
nir_ssa_scalar_as_##suffix(nir_ssa_scalar s) \
|
||||
nir_scalar_as_##suffix(nir_scalar s) \
|
||||
{ \
|
||||
return nir_const_value_as_##suffix( \
|
||||
nir_ssa_scalar_as_const_value(s), s.def->bit_size); \
|
||||
nir_scalar_as_const_value(s), s.def->bit_size); \
|
||||
}
|
||||
|
||||
NIR_DEFINE_SCALAR_AS_CONST(int64_t, int)
|
||||
|
|
@ -2629,21 +2629,21 @@ NIR_DEFINE_SCALAR_AS_CONST(double, float)
|
|||
#undef NIR_DEFINE_SCALAR_AS_CONST
|
||||
|
||||
static inline bool
|
||||
nir_ssa_scalar_is_alu(nir_ssa_scalar s)
|
||||
nir_scalar_is_alu(nir_scalar s)
|
||||
{
|
||||
return s.def->parent_instr->type == nir_instr_type_alu;
|
||||
}
|
||||
|
||||
static inline nir_op
|
||||
nir_ssa_scalar_alu_op(nir_ssa_scalar s)
|
||||
nir_scalar_alu_op(nir_scalar s)
|
||||
{
|
||||
return nir_instr_as_alu(s.def->parent_instr)->op;
|
||||
}
|
||||
|
||||
static inline nir_ssa_scalar
|
||||
nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx)
|
||||
static inline nir_scalar
|
||||
nir_scalar_chase_alu_src(nir_scalar s, unsigned alu_src_idx)
|
||||
{
|
||||
nir_ssa_scalar out = { NULL, 0 };
|
||||
nir_scalar out = { NULL, 0 };
|
||||
|
||||
nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
|
||||
assert(alu_src_idx < nir_op_infos[alu->op].num_inputs);
|
||||
|
|
@ -2671,27 +2671,27 @@ nir_ssa_scalar_chase_alu_src(nir_ssa_scalar s, unsigned alu_src_idx)
|
|||
return out;
|
||||
}
|
||||
|
||||
nir_ssa_scalar nir_ssa_scalar_chase_movs(nir_ssa_scalar s);
|
||||
nir_scalar nir_scalar_chase_movs(nir_scalar s);
|
||||
|
||||
static inline nir_ssa_scalar
|
||||
nir_get_ssa_scalar(nir_ssa_def *def, unsigned channel)
|
||||
static inline nir_scalar
|
||||
nir_get_ssa_scalar(nir_def *def, unsigned channel)
|
||||
{
|
||||
nir_ssa_scalar s = { def, channel };
|
||||
nir_scalar s = { def, channel };
|
||||
return s;
|
||||
}
|
||||
|
||||
/** Returns a nir_ssa_scalar where we've followed the bit-exact mov/vec use chain to the original definition */
|
||||
static inline nir_ssa_scalar
|
||||
nir_ssa_scalar_resolved(nir_ssa_def *def, unsigned channel)
|
||||
/** Returns a nir_scalar where we've followed the bit-exact mov/vec use chain to the original definition */
|
||||
static inline nir_scalar
|
||||
nir_scalar_resolved(nir_def *def, unsigned channel)
|
||||
{
|
||||
return nir_ssa_scalar_chase_movs(nir_get_ssa_scalar(def, channel));
|
||||
return nir_scalar_chase_movs(nir_get_ssa_scalar(def, channel));
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
nir_alu_src_as_uint(nir_alu_src src)
|
||||
{
|
||||
nir_ssa_scalar scalar = nir_get_ssa_scalar(src.src.ssa, src.swizzle[0]);
|
||||
return nir_ssa_scalar_as_uint(scalar);
|
||||
nir_scalar scalar = nir_get_ssa_scalar(src.src.ssa, src.swizzle[0]);
|
||||
return nir_scalar_as_uint(scalar);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -2966,7 +2966,7 @@ typedef struct {
|
|||
|
||||
typedef struct {
|
||||
/* Induction variable. */
|
||||
nir_ssa_def *def;
|
||||
nir_def *def;
|
||||
|
||||
/* Init statement with only uniform. */
|
||||
nir_src *init_src;
|
||||
|
|
@ -4112,7 +4112,7 @@ nir_phi_src *nir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_sr
|
|||
|
||||
nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader);
|
||||
|
||||
nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader,
|
||||
nir_undef_instr *nir_undef_instr_create(nir_shader *shader,
|
||||
unsigned num_components,
|
||||
unsigned bit_size);
|
||||
|
||||
|
|
@ -4385,9 +4385,9 @@ nir_cursor nir_instr_free_and_dce(nir_instr *instr);
|
|||
|
||||
/** @} */
|
||||
|
||||
nir_ssa_def *nir_instr_ssa_def(nir_instr *instr);
|
||||
nir_def *nir_instr_ssa_def(nir_instr *instr);
|
||||
|
||||
typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state);
|
||||
typedef bool (*nir_foreach_ssa_def_cb)(nir_def *def, void *state);
|
||||
typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state);
|
||||
typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state);
|
||||
bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb,
|
||||
|
|
@ -4419,7 +4419,7 @@ bool nir_srcs_equal(nir_src src1, nir_src src2);
|
|||
bool nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2);
|
||||
|
||||
static inline void
|
||||
nir_src_rewrite_ssa(nir_src *src, nir_ssa_def *new_ssa)
|
||||
nir_src_rewrite(nir_src *src, nir_def *new_ssa)
|
||||
{
|
||||
assert(src->ssa);
|
||||
assert(src->is_if ? (src->parent_if != NULL) : (src->parent_instr != NULL));
|
||||
|
|
@ -4430,11 +4430,11 @@ nir_src_rewrite_ssa(nir_src *src, nir_ssa_def *new_ssa)
|
|||
|
||||
static inline void
|
||||
nir_instr_rewrite_src_ssa(ASSERTED nir_instr *instr,
|
||||
nir_src *src, nir_ssa_def *new_ssa)
|
||||
nir_src *src, nir_def *new_ssa)
|
||||
{
|
||||
assert(!src->is_if);
|
||||
assert(src->parent_instr == instr);
|
||||
nir_src_rewrite_ssa(src, new_ssa);
|
||||
nir_src_rewrite(src, new_ssa);
|
||||
}
|
||||
|
||||
void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src);
|
||||
|
|
@ -4444,7 +4444,7 @@ void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src);
|
|||
|
||||
void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
|
||||
unsigned num_components, unsigned bit_size);
|
||||
void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
|
||||
void nir_def_init(nir_instr *instr, nir_def *def,
|
||||
unsigned num_components, unsigned bit_size);
|
||||
static inline void
|
||||
nir_ssa_dest_init_for_type(nir_instr *instr, nir_dest *dest,
|
||||
|
|
@ -4454,16 +4454,16 @@ nir_ssa_dest_init_for_type(nir_instr *instr, nir_dest *dest,
|
|||
nir_ssa_dest_init(instr, dest, glsl_get_components(type),
|
||||
glsl_get_bit_size(type));
|
||||
}
|
||||
void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_ssa_def *new_ssa);
|
||||
void nir_ssa_def_rewrite_uses_src(nir_ssa_def *def, nir_src new_src);
|
||||
void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa,
|
||||
void nir_def_rewrite_uses(nir_def *def, nir_def *new_ssa);
|
||||
void nir_def_rewrite_uses_src(nir_def *def, nir_src new_src);
|
||||
void nir_def_rewrite_uses_after(nir_def *def, nir_def *new_ssa,
|
||||
nir_instr *after_me);
|
||||
|
||||
nir_component_mask_t nir_src_components_read(const nir_src *src);
|
||||
nir_component_mask_t nir_ssa_def_components_read(const nir_ssa_def *def);
|
||||
nir_component_mask_t nir_def_components_read(const nir_def *def);
|
||||
|
||||
static inline bool
|
||||
nir_ssa_def_is_unused(nir_ssa_def *ssa)
|
||||
nir_def_is_unused(nir_def *ssa)
|
||||
{
|
||||
return list_is_empty(&ssa->uses);
|
||||
}
|
||||
|
|
@ -4715,9 +4715,9 @@ typedef bool (*nir_instr_writemask_filter_cb)(const nir_instr *,
|
|||
* should either return NULL indicating that no lowering needs to be done or
|
||||
* emit a sequence of instructions using the provided builder (whose cursor
|
||||
* will already be placed after the instruction to be lowered) and return the
|
||||
* resulting nir_ssa_def.
|
||||
* resulting nir_def.
|
||||
*/
|
||||
typedef nir_ssa_def *(*nir_lower_instr_cb)(struct nir_builder *,
|
||||
typedef nir_def *(*nir_lower_instr_cb)(struct nir_builder *,
|
||||
nir_instr *, void *);
|
||||
|
||||
/**
|
||||
|
|
@ -4725,7 +4725,7 @@ typedef nir_ssa_def *(*nir_lower_instr_cb)(struct nir_builder *,
|
|||
* (like changing an input to the instr) that didn't result in a replacement
|
||||
* SSA def being generated.
|
||||
*/
|
||||
#define NIR_LOWER_INSTR_PROGRESS ((nir_ssa_def *)(uintptr_t)1)
|
||||
#define NIR_LOWER_INSTR_PROGRESS ((nir_def *)(uintptr_t)1)
|
||||
|
||||
/**
|
||||
* Special return value for nir_lower_instr_cb when some progress occurred
|
||||
|
|
@ -4733,7 +4733,7 @@ typedef nir_ssa_def *(*nir_lower_instr_cb)(struct nir_builder *,
|
|||
* (like a store)
|
||||
*/
|
||||
|
||||
#define NIR_LOWER_INSTR_PROGRESS_REPLACE ((nir_ssa_def *)(uintptr_t)2)
|
||||
#define NIR_LOWER_INSTR_PROGRESS_REPLACE ((nir_def *)(uintptr_t)2)
|
||||
|
||||
/** Iterate over all the instructions in a nir_function_impl and lower them
|
||||
* using the provided callbacks
|
||||
|
|
@ -4804,7 +4804,7 @@ bool nir_lower_returns(nir_shader *shader);
|
|||
|
||||
void nir_inline_function_impl(struct nir_builder *b,
|
||||
const nir_function_impl *impl,
|
||||
nir_ssa_def **params,
|
||||
nir_def **params,
|
||||
struct hash_table *shader_var_remap);
|
||||
bool nir_inline_functions(nir_shader *shader);
|
||||
|
||||
|
|
@ -4864,7 +4864,7 @@ void nir_lower_clip_halfz(nir_shader *shader);
|
|||
|
||||
void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint);
|
||||
|
||||
void nir_gather_ssa_types(nir_function_impl *impl,
|
||||
void nir_gather_types(nir_function_impl *impl,
|
||||
BITSET_WORD *float_types,
|
||||
BITSET_WORD *int_types);
|
||||
|
||||
|
|
@ -5056,25 +5056,25 @@ nir_address_format_to_glsl_type(nir_address_format addr_format)
|
|||
|
||||
const nir_const_value *nir_address_format_null_value(nir_address_format addr_format);
|
||||
|
||||
nir_ssa_def *nir_build_addr_iadd(struct nir_builder *b, nir_ssa_def *addr,
|
||||
nir_def *nir_build_addr_iadd(struct nir_builder *b, nir_def *addr,
|
||||
nir_address_format addr_format,
|
||||
nir_variable_mode modes,
|
||||
nir_ssa_def *offset);
|
||||
nir_def *offset);
|
||||
|
||||
nir_ssa_def *nir_build_addr_iadd_imm(struct nir_builder *b, nir_ssa_def *addr,
|
||||
nir_def *nir_build_addr_iadd_imm(struct nir_builder *b, nir_def *addr,
|
||||
nir_address_format addr_format,
|
||||
nir_variable_mode modes,
|
||||
int64_t offset);
|
||||
|
||||
nir_ssa_def *nir_build_addr_ieq(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
|
||||
nir_def *nir_build_addr_ieq(struct nir_builder *b, nir_def *addr0, nir_def *addr1,
|
||||
nir_address_format addr_format);
|
||||
|
||||
nir_ssa_def *nir_build_addr_isub(struct nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
|
||||
nir_def *nir_build_addr_isub(struct nir_builder *b, nir_def *addr0, nir_def *addr1,
|
||||
nir_address_format addr_format);
|
||||
|
||||
nir_ssa_def *nir_explicit_io_address_from_deref(struct nir_builder *b,
|
||||
nir_def *nir_explicit_io_address_from_deref(struct nir_builder *b,
|
||||
nir_deref_instr *deref,
|
||||
nir_ssa_def *base_addr,
|
||||
nir_def *base_addr,
|
||||
nir_address_format addr_format);
|
||||
|
||||
bool nir_get_explicit_deref_align(nir_deref_instr *deref,
|
||||
|
|
@ -5084,7 +5084,7 @@ bool nir_get_explicit_deref_align(nir_deref_instr *deref,
|
|||
|
||||
void nir_lower_explicit_io_instr(struct nir_builder *b,
|
||||
nir_intrinsic_instr *io_instr,
|
||||
nir_ssa_def *addr,
|
||||
nir_def *addr,
|
||||
nir_address_format addr_format);
|
||||
|
||||
bool nir_lower_explicit_io(nir_shader *shader,
|
||||
|
|
@ -5330,7 +5330,7 @@ bool nir_lower_subgroups(nir_shader *shader,
|
|||
|
||||
bool nir_lower_system_values(nir_shader *shader);
|
||||
|
||||
nir_ssa_def *
|
||||
nir_def *
|
||||
nir_build_lowered_load_helper_invocation(struct nir_builder *b);
|
||||
|
||||
typedef struct nir_lower_compute_system_values_options {
|
||||
|
|
@ -5875,7 +5875,7 @@ void nir_loop_analyze_impl(nir_function_impl *impl,
|
|||
nir_variable_mode indirect_mask,
|
||||
bool force_unroll_sampler_indirect);
|
||||
|
||||
bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b);
|
||||
bool nir_defs_interfere(nir_def *a, nir_def *b);
|
||||
|
||||
bool nir_repair_ssa_impl(nir_function_impl *impl);
|
||||
bool nir_repair_ssa(nir_shader *shader);
|
||||
|
|
@ -5887,8 +5887,8 @@ bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr);
|
|||
bool nir_has_divergent_loop(nir_shader *shader);
|
||||
|
||||
void
|
||||
nir_rewrite_uses_to_load_reg(struct nir_builder *b, nir_ssa_def *old,
|
||||
nir_ssa_def *reg);
|
||||
nir_rewrite_uses_to_load_reg(struct nir_builder *b, nir_def *old,
|
||||
nir_def *reg);
|
||||
|
||||
/* If phi_webs_only is true, only convert SSA values involved in phi nodes to
|
||||
* registers. If false, convert all values (even those not involved in a phi
|
||||
|
|
@ -6096,12 +6096,12 @@ typedef struct nir_unsigned_upper_bound_config {
|
|||
|
||||
uint32_t
|
||||
nir_unsigned_upper_bound(nir_shader *shader, struct hash_table *range_ht,
|
||||
nir_ssa_scalar scalar,
|
||||
nir_scalar scalar,
|
||||
const nir_unsigned_upper_bound_config *config);
|
||||
|
||||
bool
|
||||
nir_addition_might_overflow(nir_shader *shader, struct hash_table *range_ht,
|
||||
nir_ssa_scalar ssa, unsigned const_val,
|
||||
nir_scalar ssa, unsigned const_val,
|
||||
const nir_unsigned_upper_bound_config *config);
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -6114,7 +6114,7 @@ typedef struct {
|
|||
bool subgroup_size_uniform;
|
||||
|
||||
/* size/align for load/store_preamble. */
|
||||
void (*def_size)(nir_ssa_def *def, unsigned *size, unsigned *align);
|
||||
void (*def_size)(nir_def *def, unsigned *size, unsigned *align);
|
||||
|
||||
/* Total available size for load/store_preamble storage, in units
|
||||
* determined by def_size.
|
||||
|
|
@ -6132,7 +6132,7 @@ typedef struct {
|
|||
* may happen from inserting move instructions, etc. If the benefit doesn't
|
||||
* exceed the cost here then we won't rewrite it.
|
||||
*/
|
||||
float (*rewrite_cost_cb)(nir_ssa_def *def, const void *data);
|
||||
float (*rewrite_cost_cb)(nir_def *def, const void *data);
|
||||
|
||||
/* Instructions whose definitions should not be rewritten. These could
|
||||
* still be moved to the preamble, but they shouldn't be the root of a
|
||||
|
|
@ -6154,7 +6154,7 @@ nir_function_impl *nir_shader_get_preamble(nir_shader *shader);
|
|||
bool nir_lower_point_smooth(nir_shader *shader);
|
||||
bool nir_lower_poly_line_smooth(nir_shader *shader, unsigned num_smooth_aa_sample);
|
||||
|
||||
bool nir_mod_analysis(nir_ssa_scalar val, nir_alu_type val_type, unsigned div, unsigned *mod);
|
||||
bool nir_mod_analysis(nir_scalar val, nir_alu_type val_type, unsigned div, unsigned *mod);
|
||||
|
||||
bool
|
||||
nir_remove_tex_shadow(nir_shader *shader, unsigned textures_bitmask);
|
||||
|
|
@ -6163,7 +6163,7 @@ void
|
|||
nir_trivialize_registers(nir_shader *s);
|
||||
|
||||
static inline nir_intrinsic_instr *
|
||||
nir_reg_get_decl(nir_ssa_def *reg)
|
||||
nir_reg_get_decl(nir_def *reg)
|
||||
{
|
||||
assert(reg->parent_instr->type == nir_instr_type_intrinsic);
|
||||
nir_intrinsic_instr *decl = nir_instr_as_intrinsic(reg->parent_instr);
|
||||
|
|
@ -6231,7 +6231,7 @@ nir_is_store_reg(nir_intrinsic_instr *intr)
|
|||
if (nir_is_store_reg(nir_instr_as_intrinsic(store->parent_instr)))
|
||||
|
||||
static inline nir_intrinsic_instr *
|
||||
nir_load_reg_for_def(const nir_ssa_def *def)
|
||||
nir_load_reg_for_def(const nir_def *def)
|
||||
{
|
||||
if (def->parent_instr->type != nir_instr_type_intrinsic)
|
||||
return NULL;
|
||||
|
|
@ -6244,7 +6244,7 @@ nir_load_reg_for_def(const nir_ssa_def *def)
|
|||
}
|
||||
|
||||
static inline nir_intrinsic_instr *
|
||||
nir_store_reg_for_def(const nir_ssa_def *def)
|
||||
nir_store_reg_for_def(const nir_def *def)
|
||||
{
|
||||
/* Look for the trivial store: single use of our destination by a
|
||||
* store_register intrinsic.
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue