mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 09:20:12 +01:00
brw/nir: Fix up handling of sources that might be convergent vectors
Sources that are scalars (almost all source) and convergent generally want <0,1,0> source stride. Sources that are vectors (e.g., texture coordinates, SSBO write data, etc.) and convergent want no extra strides applied. In nearly all cases LOAD_PAYLOAD lowering will do the right thing. v2: Use VEC in emit_pixel_interpolater_send. Suggested by Ken. v3: With the elimination of offset_to_component(), offset() may not convert an is_scalar source to have a zero stride. Explicitly do this in get_nir_src and prepare_alu_destination_and_sources. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29884>
This commit is contained in:
parent
9e6bd5bf97
commit
d5d7ae22ae
1 changed files with 46 additions and 16 deletions
|
|
@ -64,7 +64,7 @@ struct nir_to_brw_state {
|
||||||
bool annotate;
|
bool annotate;
|
||||||
};
|
};
|
||||||
|
|
||||||
static brw_reg get_nir_src(nir_to_brw_state &ntb, const nir_src &src);
|
static brw_reg get_nir_src(nir_to_brw_state &ntb, const nir_src &src, int channel = 0);
|
||||||
static brw_reg get_nir_def(nir_to_brw_state &ntb, const nir_def &def);
|
static brw_reg get_nir_def(nir_to_brw_state &ntb, const nir_def &def);
|
||||||
static nir_component_mask_t get_nir_write_mask(const nir_def &def);
|
static nir_component_mask_t get_nir_write_mask(const nir_def &def);
|
||||||
|
|
||||||
|
|
@ -85,6 +85,9 @@ static void fs_nir_emit_memory_access(nir_to_brw_state &ntb,
|
||||||
const fs_builder &bld,
|
const fs_builder &bld,
|
||||||
nir_intrinsic_instr *instr);
|
nir_intrinsic_instr *instr);
|
||||||
|
|
||||||
|
static void brw_combine_with_vec(const fs_builder &bld, const brw_reg &dst,
|
||||||
|
const brw_reg &src, unsigned n);
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
brw_texture_offset(const nir_tex_instr *tex, unsigned src,
|
brw_texture_offset(const nir_tex_instr *tex, unsigned src,
|
||||||
uint32_t *offset_bits_out)
|
uint32_t *offset_bits_out)
|
||||||
|
|
@ -426,8 +429,7 @@ fs_nir_emit_if(nir_to_brw_state &ntb, nir_if *if_stmt)
|
||||||
nir_alu_instr *cond = nir_src_as_alu_instr(if_stmt->condition);
|
nir_alu_instr *cond = nir_src_as_alu_instr(if_stmt->condition);
|
||||||
if (cond != NULL && cond->op == nir_op_inot) {
|
if (cond != NULL && cond->op == nir_op_inot) {
|
||||||
invert = true;
|
invert = true;
|
||||||
cond_reg = get_nir_src(ntb, cond->src[0].src);
|
cond_reg = get_nir_src(ntb, cond->src[0].src, cond->src[0].swizzle[0]);
|
||||||
cond_reg = offset(cond_reg, bld, cond->src[0].swizzle[0]);
|
|
||||||
} else {
|
} else {
|
||||||
invert = false;
|
invert = false;
|
||||||
cond_reg = get_nir_src(ntb, if_stmt->condition);
|
cond_reg = get_nir_src(ntb, if_stmt->condition);
|
||||||
|
|
@ -563,7 +565,7 @@ optimize_extract_to_float(nir_to_brw_state &ntb, nir_alu_instr *instr,
|
||||||
/* Element type to extract.*/
|
/* Element type to extract.*/
|
||||||
const brw_reg_type type = brw_int_type(bytes, is_signed);
|
const brw_reg_type type = brw_int_type(bytes, is_signed);
|
||||||
|
|
||||||
brw_reg op0 = get_nir_src(ntb, src0->src[0].src);
|
brw_reg op0 = get_nir_src(ntb, src0->src[0].src, -1);
|
||||||
op0.type = brw_type_for_nir_type(devinfo,
|
op0.type = brw_type_for_nir_type(devinfo,
|
||||||
(nir_alu_type)(nir_op_infos[src0->op].input_types[0] |
|
(nir_alu_type)(nir_op_infos[src0->op].input_types[0] |
|
||||||
nir_src_bit_size(src0->src[0].src)));
|
nir_src_bit_size(src0->src[0].src)));
|
||||||
|
|
@ -740,7 +742,7 @@ prepare_alu_destination_and_sources(nir_to_brw_state &ntb,
|
||||||
instr->def.bit_size));
|
instr->def.bit_size));
|
||||||
|
|
||||||
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
|
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
|
||||||
op[i] = get_nir_src(ntb, instr->src[i].src);
|
op[i] = get_nir_src(ntb, instr->src[i].src, -1);
|
||||||
op[i].type = brw_type_for_nir_type(devinfo,
|
op[i].type = brw_type_for_nir_type(devinfo,
|
||||||
(nir_alu_type)(nir_op_infos[instr->op].input_types[i] |
|
(nir_alu_type)(nir_op_infos[instr->op].input_types[i] |
|
||||||
nir_src_bit_size(instr->src[i].src)));
|
nir_src_bit_size(instr->src[i].src)));
|
||||||
|
|
@ -1870,8 +1872,11 @@ get_resource_nir_src(nir_to_brw_state &ntb, const nir_src &src)
|
||||||
return ntb.uniform_values[src.ssa->index];
|
return ntb.uniform_values[src.ssa->index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Specifying -1 for channel indicates that no channel selection should be applied.
|
||||||
|
*/
|
||||||
static brw_reg
|
static brw_reg
|
||||||
get_nir_src(nir_to_brw_state &ntb, const nir_src &src)
|
get_nir_src(nir_to_brw_state &ntb, const nir_src &src, int channel)
|
||||||
{
|
{
|
||||||
nir_intrinsic_instr *load_reg = nir_load_reg_for_def(src.ssa);
|
nir_intrinsic_instr *load_reg = nir_load_reg_for_def(src.ssa);
|
||||||
|
|
||||||
|
|
@ -1898,6 +1903,16 @@ get_nir_src(nir_to_brw_state &ntb, const nir_src &src)
|
||||||
*/
|
*/
|
||||||
reg.type = brw_type_with_size(BRW_TYPE_D, nir_src_bit_size(src));
|
reg.type = brw_type_with_size(BRW_TYPE_D, nir_src_bit_size(src));
|
||||||
|
|
||||||
|
if (channel >= 0) {
|
||||||
|
reg = offset(reg, ntb.bld, channel);
|
||||||
|
|
||||||
|
/* If the dispatch width matches the scalar allocation width, offset()
|
||||||
|
* won't set the stride to zero. Force that here.
|
||||||
|
*/
|
||||||
|
if (reg.is_scalar)
|
||||||
|
reg = component(reg, 0);
|
||||||
|
}
|
||||||
|
|
||||||
return reg;
|
return reg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1969,7 +1984,14 @@ emit_pixel_interpolater_send(const fs_builder &bld,
|
||||||
brw_wm_prog_data(bld.shader->prog_data);
|
brw_wm_prog_data(bld.shader->prog_data);
|
||||||
|
|
||||||
brw_reg srcs[INTERP_NUM_SRCS];
|
brw_reg srcs[INTERP_NUM_SRCS];
|
||||||
srcs[INTERP_SRC_OFFSET] = src;
|
|
||||||
|
if (src.is_scalar) {
|
||||||
|
srcs[INTERP_SRC_OFFSET] = bld.vgrf(src.type, 2);
|
||||||
|
brw_combine_with_vec(bld, srcs[INTERP_SRC_OFFSET], src, 2);
|
||||||
|
} else {
|
||||||
|
srcs[INTERP_SRC_OFFSET] = src;
|
||||||
|
}
|
||||||
|
|
||||||
srcs[INTERP_SRC_MSG_DESC] = desc;
|
srcs[INTERP_SRC_MSG_DESC] = desc;
|
||||||
srcs[INTERP_SRC_DYNAMIC_MODE] = flag_reg;
|
srcs[INTERP_SRC_DYNAMIC_MODE] = flag_reg;
|
||||||
|
|
||||||
|
|
@ -3172,7 +3194,7 @@ fs_nir_emit_tcs_intrinsic(nir_to_brw_state &ntb,
|
||||||
case nir_intrinsic_store_output:
|
case nir_intrinsic_store_output:
|
||||||
case nir_intrinsic_store_per_vertex_output: {
|
case nir_intrinsic_store_per_vertex_output: {
|
||||||
assert(nir_src_bit_size(instr->src[0]) == 32);
|
assert(nir_src_bit_size(instr->src[0]) == 32);
|
||||||
brw_reg value = get_nir_src(ntb, instr->src[0]);
|
brw_reg value = get_nir_src(ntb, instr->src[0], -1);
|
||||||
brw_reg indirect_offset = get_indirect_offset(ntb, instr);
|
brw_reg indirect_offset = get_indirect_offset(ntb, instr);
|
||||||
unsigned imm_offset = nir_intrinsic_base(instr);
|
unsigned imm_offset = nir_intrinsic_base(instr);
|
||||||
unsigned mask = nir_intrinsic_write_mask(instr);
|
unsigned mask = nir_intrinsic_write_mask(instr);
|
||||||
|
|
@ -4150,7 +4172,7 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
|
||||||
}
|
}
|
||||||
|
|
||||||
case nir_intrinsic_store_output: {
|
case nir_intrinsic_store_output: {
|
||||||
const brw_reg src = get_nir_src(ntb, instr->src[0]);
|
const brw_reg src = get_nir_src(ntb, instr->src[0], -1);
|
||||||
const unsigned store_offset = nir_src_as_uint(instr->src[1]);
|
const unsigned store_offset = nir_src_as_uint(instr->src[1]);
|
||||||
const unsigned location = nir_intrinsic_base(instr) +
|
const unsigned location = nir_intrinsic_base(instr) +
|
||||||
SET_FIELD(store_offset, BRW_NIR_FRAG_OUTPUT_LOCATION);
|
SET_FIELD(store_offset, BRW_NIR_FRAG_OUTPUT_LOCATION);
|
||||||
|
|
@ -4422,7 +4444,7 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
|
||||||
brw_reg(), /* flag_reg */
|
brw_reg(), /* flag_reg */
|
||||||
interpolation);
|
interpolation);
|
||||||
} else {
|
} else {
|
||||||
brw_reg src = retype(get_nir_src(ntb, instr->src[0]), BRW_TYPE_D);
|
brw_reg src = retype(get_nir_src(ntb, instr->src[0], -1), BRW_TYPE_D);
|
||||||
const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
|
const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
|
||||||
emit_pixel_interpolater_send(bld,
|
emit_pixel_interpolater_send(bld,
|
||||||
opcode,
|
opcode,
|
||||||
|
|
@ -5735,7 +5757,7 @@ emit_task_mesh_store(nir_to_brw_state &ntb,
|
||||||
const fs_builder &bld, nir_intrinsic_instr *instr,
|
const fs_builder &bld, nir_intrinsic_instr *instr,
|
||||||
const brw_reg &urb_handle)
|
const brw_reg &urb_handle)
|
||||||
{
|
{
|
||||||
brw_reg src = get_nir_src(ntb, instr->src[0]);
|
brw_reg src = get_nir_src(ntb, instr->src[0], -1);
|
||||||
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
|
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
|
||||||
|
|
||||||
if (nir_src_is_const(*offset_nir_src)) {
|
if (nir_src_is_const(*offset_nir_src)) {
|
||||||
|
|
@ -6479,7 +6501,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||||
|
|
||||||
case nir_intrinsic_store_output: {
|
case nir_intrinsic_store_output: {
|
||||||
assert(nir_src_bit_size(instr->src[0]) == 32);
|
assert(nir_src_bit_size(instr->src[0]) == 32);
|
||||||
brw_reg src = get_nir_src(ntb, instr->src[0]);
|
brw_reg src = get_nir_src(ntb, instr->src[0], -1);
|
||||||
|
|
||||||
unsigned store_offset = nir_src_as_uint(instr->src[1]);
|
unsigned store_offset = nir_src_as_uint(instr->src[1]);
|
||||||
unsigned num_components = instr->num_components;
|
unsigned num_components = instr->num_components;
|
||||||
|
|
@ -6913,7 +6935,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||||
emit_rt_lsc_fence(bld, LSC_FENCE_LOCAL, LSC_FLUSH_TYPE_NONE);
|
emit_rt_lsc_fence(bld, LSC_FENCE_LOCAL, LSC_FLUSH_TYPE_NONE);
|
||||||
|
|
||||||
bld.emit(SHADER_OPCODE_BTD_SPAWN_LOGICAL, bld.null_reg_ud(),
|
bld.emit(SHADER_OPCODE_BTD_SPAWN_LOGICAL, bld.null_reg_ud(),
|
||||||
bld.emit_uniformize(get_nir_src(ntb, instr->src[0])),
|
bld.emit_uniformize(get_nir_src(ntb, instr->src[0], -1)),
|
||||||
get_nir_src(ntb, instr->src[1]));
|
get_nir_src(ntb, instr->src[1]));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
@ -6942,7 +6964,7 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
|
||||||
|
|
||||||
brw_reg srcs[RT_LOGICAL_NUM_SRCS];
|
brw_reg srcs[RT_LOGICAL_NUM_SRCS];
|
||||||
|
|
||||||
brw_reg globals = get_nir_src(ntb, instr->src[0]);
|
brw_reg globals = get_nir_src(ntb, instr->src[0], -1);
|
||||||
srcs[RT_LOGICAL_SRC_GLOBALS] = bld.emit_uniformize(globals);
|
srcs[RT_LOGICAL_SRC_GLOBALS] = bld.emit_uniformize(globals);
|
||||||
srcs[RT_LOGICAL_SRC_BVH_LEVEL] = get_nir_src(ntb, instr->src[1]);
|
srcs[RT_LOGICAL_SRC_BVH_LEVEL] = get_nir_src(ntb, instr->src[1]);
|
||||||
srcs[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] = get_nir_src(ntb, instr->src[2]);
|
srcs[RT_LOGICAL_SRC_TRACE_RAY_CONTROL] = get_nir_src(ntb, instr->src[2]);
|
||||||
|
|
@ -7175,7 +7197,7 @@ fs_nir_emit_memory_access(nir_to_brw_state &ntb,
|
||||||
if (!is_load) {
|
if (!is_load) {
|
||||||
for (unsigned i = 0; i < lsc_op_num_data_values(op); i++) {
|
for (unsigned i = 0; i < lsc_op_num_data_values(op); i++) {
|
||||||
brw_reg nir_src =
|
brw_reg nir_src =
|
||||||
retype(get_nir_src(ntb, instr->src[data_src + i]), nir_data_type);
|
retype(get_nir_src(ntb, instr->src[data_src + i], -1), nir_data_type);
|
||||||
|
|
||||||
if (data_bit_size > nir_bit_size) {
|
if (data_bit_size > nir_bit_size) {
|
||||||
/* Expand e.g. D16 to D16U32 */
|
/* Expand e.g. D16 to D16U32 */
|
||||||
|
|
@ -7325,7 +7347,15 @@ fs_nir_emit_texture(nir_to_brw_state &ntb,
|
||||||
uint32_t header_bits = 0;
|
uint32_t header_bits = 0;
|
||||||
for (unsigned i = 0; i < instr->num_srcs; i++) {
|
for (unsigned i = 0; i < instr->num_srcs; i++) {
|
||||||
nir_src nir_src = instr->src[i].src;
|
nir_src nir_src = instr->src[i].src;
|
||||||
brw_reg src = get_nir_src(ntb, nir_src);
|
brw_reg src = get_nir_src(ntb, nir_src, -1);
|
||||||
|
|
||||||
|
/* If the source is not a vector (e.g., a 1D texture coordinate), then
|
||||||
|
* the eventual LOAD_PAYLOAD lowering will not properly adjust the
|
||||||
|
* stride, etc., so do it now.
|
||||||
|
*/
|
||||||
|
if (nir_tex_instr_src_size(instr, i) == 1)
|
||||||
|
src = offset(src, bld, 0);
|
||||||
|
|
||||||
switch (instr->src[i].src_type) {
|
switch (instr->src[i].src_type) {
|
||||||
case nir_tex_src_bias:
|
case nir_tex_src_bias:
|
||||||
assert(!got_lod);
|
assert(!got_lod);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue