diff --git a/src/panfrost/compiler/bifrost/bifrost_nir.c b/src/panfrost/compiler/bifrost/bifrost_nir.c index 77adfa1cdca..a2350aa15c1 100644 --- a/src/panfrost/compiler/bifrost/bifrost_nir.c +++ b/src/panfrost/compiler/bifrost/bifrost_nir.c @@ -840,7 +840,6 @@ nir_shader_has_local_variables(const nir_shader *nir) return false; } -static bool pan_nir_lower_texel_buffer_fetch(nir_shader *nir, unsigned arch); static bool pan_nir_lower_buf_image_access(nir_shader *nir, unsigned arch); static bool bi_should_idvs(nir_shader *nir, const struct pan_compile_inputs *inputs); static bool bifrost_nir_lower_vs_atomics(nir_shader *nir); @@ -861,7 +860,6 @@ bifrost_postprocess_nir(nir_shader *nir, if (gpu_arch < 9) NIR_PASS(_, nir, pan_nir_lower_image_ms); - NIR_PASS(_, nir, pan_nir_lower_texel_buffer_fetch, gpu_arch); NIR_PASS(_, nir, pan_nir_lower_buf_image_access, gpu_arch); /* We assume that UBO and SSBO were lowered, let's move things around. */ @@ -1066,62 +1064,6 @@ bifrost_postprocess_nir(nir_shader *nir, bi_optimize_loop(nir, gpu_id, false /* allow_copies */); } -static bool -lower_texel_buffer_fetch(nir_builder *b, nir_tex_instr *tex, void *data) -{ - if (tex->op != nir_texop_txf || tex->sampler_dim != GLSL_SAMPLER_DIM_BUF) - return false; - - unsigned *arch = data; - b->cursor = nir_before_instr(&tex->instr); - - nir_def *res_handle = nir_imm_int(b, tex->texture_index); - nir_def *buf_index = NULL; - for (unsigned i = 0; i < tex->num_srcs; ++i) { - switch (tex->src[i].src_type) { - case nir_tex_src_coord: - buf_index = tex->src[i].src.ssa; - break; - case nir_tex_src_texture_offset: - /* This should always be 0 as lower_index_to_offset is expected to be - * set */ - assert(tex->texture_index == 0); - res_handle = tex->src[i].src.ssa; - break; - default: - continue; - } - } - - nir_def *texel_addr, *icd; - if (*arch >= 9) { - texel_addr = nir_lea_buf_pan(b, res_handle, buf_index); - icd = pan_nir_load_va_buf_cvt(b, res_handle); - } else { - nir_def *attr = nir_lea_attr_pan(b, res_handle, buf_index, - nir_imm_int(b, 0), - .src_type = 32, - .desc_set = BI_TABLE_ATTRIBUTE_1); - texel_addr = nir_channels(b, attr, BITFIELD_MASK(2)); - icd = nir_channel(b, attr, 2); - } - texel_addr = nir_pack_64_2x32(b, texel_addr); - - nir_def *loaded_mem = - nir_load_global_cvt_pan(b, tex->def.num_components, - tex->def.bit_size, texel_addr, - icd, tex->dest_type); - nir_def_replace(&tex->def, loaded_mem); - return true; -} - -static bool -pan_nir_lower_texel_buffer_fetch(nir_shader *shader, unsigned arch) -{ - return nir_shader_tex_pass(shader, lower_texel_buffer_fetch, - nir_metadata_control_flow, &arch); -} - static bool lower_buf_image_access(nir_builder *b, nir_intrinsic_instr *intr, void *data) { diff --git a/src/panfrost/compiler/pan_nir_lower_tex.c b/src/panfrost/compiler/pan_nir_lower_tex.c index 7684df06407..db248d23409 100644 --- a/src/panfrost/compiler/pan_nir_lower_tex.c +++ b/src/panfrost/compiler/pan_nir_lower_tex.c @@ -172,6 +172,29 @@ scalar_as_imm_i4(nir_scalar s) _u; \ }) +static bool +bi_lower_txf_buf(nir_builder *b, nir_tex_instr *tex, uint64_t gpu_id) +{ + assert(tex->op == nir_texop_txf); + + b->cursor = nir_before_instr(&tex->instr); + struct tex_srcs srcs = steal_tex_srcs(b, tex); + + nir_def *attr = nir_lea_attr_pan(b, srcs.tex_h, srcs.coord, + nir_imm_int(b, 0), + .src_type = 32, + .desc_set = BI_TABLE_ATTRIBUTE_1); + nir_def *addr = nir_pack_64_2x32(b, nir_trim_vector(b, attr, 2)); + nir_def *cvt = nir_channel(b, attr, 2); + + nir_def *val = nir_load_global_cvt_pan(b, tex->def.num_components, + tex->def.bit_size, addr, cvt, + tex->dest_type); + + nir_def_replace(&tex->def, val); + return true; +} + static bool bi_lower_texs(nir_builder *b, nir_tex_instr *tex, uint64_t gpu_id) { @@ -661,9 +684,13 @@ bi_lower_tex_instr(nir_builder *b, nir_tex_instr *tex, void *cb_data) case nir_texop_txf: case nir_texop_txf_ms: case nir_texop_tg4: - return bi_lower_tex(b, tex, gpu_id); + if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) + return bi_lower_txf_buf(b, tex, gpu_id); + else + return bi_lower_tex(b, tex, gpu_id); case nir_texop_lod: + assert(tex->sampler_dim != GLSL_SAMPLER_DIM_BUF); return bi_lower_lod(b, tex, gpu_id); default: @@ -671,6 +698,25 @@ bi_lower_tex_instr(nir_builder *b, nir_tex_instr *tex, void *cb_data) } } +static bool +va_lower_txf_buf(nir_builder *b, nir_tex_instr *tex, uint64_t gpu_id) +{ + assert(tex->op == nir_texop_txf); + + b->cursor = nir_before_instr(&tex->instr); + struct tex_srcs srcs = steal_tex_srcs(b, tex); + + nir_def *addr = nir_pack_64_2x32(b, + nir_lea_buf_pan(b, srcs.tex_h, srcs.coord)); + nir_def *cvt = pan_nir_load_va_buf_cvt(b, srcs.tex_h); + nir_def *val = nir_load_global_cvt_pan(b, tex->def.num_components, + tex->def.bit_size, addr, cvt, + tex->dest_type); + + nir_def_replace(&tex->def, val); + return true; +} + static nir_def * va_tex_handle(nir_builder *b, nir_def *tex_h, nir_def *samp_h) { @@ -981,9 +1027,13 @@ va_lower_tex_instr(nir_builder *b, nir_tex_instr *tex, void *cb_data) case nir_texop_txf: case nir_texop_txf_ms: case nir_texop_tg4: - return va_lower_tex(b, tex, gpu_id); + if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) + return va_lower_txf_buf(b, tex, gpu_id); + else + return va_lower_tex(b, tex, gpu_id); case nir_texop_lod: + assert(tex->sampler_dim != GLSL_SAMPLER_DIM_BUF); return va_lower_lod(b, tex, gpu_id); default: