mesa/src/amd/vulkan/nir/radv_meta_nir.c

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1530 lines
65 KiB
C
Raw Normal View History

/* Based on anv:
* Copyright © 2015 Intel Corporation
*
* Copyright © 2016 Red Hat Inc.
* Copyright © 2018 Valve Corporation
*
* SPDX-License-Identifier: MIT
*/
#include "radv_meta_nir.h"
#include "nir/nir_format_convert.h"
#include "ac_nir_surface.h"
#include "ac_surface.h"
#include "nir_builder.h"
#include "radv_device.h"
#include "radv_physical_device.h"
nir_builder PRINTFLIKE(3, 4)
radv_meta_nir_init_shader(struct radv_device *dev, mesa_shader_stage stage, const char *name, ...)
{
const struct radv_physical_device *pdev = radv_device_physical(dev);
nir_builder b = nir_builder_init_simple_shader(stage, NULL, NULL);
if (name) {
va_list args;
va_start(args, name);
b.shader->info.name = ralloc_vasprintf(b.shader, name, args);
va_end(args);
}
b.shader->options = &pdev->nir_options[stage];
radv_device_associate_nir(dev, b.shader);
return b;
}
/* vertex shader that generates vertices */
nir_shader *
radv_meta_nir_build_vs_generate_vertices(struct radv_device *dev)
{
const struct glsl_type *vec4 = glsl_vec4_type();
nir_variable *v_position;
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_VERTEX, "meta_vs_gen_verts");
nir_def *outvec = nir_gen_rect_vertices(&b, NULL, NULL);
v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
v_position->data.location = VARYING_SLOT_POS;
nir_store_var(&b, v_position, outvec, 0xf);
return b.shader;
}
nir_shader *
radv_meta_nir_build_fs_noop(struct radv_device *dev)
{
return radv_meta_nir_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_noop_fs").shader;
}
static void
radv_meta_nir_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, int samples,
nir_variable *input_img, nir_variable *color, nir_def *img_coord)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_deref_instr *input_img_deref = nir_build_deref_var(b, input_img);
nir_def *sample0 = nir_txf_ms(b, img_coord, nir_imm_int(b, 0), .texture_deref = input_img_deref);
if (is_integer || samples <= 1) {
nir_store_var(b, color, sample0, 0xf);
return;
}
if (pdev->use_fmask) {
nir_def *all_same = nir_samples_identical(b, img_coord, .texture_deref = input_img_deref);
nir_push_if(b, nir_inot(b, all_same));
}
nir_def *accum = sample0;
for (int i = 1; i < samples; i++) {
nir_def *sample = nir_txf_ms(b, img_coord, nir_imm_int(b, i), .texture_deref = input_img_deref);
accum = nir_fadd(b, accum, sample);
}
accum = nir_fdiv_imm(b, accum, samples);
nir_store_var(b, color, accum, 0xf);
if (pdev->use_fmask) {
nir_push_else(b, NULL);
nir_store_var(b, color, sample0, 0xf);
nir_pop_if(b, NULL);
}
}
nir_def *
radv_meta_nir_get_global_ids(nir_builder *b, unsigned num_components)
{
unsigned mask = BITFIELD_MASK(num_components);
nir_def *local_ids = nir_channels(b, nir_load_local_invocation_id(b), mask);
nir_def *block_ids = nir_channels(b, nir_load_workgroup_id(b), mask);
nir_def *block_size =
nir_channels(b,
nir_imm_ivec4(b, b->shader->info.workgroup_size[0], b->shader->info.workgroup_size[1],
b->shader->info.workgroup_size[2], 0),
mask);
return nir_iadd(b, nir_imul(b, block_ids, block_size), local_ids);
}
void
radv_meta_nir_break_on_count(nir_builder *b, nir_variable *var, nir_def *count)
{
nir_def *counter = nir_load_var(b, var);
nir_break_if(b, nir_uge(b, counter, count));
counter = nir_iadd_imm(b, counter, 1);
nir_store_var(b, var, counter, 0x1);
}
nir_shader *
radv_meta_nir_build_fill_memory_shader(struct radv_device *dev, uint32_t bytes_per_invocation)
{
assert(bytes_per_invocation == 4 || bytes_per_invocation == 16);
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, "meta_fill_memory_%dB", bytes_per_invocation);
b.shader->info.workgroup_size[0] = 64;
nir_def *pconst = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_def *buffer_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b0011));
nir_def *max_offset = nir_channel(&b, pconst, 2);
nir_def *data = nir_swizzle(&b, nir_channel(&b, pconst, 3), (unsigned[]){0, 0, 0, 0}, bytes_per_invocation / 4);
nir_def *global_id =
nir_iadd(&b, nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b), 0), b.shader->info.workgroup_size[0]),
nir_load_local_invocation_index(&b));
nir_def *offset = nir_umin(&b, nir_imul_imm(&b, global_id, bytes_per_invocation), max_offset);
nir_def *dst_addr = nir_iadd(&b, buffer_addr, nir_u2u64(&b, offset));
nir_build_store_global(&b, data, dst_addr, .align_mul = 4);
return b.shader;
}
nir_shader *
radv_meta_nir_build_copy_memory_shader(struct radv_device *dev, uint32_t bytes_per_invocation)
{
assert(bytes_per_invocation == 1 || bytes_per_invocation == 16);
const uint32_t num_components = bytes_per_invocation == 1 ? 1 : 4;
const uint32_t bit_size = bytes_per_invocation == 1 ? 8 : 32;
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, "meta_copy_memory_%dB", bytes_per_invocation);
b.shader->info.workgroup_size[0] = 64;
nir_def *pconst = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_def *max_offset = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
nir_def *src_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b0011));
nir_def *dst_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b1100));
nir_def *global_id =
nir_iadd(&b, nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b), 0), b.shader->info.workgroup_size[0]),
nir_load_local_invocation_index(&b));
nir_def *offset = nir_u2u64(&b, nir_umin(&b, nir_imul_imm(&b, global_id, bytes_per_invocation), max_offset));
nir_def *data =
nir_build_load_global(&b, num_components, bit_size, nir_iadd(&b, src_addr, offset), .align_mul = bit_size / 8);
nir_build_store_global(&b, data, nir_iadd(&b, dst_addr, offset), .align_mul = bit_size / 8);
return b.shader;
}
nir_shader *
radv_meta_nir_build_blit_vertex_shader(struct radv_device *dev)
{
const struct glsl_type *vec4 = glsl_vec4_type();
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_VERTEX, "meta_blit_vs");
nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
pos_out->data.location = VARYING_SLOT_POS;
nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "v_tex_pos");
tex_pos_out->data.location = VARYING_SLOT_VAR0;
tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
nir_def *outvec = nir_gen_rect_vertices(&b, NULL, NULL);
nir_store_var(&b, pos_out, outvec, 0xf);
nir_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_def *src0_z = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
nir_def *vertex_id = nir_load_vertex_id_zero_base(&b);
/* vertex 0 - src0_x, src0_y, src0_z */
/* vertex 1 - src0_x, src1_y, src0_z*/
/* vertex 2 - src1_x, src0_y, src0_z */
/* so channel 0 is vertex_id != 2 ? src_x : src_x + w
channel 1 is vertex id != 1 ? src_y : src_y + w */
nir_def *c0cmp = nir_ine_imm(&b, vertex_id, 2);
nir_def *c1cmp = nir_ine_imm(&b, vertex_id, 1);
nir_def *comp[4];
comp[0] = nir_bcsel(&b, c0cmp, nir_channel(&b, src_box, 0), nir_channel(&b, src_box, 2));
comp[1] = nir_bcsel(&b, c1cmp, nir_channel(&b, src_box, 1), nir_channel(&b, src_box, 3));
comp[2] = src0_z;
comp[3] = nir_imm_float(&b, 1.0);
nir_def *out_tex_vec = nir_vec(&b, comp, 4);
nir_store_var(&b, tex_pos_out, out_tex_vec, 0xf);
return b.shader;
}
nir_shader *
radv_meta_nir_build_blit_copy_fragment_shader(struct radv_device *dev, enum glsl_sampler_dim tex_dim)
{
const struct glsl_type *vec4 = glsl_vec4_type();
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_blit_fs.%d", tex_dim);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_tex_pos");
tex_pos_in->data.location = VARYING_SLOT_VAR0;
/* Swizzle the array index which comes in as Z coordinate into the right
* position.
*/
unsigned swz[] = {0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2};
nir_def *const tex_pos =
nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
const struct glsl_type *sampler_type =
glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, glsl_get_base_type(vec4));
nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
nir_deref_instr *tex_deref = nir_build_deref_var(&b, sampler);
nir_def *color = nir_tex(&b, tex_pos, .texture_deref = tex_deref, .sampler_deref = tex_deref);
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
color_out->data.location = FRAG_RESULT_DATA0;
nir_store_var(&b, color_out, color, 0xf);
return b.shader;
}
nir_shader *
radv_meta_nir_build_blit_copy_fragment_shader_depth(struct radv_device *dev, enum glsl_sampler_dim tex_dim)
{
const struct glsl_type *vec4 = glsl_vec4_type();
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_blit_depth_fs.%d", tex_dim);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_tex_pos");
tex_pos_in->data.location = VARYING_SLOT_VAR0;
/* Swizzle the array index which comes in as Z coordinate into the right
* position.
*/
unsigned swz[] = {0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2};
nir_def *const tex_pos =
nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
const struct glsl_type *sampler_type =
glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, glsl_get_base_type(vec4));
nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
nir_deref_instr *tex_deref = nir_build_deref_var(&b, sampler);
nir_def *color = nir_tex(&b, tex_pos, .texture_deref = tex_deref, .sampler_deref = tex_deref);
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
color_out->data.location = FRAG_RESULT_DEPTH;
nir_store_var(&b, color_out, color, 0x1);
return b.shader;
}
nir_shader *
radv_meta_nir_build_blit_copy_fragment_shader_stencil(struct radv_device *dev, enum glsl_sampler_dim tex_dim)
{
const struct glsl_type *vec4 = glsl_vec4_type();
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_blit_stencil_fs.%d", tex_dim);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_tex_pos");
tex_pos_in->data.location = VARYING_SLOT_VAR0;
/* Swizzle the array index which comes in as Z coordinate into the right
* position.
*/
unsigned swz[] = {0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2};
nir_def *const tex_pos =
nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
const struct glsl_type *sampler_type =
glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, glsl_get_base_type(vec4));
nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
nir_deref_instr *tex_deref = nir_build_deref_var(&b, sampler);
nir_def *color = nir_tex(&b, tex_pos, .texture_deref = tex_deref, .sampler_deref = tex_deref);
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
color_out->data.location = FRAG_RESULT_STENCIL;
nir_store_var(&b, color_out, color, 0x1);
return b.shader;
}
nir_shader *
radv_meta_nir_build_blit2d_vertex_shader(struct radv_device *device)
{
const struct glsl_type *vec4 = glsl_vec4_type();
const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
nir_builder b = radv_meta_nir_init_shader(device, MESA_SHADER_VERTEX, "meta_blit2d_vs");
nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
pos_out->data.location = VARYING_SLOT_POS;
nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out, vec2, "v_tex_pos");
tex_pos_out->data.location = VARYING_SLOT_VAR0;
tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
nir_def *outvec = nir_gen_rect_vertices(&b, NULL, NULL);
nir_store_var(&b, pos_out, outvec, 0xf);
nir_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_def *vertex_id = nir_load_vertex_id_zero_base(&b);
/* vertex 0 - src_x, src_y */
/* vertex 1 - src_x, src_y+h */
/* vertex 2 - src_x+w, src_y */
/* so channel 0 is vertex_id != 2 ? src_x : src_x + w
channel 1 is vertex id != 1 ? src_y : src_y + w */
nir_def *c0cmp = nir_ine_imm(&b, vertex_id, 2);
nir_def *c1cmp = nir_ine_imm(&b, vertex_id, 1);
nir_def *comp[2];
comp[0] = nir_bcsel(&b, c0cmp, nir_channel(&b, src_box, 0), nir_channel(&b, src_box, 2));
comp[1] = nir_bcsel(&b, c1cmp, nir_channel(&b, src_box, 1), nir_channel(&b, src_box, 3));
nir_def *out_tex_vec = nir_vec(&b, comp, 2);
nir_store_var(&b, tex_pos_out, out_tex_vec, 0x3);
return b.shader;
}
nir_def *
radv_meta_nir_build_blit2d_texel_fetch(struct nir_builder *b, struct radv_device *device, nir_def *tex_pos, bool is_3d,
bool is_multisampled)
{
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D
: is_multisampled ? GLSL_SAMPLER_DIM_MS
: GLSL_SAMPLER_DIM_2D;
const struct glsl_type *sampler_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_UINT);
nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
nir_def *tex_pos_3d = NULL;
nir_def *sample_idx = NULL;
if (is_3d) {
nir_def *layer = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
nir_def *chans[3];
chans[0] = nir_channel(b, tex_pos, 0);
chans[1] = nir_channel(b, tex_pos, 1);
chans[2] = layer;
tex_pos_3d = nir_vec(b, chans, 3);
}
if (is_multisampled) {
sample_idx = nir_load_sample_id(b);
}
nir_deref_instr *tex_deref = nir_build_deref_var(b, sampler);
if (is_multisampled) {
return nir_txf_ms(b, tex_pos, sample_idx, .texture_deref = tex_deref);
} else {
return nir_txf(b, is_3d ? tex_pos_3d : tex_pos, .texture_deref = tex_deref);
}
}
nir_def *
radv_meta_nir_build_blit2d_buffer_fetch(struct nir_builder *b, struct radv_device *device, nir_def *tex_pos, bool is_3d,
bool is_multisampled)
{
const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
sampler->data.descriptor_set = 0;
sampler->data.binding = 0;
nir_def *width = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
nir_def *pos_x = nir_channel(b, tex_pos, 0);
nir_def *pos_y = nir_channel(b, tex_pos, 1);
pos_y = nir_imul(b, pos_y, width);
pos_x = nir_iadd(b, pos_x, pos_y);
nir_deref_instr *tex_deref = nir_build_deref_var(b, sampler);
return nir_txf(b, pos_x, .texture_deref = tex_deref);
}
nir_shader *
radv_meta_nir_build_blit2d_copy_fragment_shader(struct radv_device *device,
radv_meta_nir_texel_fetch_build_func txf_func, const char *name,
bool is_3d, bool is_multisampled)
{
const struct glsl_type *vec4 = glsl_vec4_type();
const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
nir_builder b = radv_meta_nir_init_shader(device, MESA_SHADER_FRAGMENT, "%s", name);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "v_tex_pos");
tex_pos_in->data.location = VARYING_SLOT_VAR0;
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
color_out->data.location = FRAG_RESULT_DATA0;
nir_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
nir_def *tex_pos = nir_trim_vector(&b, pos_int, 2);
nir_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
nir_store_var(&b, color_out, color, 0xf);
b.shader->info.fs.uses_sample_shading = is_multisampled;
return b.shader;
}
nir_shader *
radv_meta_nir_build_blit2d_copy_fragment_shader_depth(struct radv_device *device,
radv_meta_nir_texel_fetch_build_func txf_func, const char *name,
bool is_3d, bool is_multisampled)
{
const struct glsl_type *vec4 = glsl_vec4_type();
const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
nir_builder b = radv_meta_nir_init_shader(device, MESA_SHADER_FRAGMENT, "%s", name);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "v_tex_pos");
tex_pos_in->data.location = VARYING_SLOT_VAR0;
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
color_out->data.location = FRAG_RESULT_DEPTH;
nir_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
nir_def *tex_pos = nir_trim_vector(&b, pos_int, 2);
nir_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
nir_store_var(&b, color_out, color, 0x1);
b.shader->info.fs.uses_sample_shading = is_multisampled;
return b.shader;
}
nir_shader *
radv_meta_nir_build_blit2d_copy_fragment_shader_stencil(struct radv_device *device,
radv_meta_nir_texel_fetch_build_func txf_func, const char *name,
bool is_3d, bool is_multisampled)
{
const struct glsl_type *vec4 = glsl_vec4_type();
const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
nir_builder b = radv_meta_nir_init_shader(device, MESA_SHADER_FRAGMENT, "%s", name);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "v_tex_pos");
tex_pos_in->data.location = VARYING_SLOT_VAR0;
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
color_out->data.location = FRAG_RESULT_STENCIL;
nir_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
nir_def *tex_pos = nir_trim_vector(&b, pos_int, 2);
nir_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
nir_store_var(&b, color_out, color, 0x1);
b.shader->info.fs.uses_sample_shading = is_multisampled;
return b.shader;
}
nir_shader *
radv_meta_nir_build_itob_compute_shader(struct radv_device *dev, bool is_3d)
{
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
const struct glsl_type *sampler_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_def *global_id = radv_meta_nir_get_global_ids(&b, is_3d ? 3 : 2);
nir_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
nir_def *img_coord = nir_iadd(&b, global_id, offset);
nir_def *outval = nir_txf(&b, nir_trim_vector(&b, img_coord, 2 + is_3d),
.texture_deref = nir_build_deref_var(&b, input_img));
nir_def *pos_x = nir_channel(&b, global_id, 0);
nir_def *pos_y = nir_channel(&b, global_id, 1);
nir_def *tmp = nir_imul(&b, pos_y, stride);
tmp = nir_iadd(&b, tmp, pos_x);
nir_def *coord = nir_replicate(&b, tmp, 4);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, nir_undef(&b, 1, 32), outval,
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
return b.shader;
}
nir_shader *
radv_meta_nir_build_btoi_compute_shader(struct radv_device *dev, bool is_3d)
{
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_def *global_id = radv_meta_nir_get_global_ids(&b, is_3d ? 3 : 2);
nir_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
nir_def *pos_x = nir_channel(&b, global_id, 0);
nir_def *pos_y = nir_channel(&b, global_id, 1);
nir_def *buf_coord = nir_imul(&b, pos_y, stride);
buf_coord = nir_iadd(&b, buf_coord, pos_x);
nir_def *coord = nir_iadd(&b, global_id, offset);
nir_def *outval = nir_txf(&b, buf_coord, .texture_deref = nir_build_deref_var(&b, input_img));
nir_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),
is_3d ? nir_channel(&b, coord, 2) : nir_undef(&b, 1, 32), nir_undef(&b, 1, 32));
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, img_coord, nir_undef(&b, 1, 32), outval,
nir_imm_int(&b, 0), .image_dim = dim);
return b.shader;
}
/** Buffer to image - special path for R32G32B32 */
nir_shader *
radv_meta_nir_build_btoi_r32g32b32_compute_shader(struct radv_device *dev)
{
const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, "meta_btoi_r32g32b32_cs");
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_def *global_id = radv_meta_nir_get_global_ids(&b, 2);
nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_def *pitch = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 12);
nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
nir_def *pos_x = nir_channel(&b, global_id, 0);
nir_def *pos_y = nir_channel(&b, global_id, 1);
nir_def *buf_coord = nir_imul(&b, pos_y, stride);
buf_coord = nir_iadd(&b, buf_coord, pos_x);
nir_def *img_coord = nir_iadd(&b, global_id, offset);
nir_def *global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, img_coord, 1), pitch),
nir_imul_imm(&b, nir_channel(&b, img_coord, 0), 3));
nir_def *outval = nir_txf(&b, buf_coord, .texture_deref = nir_build_deref_var(&b, input_img));
for (int chan = 0; chan < 3; chan++) {
nir_def *local_pos = nir_iadd_imm(&b, global_pos, chan);
nir_def *coord = nir_replicate(&b, local_pos, 4);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, nir_undef(&b, 1, 32),
nir_channel(&b, outval, chan), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
}
return b.shader;
}
nir_shader *
radv_meta_nir_build_itoi_compute_shader(struct radv_device *dev, bool src_3d, bool dst_3d, int samples)
{
bool is_multisampled = samples > 1;
enum glsl_sampler_dim src_dim = src_3d ? GLSL_SAMPLER_DIM_3D
: is_multisampled ? GLSL_SAMPLER_DIM_MS
: GLSL_SAMPLER_DIM_2D;
enum glsl_sampler_dim dst_dim = dst_3d ? GLSL_SAMPLER_DIM_3D
: is_multisampled ? GLSL_SAMPLER_DIM_MS
: GLSL_SAMPLER_DIM_2D;
const struct glsl_type *buf_type = glsl_sampler_type(src_dim, false, false, GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_image_type(dst_dim, false, GLSL_TYPE_FLOAT);
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, "meta_itoi_cs-%dd-%dd-%d", src_3d ? 3 : 2,
dst_3d ? 3 : 2, samples);
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_def *global_id = radv_meta_nir_get_global_ids(&b, (src_3d || dst_3d) ? 3 : 2);
nir_def *src_offset = nir_load_push_constant(&b, src_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = src_3d ? 12 : 8);
nir_def *dst_offset = nir_load_push_constant(&b, dst_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range = dst_3d ? 24 : 20);
nir_def *src_coord = nir_iadd(&b, global_id, src_offset);
nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
nir_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
nir_def *tex_vals[8];
if (is_multisampled) {
for (uint32_t i = 0; i < samples; i++) {
tex_vals[i] = nir_txf_ms(&b, nir_trim_vector(&b, src_coord, 2), nir_imm_int(&b, i), .texture_deref = input_img_deref);
}
} else {
tex_vals[0] = nir_txf(&b, nir_trim_vector(&b, src_coord, 2 + src_3d), .texture_deref = input_img_deref);
}
nir_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1),
dst_3d ? nir_channel(&b, dst_coord, 2) : nir_undef(&b, 1, 32), nir_undef(&b, 1, 32));
for (uint32_t i = 0; i < samples; i++) {
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, img_coord, nir_imm_int(&b, i), tex_vals[i],
nir_imm_int(&b, 0), .image_dim = dst_dim);
}
return b.shader;
}
nir_shader *
radv_meta_nir_build_itoi_r32g32b32_compute_shader(struct radv_device *dev)
{
const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, "meta_itoi_r32g32b32_cs");
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, type, "input_img");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "output_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_def *global_id = radv_meta_nir_get_global_ids(&b, 2);
nir_def *src_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 12);
nir_def *dst_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 12), .range = 24);
nir_def *src_stride = nir_channel(&b, src_offset, 2);
nir_def *dst_stride = nir_channel(&b, dst_offset, 2);
nir_def *src_img_coord = nir_iadd(&b, global_id, src_offset);
nir_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset);
nir_def *src_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
nir_imul_imm(&b, nir_channel(&b, src_img_coord, 0), 3));
nir_def *dst_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
nir_imul_imm(&b, nir_channel(&b, dst_img_coord, 0), 3));
for (int chan = 0; chan < 3; chan++) {
/* src */
nir_def *src_local_pos = nir_iadd_imm(&b, src_global_pos, chan);
nir_def *outval = nir_txf(&b, src_local_pos, .texture_deref = nir_build_deref_var(&b, input_img));
/* dst */
nir_def *dst_local_pos = nir_iadd_imm(&b, dst_global_pos, chan);
nir_def *dst_coord = nir_replicate(&b, dst_local_pos, 4);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, dst_coord, nir_undef(&b, 1, 32),
nir_channel(&b, outval, 0), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
}
return b.shader;
}
nir_shader *
radv_meta_nir_build_cleari_compute_shader(struct radv_device *dev, bool is_3d, int samples)
{
bool is_multisampled = samples > 1;
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D
: is_multisampled ? GLSL_SAMPLER_DIM_MS
: GLSL_SAMPLER_DIM_2D;
const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE,
is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples);
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 0;
nir_def *global_id = radv_meta_nir_get_global_ids(&b, 2);
nir_def *clear_val = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_def *layer = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20);
nir_def *comps[4];
comps[0] = nir_channel(&b, global_id, 0);
comps[1] = nir_channel(&b, global_id, 1);
comps[2] = layer;
comps[3] = nir_undef(&b, 1, 32);
global_id = nir_vec(&b, comps, 4);
for (uint32_t i = 0; i < samples; i++) {
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, global_id, nir_imm_int(&b, i), clear_val,
nir_imm_int(&b, 0), .image_dim = dim);
}
return b.shader;
}
/** Special path for clearing R32G32B32 images using a compute shader. */
nir_shader *
radv_meta_nir_build_cleari_r32g32b32_compute_shader(struct radv_device *dev)
{
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, "meta_cleari_r32g32b32_cs");
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 0;
nir_def *global_id = radv_meta_nir_get_global_ids(&b, 2);
nir_def *clear_val = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 12);
nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
nir_def *global_x = nir_channel(&b, global_id, 0);
nir_def *global_y = nir_channel(&b, global_id, 1);
nir_def *global_pos = nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul_imm(&b, global_x, 3));
for (unsigned chan = 0; chan < 3; chan++) {
nir_def *local_pos = nir_iadd_imm(&b, global_pos, chan);
nir_def *coord = nir_replicate(&b, local_pos, 4);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, nir_undef(&b, 1, 32),
nir_channel(&b, clear_val, chan), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
}
return b.shader;
}
void
radv_meta_nir_build_clear_color_shaders(struct radv_device *dev, struct nir_shader **out_vs, struct nir_shader **out_fs,
uint32_t frag_output)
{
nir_builder vs_b = radv_meta_nir_init_shader(dev, MESA_SHADER_VERTEX, "meta_clear_color_vs");
nir_builder fs_b = radv_meta_nir_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_clear_color_fs-%d", frag_output);
const struct glsl_type *position_type = glsl_vec4_type();
const struct glsl_type *color_type = glsl_vec4_type();
nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position");
vs_out_pos->data.location = VARYING_SLOT_POS;
nir_def *in_color_load = nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16);
nir_variable *fs_out_color = nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color");
fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf);
nir_def *outvec = nir_gen_rect_vertices(&vs_b, NULL, NULL);
nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
const struct glsl_type *layer_type = glsl_int_type();
nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
vs_out_layer->data.location = VARYING_SLOT_LAYER;
vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
nir_def *inst_id = nir_load_instance_id(&vs_b);
nir_def *base_instance = nir_load_base_instance(&vs_b);
nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
*out_vs = vs_b.shader;
*out_fs = fs_b.shader;
}
void
radv_meta_nir_build_clear_depthstencil_shaders(struct radv_device *dev, struct nir_shader **out_vs,
struct nir_shader **out_fs, bool unrestricted)
{
nir_builder vs_b = radv_meta_nir_init_shader(
dev, MESA_SHADER_VERTEX, unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs");
nir_builder fs_b = radv_meta_nir_init_shader(
dev, MESA_SHADER_FRAGMENT,
unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs");
const struct glsl_type *position_out_type = glsl_vec4_type();
nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position");
vs_out_pos->data.location = VARYING_SLOT_POS;
nir_def *z;
if (unrestricted) {
nir_def *in_color_load = nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4);
nir_variable *fs_out_depth = nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth");
fs_out_depth->data.location = FRAG_RESULT_DEPTH;
nir_store_var(&fs_b, fs_out_depth, in_color_load, 0x1);
z = nir_imm_float(&vs_b, 0.0);
} else {
z = nir_load_push_constant(&vs_b, 1, 32, nir_imm_int(&vs_b, 0), .range = 4);
}
nir_def *outvec = nir_gen_rect_vertices(&vs_b, z, NULL);
nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
const struct glsl_type *layer_type = glsl_int_type();
nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
vs_out_layer->data.location = VARYING_SLOT_LAYER;
vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
nir_def *inst_id = nir_load_instance_id(&vs_b);
nir_def *base_instance = nir_load_base_instance(&vs_b);
nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
*out_vs = vs_b.shader;
*out_fs = fs_b.shader;
}
nir_shader *
radv_meta_nir_build_clear_htile_mask_shader(struct radv_device *dev)
{
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_htile_mask");
b.shader->info.workgroup_size[0] = 64;
nir_def *global_id = radv_meta_nir_get_global_ids(&b, 1);
nir_def *offset = nir_imul_imm(&b, global_id, 16);
offset = nir_channel(&b, offset, 0);
nir_def *constants = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
nir_def *va = nir_pack_64_2x32(&b, nir_channels(&b, constants, 0x3));
va = nir_iadd(&b, va, nir_u2u64(&b, offset));
nir_def *load = nir_build_load_global(&b, 4, 32, va, .align_mul = 16);
/* data = (data & ~htile_mask) | (htile_value & htile_mask) */
nir_def *data = nir_iand(&b, load, nir_channel(&b, constants, 3));
data = nir_ior(&b, data, nir_channel(&b, constants, 2));
nir_build_store_global(&b, data, va, .access = ACCESS_NON_READABLE, .align_mul = 16);
return b.shader;
}
/**
* Clear DCC using comp-to-single by storing the clear value at the beginning of every 256B block.
* For MSAA images, clearing the first sample should be enough as long as CMASK is also cleared.
*/
nir_shader *
radv_meta_nir_build_clear_dcc_comp_to_single_shader(struct radv_device *dev, bool is_msaa)
{
enum glsl_sampler_dim dim = is_msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
const struct glsl_type *img_type = glsl_image_type(dim, true, GLSL_TYPE_FLOAT);
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_dcc_comp_to_single-%s",
is_msaa ? "multisampled" : "singlesampled");
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_def *global_id = radv_meta_nir_get_global_ids(&b, 3);
/* Load the dimensions in pixels of a block that gets compressed to one DCC byte. */
nir_def *dcc_block_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
/* Compute the coordinates. */
nir_def *coord = nir_trim_vector(&b, global_id, 2);
coord = nir_imul(&b, coord, dcc_block_size);
coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), nir_channel(&b, global_id, 2),
nir_undef(&b, 1, 32));
nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 0;
/* Load the clear color values. */
nir_def *clear_values = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 8), .range = 24);
nir_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0), nir_channel(&b, clear_values, 1),
nir_channel(&b, clear_values, 2), nir_channel(&b, clear_values, 3));
/* Store the clear color values. */
nir_def *sample_id = is_msaa ? nir_imm_int(&b, 0) : nir_undef(&b, 1, 32);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, sample_id, data, nir_imm_int(&b, 0),
.image_dim = dim, .image_array = true);
return b.shader;
}
nir_shader *
radv_meta_nir_build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_builder b = radv_meta_nir_init_shader(device, MESA_SHADER_COMPUTE, "meta_copy_vrs_htile");
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
/* Get coordinates. */
nir_def *global_id = radv_meta_nir_get_global_ids(&b, 2);
nir_def *addr = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_def *htile_va = nir_pack_64_2x32(&b, nir_channels(&b, addr, 0x3));
nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
/* Multiply the coordinates by the HTILE block size. */
nir_def *coord = nir_iadd(&b, nir_imul_imm(&b, global_id, 8), offset);
/* Load constants. */
nir_def *constants = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 16), .range = 28);
nir_def *htile_pitch = nir_channel(&b, constants, 0);
nir_def *htile_slice_size = nir_channel(&b, constants, 1);
nir_def *read_htile_value = nir_channel(&b, constants, 2);
/* Get the HTILE addr from coordinates. */
nir_def *zero = nir_imm_int(&b, 0);
nir_def *htile_offset =
ac_nir_htile_addr_from_coord(&b, &pdev->info, &surf->u.gfx9.zs.htile_equation, htile_pitch, htile_slice_size,
nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero);
/* Set up the input VRS image descriptor. */
const struct glsl_type *vrs_sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT);
nir_variable *input_vrs_img = nir_variable_create(b.shader, nir_var_uniform, vrs_sampler_type, "input_vrs_image");
input_vrs_img->data.descriptor_set = 0;
input_vrs_img->data.binding = 0;
/* Load the VRS rates from the 2D image. */
nir_def *value = nir_txf(&b, global_id, .texture_deref = nir_build_deref_var(&b, input_vrs_img));
/* Extract the X/Y rates and clamp them because the maximum supported VRS rate is 2x2 (1x1 in
* hardware).
*
* VRS rate X = min(value >> 2, 1)
* VRS rate Y = min(value & 3, 1)
*/
nir_def *x_rate = nir_ushr_imm(&b, nir_channel(&b, value, 0), 2);
x_rate = nir_umin(&b, x_rate, nir_imm_int(&b, 1));
nir_def *y_rate = nir_iand_imm(&b, nir_channel(&b, value, 0), 3);
y_rate = nir_umin(&b, y_rate, nir_imm_int(&b, 1));
/* Compute the final VRS rate. */
nir_def *vrs_rates = nir_ior(&b, nir_ishl_imm(&b, y_rate, 10), nir_ishl_imm(&b, x_rate, 6));
/* Load the HTILE value if requested, otherwise use the default value. */
nir_variable *htile_value = nir_local_variable_create(b.impl, glsl_int_type(), "htile_value");
nir_push_if(&b, nir_ieq_imm(&b, read_htile_value, 1));
{
/* Load the existing HTILE 32-bit value for this 8x8 pixels area. */
nir_def *input_value = nir_build_load_global(&b, 1, 32, nir_iadd(&b, htile_va, nir_u2u64(&b, htile_offset)));
/* Clear the 4-bit VRS rates. */
nir_store_var(&b, htile_value, nir_iand_imm(&b, input_value, 0xfffff33f), 0x1);
}
nir_push_else(&b, NULL);
{
nir_store_var(&b, htile_value, nir_imm_int(&b, 0xfffff33f), 0x1);
}
nir_pop_if(&b, NULL);
/* Set the VRS rates loaded from the image. */
nir_def *output_value = nir_ior(&b, nir_load_var(&b, htile_value), vrs_rates);
/* Store the updated HTILE 32-bit which contains the VRS rates. */
nir_build_store_global(&b, output_value, nir_iadd(&b, htile_va, nir_u2u64(&b, htile_offset)),
.access = ACCESS_NON_READABLE);
return b.shader;
}
nir_shader *
radv_meta_nir_build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *surf)
{
const struct radv_physical_device *pdev = radv_device_physical(dev);
enum glsl_sampler_dim dim = GLSL_SAMPLER_DIM_BUF;
const struct glsl_type *buf_type = glsl_image_type(dim, false, GLSL_TYPE_UINT);
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, "dcc_retile_compute");
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_def *src_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_def *src_dcc_pitch = nir_channels(&b, src_dcc_size, 1);
nir_def *src_dcc_height = nir_channels(&b, src_dcc_size, 2);
nir_def *dst_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8);
nir_def *dst_dcc_pitch = nir_channels(&b, dst_dcc_size, 1);
nir_def *dst_dcc_height = nir_channels(&b, dst_dcc_size, 2);
nir_variable *input_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_in");
input_dcc->data.descriptor_set = 0;
input_dcc->data.binding = 0;
nir_variable *output_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_out");
output_dcc->data.descriptor_set = 0;
output_dcc->data.binding = 1;
nir_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->def;
nir_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->def;
nir_def *coord = radv_meta_nir_get_global_ids(&b, 2);
nir_def *zero = nir_imm_int(&b, 0);
coord =
nir_imul(&b, coord, nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height));
nir_def *src = ac_nir_dcc_addr_from_coord(&b, &pdev->info, surf->bpe, &surf->u.gfx9.color.dcc_equation,
src_dcc_pitch, src_dcc_height, zero, nir_channel(&b, coord, 0),
nir_channel(&b, coord, 1), zero, zero, zero);
nir_def *dst = ac_nir_dcc_addr_from_coord(&b, &pdev->info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation,
dst_dcc_pitch, dst_dcc_height, zero, nir_channel(&b, coord, 0),
nir_channel(&b, coord, 1), zero, zero, zero);
nir_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref, nir_vec4(&b, src, src, src, src),
nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = dim);
nir_image_deref_store(&b, output_dcc_ref, nir_vec4(&b, dst, dst, dst, dst), nir_undef(&b, 1, 32), dcc_val,
nir_imm_int(&b, 0), .image_dim = dim);
return b.shader;
}
nir_shader *
radv_meta_nir_build_expand_depth_stencil_compute_shader(struct radv_device *dev)
{
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, "expand_depth_stencil_compute");
/* We need at least 8/8/1 to cover an entire HTILE block in a single workgroup. */
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_image, img_type, "in_img");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_def *invoc_id = nir_load_local_invocation_id(&b);
nir_def *wg_id = nir_load_workgroup_id(&b);
nir_def *block_size = nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
b.shader->info.workgroup_size[2], 0);
nir_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_def *data = nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->def, global_id,
nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
/* We need a SCOPE_DEVICE memory_scope because ACO will avoid
* creating a vmcnt(0) because it expects the L1 cache to keep memory
* operations in-order for the same workgroup. The vmcnt(0) seems
* necessary however. */
nir_barrier(&b, .execution_scope = SCOPE_WORKGROUP, .memory_scope = SCOPE_DEVICE,
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, global_id, nir_undef(&b, 1, 32), data,
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
return b.shader;
}
nir_shader *
radv_meta_nir_build_dcc_decompress_compute_shader(struct radv_device *dev)
{
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, "dcc_decompress_compute");
/* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */
b.shader->info.workgroup_size[0] = 16;
b.shader->info.workgroup_size[1] = 16;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_image, img_type, "in_img");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_def *global_id = radv_meta_nir_get_global_ids(&b, 2);
nir_def *img_coord = nir_vec4(&b, nir_channel(&b, global_id, 0), nir_channel(&b, global_id, 1), nir_undef(&b, 1, 32),
nir_undef(&b, 1, 32));
nir_def *data = nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->def, img_coord,
nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
/* We need a SCOPE_DEVICE memory_scope because ACO will avoid
* creating a vmcnt(0) because it expects the L1 cache to keep memory
* operations in-order for the same workgroup. The vmcnt(0) seems
* necessary however. */
nir_barrier(&b, .execution_scope = SCOPE_WORKGROUP, .memory_scope = SCOPE_DEVICE,
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, img_coord, nir_undef(&b, 1, 32), data,
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
return b.shader;
}
nir_shader *
radv_meta_nir_build_fmask_copy_compute_shader(struct radv_device *dev, int samples)
{
const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, false, GLSL_TYPE_FLOAT);
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, "meta_fmask_copy_cs_-%d", samples);
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_def *invoc_id = nir_load_local_invocation_id(&b);
nir_def *wg_id = nir_load_workgroup_id(&b);
nir_def *block_size = nir_imm_ivec3(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
b.shader->info.workgroup_size[2]);
nir_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
/* Get coordinates. */
nir_def *src_coord = nir_trim_vector(&b, global_id, 2);
nir_def *dst_coord = nir_vec4(&b, nir_channel(&b, src_coord, 0), nir_channel(&b, src_coord, 1), nir_undef(&b, 1, 32),
nir_undef(&b, 1, 32));
nir_def *frag_mask =
nir_build_tex(&b, nir_texop_fragment_mask_fetch_amd,
.coord = src_coord,
.texture_deref = nir_build_deref_var(&b, input_img));
/* Get the maximum sample used in this fragment. */
nir_def *max_sample_index = nir_imm_int(&b, 0);
for (uint32_t s = 0; s < samples; s++) {
/* max_sample_index = MAX2(max_sample_index, (frag_mask >> (s * 4)) & 0xf) */
max_sample_index = nir_umax(&b, max_sample_index,
nir_ubitfield_extract(&b, frag_mask, nir_imm_int(&b, 4 * s), nir_imm_int(&b, 4)));
}
nir_variable *counter = nir_local_variable_create(b.impl, glsl_int_type(), "counter");
nir_store_var(&b, counter, nir_imm_int(&b, 0), 0x1);
nir_loop *loop = nir_push_loop(&b);
{
nir_def *sample_id = nir_load_var(&b, counter);
nir_def *outval = nir_build_tex(&b, nir_texop_fragment_fetch_amd,
.coord = src_coord, .ms_index = sample_id,
.texture_deref = nir_build_deref_var(&b, input_img));
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, dst_coord, sample_id, outval,
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_MS);
radv_meta_nir_break_on_count(&b, counter, max_sample_index);
}
nir_pop_loop(&b, loop);
return b.shader;
}
nir_shader *
radv_meta_nir_build_fmask_expand_compute_shader(struct radv_device *device, int samples)
{
const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, true, GLSL_TYPE_FLOAT);
nir_builder b = radv_meta_nir_init_shader(device, MESA_SHADER_COMPUTE, "meta_fmask_expand_cs-%d", samples);
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, type, "s_tex");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
output_img->data.access = ACCESS_NON_READABLE;
nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
nir_def *output_img_deref = &nir_build_deref_var(&b, output_img)->def;
nir_def *tex_coord = radv_meta_nir_get_global_ids(&b, 3);
nir_def *tex_vals[8];
for (uint32_t i = 0; i < samples; i++) {
tex_vals[i] = nir_txf_ms(&b, tex_coord, nir_imm_int(&b, i), .texture_deref = input_img_deref);
}
nir_def *img_coord = nir_vec4(&b, nir_channel(&b, tex_coord, 0), nir_channel(&b, tex_coord, 1),
nir_channel(&b, tex_coord, 2), nir_undef(&b, 1, 32));
for (uint32_t i = 0; i < samples; i++) {
nir_image_deref_store(&b, output_img_deref, img_coord, nir_imm_int(&b, i), tex_vals[i], nir_imm_int(&b, 0),
.image_dim = GLSL_SAMPLER_DIM_MS, .image_array = true);
}
return b.shader;
}
static nir_def *
radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_def *input)
{
unsigned i;
nir_def *comp[4];
for (i = 0; i < 3; i++)
comp[i] = nir_format_linear_to_srgb(b, nir_channel(b, input, i));
comp[3] = nir_channels(b, input, 1 << 3);
return nir_vec(b, comp, 4);
}
static const char *
radv_meta_resolve_compute_type_name(enum radv_meta_resolve_compute_type type)
{
switch (type) {
case RADV_META_RESOLVE_COMPUTE_NORM:
return "norm";
case RADV_META_RESOLVE_COMPUTE_NORM_SRGB:
return "srgb";
case RADV_META_RESOLVE_COMPUTE_INTEGER:
return "integer";
case RADV_META_RESOLVE_COMPUTE_FLOAT:
return "float";
default:
build: avoid redefining unreachable() which is standard in C23 In the C23 standard unreachable() is now a predefined function-like macro in <stddef.h> See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in And this causes build errors when building for C23: ----------------------------------------------------------------------- In file included from ../src/util/log.h:30, from ../src/util/log.c:30: ../src/util/macros.h:123:9: warning: "unreachable" redefined 123 | #define unreachable(str) \ | ^~~~~~~~~~~ In file included from ../src/util/macros.h:31: /usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition 456 | #define unreachable() (__builtin_unreachable ()) | ^~~~~~~~~~~ ----------------------------------------------------------------------- So don't redefine it with the same name, but use the name UNREACHABLE() to also signify it's a macro. Using a different name also makes sense because the behavior of the macro was extending the one of __builtin_unreachable() anyway, and it also had a different signature, accepting one argument, compared to the standard unreachable() with no arguments. This change improves the chances of building mesa with the C23 standard, which for instance is the default in recent AOSP versions. All the instances of the macro, including the definition, were updated with the following command line: git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \ while read file; \ do \ sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \ done && \ sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-23 09:17:35 +02:00
UNREACHABLE("invalid compute resolve type");
}
}
nir_shader *
radv_meta_nir_build_resolve_compute_shader(struct radv_device *dev, enum radv_meta_resolve_compute_type type,
int samples)
{
enum glsl_base_type img_base_type = type == RADV_META_RESOLVE_COMPUTE_INTEGER ? GLSL_TYPE_UINT : GLSL_TYPE_FLOAT;
const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type);
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, img_base_type);
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs-%d-%s", samples,
radv_meta_resolve_compute_type_name(type));
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_def *global_id = radv_meta_nir_get_global_ids(&b, 2);
nir_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
nir_def *src_coord = nir_iadd(&b, global_id, src_offset);
nir_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
radv_meta_nir_build_resolve_shader_core(dev, &b, type == RADV_META_RESOLVE_COMPUTE_INTEGER, samples, input_img,
color, src_coord);
nir_def *outval = nir_load_var(&b, color);
if (type == RADV_META_RESOLVE_COMPUTE_NORM_SRGB)
outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
if (type == RADV_META_RESOLVE_COMPUTE_NORM || type == RADV_META_RESOLVE_COMPUTE_NORM_SRGB)
outval = nir_f2f32(&b, nir_f2f16_rtz(&b, outval));
nir_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1), nir_undef(&b, 1, 32),
nir_undef(&b, 1, 32));
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, img_coord, nir_undef(&b, 1, 32), outval,
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
return b.shader;
}
static const char *
get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)
{
switch (resolve_mode) {
case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
return "zero";
case VK_RESOLVE_MODE_AVERAGE_BIT:
return "average";
case VK_RESOLVE_MODE_MIN_BIT:
return "min";
case VK_RESOLVE_MODE_MAX_BIT:
return "max";
default:
build: avoid redefining unreachable() which is standard in C23 In the C23 standard unreachable() is now a predefined function-like macro in <stddef.h> See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in And this causes build errors when building for C23: ----------------------------------------------------------------------- In file included from ../src/util/log.h:30, from ../src/util/log.c:30: ../src/util/macros.h:123:9: warning: "unreachable" redefined 123 | #define unreachable(str) \ | ^~~~~~~~~~~ In file included from ../src/util/macros.h:31: /usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition 456 | #define unreachable() (__builtin_unreachable ()) | ^~~~~~~~~~~ ----------------------------------------------------------------------- So don't redefine it with the same name, but use the name UNREACHABLE() to also signify it's a macro. Using a different name also makes sense because the behavior of the macro was extending the one of __builtin_unreachable() anyway, and it also had a different signature, accepting one argument, compared to the standard unreachable() with no arguments. This change improves the chances of building mesa with the C23 standard, which for instance is the default in recent AOSP versions. All the instances of the macro, including the definition, were updated with the following command line: git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \ while read file; \ do \ sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \ done && \ sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-23 09:17:35 +02:00
UNREACHABLE("invalid resolve mode");
}
}
nir_shader *
radv_meta_nir_build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
enum radv_meta_resolve_type index,
VkResolveModeFlagBits resolve_mode)
{
enum glsl_base_type img_base_type = index == RADV_META_DEPTH_RESOLVE ? GLSL_TYPE_FLOAT : GLSL_TYPE_UINT;
const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, img_base_type);
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, img_base_type);
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs_%s-%s-%d",
index == RADV_META_DEPTH_RESOLVE ? "depth" : "stencil",
get_resolve_mode_str(resolve_mode), samples);
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_def *global_id = radv_meta_nir_get_global_ids(&b, 3);
nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_def *resolve_coord = nir_iadd(&b, nir_trim_vector(&b, global_id, 2), offset);
nir_def *img_coord =
nir_vec3(&b, nir_channel(&b, resolve_coord, 0), nir_channel(&b, resolve_coord, 1), nir_channel(&b, global_id, 2));
nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
nir_def *outval = nir_txf_ms(&b, img_coord, nir_imm_int(&b, 0), .texture_deref = input_img_deref);
if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) {
for (int i = 1; i < samples; i++) {
nir_def *si = nir_txf_ms(&b, img_coord, nir_imm_int(&b, i), .texture_deref = input_img_deref);
switch (resolve_mode) {
case VK_RESOLVE_MODE_AVERAGE_BIT:
assert(index == RADV_META_DEPTH_RESOLVE);
outval = nir_fadd(&b, outval, si);
break;
case VK_RESOLVE_MODE_MIN_BIT:
if (index == RADV_META_DEPTH_RESOLVE)
outval = nir_fmin(&b, outval, si);
else
outval = nir_umin(&b, outval, si);
break;
case VK_RESOLVE_MODE_MAX_BIT:
if (index == RADV_META_DEPTH_RESOLVE)
outval = nir_fmax(&b, outval, si);
else
outval = nir_umax(&b, outval, si);
break;
default:
build: avoid redefining unreachable() which is standard in C23 In the C23 standard unreachable() is now a predefined function-like macro in <stddef.h> See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in And this causes build errors when building for C23: ----------------------------------------------------------------------- In file included from ../src/util/log.h:30, from ../src/util/log.c:30: ../src/util/macros.h:123:9: warning: "unreachable" redefined 123 | #define unreachable(str) \ | ^~~~~~~~~~~ In file included from ../src/util/macros.h:31: /usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition 456 | #define unreachable() (__builtin_unreachable ()) | ^~~~~~~~~~~ ----------------------------------------------------------------------- So don't redefine it with the same name, but use the name UNREACHABLE() to also signify it's a macro. Using a different name also makes sense because the behavior of the macro was extending the one of __builtin_unreachable() anyway, and it also had a different signature, accepting one argument, compared to the standard unreachable() with no arguments. This change improves the chances of building mesa with the C23 standard, which for instance is the default in recent AOSP versions. All the instances of the macro, including the definition, were updated with the following command line: git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \ while read file; \ do \ sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \ done && \ sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-23 09:17:35 +02:00
UNREACHABLE("invalid resolve mode");
}
}
if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT)
outval = nir_fdiv_imm(&b, outval, samples);
}
nir_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
nir_channel(&b, img_coord, 2), nir_undef(&b, 1, 32));
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, nir_undef(&b, 1, 32), outval,
nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
return b.shader;
}
nir_shader *
radv_meta_nir_build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, int samples)
{
enum glsl_base_type img_base_type = is_integer ? GLSL_TYPE_UINT : GLSL_TYPE_FLOAT;
const struct glsl_type *vec4 = glsl_vec4_type();
const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type);
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_resolve_fs-%d-%s", samples,
is_integer ? "int" : "float");
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
color_out->data.location = FRAG_RESULT_DATA0;
nir_def *pos_in = nir_trim_vector(&b, nir_load_frag_coord(&b), 2);
nir_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
nir_def *pos_int = nir_f2i32(&b, pos_in);
nir_def *img_coord = nir_trim_vector(&b, nir_iadd(&b, pos_int, src_offset), 2);
nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
radv_meta_nir_build_resolve_shader_core(dev, &b, is_integer, samples, input_img, color, img_coord);
nir_def *outval = nir_load_var(&b, color);
nir_store_var(&b, color_out, outval, 0xf);
return b.shader;
}
nir_shader *
radv_meta_nir_build_depth_stencil_resolve_fragment_shader(struct radv_device *dev, int samples,
enum radv_meta_resolve_type index,
VkResolveModeFlagBits resolve_mode)
{
enum glsl_base_type img_base_type = index == RADV_META_DEPTH_RESOLVE ? GLSL_TYPE_FLOAT : GLSL_TYPE_UINT;
const struct glsl_type *vec4 = glsl_vec4_type();
const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type);
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_resolve_fs_%s-%s-%d",
index == RADV_META_DEPTH_RESOLVE ? "depth" : "stencil",
get_resolve_mode_str(resolve_mode), samples);
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *fs_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_out");
fs_out->data.location = index == RADV_META_DEPTH_RESOLVE ? FRAG_RESULT_DEPTH : FRAG_RESULT_STENCIL;
nir_def *pos_in = nir_trim_vector(&b, nir_load_frag_coord(&b), 2);
nir_def *pos_int = nir_f2i32(&b, pos_in);
nir_def *img_coord = nir_trim_vector(&b, pos_int, 2);
nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
nir_def *outval = nir_txf_ms(&b, img_coord, nir_imm_int(&b, 0), .texture_deref = input_img_deref);
if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) {
for (int i = 1; i < samples; i++) {
nir_def *si = nir_txf_ms(&b, img_coord, nir_imm_int(&b, i), .texture_deref = input_img_deref);
switch (resolve_mode) {
case VK_RESOLVE_MODE_AVERAGE_BIT:
assert(index == RADV_META_DEPTH_RESOLVE);
outval = nir_fadd(&b, outval, si);
break;
case VK_RESOLVE_MODE_MIN_BIT:
if (index == RADV_META_DEPTH_RESOLVE)
outval = nir_fmin(&b, outval, si);
else
outval = nir_umin(&b, outval, si);
break;
case VK_RESOLVE_MODE_MAX_BIT:
if (index == RADV_META_DEPTH_RESOLVE)
outval = nir_fmax(&b, outval, si);
else
outval = nir_umax(&b, outval, si);
break;
default:
build: avoid redefining unreachable() which is standard in C23 In the C23 standard unreachable() is now a predefined function-like macro in <stddef.h> See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in And this causes build errors when building for C23: ----------------------------------------------------------------------- In file included from ../src/util/log.h:30, from ../src/util/log.c:30: ../src/util/macros.h:123:9: warning: "unreachable" redefined 123 | #define unreachable(str) \ | ^~~~~~~~~~~ In file included from ../src/util/macros.h:31: /usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition 456 | #define unreachable() (__builtin_unreachable ()) | ^~~~~~~~~~~ ----------------------------------------------------------------------- So don't redefine it with the same name, but use the name UNREACHABLE() to also signify it's a macro. Using a different name also makes sense because the behavior of the macro was extending the one of __builtin_unreachable() anyway, and it also had a different signature, accepting one argument, compared to the standard unreachable() with no arguments. This change improves the chances of building mesa with the C23 standard, which for instance is the default in recent AOSP versions. All the instances of the macro, including the definition, were updated with the following command line: git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \ while read file; \ do \ sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \ done && \ sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-23 09:17:35 +02:00
UNREACHABLE("invalid resolve mode");
}
}
if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT)
outval = nir_fdiv_imm(&b, outval, samples);
}
nir_store_var(&b, fs_out, outval, 0x1);
return b.shader;
}
nir_shader *
radv_meta_nir_build_resolve_fs(struct radv_device *dev)
{
const struct glsl_type *vec4 = glsl_vec4_type();
nir_variable *f_color;
nir_builder b = radv_meta_nir_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_resolve_fs");
f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
f_color->data.location = FRAG_RESULT_DATA0;
nir_store_var(&b, f_color, nir_imm_vec4(&b, 0.0, 0.0, 0.0, 1.0), 0xf);
return b.shader;
}