radv: implement buffer to image operations for R32G32B32

This should fix rendering issues with Batman Arkham City.
We will probably need to implement itob and itoi at some
point, but currently nothing hits these paths.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107765
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
Samuel Pitoiset 2018-10-12 11:30:13 +02:00
parent ca83d51cfb
commit 593996bc02
3 changed files with 353 additions and 2 deletions

View file

@ -483,6 +483,214 @@ radv_device_finish_meta_btoi_state(struct radv_device *device)
state->btoi.pipeline_3d, &state->alloc); state->btoi.pipeline_3d, &state->alloc);
} }
/* Buffer to image - special path for R32G32B32 */
static nir_shader *
build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
{
nir_builder b;
const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
false,
false,
GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
false,
false,
GLSL_TYPE_FLOAT);
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_r32g32b32_cs");
b.shader->info.cs.local_size[0] = 16;
b.shader->info.cs.local_size[1] = 16;
b.shader->info.cs.local_size[2] = 1;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
buf_type, "s_tex");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
img_type, "out_img");
output_img->data.descriptor_set = 0;
output_img->data.binding = 1;
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
b.shader->info.cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(offset, 0);
nir_intrinsic_set_range(offset, 16);
offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
offset->num_components = 2;
nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
nir_builder_instr_insert(&b, &offset->instr);
nir_intrinsic_instr *pitch = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(pitch, 0);
nir_intrinsic_set_range(pitch, 16);
pitch->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
pitch->num_components = 1;
nir_ssa_dest_init(&pitch->instr, &pitch->dest, 1, 32, "pitch");
nir_builder_instr_insert(&b, &pitch->instr);
nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(stride, 0);
nir_intrinsic_set_range(stride, 16);
stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
stride->num_components = 1;
nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
nir_builder_instr_insert(&b, &stride->instr);
nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
tmp = nir_iadd(&b, tmp, pos_x);
nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
nir_ssa_def *global_pos =
nir_iadd(&b,
nir_imul(&b, pos_y, &pitch->dest.ssa),
nir_imul(&b, pos_x, nir_imm_int(&b, 3)));
nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
tex->op = nir_texop_txf;
tex->src[0].src_type = nir_tex_src_coord;
tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
tex->src[1].src_type = nir_tex_src_lod;
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
tex->src[2].src_type = nir_tex_src_texture_deref;
tex->src[2].src = nir_src_for_ssa(input_img_deref);
tex->dest_type = nir_type_float;
tex->is_array = false;
tex->coord_components = 1;
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
nir_builder_instr_insert(&b, &tex->instr);
nir_ssa_def *outval = &tex->dest.ssa;
for (int chan = 0; chan < 3; chan++) {
nir_ssa_def *local_pos =
nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
nir_ssa_def *coord =
nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
store->num_components = 1;
store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
store->src[1] = nir_src_for_ssa(coord);
store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, chan));
nir_builder_instr_insert(&b, &store->instr);
}
return b.shader;
}
static VkResult
radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
{
VkResult result;
struct radv_shader_module cs = { .nir = NULL };
cs.nir = build_nir_btoi_r32g32b32_compute_shader(device);
VkDescriptorSetLayoutCreateInfo ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 2,
.pBindings = (VkDescriptorSetLayoutBinding[]) {
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL
},
{
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = NULL
},
}
};
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
&ds_create_info,
&device->meta_state.alloc,
&device->meta_state.btoi_r32g32b32.img_ds_layout);
if (result != VK_SUCCESS)
goto fail;
VkPipelineLayoutCreateInfo pl_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
};
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
&pl_create_info,
&device->meta_state.alloc,
&device->meta_state.btoi_r32g32b32.img_p_layout);
if (result != VK_SUCCESS)
goto fail;
/* compute shader */
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = radv_shader_module_to_handle(&cs),
.pName = "main",
.pSpecializationInfo = NULL,
};
VkComputePipelineCreateInfo vk_pipeline_info = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = pipeline_shader_stage,
.flags = 0,
.layout = device->meta_state.btoi_r32g32b32.img_p_layout,
};
result = radv_CreateComputePipelines(radv_device_to_handle(device),
radv_pipeline_cache_to_handle(&device->meta_state.cache),
1, &vk_pipeline_info, NULL,
&device->meta_state.btoi_r32g32b32.pipeline);
fail:
ralloc_free(cs.nir);
return result;
}
static void
radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
{
struct radv_meta_state *state = &device->meta_state;
radv_DestroyPipelineLayout(radv_device_to_handle(device),
state->btoi_r32g32b32.img_p_layout, &state->alloc);
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
state->btoi_r32g32b32.img_ds_layout,
&state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
state->btoi_r32g32b32.pipeline, &state->alloc);
}
static nir_shader * static nir_shader *
build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d) build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
{ {
@ -1056,6 +1264,7 @@ radv_device_finish_meta_bufimage_state(struct radv_device *device)
{ {
radv_device_finish_meta_itob_state(device); radv_device_finish_meta_itob_state(device);
radv_device_finish_meta_btoi_state(device); radv_device_finish_meta_btoi_state(device);
radv_device_finish_meta_btoi_r32g32b32_state(device);
radv_device_finish_meta_itoi_state(device); radv_device_finish_meta_itoi_state(device);
radv_device_finish_meta_cleari_state(device); radv_device_finish_meta_cleari_state(device);
radv_device_finish_meta_cleari_r32g32b32_state(device); radv_device_finish_meta_cleari_r32g32b32_state(device);
@ -1074,6 +1283,10 @@ radv_device_init_meta_bufimage_state(struct radv_device *device)
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail_btoi; goto fail_btoi;
result = radv_device_init_meta_btoi_r32g32b32_state(device);
if (result != VK_SUCCESS)
goto fail_btoi_r32g32b32;
result = radv_device_init_meta_itoi_state(device); result = radv_device_init_meta_itoi_state(device);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail_itoi; goto fail_itoi;
@ -1093,6 +1306,8 @@ fail_cleari:
radv_device_finish_meta_cleari_state(device); radv_device_finish_meta_cleari_state(device);
fail_itoi: fail_itoi:
radv_device_finish_meta_itoi_state(device); radv_device_finish_meta_itoi_state(device);
fail_btoi_r32g32b32:
radv_device_finish_meta_btoi_r32g32b32_state(device);
fail_btoi: fail_btoi:
radv_device_finish_meta_btoi_state(device); radv_device_finish_meta_btoi_state(device);
fail_itob: fail_itob:
@ -1219,6 +1434,125 @@ radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
} }
} }
static void
btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
struct radv_buffer_view *src,
struct radv_buffer_view *dst)
{
struct radv_device *device = cmd_buffer->device;
radv_meta_push_descriptor_set(cmd_buffer,
VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.btoi_r32g32b32.img_p_layout,
0, /* set */
2, /* descriptorWriteCount */
(VkWriteDescriptorSet[]) {
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
.pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
},
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
.pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
}
});
}
static void
radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_buffer *src,
struct radv_meta_blit2d_surf *dst,
unsigned num_rects,
struct radv_meta_blit2d_rect *rects)
{
VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
struct radv_device_memory mem = { .bo = dst->image->bo };
struct radv_device *device = cmd_buffer->device;
struct radv_buffer_view src_view, dst_view;
unsigned dst_offset = 0;
unsigned stride;
VkFormat dst_format;
VkBuffer buffer;
switch (dst->format) {
case VK_FORMAT_R32G32B32_UINT:
dst_format = VK_FORMAT_R32_UINT;
break;
case VK_FORMAT_R32G32B32_SINT:
dst_format = VK_FORMAT_R32_SINT;
break;
case VK_FORMAT_R32G32B32_SFLOAT:
dst_format = VK_FORMAT_R32_SFLOAT;
break;
default:
unreachable("invalid R32G32B32 format");
}
/* This special btoi path for R32G32B32 formats will write the linear
* image as a buffer with the same underlying memory. The compute
* shader will clear all components separately using a R32 format.
*/
radv_CreateBuffer(radv_device_to_handle(device),
&(VkBufferCreateInfo) {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.flags = 0,
.size = dst->image->size,
.usage = VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
}, NULL, &buffer);
radv_BindBufferMemory2(radv_device_to_handle(device), 1,
(VkBindBufferMemoryInfoKHR[]) {
{
.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
.buffer = buffer,
.memory = radv_device_memory_to_handle(&mem),
.memoryOffset = dst->image->offset,
}
});
create_bview(cmd_buffer, src->buffer, src->offset,
src->format, &src_view);
create_bview(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset,
dst_format, &dst_view);
btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
stride = dst->image->surface.u.gfx9.surf_pitch;
} else {
stride = dst->image->surface.u.legacy.level[0].nblk_x * 3;
}
for (unsigned r = 0; r < num_rects; ++r) {
unsigned push_constants[4] = {
rects[r].dst_x,
rects[r].dst_y,
stride,
src->pitch,
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.btoi_r32g32b32.img_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
push_constants);
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
}
radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
}
static void static void
btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
struct radv_buffer_view *src, struct radv_buffer_view *src,
@ -1269,6 +1603,14 @@ radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
struct radv_buffer_view src_view; struct radv_buffer_view src_view;
struct radv_image_view dst_view; struct radv_image_view dst_view;
if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT ||
dst->image->vk_format == VK_FORMAT_R32G32B32_SINT ||
dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst,
num_rects, rects);
return;
}
create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view); create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
create_iview(cmd_buffer, dst, &dst_view); create_iview(cmd_buffer, dst, &dst_view);
btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view); btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);

View file

@ -195,10 +195,14 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
/* Perform Blit */ /* Perform Blit */
if (cs) if (cs ||
(img_bsurf.image->vk_format == VK_FORMAT_R32G32B32_UINT ||
img_bsurf.image->vk_format == VK_FORMAT_R32G32B32_SINT ||
img_bsurf.image->vk_format == VK_FORMAT_R32G32B32_SFLOAT)) {
radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect); radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect);
else } else {
radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect); radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect);
}
/* Once we've done the blit, all of the actual information about /* Once we've done the blit, all of the actual information about
* the image is embedded in the command buffer so we can just * the image is embedded in the command buffer so we can just

View file

@ -505,6 +505,11 @@ struct radv_meta_state {
VkPipeline pipeline; VkPipeline pipeline;
VkPipeline pipeline_3d; VkPipeline pipeline_3d;
} btoi; } btoi;
struct {
VkPipelineLayout img_p_layout;
VkDescriptorSetLayout img_ds_layout;
VkPipeline pipeline;
} btoi_r32g32b32;
struct { struct {
VkPipelineLayout img_p_layout; VkPipelineLayout img_p_layout;
VkDescriptorSetLayout img_ds_layout; VkDescriptorSetLayout img_ds_layout;