From 896c9cf486182a0058deeec0036ceaca1ddd2819 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 28 Mar 2024 14:42:10 +0100 Subject: [PATCH] radv: remove radv_device::physical_device Get the logical device object using the base object. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/layers/radv_sqtt_layer.c | 16 +- src/amd/vulkan/meta/radv_meta.c | 17 +- src/amd/vulkan/meta/radv_meta_astc_decode.c | 6 +- src/amd/vulkan/meta/radv_meta_buffer.c | 3 +- src/amd/vulkan/meta/radv_meta_bufimage.c | 6 +- src/amd/vulkan/meta/radv_meta_clear.c | 27 +- src/amd/vulkan/meta/radv_meta_copy.c | 14 +- .../vulkan/meta/radv_meta_copy_vrs_htile.c | 5 +- src/amd/vulkan/meta/radv_meta_dcc_retile.c | 13 +- src/amd/vulkan/meta/radv_meta_etc_decode.c | 5 +- src/amd/vulkan/meta/radv_meta_fast_clear.c | 5 +- src/amd/vulkan/meta/radv_meta_fmask_copy.c | 4 +- src/amd/vulkan/meta/radv_meta_resolve.c | 7 +- .../nir/radv_nir_apply_pipeline_layout.c | 9 +- src/amd/vulkan/nir/radv_nir_lower_io.c | 23 +- .../vulkan/nir/radv_nir_lower_ray_queries.c | 5 +- src/amd/vulkan/nir/radv_nir_rt_common.c | 6 +- src/amd/vulkan/nir/radv_nir_rt_shader.c | 15 +- src/amd/vulkan/radv_acceleration_structure.c | 21 +- src/amd/vulkan/radv_android.c | 22 +- src/amd/vulkan/radv_buffer.c | 9 +- src/amd/vulkan/radv_buffer_view.c | 10 +- src/amd/vulkan/radv_cmd_buffer.c | 503 ++++++++++-------- src/amd/vulkan/radv_cp_reg_shadowing.c | 6 +- src/amd/vulkan/radv_debug.c | 69 +-- src/amd/vulkan/radv_descriptor_set.c | 6 +- src/amd/vulkan/radv_device.c | 145 ++--- .../vulkan/radv_device_generated_commands.c | 64 ++- src/amd/vulkan/radv_device_memory.c | 13 +- src/amd/vulkan/radv_formats.c | 5 +- src/amd/vulkan/radv_image.c | 206 ++++--- src/amd/vulkan/radv_image_view.c | 57 +- src/amd/vulkan/radv_perfcounter.c | 27 +- src/amd/vulkan/radv_pipeline.c | 24 +- src/amd/vulkan/radv_pipeline_cache.c | 13 +- src/amd/vulkan/radv_pipeline_compute.c | 5 +- src/amd/vulkan/radv_pipeline_graphics.c | 123 +++-- src/amd/vulkan/radv_pipeline_rt.c | 7 +- src/amd/vulkan/radv_printf.c | 8 +- src/amd/vulkan/radv_private.h | 29 +- src/amd/vulkan/radv_query.c | 124 ++--- src/amd/vulkan/radv_queue.c | 158 +++--- src/amd/vulkan/radv_rmv.c | 14 +- src/amd/vulkan/radv_rra.c | 17 +- src/amd/vulkan/radv_sampler.c | 15 +- src/amd/vulkan/radv_sdma.c | 45 +- src/amd/vulkan/radv_shader.c | 87 +-- src/amd/vulkan/radv_shader_args.c | 12 +- src/amd/vulkan/radv_shader_info.c | 102 ++-- src/amd/vulkan/radv_shader_object.c | 12 +- src/amd/vulkan/radv_spm.c | 13 +- src/amd/vulkan/radv_sqtt.c | 78 +-- src/amd/vulkan/radv_video.c | 73 +-- src/amd/vulkan/radv_wsi.c | 8 +- src/amd/vulkan/si_cmd_buffer.c | 109 ++-- 55 files changed, 1390 insertions(+), 1035 deletions(-) diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c index 4af7b18f64f..c8664e61fa1 100644 --- a/src/amd/vulkan/layers/radv_sqtt_layer.c +++ b/src/amd/vulkan/layers/radv_sqtt_layer.c @@ -35,7 +35,8 @@ void radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline) { - const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc; struct radeon_cmdbuf *cs = cmd_buffer->cs; uint64_t va; @@ -340,7 +341,8 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) return; /* Reserve a command buffer ID for SQTT. */ - enum amd_ip_type ip_type = radv_queue_family_to_ring(cmd_buffer->device->physical_device, cmd_buffer->qf); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + enum amd_ip_type ip_type = radv_queue_family_to_ring(pdev, cmd_buffer->qf); union rgp_sqtt_marker_cb_id cb_id = ac_sqtt_get_next_cmdbuf_id(&cmd_buffer->device->sqtt, ip_type); cmd_buffer->sqtt_cb_id = cb_id.all; @@ -354,7 +356,7 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) if (cmd_buffer->qf == RADV_QUEUE_GENERAL) marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT; - if (!radv_sparse_queue_enabled(cmd_buffer->device->physical_device)) + if (!radv_sparse_queue_enabled(pdev)) marker.queue_flags |= VK_QUEUE_SPARSE_BINDING_BIT; radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); @@ -655,6 +657,7 @@ radv_handle_sqtt(VkQueue _queue) { RADV_FROM_HANDLE(radv_queue, queue, _queue); + const struct radv_physical_device *pdev = radv_device_physical(queue->device); bool trigger = queue->device->sqtt_triggered; queue->device->sqtt_triggered = false; @@ -673,8 +676,7 @@ radv_handle_sqtt(VkQueue _queue) if (queue->device->spm.bo) ac_spm_get_trace(&queue->device->spm, &spm_trace); - ac_dump_rgp_capture(&queue->device->physical_device->info, &sqtt_trace, - queue->device->spm.bo ? &spm_trace : NULL); + ac_dump_rgp_capture(&pdev->info, &sqtt_trace, queue->device->spm.bo ? &spm_trace : NULL); } else { /* Trigger a new capture if the driver failed to get * the trace because the buffer was too small. @@ -687,7 +689,7 @@ radv_handle_sqtt(VkQueue _queue) } if (trigger) { - if (ac_check_profile_state(&queue->device->physical_device->info)) { + if (ac_check_profile_state(&pdev->info)) { fprintf(stderr, "radv: Canceling RGP trace request as a hang condition has been " "detected. Force the GPU into a profiling mode with e.g. " "\"echo profile_peak > " @@ -1415,7 +1417,7 @@ static void radv_fill_code_object_record(struct radv_device *device, struct rgp_shader_data *shader_data, struct radv_shader *shader, uint64_t va) { - struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); unsigned lds_increment = pdev->info.gfx_level >= GFX11 && shader->info.stage == MESA_SHADER_FRAGMENT ? 1024 : pdev->info.lds_encode_granularity; diff --git a/src/amd/vulkan/meta/radv_meta.c b/src/amd/vulkan/meta/radv_meta.c index 48373b15e5e..d975a55b56a 100644 --- a/src/amd/vulkan/meta/radv_meta.c +++ b/src/amd/vulkan/meta/radv_meta.c @@ -461,6 +461,7 @@ fail: VkResult radv_device_init_meta(struct radv_device *device) { + struct radv_physical_device *pdev = radv_device_physical(device); VkResult result; memset(&device->meta_state, 0, sizeof(device->meta_state)); @@ -521,7 +522,7 @@ radv_device_init_meta(struct radv_device *device) if (result != VK_SUCCESS) goto fail_resolve_fragment; - if (device->physical_device->use_fmask) { + if (pdev->use_fmask) { result = radv_device_init_meta_fmask_expand_state(device, on_demand); if (result != VK_SUCCESS) goto fail_fmask_expand; @@ -555,11 +556,11 @@ radv_device_init_meta(struct radv_device *device) /* FIXME: Acceleration structure builds hang when the build shaders are compiled with LLVM. * Work around it by forcing ACO for now. */ - bool use_llvm = device->physical_device->use_llvm; + bool use_llvm = pdev->use_llvm; if (loaded_cache || use_llvm) { - device->physical_device->use_llvm = false; + pdev->use_llvm = false; result = radv_device_init_accel_struct_build_state(device); - device->physical_device->use_llvm = use_llvm; + pdev->use_llvm = use_llvm; if (result != VK_SUCCESS) goto fail_accel_struct; @@ -639,6 +640,7 @@ radv_device_finish_meta(struct radv_device *device) nir_builder PRINTFLIKE(3, 4) radv_meta_init_shader(struct radv_device *dev, gl_shader_stage stage, const char *name, ...) { + const struct radv_physical_device *pdev = radv_device_physical(dev); nir_builder b = nir_builder_init_simple_shader(stage, NULL, NULL); if (name) { va_list args; @@ -647,7 +649,7 @@ nir_builder PRINTFLIKE(3, 4) va_end(args); } - b.shader->options = &dev->physical_device->nir_options[stage]; + b.shader->options = &pdev->nir_options[stage]; radv_device_associate_nir(dev, b.shader); @@ -684,6 +686,7 @@ void radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, int samples, nir_variable *input_img, nir_variable *color, nir_def *img_coord) { + const struct radv_physical_device *pdev = radv_device_physical(device); nir_deref_instr *input_img_deref = nir_build_deref_var(b, input_img); nir_def *sample0 = nir_txf_ms_deref(b, input_img_deref, img_coord, nir_imm_int(b, 0)); @@ -692,7 +695,7 @@ radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, return; } - if (device->physical_device->use_fmask) { + if (pdev->use_fmask) { nir_def *all_same = nir_samples_identical_deref(b, input_img_deref, img_coord); nir_push_if(b, nir_inot(b, all_same)); } @@ -706,7 +709,7 @@ radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, accum = nir_fdiv_imm(b, accum, samples); nir_store_var(b, color, accum, 0xf); - if (device->physical_device->use_fmask) { + if (pdev->use_fmask) { nir_push_else(b, NULL); nir_store_var(b, color, sample0, 0xf); nir_pop_if(b, NULL); diff --git a/src/amd/vulkan/meta/radv_meta_astc_decode.c b/src/amd/vulkan/meta/radv_meta_astc_decode.c index c069889e303..c03d77501ca 100644 --- a/src/amd/vulkan/meta/radv_meta_astc_decode.c +++ b/src/amd/vulkan/meta/radv_meta_astc_decode.c @@ -32,9 +32,10 @@ VkResult radv_device_init_meta_astc_decode_state(struct radv_device *device, bool on_demand) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_meta_state *state = &device->meta_state; - if (!device->physical_device->emulate_astc) + if (!pdev->emulate_astc) return VK_SUCCESS; return vk_texcompress_astc_init(&device->vk, &state->alloc, state->cache, &state->astc_decode); @@ -43,10 +44,11 @@ radv_device_init_meta_astc_decode_state(struct radv_device *device, bool on_dema void radv_device_finish_meta_astc_decode_state(struct radv_device *device) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_meta_state *state = &device->meta_state; struct vk_texcompress_astc_state *astc = state->astc_decode; - if (!device->physical_device->emulate_astc) + if (!pdev->emulate_astc) return; vk_texcompress_astc_finish(&device->vk, &state->alloc, astc); diff --git a/src/amd/vulkan/meta/radv_meta_buffer.c b/src/amd/vulkan/meta/radv_meta_buffer.c index 91599478fa1..104d1ef6721 100644 --- a/src/amd/vulkan/meta/radv_meta_buffer.c +++ b/src/amd/vulkan/meta/radv_meta_buffer.c @@ -213,9 +213,10 @@ static bool radv_prefer_compute_dma(const struct radv_device *device, uint64_t size, struct radeon_winsys_bo *src_bo, struct radeon_winsys_bo *dst_bo) { + const struct radv_physical_device *pdev = radv_device_physical(device); bool use_compute = size >= RADV_BUFFER_OPS_CS_THRESHOLD; - if (device->physical_device->info.gfx_level >= GFX10 && device->physical_device->info.has_dedicated_vram) { + if (pdev->info.gfx_level >= GFX10 && pdev->info.has_dedicated_vram) { if ((src_bo && !(src_bo->initial_domain & RADEON_DOMAIN_VRAM)) || (dst_bo && !(dst_bo->initial_domain & RADEON_DOMAIN_VRAM))) { /* Prefer CP DMA for GTT on dGPUS due to slow PCIe. */ diff --git a/src/amd/vulkan/meta/radv_meta_bufimage.c b/src/amd/vulkan/meta/radv_meta_bufimage.c index d4a3ff92c56..67df8b003f5 100644 --- a/src/amd/vulkan/meta/radv_meta_bufimage.c +++ b/src/amd/vulkan/meta/radv_meta_bufimage.c @@ -1174,11 +1174,12 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_bl const struct radv_meta_blit2d_surf *img_bsurf, const struct radv_meta_blit2d_rect *rect, bool to_image) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const unsigned mip_level = img_bsurf->level; const struct radv_image *image = img_bsurf->image; const struct radeon_surf *surf = &image->planes[0].surface; struct radv_device *device = cmd_buffer->device; - const struct radeon_info *gpu_info = &device->physical_device->info; + const struct radeon_info *gpu_info = &pdev->info; struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws); struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image); @@ -1243,9 +1244,10 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_bl static unsigned get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); unsigned stride; - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { stride = surf->image->planes[0].surface.u.gfx9.surf_pitch; } else { stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3; diff --git a/src/amd/vulkan/meta/radv_meta_clear.c b/src/amd/vulkan/meta/radv_meta_clear.c index 3185b83aae6..1163687e314 100644 --- a/src/amd/vulkan/meta/radv_meta_clear.c +++ b/src/amd/vulkan/meta/radv_meta_clear.c @@ -1139,10 +1139,11 @@ uint32_t radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range, uint32_t value) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); uint64_t offset = image->bindings[0].offset + image->planes[0].surface.cmask_offset; uint64_t size; - if (cmd_buffer->device->physical_device->info.gfx_level == GFX9) { + if (pdev->info.gfx_level == GFX9) { /* TODO: clear layers. */ size = image->planes[0].surface.cmask_size; } else { @@ -1178,6 +1179,7 @@ uint32_t radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range, uint32_t value) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); uint32_t level_count = vk_image_subresource_level_count(&image->vk, range); uint32_t layer_count = vk_image_subresource_layer_count(&image->vk, range); uint32_t flush_bits = 0; @@ -1190,12 +1192,12 @@ radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, con uint32_t level = range->baseMipLevel + l; uint64_t size; - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { /* DCC for mipmaps+layers is currently disabled. */ offset += image->planes[0].surface.meta_slice_size * range->baseArrayLayer + image->planes[0].surface.u.gfx9.meta_levels[level].offset; size = image->planes[0].surface.u.gfx9.meta_levels[level].size * layer_count; - } else if (cmd_buffer->device->physical_device->info.gfx_level == GFX9) { + } else if (pdev->info.gfx_level == GFX9) { /* Mipmap levels and layers aren't implemented. */ assert(level == 0); size = image->planes[0].surface.meta_size; @@ -1331,6 +1333,7 @@ uint32_t radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, const VkImageSubresourceRange *range, uint32_t value) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); uint32_t level_count = vk_image_subresource_level_count(&image->vk, range); uint32_t flush_bits = 0; uint32_t htile_mask; @@ -1338,7 +1341,7 @@ radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im htile_mask = radv_get_htile_mask(cmd_buffer->device, image, range->aspectMask); if (level_count != image->vk.mip_levels) { - assert(cmd_buffer->device->physical_device->info.gfx_level >= GFX10); + assert(pdev->info.gfx_level >= GFX10); /* Clear individuals levels separately. */ for (uint32_t l = 0; l < level_count; l++) { @@ -1398,7 +1401,8 @@ enum { static uint32_t radv_dcc_single_clear_value(const struct radv_device *device) { - return device->physical_device->info.gfx_level >= GFX11 ? RADV_DCC_GFX11_CLEAR_SINGLE : RADV_DCC_GFX9_CLEAR_SINGLE; + const struct radv_physical_device *pdev = radv_device_physical(device); + return pdev->info.gfx_level >= GFX11 ? RADV_DCC_GFX11_CLEAR_SINGLE : RADV_DCC_GFX9_CLEAR_SINGLE; } static void @@ -1605,6 +1609,7 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_ VkImageLayout image_layout, const VkClearRect *clear_rect, VkClearColorValue clear_value, uint32_t view_mask) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); uint32_t clear_color[2]; if (!iview || !iview->support_fast_clear) @@ -1641,7 +1646,7 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_ bool can_avoid_fast_clear_elim; uint32_t reset_value; - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { if (!gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value)) return false; } else { @@ -1650,7 +1655,7 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_ } if (iview->image->vk.mip_levels > 1) { - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { uint32_t last_level = iview->vk.base_mip_level + iview->vk.level_count - 1; if (last_level >= iview->image->planes[0].surface.num_meta_levels) { /* Do not fast clears if one level can't be fast cleard. */ @@ -1680,6 +1685,7 @@ radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_imag const VkClearAttachment *clear_att, enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); VkClearColorValue clear_value = clear_att->clearValue.color; uint32_t clear_color[4], flush_bits = 0; uint32_t cmask_clear_value; @@ -1710,7 +1716,7 @@ radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_imag uint32_t reset_value; bool can_avoid_fast_clear_elim = true; - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { ASSERTED bool result = gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value); assert(result); } else { @@ -2074,6 +2080,7 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *imag const VkClearValue *clear_value, uint32_t range_count, const VkImageSubresourceRange *ranges, bool cs) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); VkFormat format = image->vk.format; VkClearValue internal_clear_value; @@ -2086,8 +2093,8 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *imag if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) { bool blendable; - if (cs ? !radv_is_storage_image_format_supported(cmd_buffer->device->physical_device, format) - : !radv_is_colorbuffer_format_supported(cmd_buffer->device->physical_device, format, &blendable)) { + if (cs ? !radv_is_storage_image_format_supported(pdev, format) + : !radv_is_colorbuffer_format_supported(pdev, format, &blendable)) { format = VK_FORMAT_R32_UINT; internal_clear_value.color.uint32[0] = float3_to_rgb9e5(clear_value->color.float32); diff --git a/src/amd/vulkan/meta/radv_meta_copy.c b/src/amd/vulkan/meta/radv_meta_copy.c index b60c6df60c8..95c6f1152e5 100644 --- a/src/amd/vulkan/meta/radv_meta_copy.c +++ b/src/amd/vulkan/meta/radv_meta_copy.c @@ -239,13 +239,14 @@ radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToIm RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer); RADV_FROM_HANDLE(radv_image, dst_image, pCopyBufferToImageInfo->dstImage); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) { copy_buffer_to_image(cmd_buffer, src_buffer, dst_image, pCopyBufferToImageInfo->dstImageLayout, &pCopyBufferToImageInfo->pRegions[r]); } - if (radv_is_format_emulated(cmd_buffer->device->physical_device, dst_image->vk.format)) { + if (radv_is_format_emulated(pdev, dst_image->vk.format)) { cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) | @@ -422,6 +423,8 @@ static void copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkImageLayout src_image_layout, struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageCopy2 *region) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) { transfer_copy_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region); return; @@ -499,9 +502,9 @@ copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkI src_image_layout, src_queue_mask); bool need_dcc_sign_reinterpret = false; - if (!src_compressed || (radv_dcc_formats_compatible(cmd_buffer->device->physical_device->info.gfx_level, - b_src.format, b_dst.format, &need_dcc_sign_reinterpret) && - !need_dcc_sign_reinterpret)) { + if (!src_compressed || + (radv_dcc_formats_compatible(pdev->info.gfx_level, b_src.format, b_dst.format, &need_dcc_sign_reinterpret) && + !need_dcc_sign_reinterpret)) { b_src.format = b_dst.format; } else if (!dst_compressed) { b_dst.format = b_src.format; @@ -613,13 +616,14 @@ radv_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyI RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_image, src_image, pCopyImageInfo->srcImage); RADV_FROM_HANDLE(radv_image, dst_image, pCopyImageInfo->dstImage); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) { copy_image(cmd_buffer, src_image, pCopyImageInfo->srcImageLayout, dst_image, pCopyImageInfo->dstImageLayout, &pCopyImageInfo->pRegions[r]); } - if (radv_is_format_emulated(cmd_buffer->device->physical_device, dst_image->vk.format)) { + if (radv_is_format_emulated(pdev, dst_image->vk.format)) { cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) | diff --git a/src/amd/vulkan/meta/radv_meta_copy_vrs_htile.c b/src/amd/vulkan/meta/radv_meta_copy_vrs_htile.c index 3fe7f58d785..6de669d33c1 100644 --- a/src/amd/vulkan/meta/radv_meta_copy_vrs_htile.c +++ b/src/amd/vulkan/meta/radv_meta_copy_vrs_htile.c @@ -43,6 +43,7 @@ radv_device_finish_meta_copy_vrs_htile_state(struct radv_device *device) static nir_shader * build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf) { + const struct radv_physical_device *pdev = radv_device_physical(device); nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "meta_copy_vrs_htile"); b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[1] = 8; @@ -64,8 +65,8 @@ build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf /* Get the HTILE addr from coordinates. */ nir_def *zero = nir_imm_int(&b, 0); nir_def *htile_addr = - ac_nir_htile_addr_from_coord(&b, &device->physical_device->info, &surf->u.gfx9.zs.htile_equation, htile_pitch, - htile_slice_size, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero); + ac_nir_htile_addr_from_coord(&b, &pdev->info, &surf->u.gfx9.zs.htile_equation, htile_pitch, htile_slice_size, + nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero); /* Set up the input VRS image descriptor. */ const struct glsl_type *vrs_sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT); diff --git a/src/amd/vulkan/meta/radv_meta_dcc_retile.c b/src/amd/vulkan/meta/radv_meta_dcc_retile.c index f42dae9ec76..7348df7abe8 100644 --- a/src/amd/vulkan/meta/radv_meta_dcc_retile.c +++ b/src/amd/vulkan/meta/radv_meta_dcc_retile.c @@ -31,6 +31,7 @@ static nir_shader * build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *surf) { + const struct radv_physical_device *pdev = radv_device_physical(dev); enum glsl_sampler_dim dim = GLSL_SAMPLER_DIM_BUF; const struct glsl_type *buf_type = glsl_image_type(dim, false, GLSL_TYPE_UINT); nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "dcc_retile_compute"); @@ -60,12 +61,12 @@ build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *sur coord = nir_imul(&b, coord, nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height)); - nir_def *src = ac_nir_dcc_addr_from_coord(&b, &dev->physical_device->info, surf->bpe, - &surf->u.gfx9.color.dcc_equation, src_dcc_pitch, src_dcc_height, zero, - nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero); - nir_def *dst = ac_nir_dcc_addr_from_coord( - &b, &dev->physical_device->info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation, dst_dcc_pitch, - dst_dcc_height, zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero); + nir_def *src = ac_nir_dcc_addr_from_coord(&b, &pdev->info, surf->bpe, &surf->u.gfx9.color.dcc_equation, + src_dcc_pitch, src_dcc_height, zero, nir_channel(&b, coord, 0), + nir_channel(&b, coord, 1), zero, zero, zero); + nir_def *dst = ac_nir_dcc_addr_from_coord(&b, &pdev->info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation, + dst_dcc_pitch, dst_dcc_height, zero, nir_channel(&b, coord, 0), + nir_channel(&b, coord, 1), zero, zero, zero); nir_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref, nir_vec4(&b, src, src, src, src), nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = dim); diff --git a/src/amd/vulkan/meta/radv_meta_etc_decode.c b/src/amd/vulkan/meta/radv_meta_etc_decode.c index 28b3c907682..aa18016ddd3 100644 --- a/src/amd/vulkan/meta/radv_meta_etc_decode.c +++ b/src/amd/vulkan/meta/radv_meta_etc_decode.c @@ -34,13 +34,14 @@ VkResult radv_device_init_meta_etc_decode_state(struct radv_device *device, bool on_demand) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_meta_state *state = &device->meta_state; - if (!device->physical_device->emulate_etc2) + if (!pdev->emulate_etc2) return VK_SUCCESS; state->etc_decode.allocator = &state->alloc; - state->etc_decode.nir_options = &device->physical_device->nir_options[MESA_SHADER_COMPUTE]; + state->etc_decode.nir_options = &pdev->nir_options[MESA_SHADER_COMPUTE]; state->etc_decode.pipeline_cache = state->cache; vk_texcompress_etc2_init(&device->vk, &state->etc_decode); diff --git a/src/amd/vulkan/meta/radv_meta_fast_clear.c b/src/amd/vulkan/meta/radv_meta_fast_clear.c index e00ac638a82..2e4e0501854 100644 --- a/src/amd/vulkan/meta/radv_meta_fast_clear.c +++ b/src/amd/vulkan/meta/radv_meta_fast_clear.c @@ -155,6 +155,7 @@ create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout) static VkResult create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipelineLayout layout) { + const struct radv_physical_device *pdev = radv_device_physical(device); VkResult result; VkDevice device_h = radv_device_to_handle(device); @@ -363,8 +364,8 @@ create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipeli }, &(struct radv_graphics_pipeline_create_info){ .use_rectlist = true, - .custom_blend_mode = device->physical_device->info.gfx_level >= GFX11 ? V_028808_CB_DCC_DECOMPRESS_GFX11 - : V_028808_CB_DCC_DECOMPRESS_GFX8, + .custom_blend_mode = + pdev->info.gfx_level >= GFX11 ? V_028808_CB_DCC_DECOMPRESS_GFX11 : V_028808_CB_DCC_DECOMPRESS_GFX8, }, &device->meta_state.alloc, &device->meta_state.fast_clear_flush.dcc_decompress_pipeline); if (result != VK_SUCCESS) diff --git a/src/amd/vulkan/meta/radv_meta_fmask_copy.c b/src/amd/vulkan/meta/radv_meta_fmask_copy.c index 81b20c23c45..7d8308ee45d 100644 --- a/src/amd/vulkan/meta/radv_meta_fmask_copy.c +++ b/src/amd/vulkan/meta/radv_meta_fmask_copy.c @@ -232,8 +232,10 @@ radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_im const struct radv_image *dst_image, unsigned num_rects, const struct radv_meta_blit2d_rect *rects) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + /* TODO: Test on pre GFX10 chips. */ - if (cmd_buffer->device->physical_device->info.gfx_level < GFX10) + if (pdev->info.gfx_level < GFX10) return false; /* TODO: Add support for layers. */ diff --git a/src/amd/vulkan/meta/radv_meta_resolve.c b/src/amd/vulkan/meta/radv_meta_resolve.c index 2cdd109c434..2a20d84cb2f 100644 --- a/src/amd/vulkan/meta/radv_meta_resolve.c +++ b/src/amd/vulkan/meta/radv_meta_resolve.c @@ -253,7 +253,8 @@ enum radv_resolve_method { static bool image_hw_resolve_compat(const struct radv_device *device, struct radv_image *src_image, struct radv_image *dst_image) { - if (device->physical_device->info.gfx_level >= GFX9) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (pdev->info.gfx_level >= GFX9) { return dst_image->planes[0].surface.u.gfx9.swizzle_mode == src_image->planes[0].surface.u.gfx9.swizzle_mode; } else { return dst_image->planes[0].surface.micro_tile_mode == src_image->planes[0].surface.micro_tile_mode; @@ -506,9 +507,9 @@ radv_CmdResolveImage2(VkCommandBuffer commandBuffer, const VkResolveImageInfo2 * RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_image, src_image, pResolveImageInfo->srcImage); RADV_FROM_HANDLE(radv_image, dst_image, pResolveImageInfo->dstImage); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); VkImageLayout src_image_layout = pResolveImageInfo->srcImageLayout; VkImageLayout dst_image_layout = pResolveImageInfo->dstImageLayout; - const struct radv_physical_device *pdev = cmd_buffer->device->physical_device; enum radv_resolve_method resolve_method = pdev->info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW; /* we can use the hw resolve only for single full resolves */ @@ -622,7 +623,7 @@ radv_cmd_buffer_resolve_rendering_hw(struct radv_cmd_buffer *cmd_buffer, struct void radv_cmd_buffer_resolve_rendering(struct radv_cmd_buffer *cmd_buffer) { - const struct radv_physical_device *pdev = cmd_buffer->device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_rendering_state *render = &cmd_buffer->state.render; enum radv_resolve_method resolve_method = pdev->info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW; diff --git a/src/amd/vulkan/nir/radv_nir_apply_pipeline_layout.c b/src/amd/vulkan/nir/radv_nir_apply_pipeline_layout.c index be035c65596..48a98b10b4b 100644 --- a/src/amd/vulkan/nir/radv_nir_apply_pipeline_layout.c +++ b/src/amd/vulkan/nir/radv_nir_apply_pipeline_layout.c @@ -501,12 +501,13 @@ apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *te void radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device, const struct radv_shader_stage *stage) { + const struct radv_physical_device *pdev = radv_device_physical(device); apply_layout_state state = { - .gfx_level = device->physical_device->info.gfx_level, - .address32_hi = device->physical_device->info.address32_hi, + .gfx_level = pdev->info.gfx_level, + .address32_hi = pdev->info.address32_hi, .disable_aniso_single_level = device->instance->drirc.disable_aniso_single_level, - .has_image_load_dcc_bug = device->physical_device->info.has_image_load_dcc_bug, - .disable_tg4_trunc_coord = !device->physical_device->info.conformant_trunc_coord && !device->disable_trunc_coord, + .has_image_load_dcc_bug = pdev->info.has_image_load_dcc_bug, + .disable_tg4_trunc_coord = !pdev->info.conformant_trunc_coord && !device->disable_trunc_coord, .args = &stage->args, .info = &stage->info, .layout = &stage->layout, diff --git a/src/amd/vulkan/nir/radv_nir_lower_io.c b/src/amd/vulkan/nir/radv_nir_lower_io.c index 8a55f504a3e..5e122437f5e 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_io.c +++ b/src/amd/vulkan/nir/radv_nir_lower_io.c @@ -72,6 +72,8 @@ radv_nir_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask) void radv_nir_lower_io(struct radv_device *device, nir_shader *nir) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (nir->info.stage == MESA_SHADER_FRAGMENT) { nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, MESA_SHADER_FRAGMENT); } @@ -89,7 +91,7 @@ radv_nir_lower_io(struct radv_device *device, nir_shader *nir) NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out); - if (device->physical_device->use_ngg_streamout && nir->xfb_info) { + if (pdev->use_ngg_streamout && nir->xfb_info) { NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info); /* The total number of shader outputs is required for computing the pervertex LDS size for @@ -133,6 +135,7 @@ radv_map_io_driver_location(unsigned semantic) bool radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *stage) { + const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader_info *info = &stage->info; ac_nir_map_io_driver_location map_input = info->inputs_linked ? NULL : radv_map_io_driver_location; ac_nir_map_io_driver_location map_output = info->outputs_linked ? NULL : radv_map_io_driver_location; @@ -144,35 +147,33 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *s info->vs.tcs_temp_only_input_mask); return true; } else if (info->vs.as_es) { - NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, device->physical_device->info.gfx_level, - info->esgs_itemsize); + NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize); return true; } } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) { NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, map_input, info->vs.tcs_in_out_eq); - NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, map_output, device->physical_device->info.gfx_level, - info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, info->tcs.num_linked_outputs, - info->tcs.num_linked_patch_outputs, info->wave_size, false, false); + NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, map_output, pdev->info.gfx_level, info->tcs.tes_inputs_read, + info->tcs.tes_patch_inputs_read, info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs, + info->wave_size, false, false); return true; } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) { NIR_PASS_V(nir, ac_nir_lower_tes_inputs_to_mem, map_input); if (info->tes.as_es) { - NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, device->physical_device->info.gfx_level, - info->esgs_itemsize); + NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize); } return true; } else if (nir->info.stage == MESA_SHADER_GEOMETRY) { - NIR_PASS_V(nir, ac_nir_lower_gs_inputs_to_mem, map_input, device->physical_device->info.gfx_level, false); + NIR_PASS_V(nir, ac_nir_lower_gs_inputs_to_mem, map_input, pdev->info.gfx_level, false); return true; } else if (nir->info.stage == MESA_SHADER_TASK) { - ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, device->physical_device->task_info.num_entries, + ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, pdev->task_info.num_entries, info->cs.has_query); return true; } else if (nir->info.stage == MESA_SHADER_MESH) { - ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, device->physical_device->task_info.num_entries); + ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, pdev->task_info.num_entries); return true; } diff --git a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c index af871fa234a..4f36f25c1a9 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c @@ -648,6 +648,7 @@ lower_rq_terminate(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, s bool radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device) { + const struct radv_physical_device *pdev = radv_device_physical(device); bool progress = false; struct hash_table *query_ht = _mesa_pointer_hash_table_create(NULL); @@ -655,7 +656,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device if (!var->data.ray_query) continue; - lower_ray_query(shader, var, query_ht, device->physical_device->max_shared_size); + lower_ray_query(shader, var, query_ht, pdev->max_shared_size); progress = true; } @@ -670,7 +671,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device if (!var->data.ray_query) continue; - lower_ray_query(shader, var, query_ht, device->physical_device->max_shared_size); + lower_ray_query(shader, var, query_ht, pdev->max_shared_size); progress = true; } diff --git a/src/amd/vulkan/nir/radv_nir_rt_common.c b/src/amd/vulkan/nir/radv_nir_rt_common.c index 5b9a09d814a..df78243970a 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_common.c +++ b/src/amd/vulkan/nir/radv_nir_rt_common.c @@ -297,11 +297,12 @@ build_addr_to_node(nir_builder *b, nir_def *addr) static nir_def * build_node_to_addr(struct radv_device *device, nir_builder *b, nir_def *node, bool skip_type_and) { + const struct radv_physical_device *pdev = radv_device_physical(device); nir_def *addr = skip_type_and ? node : nir_iand_imm(b, node, ~7ull); addr = nir_ishl_imm(b, addr, 3); /* Assumes everything is in the top half of address space, which is true in * GFX9+ for now. */ - return device->physical_device->info.gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr; + return pdev->info.gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr; } nir_def * @@ -477,6 +478,7 @@ radv_test_flag(nir_builder *b, const struct radv_ray_traversal_args *args, uint3 nir_def * radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args) { + const struct radv_physical_device *pdev = radv_device_physical(device); nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete"); nir_store_var(b, incomplete, nir_imm_true(b), 0x1); @@ -568,7 +570,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc nir_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node)); nir_def *intrinsic_result = NULL; - if (!radv_emulate_rt(device->physical_device)) { + if (!radv_emulate_rt(pdev)) { intrinsic_result = nir_bvh64_intersect_ray_amd(b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node), nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin), diff --git a/src/amd/vulkan/nir/radv_nir_rt_shader.c b/src/amd/vulkan/nir/radv_nir_rt_shader.c index f21cbfb5091..d590f96bc22 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_shader.c +++ b/src/amd/vulkan/nir/radv_nir_rt_shader.c @@ -1524,6 +1524,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, bool monolithic, nir_builder *b, struct rt_variables *vars, bool ignore_cull_mask, struct radv_ray_tracing_stage_info *info) { + const struct radv_physical_device *pdev = radv_device_physical(device); nir_variable *barycentrics = nir_variable_create(b->shader, nir_var_ray_hit_attrib, glsl_vector_type(GLSL_TYPE_FLOAT, 2), "barycentrics"); barycentrics->data.driver_location = 0; @@ -1602,7 +1603,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin .tmin = nir_load_var(b, vars->tmin), .dir = nir_load_var(b, vars->direction), .vars = trav_vars_args, - .stack_stride = device->physical_device->rt_wave_size * sizeof(uint32_t), + .stack_stride = pdev->rt_wave_size * sizeof(uint32_t), .stack_entries = MAX_STACK_ENTRY_COUNT, .stack_base = 0, .ignore_cull_mask = ignore_cull_mask, @@ -1638,7 +1639,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin hit_attribs[i] = nir_local_variable_create(nir_shader_get_entrypoint(b->shader), glsl_uint_type(), "ahit_attrib"); - lower_hit_attribs(b->shader, hit_attribs, device->physical_device->rt_wave_size); + lower_hit_attribs(b->shader, hit_attribs, pdev->rt_wave_size); } /* Initialize follow-up shader. */ @@ -1702,6 +1703,7 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_ const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, struct radv_ray_tracing_stage_info *info) { + const struct radv_physical_device *pdev = radv_device_physical(device); const VkPipelineCreateFlagBits2KHR create_flags = vk_rt_pipeline_create_flags(pCreateInfo); /* Create the traversal shader as an intersection shader to prevent validation failures due to @@ -1709,8 +1711,8 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_ nir_builder b = radv_meta_init_shader(device, MESA_SHADER_INTERSECTION, "rt_traversal"); b.shader->info.internal = false; b.shader->info.workgroup_size[0] = 8; - b.shader->info.workgroup_size[1] = device->physical_device->rt_wave_size == 64 ? 8 : 4; - b.shader->info.shared_size = device->physical_device->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t); + b.shader->info.workgroup_size[1] = pdev->rt_wave_size == 64 ? 8 : 4; + b.shader->info.shared_size = pdev->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t); struct rt_variables vars = create_rt_variables(b.shader, device, create_flags, false); if (info->tmin.state == RADV_RT_CONST_ARG_STATE_VALID) @@ -1773,6 +1775,7 @@ lower_rt_instruction_monolithic(nir_builder *b, nir_instr *instr, void *data) nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); struct lower_rt_instruction_monolithic_state *state = data; + const struct radv_physical_device *pdev = radv_device_physical(state->device); struct rt_variables *vars = state->vars; switch (intr->intrinsic) { @@ -1800,8 +1803,8 @@ lower_rt_instruction_monolithic(nir_builder *b, nir_instr *instr, void *data) nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, stack_ptr, b->shader->scratch_size), 0x1); radv_build_traversal(state->device, state->pipeline, state->pCreateInfo, true, b, vars, ignore_cull_mask, NULL); - b->shader->info.shared_size = MAX2(b->shader->info.shared_size, state->device->physical_device->rt_wave_size * - MAX_STACK_ENTRY_COUNT * sizeof(uint32_t)); + b->shader->info.shared_size = + MAX2(b->shader->info.shared_size, pdev->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t)); nir_store_var(b, vars->stack_ptr, stack_ptr, 0x1); diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c index ba33a4c1ec3..0925ea5d9b5 100644 --- a/src/amd/vulkan/radv_acceleration_structure.c +++ b/src/amd/vulkan/radv_acceleration_structure.c @@ -424,7 +424,9 @@ cleanup: VkResult radv_device_init_null_accel_struct(struct radv_device *device) { - if (device->physical_device->memory_properties.memoryTypeCount == 0) + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (pdev->memory_properties.memoryTypeCount == 0) return VK_SUCCESS; /* Exit in the case of null winsys. */ VkDevice _device = radv_device_to_handle(device); @@ -465,9 +467,9 @@ radv_device_init_null_accel_struct(struct radv_device *device) VkMemoryAllocateInfo alloc_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = mem_req.memoryRequirements.size, - .memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), + .memoryTypeIndex = + radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), }; result = radv_AllocateMemory(_device, &alloc_info, &device->meta_state.alloc, &memory); @@ -1537,9 +1539,9 @@ radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device, VkAccelerationStructureCompatibilityKHR *pCompatibility) { RADV_FROM_HANDLE(radv_device, device, _device); - bool compat = - memcmp(pVersionInfo->pVersionData, device->physical_device->driver_uuid, VK_UUID_SIZE) == 0 && - memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, device->physical_device->cache_uuid, VK_UUID_SIZE) == 0; + const struct radv_physical_device *pdev = radv_device_physical(device); + bool compat = memcmp(pVersionInfo->pVersionData, pdev->driver_uuid, VK_UUID_SIZE) == 0 && + memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE) == 0; *pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR : VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR; } @@ -1601,6 +1603,7 @@ radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer, RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src); RADV_FROM_HANDLE(radv_buffer, src_buffer, src->buffer); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_meta_saved_state saved_state; VkResult result = radv_device_init_accel_struct_copy_state(cmd_buffer->device); @@ -1634,8 +1637,8 @@ radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer, /* Set the header of the serialized data. */ uint8_t header_data[2 * VK_UUID_SIZE]; - memcpy(header_data, cmd_buffer->device->physical_device->driver_uuid, VK_UUID_SIZE); - memcpy(header_data + VK_UUID_SIZE, cmd_buffer->device->physical_device->cache_uuid, VK_UUID_SIZE); + memcpy(header_data, pdev->driver_uuid, VK_UUID_SIZE); + memcpy(header_data + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE); radv_update_buffer_cp(cmd_buffer, pInfo->dst.deviceAddress, header_data, sizeof(header_data)); } diff --git a/src/amd/vulkan/radv_android.c b/src/amd/vulkan/radv_android.c index 9ff5f5d4958..d0efd44c168 100644 --- a/src/amd/vulkan/radv_android.c +++ b/src/amd/vulkan/radv_android.c @@ -114,6 +114,7 @@ radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info, { RADV_FROM_HANDLE(radv_device, device, device_h); + const struct radv_physical_device *pdev = radv_device_physical(device); VkImage image_h = VK_NULL_HANDLE; struct radv_image *image = NULL; VkResult result; @@ -141,10 +142,9 @@ radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info, /* Find the first VRAM memory type, or GART for PRIME images. */ int memory_type_index = -1; - for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) { - bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags & - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - bool is_32bit = !!(device->physical_device->memory_types_32bit & (1u << i)); + for (int i = 0; i < pdev->memory_properties.memoryTypeCount; ++i) { + bool is_local = !!(pdev->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + bool is_32bit = !!(pdev->memory_types_32bit & (1u << i)); if (is_local && !is_32bit) { memory_type_index = i; break; @@ -217,7 +217,7 @@ radv_GetSwapchainGrallocUsageANDROID(VkDevice device_h, VkFormat format, VkImage int *grallocUsage) { RADV_FROM_HANDLE(radv_device, device, device_h); - struct radv_physical_device *pdev = device->physical_device; + struct radv_physical_device *pdev = radv_device_physical(device); VkPhysicalDevice pdev_h = radv_physical_device_to_handle(pdev); VkResult result; @@ -298,7 +298,7 @@ radv_GetSwapchainGrallocUsage2ANDROID(VkDevice device_h, VkFormat format, VkImag * vkGetSwapchainGrallocUsageANDROID. */ #if ANDROID_API_LEVEL >= 26 RADV_FROM_HANDLE(radv_device, device, device_h); - struct radv_physical_device *pdev = device->physical_device; + struct radv_physical_device *pdev = radv_device_physical(device); VkPhysicalDevice pdev_h = radv_physical_device_to_handle(pdev); VkResult result; @@ -408,6 +408,7 @@ get_ahb_buffer_format_properties(VkDevice device_h, const struct AHardwareBuffer VkAndroidHardwareBufferFormatPropertiesANDROID *pProperties) { RADV_FROM_HANDLE(radv_device, device, device_h); + struct radv_physical_device *pdev = radv_device_physical(device); /* Get a description of buffer contents . */ AHardwareBuffer_Desc desc; @@ -431,8 +432,7 @@ get_ahb_buffer_format_properties(VkDevice device_h, const struct AHardwareBuffer VkFormatProperties2 format_properties = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2}; - radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(device->physical_device), p->format, - &format_properties); + radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(pdev), p->format, &format_properties); if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) p->formatFeatures = format_properties.formatProperties.linearTilingFeatures; @@ -481,6 +481,7 @@ get_ahb_buffer_format_properties2(VkDevice device_h, const struct AHardwareBuffe VkAndroidHardwareBufferFormatProperties2ANDROID *pProperties) { RADV_FROM_HANDLE(radv_device, device, device_h); + struct radv_physical_device *pdev = radv_device_physical(device); /* Get a description of buffer contents . */ AHardwareBuffer_Desc desc; @@ -504,8 +505,7 @@ get_ahb_buffer_format_properties2(VkDevice device_h, const struct AHardwareBuffe VkFormatProperties2 format_properties = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2}; - radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(device->physical_device), p->format, - &format_properties); + radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(pdev), p->format, &format_properties); if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) p->formatFeatures = format_properties.formatProperties.linearTilingFeatures; @@ -554,7 +554,7 @@ radv_GetAndroidHardwareBufferPropertiesANDROID(VkDevice device_h, const struct A VkAndroidHardwareBufferPropertiesANDROID *pProperties) { RADV_FROM_HANDLE(radv_device, dev, device_h); - struct radv_physical_device *pdev = dev->physical_device; + struct radv_physical_device *pdev = radv_device_physical(dev); VkAndroidHardwareBufferFormatPropertiesANDROID *format_prop = vk_find_struct(pProperties->pNext, ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_ANDROID); diff --git a/src/amd/vulkan/radv_buffer.c b/src/amd/vulkan/radv_buffer.c index e27d06b031e..01deed7a403 100644 --- a/src/amd/vulkan/radv_buffer.c +++ b/src/amd/vulkan/radv_buffer.c @@ -176,9 +176,10 @@ static void radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize size, VkBufferCreateFlags flags, VkBufferUsageFlags2KHR usage, VkMemoryRequirements2 *pMemoryRequirements) { + const struct radv_physical_device *pdev = radv_device_physical(device); + pMemoryRequirements->memoryRequirements.memoryTypeBits = - ((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) & - ~device->physical_device->memory_types_32bit; + ((1u << pdev->memory_properties.memoryTypeCount) - 1u) & ~pdev->memory_types_32bit; /* Allow 32-bit address-space for DGC usage, as this buffer will contain * cmd buffer upload buffers, and those get passed to shaders through 32-bit @@ -190,14 +191,14 @@ radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize siz * intersection is non-zero at least) */ if ((usage & VK_BUFFER_USAGE_2_INDIRECT_BUFFER_BIT_KHR) && radv_uses_device_generated_commands(device)) - pMemoryRequirements->memoryRequirements.memoryTypeBits |= device->physical_device->memory_types_32bit; + pMemoryRequirements->memoryRequirements.memoryTypeBits |= pdev->memory_types_32bit; /* Force 32-bit address-space for descriptor buffers usage because they are passed to shaders * through 32-bit pointers. */ if (usage & (VK_BUFFER_USAGE_2_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | VK_BUFFER_USAGE_2_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT)) - pMemoryRequirements->memoryRequirements.memoryTypeBits = device->physical_device->memory_types_32bit; + pMemoryRequirements->memoryRequirements.memoryTypeBits = pdev->memory_types_32bit; if (flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) pMemoryRequirements->memoryRequirements.alignment = 4096; diff --git a/src/amd/vulkan/radv_buffer_view.c b/src/amd/vulkan/radv_buffer_view.c index 66927bc2929..7717ecdc376 100644 --- a/src/amd/vulkan/radv_buffer_view.c +++ b/src/amd/vulkan/radv_buffer_view.c @@ -34,6 +34,7 @@ void radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFormat vk_format, unsigned offset, unsigned range, uint32_t *state) { + const struct radv_physical_device *pdev = radv_device_physical(device); const struct util_format_description *desc; unsigned stride; unsigned num_format, data_format; @@ -49,16 +50,15 @@ radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFor va += offset; - if (device->physical_device->info.gfx_level != GFX8 && stride) { + if (pdev->info.gfx_level != GFX8 && stride) { range /= stride; } rsrc_word3 = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3])); - if (device->physical_device->info.gfx_level >= GFX10) { - const struct gfx10_format *fmt = - &ac_get_gfx10_format_table(&device->physical_device->info)[vk_format_to_pipe_format(vk_format)]; + if (pdev->info.gfx_level >= GFX10) { + const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&pdev->info)[vk_format_to_pipe_format(vk_format)]; /* OOB_SELECT chooses the out-of-bounds check. * @@ -81,7 +81,7 @@ radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFor * offset+payload > NUM_RECORDS */ rsrc_word3 |= S_008F0C_FORMAT(fmt->img_format) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) | - S_008F0C_RESOURCE_LEVEL(device->physical_device->info.gfx_level < GFX11); + S_008F0C_RESOURCE_LEVEL(pdev->info.gfx_level < GFX11); } else { num_format = radv_translate_buffer_numformat(desc, first_non_void); data_format = radv_translate_buffer_dataformat(desc, first_non_void); diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 7b95db72d6c..9a1e9b392e1 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -66,6 +66,7 @@ static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, str static void radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dynamic_state *src) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_dynamic_state *dest = &cmd_buffer->state.dynamic; uint64_t copy_mask = src->mask; uint64_t dest_mask = 0; @@ -249,7 +250,7 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy cmd_buffer->state.dirty |= RADV_CMD_DIRTY_GUARDBAND; } - if (cmd_buffer->device->physical_device->info.rbplus_allowed && (dest_mask & RADV_DYNAMIC_COLOR_WRITE_MASK)) { + if (pdev->info.rbplus_allowed && (dest_mask & RADV_DYNAMIC_COLOR_WRITE_MASK)) { cmd_buffer->state.dirty |= RADV_CMD_DIRTY_RBPLUS; } } @@ -257,7 +258,8 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer) { - return cmd_buffer->qf == RADV_QUEUE_COMPUTE && cmd_buffer->device->physical_device->info.gfx_level >= GFX7; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + return cmd_buffer->qf == RADV_QUEUE_COMPUTE && pdev->info.gfx_level >= GFX7; } enum amd_ip_type @@ -363,7 +365,7 @@ static VkResult radv_create_cmd_buffer(struct vk_command_pool *pool, struct vk_command_buffer **cmd_buffer_out) { struct radv_device *device = container_of(pool->base.device, struct radv_device, vk); - + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_cmd_buffer *cmd_buffer; unsigned ring; cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); @@ -378,7 +380,7 @@ radv_create_cmd_buffer(struct vk_command_pool *pool, struct vk_command_buffer ** cmd_buffer->device = device; - cmd_buffer->qf = vk_queue_to_radv(device->physical_device, pool->queue_family_index); + cmd_buffer->qf = vk_queue_to_radv(pdev, pool->queue_family_index); if (cmd_buffer->qf != RADV_QUEUE_SPARSE) { list_inithead(&cmd_buffer->upload.list); @@ -388,7 +390,7 @@ radv_create_cmd_buffer(struct vk_command_pool *pool, struct vk_command_buffer ** return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } - ring = radv_queue_family_to_ring(device->physical_device, cmd_buffer->qf); + ring = radv_queue_family_to_ring(pdev, cmd_buffer->qf); cmd_buffer->cs = device->ws->cs_create(device->ws, ring, cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); @@ -534,7 +536,8 @@ radv_cmd_buffer_upload_alloc_aligned(struct radv_cmd_buffer *cmd_buffer, unsigne { assert(size % 4 == 0); - const struct radeon_info *gpu_info = &cmd_buffer->device->physical_device->info; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + const struct radeon_info *gpu_info = &pdev->info; /* Align to the scalar cache line size if it results in this allocation * being placed in less of them. @@ -636,12 +639,13 @@ radv_gang_barrier(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_ void radv_gang_cache_flush(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radeon_cmdbuf *ace_cs = cmd_buffer->gang.cs; const uint32_t flush_bits = cmd_buffer->gang.flush_bits; enum rgp_flush_bits sqtt_flush_bits = 0; - radv_cs_emit_cache_flush(cmd_buffer->device->ws, ace_cs, cmd_buffer->device->physical_device->info.gfx_level, NULL, - 0, RADV_QUEUE_COMPUTE, flush_bits, &sqtt_flush_bits, 0); + radv_cs_emit_cache_flush(cmd_buffer->device->ws, ace_cs, pdev->info.gfx_level, NULL, 0, RADV_QUEUE_COMPUTE, + flush_bits, &sqtt_flush_bits, 0); cmd_buffer->gang.flush_bits = 0; } @@ -682,13 +686,15 @@ ALWAYS_INLINE static bool radv_flush_gang_semaphore(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs, const enum radv_queue_family qf, const uint32_t va_off, const uint32_t value) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + if (!radv_gang_sem_init(cmd_buffer)) return false; ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 12); - radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, qf, V_028A90_BOTTOM_OF_PIPE_TS, - 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, cmd_buffer->gang.sem.va + va_off, value, + radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, qf, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, + EOP_DATA_SEL_VALUE_32BIT, cmd_buffer->gang.sem.va + va_off, value, cmd_buffer->gfx9_eop_bug_va); assert(cmd_buffer->cs->cdw <= cdw_max); @@ -792,6 +798,8 @@ static void radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flush_bits flags, bool dgc) { const struct radv_device *device = cmd_buffer->device; + const struct radv_physical_device *pdev = radv_device_physical(device); + if (unlikely(device->sqtt.bo) && !dgc) { radeon_check_space(device->ws, cmd_buffer->cs, 2); @@ -804,14 +812,14 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flu assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)); /* Force wait for graphics or compute engines to be idle. */ - radv_cs_emit_cache_flush(device->ws, cmd_buffer->cs, device->physical_device->info.gfx_level, - &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va, cmd_buffer->qf, flags, - &sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va); + radv_cs_emit_cache_flush(device->ws, cmd_buffer->cs, pdev->info.gfx_level, &cmd_buffer->gfx9_fence_idx, + cmd_buffer->gfx9_fence_va, cmd_buffer->qf, flags, &sqtt_flush_bits, + cmd_buffer->gfx9_eop_bug_va); if ((flags & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) && radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) { /* Force wait for compute engines to be idle on the internal cmdbuf. */ - radv_cs_emit_cache_flush(device->ws, cmd_buffer->gang.cs, device->physical_device->info.gfx_level, NULL, 0, - RADV_QUEUE_COMPUTE, RADV_CMD_FLAG_CS_PARTIAL_FLUSH, &sqtt_flush_bits, 0); + radv_cs_emit_cache_flush(device->ws, cmd_buffer->gang.cs, pdev->info.gfx_level, NULL, 0, RADV_QUEUE_COMPUTE, + RADV_CMD_FLAG_CS_PARTIAL_FLUSH, &sqtt_flush_bits, 0); } } @@ -823,13 +831,14 @@ static void radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline) { struct radv_device *device = cmd_buffer->device; + const struct radv_physical_device *pdev = radv_device_physical(device); enum amd_ip_type ring; uint32_t data[2]; uint64_t va; va = radv_buffer_get_va(device->trace_bo); - ring = radv_queue_family_to_ring(device->physical_device, cmd_buffer->qf); + ring = radv_queue_family_to_ring(pdev, cmd_buffer->qf); switch (ring) { case AMD_IP_GFX: @@ -1203,7 +1212,7 @@ struct radv_bin_size_entry { static VkExtent2D radv_gfx10_compute_bin_size(struct radv_cmd_buffer *cmd_buffer) { - const struct radv_physical_device *pdev = cmd_buffer->device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_rendering_state *render = &cmd_buffer->state.render; const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; VkExtent2D extent = {512, 512}; @@ -1287,7 +1296,7 @@ radv_gfx10_compute_bin_size(struct radv_cmd_buffer *cmd_buffer) static VkExtent2D radv_gfx9_compute_bin_size(struct radv_cmd_buffer *cmd_buffer) { - const struct radv_physical_device *pdev = cmd_buffer->device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_rendering_state *render = &cmd_buffer->state.render; const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; static const struct radv_bin_size_entry color_size_table[][3][9] = { @@ -1553,7 +1562,7 @@ radv_gfx9_compute_bin_size(struct radv_cmd_buffer *cmd_buffer) static unsigned radv_get_disabled_binning_state(struct radv_cmd_buffer *cmd_buffer) { - const struct radv_physical_device *pdev = cmd_buffer->device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_rendering_state *render = &cmd_buffer->state.render; const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; uint32_t pa_sc_binner_cntl_0; @@ -1595,30 +1604,31 @@ static unsigned radv_get_binning_state(struct radv_cmd_buffer *cmd_buffer) { const struct radv_device *device = cmd_buffer->device; + const struct radv_physical_device *pdev = radv_device_physical(device); unsigned pa_sc_binner_cntl_0; VkExtent2D bin_size; - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { bin_size = radv_gfx10_compute_bin_size(cmd_buffer); } else { - assert(device->physical_device->info.gfx_level == GFX9); + assert(pdev->info.gfx_level == GFX9); bin_size = radv_gfx9_compute_bin_size(cmd_buffer); } if (device->pbb_allowed && bin_size.width && bin_size.height) { - struct radv_binning_settings *settings = &device->physical_device->binning_settings; + const struct radv_binning_settings *settings = &pdev->binning_settings; - pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) | - S_028C44_BIN_SIZE_X(bin_size.width == 16) | S_028C44_BIN_SIZE_Y(bin_size.height == 16) | - S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) | - S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) | - S_028C44_CONTEXT_STATES_PER_BIN(settings->context_states_per_bin - 1) | - S_028C44_PERSISTENT_STATES_PER_BIN(settings->persistent_states_per_bin - 1) | - S_028C44_DISABLE_START_OF_PRIM(1) | S_028C44_FPOVS_PER_BATCH(settings->fpovs_per_batch) | - S_028C44_OPTIMAL_BIN_SELECTION(1) | - S_028C44_FLUSH_ON_BINNING_TRANSITION(device->physical_device->info.family == CHIP_VEGA12 || - device->physical_device->info.family == CHIP_VEGA20 || - device->physical_device->info.family >= CHIP_RAVEN2); + pa_sc_binner_cntl_0 = + S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) | S_028C44_BIN_SIZE_X(bin_size.width == 16) | + S_028C44_BIN_SIZE_Y(bin_size.height == 16) | + S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) | + S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) | + S_028C44_CONTEXT_STATES_PER_BIN(settings->context_states_per_bin - 1) | + S_028C44_PERSISTENT_STATES_PER_BIN(settings->persistent_states_per_bin - 1) | + S_028C44_DISABLE_START_OF_PRIM(1) | S_028C44_FPOVS_PER_BATCH(settings->fpovs_per_batch) | + S_028C44_OPTIMAL_BIN_SELECTION(1) | + S_028C44_FLUSH_ON_BINNING_TRANSITION(pdev->info.family == CHIP_VEGA12 || pdev->info.family == CHIP_VEGA20 || + pdev->info.family >= CHIP_RAVEN2); } else { pa_sc_binner_cntl_0 = radv_get_disabled_binning_state(cmd_buffer); } @@ -1629,9 +1639,10 @@ radv_get_binning_state(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_binning_state(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); unsigned pa_sc_binner_cntl_0; - if (cmd_buffer->device->physical_device->info.gfx_level < GFX9) + if (pdev->info.gfx_level < GFX9) return; pa_sc_binner_cntl_0 = radv_get_binning_state(cmd_buffer); @@ -1698,7 +1709,9 @@ radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer, bool first_stage_only) static void radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer) { - assert(cmd_buffer->device->physical_device->info.rbplus_allowed); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + + assert(pdev->info.rbplus_allowed); const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; struct radv_rendering_state *render = &cmd_buffer->state.render; @@ -1719,12 +1732,11 @@ radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer) struct radv_color_buffer_info *cb = &render->color_att[i].cb; - format = cmd_buffer->device->physical_device->info.gfx_level >= GFX11 ? G_028C70_FORMAT_GFX11(cb->cb_color_info) - : G_028C70_FORMAT_GFX6(cb->cb_color_info); + format = pdev->info.gfx_level >= GFX11 ? G_028C70_FORMAT_GFX11(cb->cb_color_info) + : G_028C70_FORMAT_GFX6(cb->cb_color_info); swap = G_028C70_COMP_SWAP(cb->cb_color_info); - has_alpha = cmd_buffer->device->physical_device->info.gfx_level >= GFX11 - ? !G_028C74_FORCE_DST_ALPHA_1_GFX11(cb->cb_color_attrib) - : !G_028C74_FORCE_DST_ALPHA_1_GFX6(cb->cb_color_attrib); + has_alpha = pdev->info.gfx_level >= GFX11 ? !G_028C74_FORCE_DST_ALPHA_1_GFX11(cb->cb_color_attrib) + : !G_028C74_FORCE_DST_ALPHA_1_GFX6(cb->cb_color_attrib); uint32_t spi_format = (cmd_buffer->state.col_format_non_compacted >> (i * 4)) & 0xf; uint32_t colormask = d->vk.cb.attachments[i].write_mask; @@ -1854,11 +1866,12 @@ radv_emit_epilog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *s const struct radv_shader_part *epilog) { const struct radv_device *device = cmd_buffer->device; + const struct radv_physical_device *pdev = radv_device_physical(device); struct radeon_cmdbuf *cs = cmd_buffer->cs; radv_cs_add_buffer(device->ws, cs, epilog->bo); - assert((epilog->va >> 32) == device->physical_device->info.address32_hi); + assert((epilog->va >> 32) == pdev->info.address32_hi); const struct radv_userdata_info *loc = &shader->info.user_sgprs_locs.shader_data[AC_UD_EPILOG_PC]; const uint32_t base_reg = shader->info.user_data_0; @@ -1906,6 +1919,7 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) { struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; const struct radv_device *device = cmd_buffer->device; + const struct radv_physical_device *pdev = radv_device_physical(device); if (cmd_buffer->state.emitted_graphics_pipeline == pipeline) return; @@ -1926,7 +1940,7 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.emitted_graphics_pipeline->ms.sample_shading_enable != pipeline->ms.sample_shading_enable) { cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES; - if (device->physical_device->info.gfx_level >= GFX10_3) + if (pdev->info.gfx_level >= GFX10_3) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE; } @@ -1945,7 +1959,7 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) } if (device->pbb_allowed) { - struct radv_binning_settings *settings = &device->physical_device->binning_settings; + const struct radv_binning_settings *settings = &pdev->binning_settings; if ((!cmd_buffer->state.emitted_graphics_pipeline || cmd_buffer->state.emitted_graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT] != @@ -1977,7 +1991,7 @@ radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK]; if (task_shader) { - radv_emit_compute_shader(device->physical_device, cmd_buffer->gang.cs, task_shader); + radv_emit_compute_shader(pdev, cmd_buffer->gang.cs, task_shader); /* Relocate the task shader because RGP requires shaders to be contiguous in memory. */ if (pipeline->sqtt_shaders_reloc) { @@ -2207,8 +2221,9 @@ radv_emit_depth_bias(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_line_stipple(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; - enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; + enum amd_gfx_level gfx_level = pdev->info.gfx_level; /* GFX9 chips fail linestrip CTS tests unless this is set to 0 = no reset */ uint32_t auto_reset_cntl = (gfx_level == GFX9) ? 0 : 2; @@ -2224,7 +2239,8 @@ radv_emit_line_stipple(struct radv_cmd_buffer *cmd_buffer) static uint32_t radv_get_pa_su_sc_mode_cntl(const struct radv_cmd_buffer *cmd_buffer) { - enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + enum amd_gfx_level gfx_level = pdev->info.gfx_level; const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; unsigned pa_su_sc_mode_cntl; @@ -2287,6 +2303,7 @@ radv_emit_provoking_vertex_mode(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader; const struct radv_userdata_info *loc = radv_get_user_sgpr(last_vgt_shader, AC_UD_NUM_VERTS_PER_PRIM); const uint32_t vgt_gs_out_prim_type = radv_get_rasterization_prim(cmd_buffer); @@ -2295,9 +2312,8 @@ radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer) assert(!cmd_buffer->state.mesh_shading); - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX7) { - radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cmd_buffer->cs, R_030908_VGT_PRIMITIVE_TYPE, 1, - d->vk.ia.primitive_topology); + if (pdev->info.gfx_level >= GFX7) { + radeon_set_uconfig_reg_idx(pdev, cmd_buffer->cs, R_030908_VGT_PRIMITIVE_TYPE, 1, d->vk.ia.primitive_topology); } else { radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, d->vk.ia.primitive_topology); } @@ -2347,7 +2363,7 @@ radv_emit_stencil_control(struct radv_cmd_buffer *cmd_buffer) static bool radv_should_force_vrs1x1(struct radv_cmd_buffer *cmd_buffer) { - const struct radv_physical_device *pdev = cmd_buffer->device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT]; return pdev->info.gfx_level >= GFX10_3 && @@ -2357,6 +2373,7 @@ radv_should_force_vrs1x1(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_fragment_shading_rate(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; /* When per-vertex VRS is forced and the dynamic fragment shading rate is a no-op, ignore @@ -2374,7 +2391,7 @@ radv_emit_fragment_shading_rate(struct radv_cmd_buffer *cmd_buffer) uint32_t htile_comb_mode = d->vk.fsr.combiner_ops[1]; uint32_t pa_cl_vrs_cntl = 0; - assert(cmd_buffer->device->physical_device->info.gfx_level >= GFX10_3); + assert(pdev->info.gfx_level >= GFX10_3); if (!cmd_buffer->state.render.vrs_att.iview) { /* When the current subpass has no VRS attachment, the VRS rates are expected to be 1x1, so we @@ -2454,7 +2471,8 @@ radv_get_primitive_reset_index(const struct radv_cmd_buffer *cmd_buffer) static void radv_emit_primitive_restart_enable(struct radv_cmd_buffer *cmd_buffer) { - const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; const struct radv_dynamic_state *const d = &cmd_buffer->state.dynamic; struct radeon_cmdbuf *cs = cmd_buffer->cs; const bool en = d->vk.ia.primitive_restart_enable; @@ -2513,6 +2531,7 @@ radv_is_mrt0_dual_src(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_logic_op(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; unsigned cb_color_control = 0; @@ -2522,7 +2541,7 @@ radv_emit_logic_op(struct radv_cmd_buffer *cmd_buffer) cb_color_control |= S_028808_ROP3(V_028808_ROP3_COPY); } - if (cmd_buffer->device->physical_device->info.has_rbplus) { + if (pdev->info.has_rbplus) { /* RB+ doesn't work with dual source blending, logic op and CB_RESOLVE. */ bool mrt0_is_dual_src = radv_is_mrt0_dual_src(cmd_buffer); @@ -2556,7 +2575,8 @@ static void radv_emit_color_write(struct radv_cmd_buffer *cmd_buffer) { const struct radv_device *device = cmd_buffer->device; - const struct radv_binning_settings *settings = &device->physical_device->binning_settings; + const struct radv_physical_device *pdev = radv_device_physical(device); + const struct radv_binning_settings *settings = &pdev->binning_settings; const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; uint32_t color_write_enable = 0, color_write_mask = 0; @@ -2580,7 +2600,7 @@ radv_emit_color_write(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer) { - const struct radv_physical_device *pdev = cmd_buffer->device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_shader *vs = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_VERTEX); const struct radv_shader *tcs = cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL]; const struct radv_shader *tes = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_TESS_EVAL); @@ -2667,7 +2687,7 @@ radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_conservative_rast_mode(struct radv_cmd_buffer *cmd_buffer) { - const struct radv_physical_device *pdev = cmd_buffer->device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; if (pdev->info.gfx_level >= GFX9) { @@ -2710,7 +2730,7 @@ radv_emit_depth_clamp_enable(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_rasterization_samples(struct radv_cmd_buffer *cmd_buffer) { - const struct radv_physical_device *pdev = cmd_buffer->device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); unsigned rasterization_samples = radv_get_rasterization_samples(cmd_buffer); unsigned ps_iter_samples = radv_get_ps_iter_samples(cmd_buffer); const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; @@ -2756,14 +2776,15 @@ static void radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, struct radv_color_buffer_info *cb, struct radv_image_view *iview, VkImageLayout layout) { - bool is_vi = cmd_buffer->device->physical_device->info.gfx_level >= GFX8; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + bool is_vi = pdev->info.gfx_level >= GFX8; uint32_t cb_fdcc_control = cb->cb_dcc_control; uint32_t cb_color_info = cb->cb_color_info; struct radv_image *image = iview->image; if (!radv_layout_dcc_compressed(cmd_buffer->device, image, iview->vk.base_mip_level, layout, radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf))) { - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { cb_fdcc_control &= C_028C78_FDCC_ENABLE; } else { cb_color_info &= C_028C70_DCC_ENABLE; @@ -2776,7 +2797,7 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, struct r cb_color_info &= C_028C70_COMPRESSION; } - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { radeon_set_context_reg_seq(cmd_buffer->cs, R_028C6C_CB_COLOR0_VIEW + index * 0x3c, 4); radeon_emit(cmd_buffer->cs, cb->cb_color_view); /* CB_COLOR0_VIEW */ radeon_emit(cmd_buffer->cs, cb->cb_color_info); /* CB_COLOR0_INFO */ @@ -2789,7 +2810,7 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, struct r radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4, cb->cb_dcc_base >> 32); radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4, cb->cb_color_attrib2); radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4, cb->cb_color_attrib3); - } else if (cmd_buffer->device->physical_device->info.gfx_level >= GFX10) { + } else if (pdev->info.gfx_level >= GFX10) { radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); radeon_emit(cmd_buffer->cs, cb->cb_color_base); radeon_emit(cmd_buffer->cs, 0); @@ -2811,7 +2832,7 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, struct r radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4, cb->cb_dcc_base >> 32); radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4, cb->cb_color_attrib2); radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4, cb->cb_color_attrib3); - } else if (cmd_buffer->device->physical_device->info.gfx_level == GFX9) { + } else if (pdev->info.gfx_level == GFX9) { radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); radeon_emit(cmd_buffer->cs, cb->cb_color_base); radeon_emit(cmd_buffer->cs, S_028C64_BASE_256B(cb->cb_color_base >> 32)); @@ -2849,8 +2870,7 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, struct r } } - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11 ? G_028C78_FDCC_ENABLE(cb_fdcc_control) - : G_028C70_DCC_ENABLE(cb_color_info)) { + if (pdev->info.gfx_level >= GFX11 ? G_028C78_FDCC_ENABLE(cb_fdcc_control) : G_028C70_DCC_ENABLE(cb_color_info)) { /* Drawing with DCC enabled also compresses colorbuffers. */ VkImageSubresourceRange range = { .aspectMask = iview->vk.aspects, @@ -2868,16 +2888,17 @@ static void radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds, const struct radv_image_view *iview, bool requires_cond_exec) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_image *image = iview->image; uint32_t db_z_info = ds->db_z_info; uint32_t db_z_info_reg; - if (!cmd_buffer->device->physical_device->info.has_tc_compat_zrange_bug || !radv_image_is_tc_compat_htile(image)) + if (!pdev->info.has_tc_compat_zrange_bug || !radv_image_is_tc_compat_htile(image)) return; db_z_info &= C_028040_ZRANGE_PRECISION; - if (cmd_buffer->device->physical_device->info.gfx_level == GFX9) { + if (pdev->info.gfx_level == GFX9) { db_z_info_reg = R_028038_DB_Z_INFO; } else { db_z_info_reg = R_028040_DB_Z_INFO; @@ -2919,6 +2940,7 @@ static void radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds, struct radv_image_view *iview, bool depth_compressed, bool stencil_compressed) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); uint64_t db_htile_data_base = ds->db_htile_data_base; uint32_t db_htile_surface = ds->db_htile_surface; uint32_t db_render_control = ds->db_render_control | cmd_buffer->state.db_render_control; @@ -2929,7 +2951,7 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_ if (!stencil_compressed) db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(1); - if (cmd_buffer->device->physical_device->info.gfx_level == GFX10_3) { + if (pdev->info.gfx_level == GFX10_3) { if (!cmd_buffer->state.render.vrs_att.iview) { db_htile_surface &= C_028ABC_VRS_HTILE_ENCODING; } else { @@ -2953,11 +2975,11 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_ radeon_set_context_reg(cmd_buffer->cs, R_028010_DB_RENDER_OVERRIDE2, ds->db_render_override2); radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, db_htile_surface); - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, db_htile_data_base); radeon_set_context_reg(cmd_buffer->cs, R_02801C_DB_DEPTH_SIZE_XY, ds->db_depth_size); - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 6); } else { radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 7); @@ -2976,7 +2998,7 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_ radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32); radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32); radeon_emit(cmd_buffer->cs, db_htile_data_base >> 32); - } else if (cmd_buffer->device->physical_device->info.gfx_level == GFX9) { + } else if (pdev->info.gfx_level == GFX9) { radeon_set_context_reg_seq(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, 3); radeon_emit(cmd_buffer->cs, db_htile_data_base); radeon_emit(cmd_buffer->cs, S_028018_BASE_HI(db_htile_data_base >> 32)); @@ -3019,7 +3041,8 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_ static void radv_emit_null_ds_state(struct radv_cmd_buffer *cmd_buffer) { - const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; unsigned db_render_control = 0; unsigned num_samples = 0; @@ -3129,9 +3152,10 @@ static void radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range, uint32_t value) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radeon_cmdbuf *cs = cmd_buffer->cs; - if (!cmd_buffer->device->physical_device->info.has_tc_compat_zrange_bug) + if (!pdev->info.has_tc_compat_zrange_bug) return; uint64_t va = radv_get_tc_compat_zrange_va(image, range->baseMipLevel); @@ -3200,6 +3224,7 @@ radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct r static void radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radeon_cmdbuf *cs = cmd_buffer->cs; const struct radv_image *image = iview->image; VkImageAspectFlags aspects = vk_format_aspects(image->vk.format); @@ -3219,7 +3244,7 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct rad uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset; - if (cmd_buffer->device->physical_device->info.has_load_ctx_reg_pkt) { + if (pdev->info.has_load_ctx_reg_pkt) { radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0)); radeon_emit(cs, va); radeon_emit(cs, va >> 32); @@ -3382,6 +3407,7 @@ radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struc static void radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *iview, int cb_idx) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radv_image *image = iview->image; @@ -3400,7 +3426,7 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_i uint64_t va = radv_image_get_fast_clear_va(image, iview->vk.base_mip_level); uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c; - if (cmd_buffer->device->physical_device->info.has_load_ctx_reg_pkt) { + if (pdev->info.has_load_ctx_reg_pkt) { radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, cmd_buffer->state.predicating)); radeon_emit(cs, va); radeon_emit(cs, va >> 32); @@ -3428,11 +3454,12 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_i static void radv_emit_fb_mip_change_flush(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_rendering_state *render = &cmd_buffer->state.render; bool color_mip_changed = false; /* Entire workaround is not applicable before GFX9 */ - if (cmd_buffer->device->physical_device->info.gfx_level < GFX9) + if (pdev->info.gfx_level < GFX9) return; for (int i = 0; i < render->color_att_count; ++i) { @@ -3471,8 +3498,10 @@ radv_emit_fb_mip_change_flush(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_mip_change_flush_default(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + /* Entire workaround is not applicable before GFX9 */ - if (cmd_buffer->device->physical_device->info.gfx_level < GFX9) + if (pdev->info.gfx_level < GFX9) return; bool need_color_mip_flush = false; @@ -3498,12 +3527,12 @@ radv_emit_mip_change_flush_default(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_rendering_state *render = &cmd_buffer->state.render; int i; bool disable_constant_encode_ac01 = false; - unsigned color_invalid = cmd_buffer->device->physical_device->info.gfx_level >= GFX11 - ? S_028C70_FORMAT_GFX11(V_028C70_COLOR_INVALID) - : S_028C70_FORMAT_GFX6(V_028C70_COLOR_INVALID); + unsigned color_invalid = pdev->info.gfx_level >= GFX11 ? S_028C70_FORMAT_GFX11(V_028C70_COLOR_INVALID) + : S_028C70_FORMAT_GFX6(V_028C70_COLOR_INVALID); VkExtent2D extent = {MAX_FRAMEBUFFER_WIDTH, MAX_FRAMEBUFFER_HEIGHT}; ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 51 + MAX_RTS * 70); @@ -3535,7 +3564,7 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) radv_load_color_clear_metadata(cmd_buffer, iview, i); - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9 && iview->image->dcc_sign_reinterpret) { + if (pdev->info.gfx_level >= GFX9 && iview->image->dcc_sign_reinterpret) { /* Disable constant encoding with the clear value of "1" with different DCC signedness * because the hardware will fill "1" instead of the clear value. */ @@ -3572,8 +3601,7 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) extent.width = MIN2(extent.width, iview->vk.extent.width); extent.height = MIN2(extent.height, iview->vk.extent.height); - } else if (cmd_buffer->device->physical_device->info.gfx_level == GFX10_3 && render->vrs_att.iview && - radv_cmd_buffer_get_vrs_image(cmd_buffer)) { + } else if (pdev->info.gfx_level == GFX10_3 && render->vrs_att.iview && radv_cmd_buffer_get_vrs_image(cmd_buffer)) { /* When a subpass uses a VRS attachment without binding a depth/stencil attachment, we have to * bind our internal depth buffer that contains the VRS data as part of HTILE. */ @@ -3613,7 +3641,7 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) radv_emit_null_ds_state(cmd_buffer); } - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { bool vrs_surface_enable = render->vrs_att.iview != NULL; unsigned xmax = 0, ymax = 0; uint64_t va = 0; @@ -3637,12 +3665,12 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) S_0283D0_VRS_SURFACE_ENABLE(vrs_surface_enable)); } - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX8) { - bool disable_constant_encode = cmd_buffer->device->physical_device->info.has_dcc_constant_encode; - enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; + if (pdev->info.gfx_level >= GFX8) { + bool disable_constant_encode = pdev->info.has_dcc_constant_encode; + enum amd_gfx_level gfx_level = pdev->info.gfx_level; - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { - const bool has_dedicated_vram = cmd_buffer->device->physical_device->info.has_dedicated_vram; + if (pdev->info.gfx_level >= GFX11) { + const bool has_dedicated_vram = pdev->info.has_dedicated_vram; radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_FDCC_CONTROL, S_028424_SAMPLE_MASK_TRACKER_WATERMARK(has_dedicated_vram ? 0 : 15)); @@ -3698,6 +3726,7 @@ radv_handle_zero_index_buffer_bug(struct radv_cmd_buffer *cmd_buffer, uint64_t * static void radv_emit_index_buffer(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radv_cmd_state *state = &cmd_buffer->state; uint32_t max_index_count = state->max_index_count; @@ -3709,7 +3738,7 @@ radv_emit_index_buffer(struct radv_cmd_buffer *cmd_buffer) return; /* Handle indirect draw calls with NULL index buffer if the GPU doesn't support them. */ - if (!max_index_count && cmd_buffer->device->physical_device->info.has_zero_index_buffer_bug) { + if (!max_index_count && pdev->info.has_zero_index_buffer_bug) { radv_handle_zero_index_buffer_bug(cmd_buffer, &index_va, &max_index_count); } @@ -3726,7 +3755,8 @@ radv_emit_index_buffer(struct radv_cmd_buffer *cmd_buffer) static void radv_flush_occlusion_query_state(struct radv_cmd_buffer *cmd_buffer) { - const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; const bool enable_occlusion_queries = cmd_buffer->state.active_occlusion_queries || cmd_buffer->state.inherited_occlusion_queries; uint32_t db_count_control; @@ -3798,6 +3828,7 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v const struct radv_vs_input_state *state = &cmd_buffer->state.dynamic_vs_input; struct radv_device *device = cmd_buffer->device; + const struct radv_physical_device *pdev = radv_device_physical(device); unsigned num_attributes = util_last_bit(vs_shader->info.vs.vb_desc_usage_mask); uint32_t attribute_mask = BITFIELD_MASK(num_attributes); @@ -3807,7 +3838,7 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v *nontrivial_divisors = state->nontrivial_divisors & attribute_mask; uint32_t misaligned_mask = cmd_buffer->state.vbo_misaligned_mask; if (cmd_buffer->state.vbo_misaligned_mask_invalid) { - assert(device->physical_device->info.gfx_level == GFX6 || device->physical_device->info.gfx_level >= GFX10); + assert(pdev->info.gfx_level == GFX6 || pdev->info.gfx_level >= GFX10); u_foreach_bit (index, cmd_buffer->state.vbo_misaligned_mask_invalid & attribute_mask) { uint8_t binding = state->bindings[index]; @@ -3837,14 +3868,13 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v const bool can_use_simple_input = cmd_buffer->state.shaders[MESA_SHADER_VERTEX] && !cmd_buffer->state.shaders[MESA_SHADER_VERTEX]->info.merged_shader_compiled_separately && - cmd_buffer->state.shaders[MESA_SHADER_VERTEX]->info.is_ngg == device->physical_device->use_ngg && - cmd_buffer->state.shaders[MESA_SHADER_VERTEX]->info.wave_size == device->physical_device->ge_wave_size; + cmd_buffer->state.shaders[MESA_SHADER_VERTEX]->info.is_ngg == pdev->use_ngg && + cmd_buffer->state.shaders[MESA_SHADER_VERTEX]->info.wave_size == pdev->ge_wave_size; /* The instance ID input VGPR is placed differently when as_ls=true. as_ls is also needed to * workaround the LS VGPR initialization bug. */ - bool as_ls = - vs_shader->info.vs.as_ls && (instance_rate_inputs || device->physical_device->info.has_ls_vgpr_init_bug); + bool as_ls = vs_shader->info.vs.as_ls && (instance_rate_inputs || pdev->info.has_ls_vgpr_init_bug); /* try to use a pre-compiled prolog first */ struct radv_shader_part *prolog = NULL; @@ -3892,13 +3922,14 @@ static void emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs_shader, const struct radv_shader_part *prolog) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); uint32_t rsrc1, rsrc2; /* no need to re-emit anything in this case */ if (cmd_buffer->state.emitted_vs_prolog == prolog) return; - enum amd_gfx_level chip = cmd_buffer->device->physical_device->info.gfx_level; + enum amd_gfx_level chip = pdev->info.gfx_level; assert(cmd_buffer->state.emitted_graphics_pipeline == cmd_buffer->state.graphics_pipeline); @@ -3949,8 +3980,7 @@ emit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v unsigned lds_size; if (gs->info.is_ngg) { - lds_size = DIV_ROUND_UP(gs->info.ngg_info.lds_size, - cmd_buffer->device->physical_device->info.lds_encode_granularity); + lds_size = DIV_ROUND_UP(gs->info.ngg_info.lds_size, pdev->info.lds_encode_granularity); } else { lds_size = gs->info.gs_ring_info.lds_size; } @@ -4041,7 +4071,7 @@ radv_emit_vertex_input(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_tess_domain_origin(struct radv_cmd_buffer *cmd_buffer) { - const struct radv_physical_device *pdev = cmd_buffer->device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_shader *tes = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_TESS_EVAL); const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; unsigned type = 0, partitioning = 0, distribution_mode = 0; @@ -4137,7 +4167,7 @@ radv_emit_sample_mask(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_color_blend(struct radv_cmd_buffer *cmd_buffer) { - const struct radv_physical_device *pdev = cmd_buffer->device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const enum amd_gfx_level gfx_level = pdev->info.gfx_level; const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; unsigned cb_blend_control[MAX_RTS], sx_mrt_blend_opt[MAX_RTS]; @@ -4249,6 +4279,7 @@ lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer) const struct radv_rendering_state *render = &cmd_buffer->state.render; const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; struct radv_device *device = cmd_buffer->device; + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_ps_epilog_state state = {0}; state.color_attachment_count = render->color_att_count; @@ -4285,7 +4316,7 @@ lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer) state.colors_written = ps->info.ps.colors_written; if (ps->info.ps.exports_mrtz_via_epilog) { - assert(device->physical_device->info.gfx_level >= GFX11); + assert(pdev->info.gfx_level >= GFX11); state.export_depth = ps->info.ps.writes_z; state.export_stencil = ps->info.ps.writes_stencil; state.export_sample_mask = ps->info.ps.writes_sample_mask; @@ -4304,7 +4335,7 @@ lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_msaa_state(struct radv_cmd_buffer *cmd_buffer) { - const struct radv_physical_device *pdev = cmd_buffer->device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT]; unsigned rasterization_samples = radv_get_rasterization_samples(cmd_buffer); const struct radv_rendering_state *render = &cmd_buffer->state.render; @@ -4401,12 +4432,13 @@ radv_emit_line_rasterization_mode(struct radv_cmd_buffer *cmd_buffer) static void radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const uint64_t states) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + if (states & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT | RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_CLAMP_ENABLE)) radv_emit_viewport(cmd_buffer); - if (states & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT) && - !cmd_buffer->device->physical_device->info.has_gfx9_scissor_bug) + if (states & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT) && !pdev->info.has_gfx9_scissor_bug) radv_emit_scissor(cmd_buffer); if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) @@ -4778,9 +4810,10 @@ void radv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, const struct radv_graphics_pipeline *pipeline, bool full_null_descriptors, void *vb_ptr) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_shader *vs_shader = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_VERTEX); - enum amd_gfx_level chip = cmd_buffer->device->physical_device->info.gfx_level; - enum radeon_family family = cmd_buffer->device->physical_device->info.family; + enum amd_gfx_level chip = pdev->info.gfx_level; + enum radeon_family family = pdev->info.family; unsigned desc_index = 0; uint32_t mask = vs_shader->info.vs.vb_desc_usage_mask; uint64_t va; @@ -5003,6 +5036,8 @@ radv_emit_streamout_buffers(struct radv_cmd_buffer *cmd_buffer, uint64_t va) static void radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_STREAMOUT_BUFFER) { struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; struct radv_streamout_state *so = &cmd_buffer->state.streamout; @@ -5033,7 +5068,7 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer) */ size = 0xffffffff; - if (cmd_buffer->device->physical_device->use_ngg_streamout) { + if (pdev->use_ngg_streamout) { /* With NGG streamout, the buffer size is used to determine the max emit per buffer * and also acts as a disable bit when it's 0. */ @@ -5044,10 +5079,10 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer) uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); - } else if (cmd_buffer->device->physical_device->info.gfx_level >= GFX10) { + } else if (pdev->info.gfx_level >= GFX10) { rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); } else { @@ -5072,6 +5107,7 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer) static void radv_flush_shader_query_state_gfx(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader; const struct radv_userdata_info *loc = radv_get_user_sgpr(last_vgt_shader, AC_UD_SHADER_QUERY_STATE); enum radv_shader_query_state shader_query_state = radv_shader_query_none; @@ -5090,8 +5126,8 @@ radv_flush_shader_query_state_gfx(struct radv_cmd_buffer *cmd_buffer) (cmd_buffer->state.inherited_pipeline_statistics & (VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT)) || - (cmd_buffer->device->physical_device->emulate_mesh_shader_queries && - (cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT))) + (pdev->emulate_mesh_shader_queries && (cmd_buffer->state.inherited_pipeline_statistics & + VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT))) shader_query_state |= radv_shader_query_pipeline_stat; if (cmd_buffer->state.active_prims_gen_gds_queries) @@ -5134,10 +5170,11 @@ radv_flush_shader_query_state_ace(struct radv_cmd_buffer *cmd_buffer, struct rad static void radv_flush_shader_query_state(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + radv_flush_shader_query_state_gfx(cmd_buffer); - if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK) && - cmd_buffer->device->physical_device->emulate_mesh_shader_queries) + if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK) && pdev->emulate_mesh_shader_queries) radv_flush_shader_query_state_ace(cmd_buffer, cmd_buffer->state.shaders[MESA_SHADER_TASK]); cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_SHADER_QUERY; @@ -5146,6 +5183,7 @@ radv_flush_shader_query_state(struct radv_cmd_buffer *cmd_buffer) static void radv_flush_force_vrs_state(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_shader *last_vgt_shader = cmd_buffer->state.last_vgt_shader; if (!last_vgt_shader->info.force_vrs_per_vertex) { @@ -5167,7 +5205,7 @@ radv_flush_force_vrs_state(struct radv_cmd_buffer *cmd_buffer) assert(loc->sgpr_idx != -1); - enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; + enum amd_gfx_level gfx_level = pdev->info.gfx_level; uint32_t vrs_rates = 0; switch (cmd_buffer->device->force_vrs) { @@ -5255,7 +5293,8 @@ static void radv_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, bool indirect_draw, bool count_from_stream_output, uint32_t draw_vertex_count) { - const struct radeon_info *gpu_info = &cmd_buffer->device->physical_device->info; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + const struct radeon_info *gpu_info = &pdev->info; struct radv_cmd_state *state = &cmd_buffer->state; const unsigned patch_control_points = state->dynamic.vk.ts.patch_control_points; const unsigned topology = state->dynamic.vk.ia.primitive_topology; @@ -5269,8 +5308,7 @@ radv_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_ if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) { if (gpu_info->gfx_level == GFX9) { - radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs, R_030960_IA_MULTI_VGT_PARAM, 4, - ia_multi_vgt_param); + radeon_set_uconfig_reg_idx(pdev, cs, R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param); } else if (gpu_info->gfx_level >= GFX7) { radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); } else { @@ -5322,7 +5360,8 @@ gfx10_emit_ge_cntl(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info) { - const struct radeon_info *gpu_info = &cmd_buffer->device->physical_device->info; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + const struct radeon_info *gpu_info = &pdev->info; struct radv_cmd_state *state = &cmd_buffer->state; struct radeon_cmdbuf *cs = cmd_buffer->cs; uint32_t topology = state->dynamic.vk.ia.primitive_topology; @@ -5353,8 +5392,8 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_d disable_instance_packing != G_028A7C_DISABLE_INSTANCE_PACKING(state->last_index_type)))) { uint32_t index_type = state->index_type | S_028A7C_DISABLE_INSTANCE_PACKING(disable_instance_packing); - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) { - radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs, R_03090C_VGT_INDEX_TYPE, 2, index_type); + if (pdev->info.gfx_level >= GFX9) { + radeon_set_uconfig_reg_idx(pdev, cs, R_03090C_VGT_INDEX_TYPE, 2, index_type); } else { radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); radeon_emit(cs, index_type); @@ -5405,8 +5444,8 @@ radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_s static bool can_skip_buffer_l2_flushes(struct radv_device *device) { - return device->physical_device->info.gfx_level == GFX9 || - (device->physical_device->info.gfx_level >= GFX10 && !device->physical_device->info.tcc_rb_non_coherent); + const struct radv_physical_device *pdev = radv_device_physical(device); + return pdev->info.gfx_level == GFX9 || (pdev->info.gfx_level >= GFX10 && !pdev->info.tcc_rb_non_coherent); } /* @@ -5519,6 +5558,7 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 src_fla enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 dst_flags, const struct radv_image *image) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); bool has_CB_meta = true, has_DB_meta = true; enum radv_cmd_flush_bits flush_bits = 0; bool flush_CB = true, flush_DB = true; @@ -5551,7 +5591,7 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 dst_fla if (radv_uses_device_generated_commands(cmd_buffer->device)) { flush_bits |= RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE; - if (cmd_buffer->device->physical_device->info.gfx_level < GFX9) + if (pdev->info.gfx_level < GFX9) flush_bits |= RADV_CMD_FLAG_INV_L2; } @@ -5581,7 +5621,7 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 dst_fla case VK_ACCESS_2_SHADER_STORAGE_READ_BIT: /* Unlike LLVM, ACO uses SMEM for SSBOs and we have to * invalidate the scalar cache. */ - if (!cmd_buffer->device->physical_device->use_llvm && !image) + if (!pdev->use_llvm && !image) flush_bits |= RADV_CMD_FLAG_INV_SCACHE; FALLTHROUGH; case VK_ACCESS_2_SHADER_SAMPLED_READ_BIT: @@ -5594,7 +5634,7 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 dst_fla case VK_ACCESS_2_COMMAND_PREPROCESS_READ_BIT_NV: case VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR: flush_bits |= RADV_CMD_FLAG_INV_VCACHE; - if (cmd_buffer->device->physical_device->info.gfx_level < GFX9) + if (pdev->info.gfx_level < GFX9) flush_bits |= RADV_CMD_FLAG_INV_L2; break; case VK_ACCESS_2_SHADER_WRITE_BIT: @@ -5739,6 +5779,7 @@ VKAPI_ATTR VkResult VKAPI_CALL radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo *pBeginInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); VkResult result = VK_SUCCESS; vk_command_buffer_begin(&cmd_buffer->vk, pBeginInfo); @@ -5781,8 +5822,8 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi cmd_buffer->state.mec_inv_pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset; } - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9 && cmd_buffer->qf == RADV_QUEUE_GENERAL) { - unsigned num_db = cmd_buffer->device->physical_device->info.max_render_backends; + if (pdev->info.gfx_level >= GFX9 && cmd_buffer->qf == RADV_QUEUE_GENERAL) { + unsigned num_db = pdev->info.max_render_backends; unsigned fence_offset, eop_bug_offset; void *fence_ptr; @@ -5794,7 +5835,7 @@ radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBegi radv_emit_clear_data(cmd_buffer, V_370_PFP, cmd_buffer->gfx9_fence_va, 8); - if (cmd_buffer->device->physical_device->info.gfx_level == GFX9) { + if (pdev->info.gfx_level == GFX9) { /* Allocate a buffer for the EOP bug on GFX9. */ radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, &eop_bug_offset, &fence_ptr); memset(fence_ptr, 0, 16 * num_db); @@ -5870,6 +5911,7 @@ radv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, const VkDeviceSize *pStrides) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_vertex_binding *vb = cmd_buffer->vertex_bindings; const struct radv_vs_input_state *state = &cmd_buffer->state.dynamic_vs_input; @@ -5877,7 +5919,7 @@ radv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, * stride from the pipeline. */ assert(firstBinding + bindingCount <= MAX_VBS); - enum amd_gfx_level chip = cmd_buffer->device->physical_device->info.gfx_level; + enum amd_gfx_level chip = pdev->info.gfx_level; if (firstBinding + bindingCount > cmd_buffer->used_vertex_bindings) cmd_buffer->used_vertex_bindings = firstBinding + bindingCount; @@ -5955,6 +5997,7 @@ radv_CmdBindIndexBuffer2KHR(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDe { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_buffer, index_buffer, buffer); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); cmd_buffer->state.index_type = vk_to_index_type(indexType); @@ -5969,7 +6012,7 @@ radv_CmdBindIndexBuffer2KHR(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDe cmd_buffer->state.index_va = 0; cmd_buffer->state.max_index_count = 0; - if (cmd_buffer->device->physical_device->info.has_null_index_buffer_clamping_bug) + if (pdev->info.has_null_index_buffer_clamping_bug) cmd_buffer->state.index_va = 0x2; } @@ -6006,6 +6049,7 @@ radv_bind_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, const VkBindDescriptorSetsInfoKHR *pBindDescriptorSetsInfo, VkPipelineBindPoint bind_point) { RADV_FROM_HANDLE(radv_pipeline_layout, layout, pBindDescriptorSetsInfo->layout); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const bool no_dynamic_bounds = cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_DYNAMIC_BOUNDS; struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point); unsigned dyn_idx = 0; @@ -6040,9 +6084,9 @@ radv_bind_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, dst[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { dst[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); - } else if (cmd_buffer->device->physical_device->info.gfx_level >= GFX10) { + } else if (pdev->info.gfx_level >= GFX10) { dst[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); } else { @@ -6227,6 +6271,7 @@ VKAPI_ATTR VkResult VKAPI_CALL radv_EndCommandBuffer(VkCommandBuffer commandBuffer) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); if (cmd_buffer->qf == RADV_QUEUE_SPARSE) return vk_command_buffer_end(&cmd_buffer->vk); @@ -6236,7 +6281,7 @@ radv_EndCommandBuffer(VkCommandBuffer commandBuffer) const bool is_gfx_or_ace = cmd_buffer->qf == RADV_QUEUE_GENERAL || cmd_buffer->qf == RADV_QUEUE_COMPUTE; if (is_gfx_or_ace) { - if (cmd_buffer->device->physical_device->info.gfx_level == GFX6) + if (pdev->info.gfx_level == GFX6) cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2; @@ -6330,6 +6375,7 @@ radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer, VkPipelineBi static void radv_bind_vs_input_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_graphics_pipeline *pipeline) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_shader *vs_shader = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_VERTEX); const struct radv_vs_input_state *src = &pipeline->vs_input_state; @@ -6343,8 +6389,7 @@ radv_bind_vs_input_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_g cmd_buffer->state.dynamic_vs_input = *src; - if (cmd_buffer->device->physical_device->info.gfx_level == GFX6 || - cmd_buffer->device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level == GFX6 || pdev->info.gfx_level >= GFX10) { cmd_buffer->state.vbo_misaligned_mask = 0; cmd_buffer->state.vbo_misaligned_mask_invalid = src->attribute_mask; } @@ -6374,6 +6419,7 @@ radv_bind_custom_blend_mode(struct radv_cmd_buffer *cmd_buffer, unsigned custom_ static void radv_bind_pre_rast_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *shader) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); bool mesh_shading = shader->info.stage == MESA_SHADER_MESH; const struct radv_userdata_info *loc; @@ -6392,7 +6438,7 @@ radv_bind_pre_rast_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_ */ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER; - if (cmd_buffer->device->physical_device->use_ngg_streamout) { + if (pdev->use_ngg_streamout) { /* GFX11 needs GDS OA for streamout. */ cmd_buffer->gds_oa_needed = true; } @@ -6522,7 +6568,8 @@ radv_bind_mesh_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_shad static void radv_bind_fragment_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *ps) { - const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; const struct radv_shader *previous_ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT]; const float min_sample_shading = 1.0f; @@ -6692,6 +6739,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); radv_reset_shader_object_state(cmd_buffer, pipelineBindPoint); @@ -6759,7 +6807,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline /* Prefetch all pipeline shaders at first draw time. */ cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_SHADERS; - if (cmd_buffer->device->physical_device->info.has_vgt_flush_ngg_legacy_bug && + if (pdev->info.has_vgt_flush_ngg_legacy_bug && (!cmd_buffer->state.emitted_graphics_pipeline || (cmd_buffer->state.emitted_graphics_pipeline->is_ngg && !cmd_buffer->state.graphics_pipeline->is_ngg))) { /* Transitioning from NGG to legacy GS requires @@ -6797,7 +6845,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline } } - if (cmd_buffer->device->physical_device->info.rbplus_allowed && + if (pdev->info.rbplus_allowed && (!cmd_buffer->state.emitted_graphics_pipeline || cmd_buffer->state.col_format_non_compacted != graphics_pipeline->col_format_non_compacted)) { cmd_buffer->state.col_format_non_compacted = graphics_pipeline->col_format_non_compacted; @@ -7229,6 +7277,7 @@ radv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingD const VkVertexInputAttributeDescription2EXT *pVertexAttributeDescriptions) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_cmd_state *state = &cmd_buffer->state; struct radv_vs_input_state *vs_state = &state->dynamic_vs_input; @@ -7249,8 +7298,8 @@ radv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingD vs_state->nontrivial_formats = 0; vs_state->bindings_match_attrib = true; - enum amd_gfx_level chip = cmd_buffer->device->physical_device->info.gfx_level; - enum radeon_family family = cmd_buffer->device->physical_device->info.family; + enum amd_gfx_level chip = pdev->info.gfx_level; + enum radeon_family family = pdev->info.family; const struct ac_vtx_format_info *vtx_info_table = ac_get_vtx_format_info_table(chip, family); for (unsigned i = 0; i < vertexAttributeDescriptionCount; i++) { @@ -7445,6 +7494,7 @@ radv_CmdSetColorWriteMaskEXT(VkCommandBuffer commandBuffer, uint32_t firstAttach const VkColorComponentFlags *pColorWriteMasks) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_cmd_state *state = &cmd_buffer->state; assert(firstAttachment + attachmentCount <= MAX_RTS); @@ -7457,7 +7507,7 @@ radv_CmdSetColorWriteMaskEXT(VkCommandBuffer commandBuffer, uint32_t firstAttach state->dirty |= RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_MASK; - if (cmd_buffer->device->physical_device->info.rbplus_allowed) + if (pdev->info.rbplus_allowed) state->dirty |= RADV_CMD_DIRTY_RBPLUS; } @@ -7590,6 +7640,7 @@ VKAPI_ATTR void VKAPI_CALL radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, const VkCommandBuffer *pCmdBuffers) { RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer); + const struct radv_physical_device *pdev = radv_device_physical(primary->device); assert(commandBufferCount > 0); @@ -7607,8 +7658,7 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou /* Do not launch an IB2 for secondary command buffers that contain * DRAW_{INDEX}_INDIRECT_{MULTI} on GFX6-7 because it's illegal and hangs the GPU. */ - const bool allow_ib2 = - !secondary->state.uses_draw_indirect || secondary->device->physical_device->info.gfx_level >= GFX8; + const bool allow_ib2 = !secondary->state.uses_draw_indirect || pdev->info.gfx_level >= GFX8; primary->scratch_size_per_wave_needed = MAX2(primary->scratch_size_per_wave_needed, secondary->scratch_size_per_wave_needed); @@ -7779,6 +7829,7 @@ VKAPI_ATTR void VKAPI_CALL radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRenderingInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct VkSampleLocationsInfoEXT *sample_locs_info = vk_find_struct_const(pRenderingInfo->pNext, SAMPLE_LOCATIONS_INFO_EXT); @@ -7936,12 +7987,12 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe render->vrs_texel_size = vrs_texel_size; cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER; - if (cmd_buffer->device->physical_device->info.rbplus_allowed) + if (pdev->info.rbplus_allowed) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_RBPLUS; cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE; - if (render->vrs_att.iview && cmd_buffer->device->physical_device->info.gfx_level == GFX10_3) { + if (render->vrs_att.iview && pdev->info.gfx_level == GFX10_3) { if (render->ds_att.iview && radv_htile_enabled(render->ds_att.iview->image, render->ds_att.iview->vk.base_mip_level)) { /* When we have a VRS attachment and a depth/stencil attachment, we just need to copy the @@ -8049,6 +8100,8 @@ static void radv_cs_emit_compute_predication(const struct radv_device *device, struct radv_cmd_state *state, struct radeon_cmdbuf *cs, uint64_t inv_va, bool *inv_emitted, unsigned dwords) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (!state->predicating) return; @@ -8057,7 +8110,7 @@ radv_cs_emit_compute_predication(const struct radv_device *device, struct radv_c if (!state->predication_type) { /* Invert the condition the first time it is needed. */ if (!*inv_emitted) { - const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; *inv_emitted = true; @@ -8172,6 +8225,7 @@ ALWAYS_INLINE static void radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t draw_count, uint64_t count_va, uint32_t stride) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_shader *mesh_shader = cmd_buffer->state.shaders[MESA_SHADER_MESH]; struct radeon_cmdbuf *cs = cmd_buffer->cs; uint32_t base_reg = cmd_buffer->state.vtx_base_sgpr; @@ -8189,12 +8243,12 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3 uint32_t draw_id_enable = !!cmd_buffer->state.uses_drawid; uint32_t draw_id_reg = !draw_id_enable ? 0 : (base_reg + (xyz_dim_enable ? 12 : 0) - SI_SH_REG_OFFSET) >> 2; - uint32_t mode1_enable = !cmd_buffer->device->physical_device->mesh_fast_launch_2; + uint32_t mode1_enable = !pdev->mesh_fast_launch_2; radeon_emit(cs, PKT3(PKT3_DISPATCH_MESH_INDIRECT_MULTI, 7, predicating) | PKT3_RESET_FILTER_CAM_S(1)); radeon_emit(cs, 0); /* data_offset */ radeon_emit(cs, S_4C1_XYZ_DIM_REG(xyz_dim_reg) | S_4C1_DRAW_INDEX_REG(draw_id_reg)); - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) + if (pdev->info.gfx_level >= GFX11) radeon_emit(cs, S_4C2_DRAW_INDEX_ENABLE(draw_id_enable) | S_4C2_COUNT_INDIRECT_ENABLE(!!count_va) | S_4C2_XYZ_DIM_ENABLE(xyz_dim_enable) | S_4C2_MODE1_ENABLE(mode1_enable)); else @@ -8278,6 +8332,7 @@ ALWAYS_INLINE static void radv_cs_emit_dispatch_taskmesh_gfx_packet(const struct radv_device *device, const struct radv_cmd_state *cmd_state, struct radeon_cmdbuf *cs) { + const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader *mesh_shader = cmd_state->shaders[MESA_SHADER_MESH]; const bool predicating = cmd_state->predicating; @@ -8289,13 +8344,13 @@ radv_cs_emit_dispatch_taskmesh_gfx_packet(const struct radv_device *device, cons uint32_t xyz_dim_en = mesh_shader->info.cs.uses_grid_size; uint32_t xyz_dim_reg = !xyz_dim_en ? 0 : (cmd_state->vtx_base_sgpr - SI_SH_REG_OFFSET) >> 2; uint32_t ring_entry_reg = ((mesh_shader->info.user_data_0 - SI_SH_REG_OFFSET) >> 2) + ring_entry_loc->sgpr_idx; - uint32_t mode1_en = !device->physical_device->mesh_fast_launch_2; + uint32_t mode1_en = !pdev->mesh_fast_launch_2; uint32_t linear_dispatch_en = cmd_state->shaders[MESA_SHADER_TASK]->info.cs.linear_taskmesh_dispatch; const bool sqtt_en = !!device->sqtt.bo; radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_GFX, 2, predicating) | PKT3_RESET_FILTER_CAM_S(1)); radeon_emit(cs, S_4D0_RING_ENTRY_REG(ring_entry_reg) | S_4D0_XYZ_DIM_REG(xyz_dim_reg)); - if (device->physical_device->info.gfx_level >= GFX11) + if (pdev->info.gfx_level >= GFX11) radeon_emit(cs, S_4D1_XYZ_DIM_ENABLE(xyz_dim_en) | S_4D1_MODE1_ENABLE(mode1_en) | S_4D1_LINEAR_DISPATCH_ENABLE(linear_dispatch_en) | S_4D1_THREAD_TRACE_MARKER_ENABLE(sqtt_en)); else @@ -8412,12 +8467,13 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, const struct const int32_t *vertexOffset) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_cmd_state *state = &cmd_buffer->state; struct radeon_cmdbuf *cs = cmd_buffer->cs; const int index_size = radv_get_vgt_index_size(state->index_type); unsigned i = 0; const bool uses_drawid = state->uses_drawid; - const bool can_eop = !uses_drawid && cmd_buffer->device->physical_device->info.gfx_level >= GFX10; + const bool can_eop = !uses_drawid && pdev->info.gfx_level >= GFX10; if (uses_drawid) { if (vertexOffset) { @@ -8427,7 +8483,7 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, const struct uint64_t index_va = state->index_va + draw->firstIndex * index_size; /* Handle draw calls with 0-sized index buffers if the GPU can't support them. */ - if (!remaining_indexes && cmd_buffer->device->physical_device->info.has_zero_index_buffer_bug) + if (!remaining_indexes && pdev->info.has_zero_index_buffer_bug) radv_handle_zero_index_buffer_bug(cmd_buffer, &index_va, &remaining_indexes); if (i > 0) @@ -8449,7 +8505,7 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, const struct uint64_t index_va = state->index_va + draw->firstIndex * index_size; /* Handle draw calls with 0-sized index buffers if the GPU can't support them. */ - if (!remaining_indexes && cmd_buffer->device->physical_device->info.has_zero_index_buffer_bug) + if (!remaining_indexes && pdev->info.has_zero_index_buffer_bug) radv_handle_zero_index_buffer_bug(cmd_buffer, &index_va, &remaining_indexes); if (i > 0) { @@ -8477,7 +8533,7 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, const struct } } else { if (vertexOffset) { - if (cmd_buffer->device->physical_device->info.gfx_level == GFX10) { + if (pdev->info.gfx_level == GFX10) { /* GFX10 has a bug that consecutive draw packets with NOT_EOP must not have * count == 0 for the last draw that doesn't have NOT_EOP. */ @@ -8496,7 +8552,7 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, const struct uint64_t index_va = state->index_va + draw->firstIndex * index_size; /* Handle draw calls with 0-sized index buffers if the GPU can't support them. */ - if (!remaining_indexes && cmd_buffer->device->physical_device->info.has_zero_index_buffer_bug) + if (!remaining_indexes && pdev->info.has_zero_index_buffer_bug) radv_handle_zero_index_buffer_bug(cmd_buffer, &index_va, &remaining_indexes); if (!state->render.view_mask) { @@ -8516,7 +8572,7 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, const struct uint64_t index_va = state->index_va + draw->firstIndex * index_size; /* Handle draw calls with 0-sized index buffers if the GPU can't support them. */ - if (!remaining_indexes && cmd_buffer->device->physical_device->info.has_zero_index_buffer_bug) + if (!remaining_indexes && pdev->info.has_zero_index_buffer_bug) radv_handle_zero_index_buffer_bug(cmd_buffer, &index_va, &remaining_indexes); const VkMultiDrawIndexedInfoEXT *next = @@ -8589,11 +8645,12 @@ radv_cs_emit_mesh_dispatch_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x ALWAYS_INLINE static void radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const uint32_t view_mask = cmd_buffer->state.render.view_mask; radv_emit_userdata_mesh(cmd_buffer, x, y, z); - if (cmd_buffer->device->physical_device->mesh_fast_launch_2) { + if (pdev->mesh_fast_launch_2) { if (!view_mask) { radv_cs_emit_mesh_dispatch_packet(cmd_buffer, x, y, z); } else { @@ -8685,6 +8742,7 @@ radv_emit_indirect_taskmesh_draw_packets(const struct radv_device *device, struc struct radeon_cmdbuf *cs, struct radeon_cmdbuf *ace_cs, const struct radv_draw_info *info, uint64_t workaround_cond_va) { + const struct radv_physical_device *pdev = radv_device_physical(device); const uint32_t view_mask = cmd_state->render.view_mask; struct radeon_winsys *ws = device->ws; const unsigned num_views = MAX2(1, util_bitcount(view_mask)); @@ -8698,7 +8756,7 @@ radv_emit_indirect_taskmesh_draw_packets(const struct radv_device *device, struc if (count_va) radv_cs_add_buffer(ws, ace_cs, info->count_buffer->bo); - if (device->physical_device->info.has_taskmesh_indirect0_bug && count_va) { + if (pdev->info.has_taskmesh_indirect0_bug && count_va) { /* MEC firmware bug workaround. * When the count buffer contains zero, DISPATCH_TASKMESH_INDIRECT_MULTI_ACE hangs. * - We must ensure that DISPATCH_TASKMESH_INDIRECT_MULTI_ACE @@ -8799,6 +8857,7 @@ radv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct static uint64_t radv_get_needed_dynamic_states(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); uint64_t dynamic_states = RADV_DYNAMIC_ALL; if (cmd_buffer->state.graphics_pipeline) @@ -8808,7 +8867,7 @@ radv_get_needed_dynamic_states(struct radv_cmd_buffer *cmd_buffer) if (!cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL]) dynamic_states &= ~(RADV_DYNAMIC_PATCH_CONTROL_POINTS | RADV_DYNAMIC_TESS_DOMAIN_ORIGIN); - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX10_3) { + if (pdev->info.gfx_level >= GFX10_3) { if (cmd_buffer->state.shaders[MESA_SHADER_MESH]) dynamic_states &= ~(RADV_DYNAMIC_VERTEX_INPUT | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE | RADV_DYNAMIC_PRIMITIVE_TOPOLOGY); @@ -8984,7 +9043,8 @@ radv_emit_fs_state(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_db_shader_control(struct radv_cmd_buffer *cmd_buffer) { - const struct radeon_info *gpu_info = &cmd_buffer->device->physical_device->info; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + const struct radeon_info *gpu_info = &pdev->info; const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT]; const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; const bool uses_ds_feedback_loop = @@ -9050,6 +9110,7 @@ radv_emit_db_shader_control(struct radv_cmd_buffer *cmd_buffer) static void radv_emit_streamout_enable_state(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const struct radv_streamout_state *so = &cmd_buffer->state.streamout; const bool streamout_enabled = radv_is_streamout_enabled(cmd_buffer); uint32_t enabled_stream_buffers_mask = 0; @@ -9059,7 +9120,7 @@ radv_emit_streamout_enable_state(struct radv_cmd_buffer *cmd_buffer) enabled_stream_buffers_mask = info->so.enabled_stream_buffers_mask; - if (!cmd_buffer->device->physical_device->use_ngg_streamout) { + if (!pdev->use_ngg_streamout) { u_foreach_bit (i, so->enabled_mask) { radeon_set_context_reg(cmd_buffer->cs, R_028AD4_VGT_STRMOUT_VTX_STRIDE_0 + 16 * i, info->so.strides[i]); } @@ -9088,6 +9149,7 @@ radv_cmdbuf_get_last_vgt_api_stage(const struct radv_cmd_buffer *cmd_buffer) static void radv_emit_graphics_shaders(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const gl_shader_stage last_vgt_api_stage = radv_cmdbuf_get_last_vgt_api_stage(cmd_buffer); const struct radv_shader *last_vgt_shader = cmd_buffer->state.shaders[last_vgt_api_stage]; struct radv_device *device = cmd_buffer->device; @@ -9142,8 +9204,7 @@ radv_emit_graphics_shaders(struct radv_cmd_buffer *cmd_buffer) radv_emit_mesh_shader(device, cs, cs, cmd_buffer->state.shaders[MESA_SHADER_MESH]); break; case MESA_SHADER_TASK: - radv_emit_compute_shader(device->physical_device, cmd_buffer->gang.cs, - cmd_buffer->state.shaders[MESA_SHADER_TASK]); + radv_emit_compute_shader(pdev, cmd_buffer->gang.cs, cmd_buffer->state.shaders[MESA_SHADER_TASK]); break; default: unreachable("invalid bind stage"); @@ -9159,7 +9220,7 @@ radv_emit_graphics_shaders(struct radv_cmd_buffer *cmd_buffer) &vgt_shader_cfg_key); radv_emit_vgt_shader_config(device, cs, &vgt_shader_cfg_key); - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX10_3) { + if (pdev->info.gfx_level >= GFX10_3) { gfx103_emit_vgt_draw_payload_cntl(cs, cmd_buffer->state.shaders[MESA_SHADER_MESH], false); gfx103_emit_vrs_state(device, cs, NULL, false, false, false); } @@ -9176,6 +9237,7 @@ static void radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info) { const struct radv_device *device = cmd_buffer->device; + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_shader_part *ps_epilog = NULL; if (cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT] && @@ -9200,8 +9262,7 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r !col_format_non_compacted) col_format_non_compacted = V_028714_SPI_SHADER_32_R; - if (device->physical_device->info.rbplus_allowed && - cmd_buffer->state.col_format_non_compacted != col_format_non_compacted) { + if (pdev->info.rbplus_allowed && cmd_buffer->state.col_format_non_compacted != col_format_non_compacted) { cmd_buffer->state.col_format_non_compacted = col_format_non_compacted; cmd_buffer->state.dirty |= RADV_CMD_DIRTY_RBPLUS; } @@ -9212,9 +9273,8 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r * 1. radv_need_late_scissor_emission * 2. any dirty dynamic flags that may cause context rolls */ - const bool late_scissor_emission = cmd_buffer->device->physical_device->info.has_gfx9_scissor_bug - ? radv_need_late_scissor_emission(cmd_buffer, info) - : false; + const bool late_scissor_emission = + pdev->info.has_gfx9_scissor_bug ? radv_need_late_scissor_emission(cmd_buffer, info) : false; if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_RBPLUS) radv_emit_rbplus_state(cmd_buffer); @@ -9288,6 +9348,7 @@ static void radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer) { const struct radv_device *device = cmd_buffer->device; + const struct radv_physical_device *pdev = radv_device_physical(device); uint32_t push_constant_size = 0, dynamic_offset_count = 0; bool need_indirect_descriptor_sets = false; @@ -9333,7 +9394,7 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer) const gl_shader_stage last_vgt_api_stage = radv_cmdbuf_get_last_vgt_api_stage(cmd_buffer); assume(last_vgt_api_stage != MESA_SHADER_NONE); - if (device->physical_device->info.has_vgt_flush_ngg_legacy_bug && + if (pdev->info.has_vgt_flush_ngg_legacy_bug && (!cmd_buffer->state.last_vgt_shader || (cmd_buffer->state.last_vgt_shader->info.is_ngg && !cmd_buffer->state.shaders[last_vgt_api_stage]->info.is_ngg))) { /* Transitioning from NGG to legacy GS requires VGT_FLUSH on GFX10 and Navi21. VGT_FLUSH is @@ -9393,8 +9454,7 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer) if (radv_needs_null_export_workaround(device, ps, 0)) col_format_non_compacted = V_028714_SPI_SHADER_32_R; - if (device->physical_device->info.rbplus_allowed && - cmd_buffer->state.col_format_non_compacted != col_format_non_compacted) { + if (pdev->info.rbplus_allowed && cmd_buffer->state.col_format_non_compacted != col_format_non_compacted) { cmd_buffer->state.col_format_non_compacted = col_format_non_compacted; cmd_buffer->state.dirty |= RADV_CMD_DIRTY_RBPLUS; } @@ -9409,7 +9469,7 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer) pc_state->size = push_constant_size; pc_state->dynamic_offset_count = dynamic_offset_count; - if (device->physical_device->info.gfx_level <= GFX9) { + if (pdev->info.gfx_level <= GFX9) { cmd_buffer->state.ia_multi_vgt_param = radv_compute_ia_multi_vgt_param(device, cmd_buffer->state.shaders); } @@ -9425,7 +9485,8 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer) ALWAYS_INLINE static bool radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, uint32_t drawCount, bool dgc) { - const bool has_prefetch = cmd_buffer->device->physical_device->info.gfx_level >= GFX7; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + const bool has_prefetch = pdev->info.gfx_level >= GFX7; ASSERTED const unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4096 + 128 * (drawCount - 1)); @@ -9443,7 +9504,7 @@ radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info return false; } - if (!info->indexed && cmd_buffer->device->physical_device->info.gfx_level >= GFX7) { + if (!info->indexed && pdev->info.gfx_level >= GFX7) { /* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE, * so the state must be re-emitted before the next indexed * draw. @@ -9578,8 +9639,9 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_ ALWAYS_INLINE static void radv_after_draw(struct radv_cmd_buffer *cmd_buffer, bool dgc) { - const struct radeon_info *gpu_info = &cmd_buffer->device->physical_device->info; - bool has_prefetch = cmd_buffer->device->physical_device->info.gfx_level >= GFX7; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + const struct radeon_info *gpu_info = &pdev->info; + bool has_prefetch = pdev->info.gfx_level >= GFX7; /* Start prefetches after the draw has been started. Both will * run in parallel, but starting the draw first is more * important. @@ -9857,7 +9919,7 @@ radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _b RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); - + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_draw_info info; info.indirect = buffer; @@ -9876,7 +9938,7 @@ radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _b if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) { uint64_t workaround_cond_va = 0; - if (cmd_buffer->device->physical_device->info.has_taskmesh_indirect0_bug && info.count_buffer) { + if (pdev->info.has_taskmesh_indirect0_bug && info.count_buffer) { /* Allocate a 32-bit value for the MEC firmware bug workaround. */ uint32_t workaround_cond_init = 0; uint32_t workaround_cond_off; @@ -9911,6 +9973,7 @@ radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPre const bool compute = layout->pipeline_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE; const bool use_predication = radv_use_dgc_predication(cmd_buffer, pGeneratedCommandsInfo); const struct radv_device *device = cmd_buffer->device; + const struct radv_physical_device *pdev = radv_device_physical(device); /* Secondary command buffers are needed for the full extension but can't use * PKT3_INDIRECT_BUFFER. @@ -10010,7 +10073,7 @@ radv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPre cmd_buffer->push_constant_stages |= graphics_pipeline->active_stages; - if (!layout->indexed && cmd_buffer->device->physical_device->info.gfx_level >= GFX7) { + if (!layout->indexed && pdev->info.gfx_level >= GFX7) { /* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE, so the state must be * re-emitted before the next indexed draw. */ @@ -10034,6 +10097,7 @@ static void radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *compute_shader, const struct radv_dispatch_info *info) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); unsigned dispatch_initiator = cmd_buffer->device->dispatch_initiator; struct radeon_winsys *ws = cmd_buffer->device->ws; bool predicating = cmd_buffer->state.predicating; @@ -10045,7 +10109,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 30); if (compute_shader->info.wave_size == 32) { - assert(cmd_buffer->device->physical_device->info.gfx_level >= GFX10); + assert(pdev->info.gfx_level >= GFX10); dispatch_initiator |= S_00B800_CS_W32_EN(1); } @@ -10069,7 +10133,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv unsigned reg = R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4; if (cmd_buffer->device->load_grid_size_from_user_sgpr) { - assert(cmd_buffer->device->physical_device->info.gfx_level >= GFX10_3); + assert(pdev->info.gfx_level >= GFX10_3); radeon_emit(cs, PKT3(PKT3_LOAD_SH_REG_INDEX, 3, 0)); radeon_emit(cs, info->va); radeon_emit(cs, info->va >> 32); @@ -10082,9 +10146,9 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv if (radv_cmd_buffer_uses_mec(cmd_buffer)) { uint64_t indirect_va = info->va; - const bool needs_align32_workaround = - cmd_buffer->device->physical_device->info.has_async_compute_align32_bug && - cmd_buffer->qf == RADV_QUEUE_COMPUTE && !radv_is_aligned(indirect_va, 32); + const bool needs_align32_workaround = pdev->info.has_async_compute_align32_bug && + cmd_buffer->qf == RADV_QUEUE_COMPUTE && + !radv_is_aligned(indirect_va, 32); const unsigned ace_predication_size = 4 /* DISPATCH_INDIRECT */ + (needs_align32_workaround ? 6 * 3 /* 3x COPY_DATA */ : 0); @@ -10206,8 +10270,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv predicating = false; } - if (cmd_buffer->device->physical_device->info.has_async_compute_threadgroup_bug && - cmd_buffer->qf == RADV_QUEUE_COMPUTE) { + if (pdev->info.has_async_compute_threadgroup_bug && cmd_buffer->qf == RADV_QUEUE_COMPUTE) { for (unsigned i = 0; i < 3; i++) { if (info->unaligned) { /* info->blocks is already in thread dimensions for unaligned dispatches. */ @@ -10258,7 +10321,8 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_inf struct radv_compute_pipeline *pipeline, struct radv_shader *compute_shader, VkPipelineBindPoint bind_point) { - bool has_prefetch = cmd_buffer->device->physical_device->info.gfx_level >= GFX7; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + bool has_prefetch = pdev->info.gfx_level >= GFX7; bool pipeline_is_dirty = pipeline != cmd_buffer->state.emitted_compute_pipeline; if (compute_shader->info.cs.regalloc_hang_bug) @@ -10331,6 +10395,7 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_inf static void radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_compute_pipeline *pipeline = cmd_buffer->state.compute_pipeline; struct radv_shader *compute_shader = cmd_buffer->state.shaders[MESA_SHADER_COMPUTE]; bool pipeline_is_dirty = pipeline != cmd_buffer->state.emitted_compute_pipeline; @@ -10349,7 +10414,7 @@ radv_dgc_before_dispatch(struct radv_cmd_buffer *cmd_buffer) radv_upload_compute_shader_descriptors(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE); if (pipeline_is_dirty) { - const bool has_prefetch = cmd_buffer->device->physical_device->info.gfx_level >= GFX7; + const bool has_prefetch = pdev->info.gfx_level >= GFX7; if (has_prefetch) radv_emit_shader_prefetch(cmd_buffer, compute_shader); @@ -10531,6 +10596,8 @@ static void radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, VkTraceRaysIndirectCommand2KHR *tables, uint64_t indirect_va, enum radv_rt_mode mode) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_RT) return; @@ -10546,7 +10613,7 @@ radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, VkTraceRaysIndirectCommand2K uint32_t wave_size = rt_prolog->info.wave_size; /* The hardware register is specified as a multiple of 64 or 256 DWORDS. */ - unsigned scratch_alloc_granule = cmd_buffer->device->physical_device->info.gfx_level >= GFX11 ? 256 : 1024; + unsigned scratch_alloc_granule = pdev->info.gfx_level >= GFX11 ? 256 : 1024; scratch_bytes_per_wave += align(cmd_buffer->state.rt_stack_size * wave_size, scratch_alloc_granule); cmd_buffer->compute_scratch_size_per_wave_needed = @@ -10817,6 +10884,7 @@ uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range, uint32_t value) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_barrier_data barrier = {0}; uint32_t flush_bits = 0; unsigned size = 0; @@ -10826,7 +10894,7 @@ radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, cons flush_bits |= radv_clear_dcc(cmd_buffer, image, range, value); - if (cmd_buffer->device->physical_device->info.gfx_level == GFX8) { + if (pdev->info.gfx_level == GFX8) { /* When DCC is enabled with mipmaps, some levels might not * support fast clears and we have to initialize them as "fully * expanded". @@ -11007,8 +11075,9 @@ radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_ima VkImageLayout dst_layout, uint32_t src_family_index, uint32_t dst_family_index, const VkImageSubresourceRange *range, struct radv_sample_locations_state *sample_locs) { - enum radv_queue_family src_qf = vk_queue_to_radv(cmd_buffer->device->physical_device, src_family_index); - enum radv_queue_family dst_qf = vk_queue_to_radv(cmd_buffer->device->physical_device, dst_family_index); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + enum radv_queue_family src_qf = vk_queue_to_radv(pdev, src_family_index); + enum radv_queue_family dst_qf = vk_queue_to_radv(pdev, dst_family_index); if (image->exclusive && src_family_index != dst_family_index) { /* This is an acquire or a release operation and there will be * a corresponding release/acquire. Do the transition in the @@ -11169,6 +11238,7 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, VkPipelineStageFlags2 stageMask, unsigned value) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radeon_cmdbuf *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(event->bo); @@ -11228,9 +11298,8 @@ write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, VkPipe event_type = V_028A90_BOTTOM_OF_PIPE_TS; } - radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf, event_type, - 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, value, - cmd_buffer->gfx9_eop_bug_va); + radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, event_type, 0, EOP_DST_SEL_MEM, + EOP_DATA_SEL_VALUE_32BIT, va, value, cmd_buffer->gfx9_eop_bug_va); } assert(cmd_buffer->cs->cdw <= cdw_max); @@ -11290,13 +11359,14 @@ radv_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const Vk void radv_begin_conditional_rendering(struct radv_cmd_buffer *cmd_buffer, uint64_t va, bool draw_visible) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radeon_cmdbuf *cs = cmd_buffer->cs; unsigned pred_op = PREDICATION_OP_BOOL32; radv_emit_cache_flush(cmd_buffer); if (cmd_buffer->qf == RADV_QUEUE_GENERAL) { - if (!cmd_buffer->device->physical_device->info.has_32bit_predication) { + if (!pdev->info.has_32bit_predication) { uint64_t pred_value = 0, pred_va; unsigned pred_offset; @@ -11445,6 +11515,7 @@ radv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer, uint32_t static void radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_streamout_state *so = &cmd_buffer->state.streamout; bool old_streamout_enabled = radv_is_streamout_enabled(cmd_buffer); uint32_t old_hw_enabled_mask = so->hw_enabled_mask; @@ -11454,12 +11525,11 @@ radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable) so->hw_enabled_mask = so->enabled_mask | (so->enabled_mask << 4) | (so->enabled_mask << 8) | (so->enabled_mask << 12); - if (!cmd_buffer->device->physical_device->use_ngg_streamout && - ((old_streamout_enabled != radv_is_streamout_enabled(cmd_buffer)) || - (old_hw_enabled_mask != so->hw_enabled_mask))) + if (!pdev->use_ngg_streamout && ((old_streamout_enabled != radv_is_streamout_enabled(cmd_buffer)) || + (old_hw_enabled_mask != so->hw_enabled_mask))) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_ENABLE; - if (cmd_buffer->device->physical_device->use_ngg_streamout) { + if (pdev->use_ngg_streamout) { /* Re-emit streamout desciptors because with NGG streamout, a buffer size of 0 acts like a * disable bit and this is needed when streamout needs to be ignored in shaders. */ @@ -11470,20 +11540,21 @@ radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable) static void radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radeon_cmdbuf *cs = cmd_buffer->cs; unsigned reg_strmout_cntl; ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 14); /* The register is at different places on different ASICs. */ - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); radeon_emit(cs, S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | S_370_ENGINE_SEL(V_370_ME)); radeon_emit(cs, R_0300FC_CP_STRMOUT_CNTL >> 2); radeon_emit(cs, 0); radeon_emit(cs, 0); - } else if (cmd_buffer->device->physical_device->info.gfx_level >= GFX7) { + } else if (pdev->info.gfx_level >= GFX7) { reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0); } else { @@ -11511,12 +11582,13 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC const VkDeviceSize *pCounterBufferOffsets) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; struct radv_streamout_state *so = &cmd_buffer->state.streamout; struct radeon_cmdbuf *cs = cmd_buffer->cs; assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); - if (!cmd_buffer->device->physical_device->use_ngg_streamout) + if (!pdev->use_ngg_streamout) radv_flush_vgt_streamout(cmd_buffer); ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, MAX_SO_BUFFERS * 10); @@ -11542,7 +11614,7 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); } - if (cmd_buffer->device->physical_device->use_ngg_streamout) { + if (pdev->use_ngg_streamout) { if (append) { radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cs, @@ -11553,7 +11625,7 @@ radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstC radeon_emit(cs, 0); } else { /* The PKT3 CAM bit workaround seems needed for initializing this GDS register to zero. */ - radeon_set_perfctr_reg(cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf, cs, + radeon_set_perfctr_reg(pdev->info.gfx_level, cmd_buffer->qf, cs, R_031088_GDS_STRMOUT_DWORDS_WRITTEN_0 + i * 4, 0); } } else { @@ -11598,12 +11670,13 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou const VkBuffer *pCounterBuffers, const VkDeviceSize *pCounterBufferOffsets) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_streamout_state *so = &cmd_buffer->state.streamout; struct radeon_cmdbuf *cs = cmd_buffer->cs; assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); - if (cmd_buffer->device->physical_device->use_ngg_streamout) { + if (pdev->use_ngg_streamout) { /* Wait for streamout to finish before reading GDS_STRMOUT registers. */ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH; radv_emit_cache_flush(cmd_buffer); @@ -11634,7 +11707,7 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); } - if (cmd_buffer->device->physical_device->use_ngg_streamout) { + if (pdev->use_ngg_streamout) { if (append) { radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); radeon_emit(cs, @@ -11675,7 +11748,8 @@ radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCou static void radv_emit_strmout_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info) { - const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo); struct radeon_cmdbuf *cs = cmd_buffer->cs; @@ -11740,6 +11814,7 @@ radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlag { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_buffer, buffer, dstBuffer); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radeon_cmdbuf *cs = cmd_buffer->cs; const uint64_t va = radv_buffer_get_va(buffer->bo) + buffer->offset + dstOffset; @@ -11764,9 +11839,8 @@ radv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlag radeon_emit(cs, va); radeon_emit(cs, va >> 32); } else { - radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf, - V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, marker, - cmd_buffer->gfx9_eop_bug_va); + radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, marker, cmd_buffer->gfx9_eop_bug_va); } assert(cmd_buffer->cs->cdw <= cdw_max); @@ -11912,6 +11986,7 @@ radv_bind_compute_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_ { struct radv_shader *shader = shader_obj ? shader_obj->shader : NULL; const struct radv_device *device = cmd_buffer->device; + const struct radv_physical_device *pdev = radv_device_physical(device); struct radeon_cmdbuf *cs = cmd_buffer->cs; radv_bind_shader(cmd_buffer, shader, MESA_SHADER_COMPUTE); @@ -11923,7 +11998,7 @@ radv_bind_compute_shader(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, shader->bo); - radv_emit_compute_shader(device->physical_device, cs, shader); + radv_emit_compute_shader(pdev, cs, shader); /* Update push constants/indirect descriptors state. */ struct radv_descriptor_state *descriptors_state = diff --git a/src/amd/vulkan/radv_cp_reg_shadowing.c b/src/amd/vulkan/radv_cp_reg_shadowing.c index 528995e885a..7c65d119d05 100644 --- a/src/amd/vulkan/radv_cp_reg_shadowing.c +++ b/src/amd/vulkan/radv_cp_reg_shadowing.c @@ -38,8 +38,9 @@ radv_set_context_reg_array(struct radeon_cmdbuf *cs, unsigned reg, unsigned num, VkResult radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_state *queue_state) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radeon_winsys *ws = device->ws; - const struct radeon_info *gpu_info = &device->physical_device->info; + const struct radeon_info *gpu_info = &pdev->info; VkResult result; struct radeon_cmdbuf *cs = ws->cs_create(ws, AMD_IP_GFX, false); @@ -125,7 +126,8 @@ radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_devic VkResult radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct radv_queue *queue) { - const struct radeon_info *gpu_info = &device->physical_device->info; + const struct radv_physical_device *pdev = radv_device_physical(device); + const struct radeon_info *gpu_info = &pdev->info; struct radeon_winsys *ws = device->ws; struct radeon_cmdbuf *cs; VkResult result; diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c index 144526851cf..01a27011b54 100644 --- a/src/amd/vulkan/radv_debug.c +++ b/src/amd/vulkan/radv_debug.c @@ -108,17 +108,19 @@ radv_dump_trace(const struct radv_device *device, struct radeon_cmdbuf *cs, FILE static void radv_dump_mmapped_reg(const struct radv_device *device, FILE *f, unsigned offset) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radeon_winsys *ws = device->ws; uint32_t value; if (ws->read_registers(ws, offset, 1, &value)) - ac_dump_reg(f, device->physical_device->info.gfx_level, device->physical_device->info.family, offset, value, ~0); + ac_dump_reg(f, pdev->info.gfx_level, pdev->info.family, offset, value, ~0); } static void radv_dump_debug_registers(const struct radv_device *device, FILE *f) { - const struct radeon_info *gpu_info = &device->physical_device->info; + const struct radv_physical_device *pdev = radv_device_physical(device); + const struct radeon_info *gpu_info = &pdev->info; fprintf(f, "Memory-mapped registers:\n"); radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS); @@ -190,8 +192,9 @@ radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level, enum r static void radv_dump_descriptor_set(const struct radv_device *device, const struct radv_descriptor_set *set, unsigned id, FILE *f) { - enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; - enum radeon_family family = device->physical_device->info.family; + const struct radv_physical_device *pdev = radv_device_physical(device); + enum amd_gfx_level gfx_level = pdev->info.gfx_level; + enum radeon_family family = pdev->info.family; const struct radv_descriptor_set_layout *layout; int i; @@ -376,6 +379,8 @@ static void radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader, gl_shader_stage stage, const char *dump_dir, FILE *f) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (!shader) return; @@ -400,7 +405,7 @@ radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline, str fprintf(f, "NIR:\n%s\n", shader->nir_string); } - fprintf(f, "%s IR:\n%s\n", device->physical_device->use_llvm ? "LLVM" : "ACO", shader->ir_string); + fprintf(f, "%s IR:\n%s\n", pdev->use_llvm ? "LLVM" : "ACO", shader->ir_string); fprintf(f, "DISASM:\n%s\n", shader->disasm_string); radv_dump_shader_stats(device, pipeline, shader, stage, f); @@ -504,9 +509,10 @@ radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f) } if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR)) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]; - enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; - unsigned num_waves = ac_get_wave_info(gfx_level, &device->physical_device->info, waves); + enum amd_gfx_level gfx_level = pdev->info.gfx_level; + unsigned num_waves = ac_get_wave_info(gfx_level, &pdev->info, waves); fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves); @@ -633,21 +639,22 @@ radv_dump_app_info(const struct radv_device *device, FILE *f) static void radv_dump_device_name(const struct radv_device *device, FILE *f) { - const struct radeon_info *gpu_info = &device->physical_device->info; + const struct radv_physical_device *pdev = radv_device_physical(device); + const struct radeon_info *gpu_info = &pdev->info; #ifndef _WIN32 char kernel_version[128] = {0}; struct utsname uname_data; #endif #ifdef _WIN32 - fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", device->physical_device->marketing_name, gpu_info->drm_major, - gpu_info->drm_minor, gpu_info->drm_patchlevel); + fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", pdev->marketing_name, gpu_info->drm_major, gpu_info->drm_minor, + gpu_info->drm_patchlevel); #else if (uname(&uname_data) == 0) snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release); - fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", device->physical_device->marketing_name, gpu_info->drm_major, - gpu_info->drm_minor, gpu_info->drm_patchlevel, kernel_version); + fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", pdev->marketing_name, gpu_info->drm_major, gpu_info->drm_minor, + gpu_info->drm_patchlevel, kernel_version); #endif } @@ -655,18 +662,16 @@ static void radv_dump_umr_ring(const struct radv_queue *queue, FILE *f) { #ifndef _WIN32 + const struct radv_physical_device *pdev = radv_device_physical(queue->device); const enum amd_ip_type ring = radv_queue_ring(queue); - const struct radv_device *device = queue->device; char cmd[256]; /* TODO: Dump compute ring. */ if (ring != AMD_IP_GFX) return; - sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -RS %s 2>&1", device->physical_device->bus_info.domain, - device->physical_device->bus_info.bus, device->physical_device->bus_info.dev, - device->physical_device->bus_info.func, - device->physical_device->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx"); + sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -RS %s 2>&1", pdev->bus_info.domain, pdev->bus_info.bus, + pdev->bus_info.dev, pdev->bus_info.func, pdev->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx"); fprintf(f, "\nUMR GFX ring:\n\n"); radv_dump_cmd(cmd, f); #endif @@ -676,18 +681,17 @@ static void radv_dump_umr_waves(struct radv_queue *queue, FILE *f) { #ifndef _WIN32 + const struct radv_physical_device *pdev = radv_device_physical(queue->device); enum amd_ip_type ring = radv_queue_ring(queue); - struct radv_device *device = queue->device; char cmd[256]; /* TODO: Dump compute ring. */ if (ring != AMD_IP_GFX) return; - sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -O bits,halt_waves -go 0 -wa %s -go 1 2>&1", - device->physical_device->bus_info.domain, device->physical_device->bus_info.bus, - device->physical_device->bus_info.dev, device->physical_device->bus_info.func, - device->physical_device->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx"); + sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -O bits,halt_waves -go 0 -wa %s -go 1 2>&1", pdev->bus_info.domain, + pdev->bus_info.bus, pdev->bus_info.dev, pdev->bus_info.func, + pdev->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx"); fprintf(f, "\nUMR GFX waves:\n\n"); radv_dump_cmd(cmd, f); #endif @@ -707,7 +711,9 @@ radv_gpu_hang_occurred(struct radv_queue *queue, enum amd_ip_type ring) bool radv_vm_fault_occurred(struct radv_device *device, struct radv_winsys_gpuvm_fault_info *fault_info) { - if (!device->physical_device->info.has_gpuvm_fault_query) + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (!pdev->info.has_gpuvm_fault_query) return false; return device->ws->query_gpuvm_fault(device->ws, fault_info); @@ -742,6 +748,7 @@ radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_i fprintf(stderr, "radv: GPU hang detected...\n"); #ifndef _WIN32 + const struct radv_physical_device *pdev = radv_device_physical(queue->device); const bool save_hang_report = !queue->device->vk.enabled_features.deviceFaultVendorBinary; struct radv_winsys_gpuvm_fault_info fault_info = {0}; struct radv_device *device = queue->device; @@ -822,7 +829,7 @@ radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_i if (vm_fault_occurred) { fprintf(f, "VM fault report.\n\n"); fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n", fault_info.addr); - ac_print_gpuvm_fault_status(f, device->physical_device->info.gfx_level, fault_info.status); + ac_print_gpuvm_fault_status(f, pdev->info.gfx_level, fault_info.status); } break; case RADV_DEVICE_FAULT_CHUNK_APP_INFO: @@ -830,7 +837,7 @@ radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_i break; case RADV_DEVICE_FAULT_CHUNK_GPU_INFO: radv_dump_device_name(device, f); - ac_print_gpu_info(&device->physical_device->info, f); + ac_print_gpu_info(&pdev->info, f); break; case RADV_DEVICE_FAULT_CHUNK_DMESG: radv_dump_dmesg(f); @@ -1010,12 +1017,13 @@ struct radv_sq_hw_reg { static void radv_dump_sq_hw_regs(struct radv_device *device) { - enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; - enum radeon_family family = device->physical_device->info.family; + const struct radv_physical_device *pdev = radv_device_physical(device); + enum amd_gfx_level gfx_level = pdev->info.gfx_level; + enum radeon_family family = pdev->info.family; struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6]; fprintf(stderr, "\nHardware registers:\n"); - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { ac_dump_reg(stderr, gfx_level, family, R_000408_SQ_WAVE_STATUS, regs->status, ~0); ac_dump_reg(stderr, gfx_level, family, R_00040C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0); ac_dump_reg(stderr, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, regs->hw_id, ~0); @@ -1084,6 +1092,7 @@ radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCount &pFaultCounts->addressInfoCount); struct radv_winsys_gpuvm_fault_info fault_info = {0}; RADV_FROM_HANDLE(radv_device, device, _device); + const struct radv_physical_device *pdev = radv_device_physical(device); bool vm_fault_occurred = false; /* Query if a GPUVM fault happened. */ @@ -1094,8 +1103,6 @@ radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCount pFaultCounts->vendorBinarySize = 0; if (device->gpu_hang_report) { - const struct radv_physical_device *pdev = device->physical_device; - VkDeviceFaultVendorBinaryHeaderVersionOneEXT hdr; hdr.headerSize = sizeof(VkDeviceFaultVendorBinaryHeaderVersionOneEXT); @@ -1127,7 +1134,7 @@ radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCount if (pFaultInfo) strncpy(pFaultInfo->description, "A GPUVM fault has been detected", sizeof(pFaultInfo->description)); - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { addr_fault_info.addressType = G_00A130_RW(fault_info.status) ? VK_DEVICE_FAULT_ADDRESS_TYPE_WRITE_INVALID_EXT : VK_DEVICE_FAULT_ADDRESS_TYPE_READ_INVALID_EXT; } else { diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c index d175be6b29f..7485bdc7164 100644 --- a/src/amd/vulkan/radv_descriptor_set.c +++ b/src/amd/vulkan/radv_descriptor_set.c @@ -1074,6 +1074,8 @@ write_texel_buffer_descriptor(struct radv_device *device, struct radv_cmd_buffer static ALWAYS_INLINE void write_buffer_descriptor(struct radv_device *device, unsigned *dst, uint64_t va, uint64_t range) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (!va) { memset(dst, 0, 4 * 4); return; @@ -1082,9 +1084,9 @@ write_buffer_descriptor(struct radv_device *device, unsigned *dst, uint64_t va, uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); - } else if (device->physical_device->info.gfx_level >= GFX10) { + } else if (pdev->info.gfx_level >= GFX10) { rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); } else { diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 8112da8afdd..38ab62963f7 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -97,10 +97,10 @@ radv_GetMemoryHostPointerPropertiesEXT(VkDevice _device, VkExternalMemoryHandleT VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties) { RADV_FROM_HANDLE(radv_device, device, _device); + const struct radv_physical_device *pdev = radv_device_physical(device); switch (handleType) { case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: { - const struct radv_physical_device *pdev = device->physical_device; uint32_t memoryTypeBits = 0; for (int i = 0; i < pdev->memory_properties.memoryTypeCount; i++) { if (pdev->memory_domains[i] == RADEON_DOMAIN_GTT && !(pdev->memory_flags[i] & RADEON_FLAG_GTT_WC)) { @@ -186,8 +186,10 @@ static struct radv_shader_part_cache_ops vs_prolog_ops = { static VkResult radv_device_init_vs_prologs(struct radv_device *device) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (!radv_shader_part_cache_init(&device->vs_prologs, &vs_prolog_ops)) - return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return vk_error(pdev->instance, VK_ERROR_OUT_OF_HOST_MEMORY); /* don't pre-compile prologs if we want to print them */ if (device->instance->debug_flags & RADV_DEBUG_DUMP_PROLOGS) @@ -196,9 +198,9 @@ radv_device_init_vs_prologs(struct radv_device *device) struct radv_vs_prolog_key key; memset(&key, 0, sizeof(key)); key.as_ls = false; - key.is_ngg = device->physical_device->use_ngg; + key.is_ngg = pdev->use_ngg; key.next_stage = MESA_SHADER_VERTEX; - key.wave32 = device->physical_device->ge_wave_size == 32; + key.wave32 = pdev->ge_wave_size == 32; for (unsigned i = 1; i <= MAX_VERTEX_ATTRIBS; i++) { key.instance_rate_inputs = 0; @@ -206,7 +208,7 @@ radv_device_init_vs_prologs(struct radv_device *device) device->simple_vs_prologs[i - 1] = radv_create_vs_prolog(device, &key); if (!device->simple_vs_prologs[i - 1]) - return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); + return vk_error(pdev->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); } unsigned idx = 0; @@ -218,7 +220,7 @@ radv_device_init_vs_prologs(struct radv_device *device) struct radv_shader_part *prolog = radv_create_vs_prolog(device, &key); if (!prolog) - return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); + return vk_error(pdev->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); assert(idx == radv_instance_rate_prolog_index(num_attributes, key.instance_rate_inputs)); device->instance_rate_vs_prologs[idx++] = prolog; @@ -638,11 +640,11 @@ capture_trace(VkQueue _queue) static void radv_device_init_cache_key(struct radv_device *device) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_device_cache_key *key = &device->cache_key; key->disable_trunc_coord = device->disable_trunc_coord; - key->image_2d_view_of_3d = - device->vk.enabled_features.image2DViewOf3D && device->physical_device->info.gfx_level == GFX9; + key->image_2d_view_of_3d = device->vk.enabled_features.image2DViewOf3D && pdev->info.gfx_level == GFX9; key->mesh_shader_queries = device->vk.enabled_features.meshShaderQueries; key->primitives_generated_query = radv_uses_primitives_generated_query(device); @@ -655,7 +657,7 @@ radv_device_init_cache_key(struct radv_device *device) * enabled, regardless of what features are actually enabled on the logical device. */ if (device->vk.enabled_features.shaderObject) { - key->image_2d_view_of_3d = device->physical_device->info.gfx_level == GFX9; + key->image_2d_view_of_3d = pdev->info.gfx_level == GFX9; key->primitives_generated_query = true; } @@ -701,7 +703,6 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr device->vk.command_buffer_ops = &radv_cmd_buffer_ops; device->instance = pdev->instance; - device->physical_device = pdev; init_dispatch_tables(device, pdev); @@ -782,13 +783,12 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr device->shader_use_invisible_vram = (device->instance->perftest_flags & RADV_PERFTEST_DMA_SHADERS) && /* SDMA buffer copy is only implemented for GFX7+. */ - device->physical_device->info.gfx_level >= GFX7; + pdev->info.gfx_level >= GFX7; result = radv_init_shader_upload_queue(device); if (result != VK_SUCCESS) goto fail; - device->pbb_allowed = - device->physical_device->info.gfx_level >= GFX9 && !(device->instance->debug_flags & RADV_DEBUG_NOBINNING); + device->pbb_allowed = pdev->info.gfx_level >= GFX9 && !(device->instance->debug_flags & RADV_DEBUG_NOBINNING); device->disable_trunc_coord = device->instance->drirc.disable_trunc_coord; @@ -818,13 +818,13 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1); - if (device->physical_device->info.gfx_level >= GFX7) { + if (pdev->info.gfx_level >= GFX7) { /* If the KMD allows it (there is a KMD hw register for it), * allow launching waves out-of-order. */ device->dispatch_initiator |= S_00B800_ORDER_MODE(1); } - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { /* Enable asynchronous compute tunneling. The KMD restricts this feature * to high-priority compute queues, so setting the bit on any other queue * is a no-op. PAL always sets this bit as well. @@ -862,7 +862,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr } if (device->instance->vk.trace_mode & RADV_TRACE_MODE_RGP) { - if (device->physical_device->info.gfx_level < GFX8 || device->physical_device->info.gfx_level > GFX11) { + if (pdev->info.gfx_level < GFX8 || pdev->info.gfx_level > GFX11) { fprintf(stderr, "GPU hardware not supported: refer to " "the RGP documentation for the list of " "supported GPUs!\n"); @@ -882,13 +882,13 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr radv_sqtt_queue_events_enabled() ? "enabled" : "disabled"); if (radv_spm_trace_enabled(device->instance)) { - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { if (!radv_spm_init(device)) { result = VK_ERROR_INITIALIZATION_FAILED; goto fail; } } else { - fprintf(stderr, "radv: SPM isn't supported for this GPU (%s)!\n", device->physical_device->name); + fprintf(stderr, "radv: SPM isn't supported for this GPU (%s)!\n", pdev->name); } } } @@ -905,7 +905,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr if (getenv("RADV_TRAP_HANDLER")) { /* TODO: Add support for more hardware. */ - assert(device->physical_device->info.gfx_level == GFX8); + assert(pdev->info.gfx_level == GFX8); fprintf(stderr, "**********************************************************************\n"); fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n"); @@ -922,7 +922,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr } } - if (device->physical_device->info.gfx_level == GFX10_3) { + if (pdev->info.gfx_level == GFX10_3) { if (getenv("RADV_FORCE_VRS_CONFIG_FILE")) { const char *file = radv_get_force_vrs_config_file(); @@ -942,7 +942,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr } /* PKT3_LOAD_SH_REG_INDEX is supported on GFX8+, but it hangs with compute queues until GFX10.3. */ - device->load_grid_size_from_user_sgpr = device->physical_device->info.gfx_level >= GFX10_3; + device->load_grid_size_from_user_sgpr = pdev->info.gfx_level >= GFX10_3; device->keep_shader_info = keep_shader_info; @@ -1009,7 +1009,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr goto fail_cache; } - if (!device->physical_device->ac_perfcounters.blocks) { + if (!pdev->ac_perfcounters.blocks) { result = VK_ERROR_INITIALIZATION_FAILED; goto fail_cache; } @@ -1029,7 +1029,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr if (result != VK_SUCCESS) goto fail_cache; - if (device->physical_device->info.gfx_level == GFX11 && device->physical_device->info.has_dedicated_vram && + if (pdev->info.gfx_level == GFX11 && pdev->info.has_dedicated_vram && device->instance->drirc.force_pstate_peak_gfx11_dgpu) { if (!radv_device_acquire_performance_counters(device)) fprintf(stderr, "radv: failed to set pstate to profile_peak.\n"); @@ -1197,10 +1197,10 @@ radv_GetImageMemoryRequirements2(VkDevice _device, const VkImageMemoryRequiremen { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_image, image, pInfo->image); + const struct radv_physical_device *pdev = radv_device_physical(device); pMemoryRequirements->memoryRequirements.memoryTypeBits = - ((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) & - ~device->physical_device->memory_types_32bit; + ((1u << pdev->memory_properties.memoryTypeCount) - 1u) & ~pdev->memory_types_32bit; pMemoryRequirements->memoryRequirements.size = image->size; pMemoryRequirements->memoryRequirements.alignment = image->alignment; @@ -1254,7 +1254,9 @@ radv_surface_max_layer_count(struct radv_image_view *iview) unsigned radv_get_dcc_max_uncompressed_block_size(const struct radv_device *device, const struct radv_image *image) { - if (device->physical_device->info.gfx_level < GFX10 && image->vk.samples > 1) { + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (pdev->info.gfx_level < GFX10 && image->vk.samples > 1) { if (image->planes[0].surface.bpe == 1) return V_028C78_MAX_BLOCK_SIZE_64B; else if (image->planes[0].surface.bpe == 2) @@ -1267,7 +1269,9 @@ radv_get_dcc_max_uncompressed_block_size(const struct radv_device *device, const static unsigned get_dcc_min_compressed_block_size(const struct radv_device *device) { - if (!device->physical_device->info.has_dedicated_vram) { + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (!pdev->info.has_dedicated_vram) { /* amdvlk: [min-compressed-block-size] should be set to 32 for * dGPU and 64 for APU because all of our APUs to date use * DIMMs which have a request granularity size of 64B while all @@ -1282,6 +1286,7 @@ get_dcc_min_compressed_block_size(const struct radv_device *device) static uint32_t radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iview) { + const struct radv_physical_device *pdev = radv_device_physical(device); unsigned max_uncompressed_block_size = radv_get_dcc_max_uncompressed_block_size(device, iview->image); unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device); unsigned max_compressed_block_size; @@ -1293,7 +1298,7 @@ radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iv /* For GFX9+ ac_surface computes values for us (except min_compressed * and max_uncompressed) */ - if (device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { max_compressed_block_size = iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size; independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_128B_blocks; independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_64B_blocks; @@ -1322,12 +1327,12 @@ radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iv S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) | S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks); - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { result |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX11(independent_128b_blocks) | S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) | S_028C78_FDCC_ENABLE(radv_dcc_enabled(iview->image, iview->vk.base_mip_level)); - if (device->physical_device->info.family >= CHIP_GFX1103_R2) { + if (pdev->info.family >= CHIP_GFX1103_R2) { result |= S_028C78_ENABLE_MAX_COMP_FRAG_OVERRIDE(1) | S_028C78_MAX_COMP_FRAGS(iview->image->vk.samples >= 4); } } else { @@ -1341,6 +1346,7 @@ void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb, struct radv_image_view *iview) { + const struct radv_physical_device *pdev = radv_device_physical(device); const struct util_format_description *desc; unsigned ntype, format, swap, endian; unsigned blend_clamp = 0, blend_bypass = 0; @@ -1354,7 +1360,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff memset(cb, 0, sizeof(*cb)); /* Intensity is implemented as Red, so treat it that way. */ - if (device->physical_device->info.gfx_level >= GFX11) + if (pdev->info.gfx_level >= GFX11) cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX11(desc->swizzle[3] == PIPE_SWIZZLE_1); else cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX6(desc->swizzle[3] == PIPE_SWIZZLE_1); @@ -1369,11 +1375,11 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff cb->cb_color_base = va >> 8; - if (device->physical_device->info.gfx_level >= GFX9) { - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX11) { cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) | S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned); - } else if (device->physical_device->info.gfx_level >= GFX10) { + } else if (pdev->info.gfx_level >= GFX10) { cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) | S_028EE0_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) | S_028EE0_CMASK_PIPE_ALIGNED(1) | @@ -1414,13 +1420,13 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index); if (radv_image_has_fmask(iview->image)) { - if (device->physical_device->info.gfx_level >= GFX7) + if (pdev->info.gfx_level >= GFX7) cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1); cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index); cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max); } else { /* This must be set for fast clear to work without FMASK. */ - if (device->physical_device->info.gfx_level >= GFX7) + if (pdev->info.gfx_level >= GFX7) cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); @@ -1435,7 +1441,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset; va += surf->meta_offset; - if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && device->physical_device->info.gfx_level <= GFX8) + if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && pdev->info.gfx_level <= GFX8) va += plane->surface.u.legacy.color.dcc_level[iview->vk.base_mip_level].dcc_offset; unsigned dcc_tile_swizzle = tile_swizzle; @@ -1452,7 +1458,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff if (iview->image->vk.samples > 1) { unsigned log_samples = util_logbase2(iview->image->vk.samples); - if (device->physical_device->info.gfx_level >= GFX11) + if (pdev->info.gfx_level >= GFX11) cb->cb_color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(log_samples); else cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS_GFX6(log_samples); @@ -1467,7 +1473,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff } ntype = ac_get_cb_number_type(desc->format); - format = ac_get_cb_format(device->physical_device->info.gfx_level, desc->format); + format = ac_get_cb_format(pdev->info.gfx_level, desc->format); assert(format != V_028C70_COLOR_INVALID); swap = radv_translate_colorswap(iview->vk.format, false); @@ -1498,14 +1504,14 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff format != V_028C70_COLOR_24_8) | S_028C70_NUMBER_TYPE(ntype); - if (device->physical_device->info.gfx_level >= GFX11) + if (pdev->info.gfx_level >= GFX11) cb->cb_color_info |= S_028C70_FORMAT_GFX11(format); else cb->cb_color_info |= S_028C70_FORMAT_GFX6(format) | S_028C70_ENDIAN(endian); if (radv_image_has_fmask(iview->image)) { cb->cb_color_info |= S_028C70_COMPRESSION(1); - if (device->physical_device->info.gfx_level == GFX6) { + if (pdev->info.gfx_level == GFX6) { unsigned fmask_bankh = util_logbase2(surf->u.legacy.color.fmask.bankh); cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); } @@ -1514,7 +1520,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff /* Allow the texture block to read FMASK directly without decompressing it. */ cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1); - if (device->physical_device->info.gfx_level == GFX8) { + if (pdev->info.gfx_level == GFX8) { /* Set CMASK into a tiling format that allows * the texture block to read it. */ @@ -1527,25 +1533,25 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff cb->cb_color_info |= S_028C70_FAST_CLEAR(1); if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && !iview->disable_dcc_mrt && - device->physical_device->info.gfx_level < GFX11) + pdev->info.gfx_level < GFX11) cb->cb_color_info |= S_028C70_DCC_ENABLE(1); cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview); /* This must be set for fast clear to work without FMASK. */ - if (!radv_image_has_fmask(iview->image) && device->physical_device->info.gfx_level == GFX6) { + if (!radv_image_has_fmask(iview->image) && pdev->info.gfx_level == GFX6) { unsigned bankh = util_logbase2(surf->u.legacy.bankh); cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); } - if (device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { unsigned mip0_depth = iview->image->vk.image_type == VK_IMAGE_TYPE_3D ? (iview->extent.depth - 1) : (iview->image->vk.array_layers - 1); unsigned width = vk_format_get_plane_width(iview->image->vk.format, iview->plane_id, iview->extent.width); unsigned height = vk_format_get_plane_height(iview->image->vk.format, iview->plane_id, iview->extent.height); unsigned max_mip = iview->image->vk.mip_levels - 1; - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { unsigned base_level = iview->vk.base_mip_level; if (iview->nbc_view.valid) { @@ -1556,7 +1562,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(base_level); cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) | S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) | - S_028EE0_RESOURCE_LEVEL(device->physical_device->info.gfx_level >= GFX11 ? 0 : 1); + S_028EE0_RESOURCE_LEVEL(pdev->info.gfx_level >= GFX11 ? 0 : 1); } else { cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->vk.base_mip_level); cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type); @@ -1567,7 +1573,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff * * We set the pitch in MIP0_WIDTH. */ - if (device->physical_device->info.gfx_level && iview->image->vk.image_type == VK_IMAGE_TYPE_2D && + if (pdev->info.gfx_level && iview->image->vk.image_type == VK_IMAGE_TYPE_2D && iview->image->vk.array_layers == 1 && plane->surface.is_linear) { assert((plane->surface.u.gfx9.surf_pitch * plane->surface.bpe) % 256 == 0); @@ -1586,11 +1592,12 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff static unsigned radv_calc_decompress_on_z_planes(const struct radv_device *device, struct radv_image_view *iview) { + const struct radv_physical_device *pdev = radv_device_physical(device); unsigned max_zplanes = 0; assert(radv_image_is_tc_compat_htile(iview->image)); - if (device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { /* Default value for 32-bit depth surfaces. */ max_zplanes = 4; @@ -1598,9 +1605,8 @@ radv_calc_decompress_on_z_planes(const struct radv_device *device, struct radv_i max_zplanes = 2; /* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */ - if (device->physical_device->info.has_two_planes_iterate256_bug && - radv_image_get_iterate256(device, iview->image) && !radv_image_tile_stencil_disabled(device, iview->image) && - iview->image->vk.samples == 4) { + if (pdev->info.has_two_planes_iterate256_bug && radv_image_get_iterate256(device, iview->image) && + !radv_image_tile_stencil_disabled(device, iview->image) && iview->image->vk.samples == 4) { max_zplanes = 1; } @@ -1650,6 +1656,7 @@ void radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buffer_info *ds, struct radv_image_view *iview, VkImageAspectFlags ds_aspects) { + const struct radv_physical_device *pdev = radv_device_physical(device); unsigned level = iview->vk.base_mip_level; unsigned format, stencil_format; uint64_t va, s_offs, z_offs; @@ -1668,7 +1675,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) | S_028008_SLICE_MAX(max_slice) | S_028008_Z_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) | S_028008_STENCIL_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT)); - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { ds->db_depth_view |= S_028008_SLICE_START_HI(iview->vk.base_array_layer >> 11) | S_028008_SLICE_MAX_HI(max_slice >> 11); } @@ -1681,20 +1688,19 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff /* Recommended value for better performance with 4x and 8x. */ ds->db_render_override2 = S_028010_DECOMPRESS_Z_ON_FLUSH(iview->image->vk.samples >= 4) | - S_028010_CENTROID_COMPUTATION_MODE(device->physical_device->info.gfx_level >= GFX10_3); + S_028010_CENTROID_COMPUTATION_MODE(pdev->info.gfx_level >= GFX10_3); - if (device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { assert(surf->u.gfx9.surf_offset == 0); s_offs += surf->u.gfx9.zs.stencil_offset; ds->db_z_info = S_028038_FORMAT(format) | S_028038_NUM_SAMPLES(util_logbase2(iview->image->vk.samples)) | S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) | S_028038_MAXMIP(iview->image->vk.mip_levels - 1) | - S_028038_ZRANGE_PRECISION(1) | - S_028040_ITERATE_256(device->physical_device->info.gfx_level >= GFX11); + S_028038_ZRANGE_PRECISION(1) | S_028040_ITERATE_256(pdev->info.gfx_level >= GFX11); ds->db_stencil_info = S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) | - S_028044_ITERATE_256(device->physical_device->info.gfx_level >= GFX11); + S_028044_ITERATE_256(pdev->info.gfx_level >= GFX11); - if (device->physical_device->info.gfx_level == GFX9) { + if (pdev->info.gfx_level == GFX9) { ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch); ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.zs.stencil_epitch); } @@ -1711,7 +1717,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes); - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { bool iterate256 = radv_image_get_iterate256(device, iview->image); ds->db_z_info |= S_028040_ITERATE_FLUSH(1); @@ -1732,7 +1738,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff ds->db_htile_data_base = va >> 8; ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1); - if (device->physical_device->info.gfx_level == GFX9) { + if (pdev->info.gfx_level == GFX9) { ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1); } @@ -1741,7 +1747,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff } } - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { radv_gfx11_set_db_render_control(device, iview->image->vk.samples, &ds->db_render_control); } } else { @@ -1760,8 +1766,8 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff if (iview->image->vk.samples > 1) ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->vk.samples)); - if (device->physical_device->info.gfx_level >= GFX7) { - const struct radeon_info *gpu_info = &device->physical_device->info; + if (pdev->info.gfx_level >= GFX7) { + const struct radeon_info *gpu_info = &pdev->info; unsigned tiling_index = surf->u.legacy.tiling_index[level]; unsigned stencil_index = surf->u.legacy.zs.stencil_tiling_index[level]; unsigned macro_index = surf->u.legacy.macro_tile_index; @@ -1820,7 +1826,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff void radv_gfx11_set_db_render_control(const struct radv_device *device, unsigned num_samples, unsigned *db_render_control) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); unsigned max_allowed_tiles_in_wave = 0; if (pdev->info.has_dedicated_vram) { @@ -1911,6 +1917,7 @@ radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBi VkMemoryFdPropertiesKHR *pMemoryFdProperties) { RADV_FROM_HANDLE(radv_device, device, _device); + struct radv_physical_device *pdev = radv_device_physical(device); switch (handleType) { case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: { @@ -1919,7 +1926,7 @@ radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBi if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags)) return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); - pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(device->physical_device, domains, flags); + pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(pdev, domains, flags); return VK_SUCCESS; } default: @@ -1941,7 +1948,8 @@ radv_GetCalibratedTimestampsKHR(VkDevice _device, uint32_t timestampCount, { #ifndef _WIN32 RADV_FROM_HANDLE(radv_device, device, _device); - uint32_t clock_crystal_freq = device->physical_device->info.clock_crystal_freq; + const struct radv_physical_device *pdev = radv_device_physical(device); + uint32_t clock_crystal_freq = pdev->info.clock_crystal_freq; int d; uint64_t begin, end; uint64_t max_clock_period = 0; @@ -1992,10 +2000,11 @@ radv_GetCalibratedTimestampsKHR(VkDevice _device, uint32_t timestampCount, bool radv_device_set_pstate(struct radv_device *device, bool enable) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radeon_winsys *ws = device->ws; enum radeon_ctx_pstate pstate = enable ? RADEON_CTX_PSTATE_PEAK : RADEON_CTX_PSTATE_NONE; - if (device->physical_device->info.has_stable_pstate) { + if (pdev->info.has_stable_pstate) { /* pstate is per-device; setting it for one ctx is sufficient. * We pick the first initialized one below. */ for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) diff --git a/src/amd/vulkan/radv_device_generated_commands.c b/src/amd/vulkan/radv_device_generated_commands.c index 61ff027c317..a0131106f15 100644 --- a/src/amd/vulkan/radv_device_generated_commands.c +++ b/src/amd/vulkan/radv_device_generated_commands.c @@ -35,6 +35,7 @@ radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout const struct radv_compute_pipeline *pipeline, uint32_t *cmd_size) { const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk); + const struct radv_physical_device *pdev = radv_device_physical(device); /* dispatch */ *cmd_size += 5 * 4; @@ -55,7 +56,7 @@ radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout /* COMPUTE_PGM_{LO,RSRC1,RSRC2} */ *cmd_size += 7 * 4; - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { /* COMPUTE_PGM_RSRC3 */ *cmd_size += 3 * 4; } @@ -87,6 +88,7 @@ radv_get_sequence_size_graphics(const struct radv_indirect_command_layout *layou uint32_t *upload_size) { const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk); + const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader *vs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX); if (layout->bind_vbo_mask) { @@ -115,7 +117,7 @@ radv_get_sequence_size_graphics(const struct radv_indirect_command_layout *layou } else { if (layout->draw_mesh_tasks) { /* userdata writes + instance count + non-indexed draw */ - *cmd_size += (6 + 2 + (device->physical_device->mesh_fast_launch_2 ? 5 : 3)) * 4; + *cmd_size += (6 + 2 + (pdev->mesh_fast_launch_2 ? 5 : 3)) * 4; } else { /* userdata writes + instance count + non-indexed draw */ *cmd_size += (5 + 2 + 3) * 4; @@ -189,7 +191,8 @@ radv_get_sequence_size(const struct radv_indirect_command_layout *layout, struct static uint32_t radv_align_cmdbuf_size(const struct radv_device *device, uint32_t size, enum amd_ip_type ip_type) { - const uint32_t ib_alignment = device->physical_device->info.ip[ip_type].ib_alignment; + const struct radv_physical_device *pdev = radv_device_physical(device); + const uint32_t ib_alignment = pdev->info.ip[ip_type].ib_alignment; return align(size, ib_alignment); } @@ -365,7 +368,9 @@ nir_pkt3(nir_builder *b, unsigned op, nir_def *len) static nir_def * dgc_get_nop_packet(nir_builder *b, const struct radv_device *device) { - if (device->physical_device->info.gfx_ib_pad_with_type2) { + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (pdev->info.gfx_ib_pad_with_type2) { return nir_imm_int(b, PKT2_NOP_PAD); } else { return nir_imm_int(b, PKT3_NOP_PAD); @@ -691,6 +696,8 @@ dgc_main_cmd_buf_offset(nir_builder *b, const struct radv_device *device) static void build_dgc_buffer_tail(nir_builder *b, nir_def *sequence_count, const struct radv_device *device) { + const struct radv_physical_device *pdev = radv_device_physical(device); + nir_def *global_id = get_global_ids(b, 1); nir_def *cmd_buf_stride = load_param32(b, cmd_buf_stride); @@ -718,7 +725,7 @@ build_dgc_buffer_tail(nir_builder *b, nir_def *sequence_count, const struct radv nir_def *packet, *packet_size; - if (device->physical_device->info.gfx_ib_pad_with_type2) { + if (pdev->info.gfx_ib_pad_with_type2) { packet_size = nir_imm_int(b, 4); packet = nir_imm_int(b, PKT2_NOP_PAD); } else { @@ -741,6 +748,8 @@ build_dgc_buffer_tail(nir_builder *b, nir_def *sequence_count, const struct radv static void build_dgc_buffer_preamble(nir_builder *b, nir_def *sequence_count, const struct radv_device *device) { + const struct radv_physical_device *pdev = radv_device_physical(device); + nir_def *global_id = get_global_ids(b, 1); nir_def *use_preamble = nir_ine_imm(b, load_param8(b, use_preamble), 0); @@ -778,7 +787,7 @@ build_dgc_buffer_preamble(nir_builder *b, nir_def *sequence_count, const struct nir_def *chain_packets[] = { nir_imm_int(b, PKT3(PKT3_INDIRECT_BUFFER, 2, 0)), addr, - nir_imm_int(b, device->physical_device->info.address32_hi), + nir_imm_int(b, pdev->info.address32_hi), nir_ior_imm(b, words, S_3F2_CHAIN(1) | S_3F2_VALID(1) | S_3F2_PRE_ENA(false)), }; @@ -861,6 +870,8 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf nir_def *index_buffer_offset, nir_def *ibo_type_32, nir_def *ibo_type_8, nir_variable *max_index_count_var, const struct radv_device *device) { + const struct radv_physical_device *pdev = radv_device_physical(device); + nir_def *index_stream_offset = nir_iadd(b, index_buffer_offset, stream_base); nir_def *data = nir_load_ssbo(b, 4, 32, stream_buf, index_stream_offset); @@ -876,10 +887,9 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf nir_def *cmd_values[3 + 2 + 3]; - if (device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX; - if (device->physical_device->info.gfx_level < GFX9 || - (device->physical_device->info.gfx_level == GFX9 && device->physical_device->info.me_fw_version < 26)) + if (pdev->info.gfx_level < GFX9 || (pdev->info.gfx_level == GFX9 && pdev->info.me_fw_version < 26)) opcode = PKT3_SET_UCONFIG_REG; cmd_values[0] = nir_imm_int(b, PKT3(opcode, 1, 0)); cmd_values[1] = nir_imm_int(b, (R_03090C_VGT_INDEX_TYPE - CIK_UCONFIG_REG_OFFSET) >> 2 | (2u << 28)); @@ -1186,6 +1196,8 @@ static void dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base, nir_def *vbo_bind_mask, nir_variable *upload_offset, const struct radv_device *device) { + const struct radv_physical_device *pdev = radv_device_physical(device); + nir_def *vbo_cnt = load_param8(b, vbo_cnt); nir_variable *vbo_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx"); nir_store_var(b, vbo_idx, nir_imm_int(b, 0), 0x1); @@ -1252,9 +1264,9 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu nir_pop_if(b, NULL); nir_def *convert_cond = nir_ine_imm(b, nir_load_var(b, num_records), 0); - if (device->physical_device->info.gfx_level == GFX9) + if (pdev->info.gfx_level == GFX9) convert_cond = nir_imm_false(b); - else if (device->physical_device->info.gfx_level != GFX8) + else if (pdev->info.gfx_level != GFX8) convert_cond = nir_iand(b, convert_cond, nir_ieq_imm(b, stride, 0)); nir_def *new_records = @@ -1264,7 +1276,7 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu } nir_push_else(b, NULL); { - if (device->physical_device->info.gfx_level != GFX8) { + if (pdev->info.gfx_level != GFX8) { nir_push_if(b, nir_ine_imm(b, stride, 0)); { nir_def *r = nir_iadd(b, nir_load_var(b, num_records), nir_iadd_imm(b, stride, -1)); @@ -1276,7 +1288,7 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu nir_pop_if(b, NULL); nir_def *rsrc_word3 = nir_channel(b, nir_load_var(b, vbo_data), 3); - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { nir_def *oob_select = nir_bcsel(b, nir_ieq_imm(b, stride, 0), nir_imm_int(b, V_008F0C_OOB_SELECT_RAW), nir_imm_int(b, V_008F0C_OOB_SELECT_STRUCTURED)); rsrc_word3 = nir_iand_imm(b, rsrc_word3, C_008F0C_OOB_SELECT); @@ -1408,6 +1420,8 @@ static void dgc_emit_draw_mesh_tasks(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base, nir_def *draw_params_offset, nir_def *sequence_id, const struct radv_device *device) { + const struct radv_physical_device *pdev = radv_device_physical(device); + nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr); nir_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base); @@ -1424,7 +1438,7 @@ dgc_emit_draw_mesh_tasks(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_ dgc_emit_userdata_mesh(b, cs, vtx_base_sgpr, x, y, z, sequence_id, device); dgc_emit_instance_count(b, cs, nir_imm_int(b, 1)); - if (device->physical_device->mesh_fast_launch_2) { + if (pdev->mesh_fast_launch_2) { dgc_emit_dispatch_mesh_direct(b, cs, x, y, z); } else { nir_def *vertex_count = nir_imul(b, x, nir_imul(b, y, z)); @@ -1454,6 +1468,8 @@ static void dgc_emit_bind_pipeline(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base, nir_def *pipeline_params_offset, const struct radv_device *device) { + const struct radv_physical_device *pdev = radv_device_physical(device); + nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base); nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset); @@ -1465,7 +1481,7 @@ dgc_emit_bind_pipeline(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu dgc_emit1(b, cs, load_metadata32(b, rsrc1)); dgc_emit1(b, cs, load_metadata32(b, rsrc2)); - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { dgc_emit_set_sh_reg_seq(b, cs, R_00B8A0_COMPUTE_PGM_RSRC3, 1); dgc_emit1(b, cs, load_metadata32(b, rsrc3)); } @@ -1504,6 +1520,7 @@ dgc_is_cond_render_enabled(nir_builder *b) static nir_shader * build_dgc_prepare_shader(struct radv_device *dev) { + const struct radv_physical_device *pdev = radv_device_physical(dev); nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_dgc_prepare"); b.shader->info.workgroup_size[0] = 64; @@ -1554,7 +1571,7 @@ build_dgc_prepare_shader(struct radv_device *dev) struct dgc_cmdbuf cmd_buf = { .descriptor = radv_meta_load_descriptor(&b, 0, DGC_DESC_PREPARE), .offset = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "cmd_buf_offset"), - .gfx_level = dev->physical_device->info.gfx_level, + .gfx_level = pdev->info.gfx_level, .sqtt_enabled = !!dev->sqtt.bo, }; nir_store_var(&b, cmd_buf.offset, nir_iadd(&b, nir_imul(&b, global_id, cmd_buf_stride), cmd_buf_base_offset), 1); @@ -1647,7 +1664,7 @@ build_dgc_prepare_shader(struct radv_device *dev) /* Pad the cmdbuffer if we did not use the whole stride */ nir_push_if(&b, nir_ine(&b, nir_load_var(&b, cmd_buf.offset), cmd_buf_end)); { - if (dev->physical_device->info.gfx_ib_pad_with_type2) { + if (pdev->info.gfx_ib_pad_with_type2) { nir_push_loop(&b); { nir_def *curr_offset = nir_load_var(&b, cmd_buf.offset); @@ -1872,6 +1889,7 @@ radv_GetGeneratedCommandsMemoryRequirementsNV(VkDevice _device, VkMemoryRequirements2 *pMemoryRequirements) { RADV_FROM_HANDLE(radv_device, device, _device); + const struct radv_physical_device *pdev = radv_device_physical(device); VK_FROM_HANDLE(radv_indirect_command_layout, layout, pInfo->indirectCommandsLayout); VK_FROM_HANDLE(radv_pipeline, pipeline, pInfo->pipeline); @@ -1882,10 +1900,9 @@ radv_GetGeneratedCommandsMemoryRequirementsNV(VkDevice _device, radv_dgc_preamble_cmdbuf_size(device); VkDeviceSize upload_buf_size = upload_stride * pInfo->maxSequencesCount; - pMemoryRequirements->memoryRequirements.memoryTypeBits = device->physical_device->memory_types_32bit; + pMemoryRequirements->memoryRequirements.memoryTypeBits = pdev->memory_types_32bit; pMemoryRequirements->memoryRequirements.alignment = - MAX2(device->physical_device->info.ip[AMD_IP_GFX].ib_alignment, - device->physical_device->info.ip[AMD_IP_COMPUTE].ib_alignment); + MAX2(pdev->info.ip[AMD_IP_GFX].ib_alignment, pdev->info.ip[AMD_IP_COMPUTE].ib_alignment); pMemoryRequirements->memoryRequirements.size = align(cmd_buf_size + upload_buf_size, pMemoryRequirements->memoryRequirements.alignment); } @@ -2051,6 +2068,7 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo { VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout); VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); *upload_size = MAX2(*upload_size, 16); @@ -2074,7 +2092,7 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo struct radv_shader *cs = radv_get_shader(compute_pipeline->base.shaders, MESA_SHADER_COMPUTE); if (cs->info.wave_size == 32) { - assert(cmd_buffer->device->physical_device->info.gfx_level >= GFX10); + assert(pdev->info.gfx_level >= GFX10); params->dispatch_initiator |= S_00B800_CS_W32_EN(1); } @@ -2276,9 +2294,9 @@ radv_GetPipelineIndirectMemoryRequirementsNV(VkDevice _device, const VkComputePi VkMemoryRequirements *reqs = &pMemoryRequirements->memoryRequirements; const uint32_t size = sizeof(struct radv_compute_pipeline_metadata); RADV_FROM_HANDLE(radv_device, device, _device); + const struct radv_physical_device *pdev = radv_device_physical(device); - reqs->memoryTypeBits = ((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) & - ~device->physical_device->memory_types_32bit; + reqs->memoryTypeBits = ((1u << pdev->memory_properties.memoryTypeCount) - 1u) & ~pdev->memory_types_32bit; reqs->alignment = 4; reqs->size = align(size, reqs->alignment); } diff --git a/src/amd/vulkan/radv_device_memory.c b/src/amd/vulkan/radv_device_memory.c index 72d05c25181..7cbe7300511 100644 --- a/src/amd/vulkan/radv_device_memory.c +++ b/src/amd/vulkan/radv_device_memory.c @@ -195,12 +195,13 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc mem->user_ptr = host_ptr_info->pHostPointer; } } else { + const struct radv_physical_device *pdev = radv_device_physical(device); uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); uint32_t heap_index; - heap_index = device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex].heapIndex; - domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex]; - flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex]; + heap_index = pdev->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex].heapIndex; + domain = pdev->memory_domains[pAllocateInfo->memoryTypeIndex]; + flags |= pdev->memory_flags[pAllocateInfo->memoryTypeIndex]; if (export_info && export_info->handleTypes) { /* Setting RADEON_FLAG_GTT_WC in case the bo is spilled to GTT. This is important when the @@ -226,7 +227,7 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc flags |= RADEON_FLAG_ZERO_VRAM; if (device->overallocation_disallowed) { - uint64_t total_size = device->physical_device->memory_properties.memoryHeaps[heap_index].size; + uint64_t total_size = pdev->memory_properties.memoryHeaps[heap_index].size; mtx_lock(&device->overallocation_mutex); if (device->allocated_memory_size[heap_index] + alloc_size > total_size) { @@ -238,8 +239,8 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc mtx_unlock(&device->overallocation_mutex); } - result = radv_bo_create(device, alloc_size, device->physical_device->info.max_alignment, domain, flags, priority, - replay_address, is_internal, &mem->bo); + result = radv_bo_create(device, alloc_size, pdev->info.max_alignment, domain, flags, priority, replay_address, + is_internal, &mem->bo); if (result != VK_SUCCESS) { if (device->overallocation_disallowed) { diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c index c680296aa31..fc84e1fc607 100644 --- a/src/amd/vulkan/radv_formats.c +++ b/src/amd/vulkan/radv_formats.c @@ -1881,6 +1881,7 @@ radv_GetImageSparseMemoryRequirements2(VkDevice _device, const VkImageSparseMemo { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_image, image, pInfo->image); + struct radv_physical_device *pdev = radv_device_physical(device); if (!(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)) { *pSparseMemoryRequirementCount = 0; @@ -1892,12 +1893,12 @@ radv_GetImageSparseMemoryRequirements2(VkDevice _device, const VkImageSparseMemo vk_outarray_append_typed(VkSparseImageMemoryRequirements2, &out, req) { - fill_sparse_image_format_properties(device->physical_device, image->vk.image_type, image->vk.format, + fill_sparse_image_format_properties(pdev, image->vk.image_type, image->vk.format, &req->memoryRequirements.formatProperties); req->memoryRequirements.imageMipTailFirstLod = image->planes[0].surface.first_mip_tail_level; if (req->memoryRequirements.imageMipTailFirstLod < image->vk.mip_levels) { - if (device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { /* The tail is always a single tile per layer. */ req->memoryRequirements.imageMipTailSize = 65536; req->memoryRequirements.imageMipTailOffset = diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c index b589311dc20..722fc7da2ff 100644 --- a/src/amd/vulkan/radv_image.c +++ b/src/amd/vulkan/radv_image.c @@ -42,6 +42,8 @@ static unsigned radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) { assert(pCreateInfo->samples <= 1); return RADEON_SURF_MODE_LINEAR_ALIGNED; @@ -54,8 +56,7 @@ radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateI if (pCreateInfo->samples > 1) return RADEON_SURF_MODE_2D; - if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) && - device->physical_device->info.gfx_level <= GFX8) { + if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) && pdev->info.gfx_level <= GFX8) { /* this causes hangs in some VK CTS tests on GFX9. */ /* Textures with a very small height are recommended to be linear. */ if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D || @@ -71,14 +72,16 @@ radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateI static bool radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format) { + const struct radv_physical_device *pdev = radv_device_physical(device); + /* TC-compat HTILE is only available for GFX8+. */ - if (device->physical_device->info.gfx_level < GFX8) + if (pdev->info.gfx_level < GFX8) return false; /* TC-compat HTILE looks broken on Tonga (and Iceland is the same design) and the documented bug * workarounds don't help. */ - if (device->physical_device->info.family == CHIP_TONGA || device->physical_device->info.family == CHIP_ICELAND) + if (pdev->info.family == CHIP_TONGA || pdev->info.family == CHIP_ICELAND) return false; if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) @@ -91,7 +94,7 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCrea (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT))) return false; - if (device->physical_device->info.gfx_level < GFX9) { + if (pdev->info.gfx_level < GFX9) { /* TC-compat HTILE for MSAA depth/stencil images is broken * on GFX8 because the tiling doesn't match. */ @@ -114,7 +117,7 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCrea } /* GFX9 has issues when the sample count is 4 and the format is D16 */ - if (device->physical_device->info.gfx_level == GFX9 && pCreateInfo->samples == 4 && format == VK_FORMAT_D16_UNORM) + if (pdev->info.gfx_level == GFX9 && pCreateInfo->samples == 4 && format == VK_FORMAT_D16_UNORM) return false; return true; @@ -123,8 +126,10 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCrea static bool radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (info->bo_metadata) { - if (device->physical_device->info.gfx_level >= GFX9) + if (pdev->info.gfx_level >= GFX9) return info->bo_metadata->u.gfx9.scanout; else return info->bo_metadata->u.legacy.scanout; @@ -237,8 +242,10 @@ static bool radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image, const VkImageCreateInfo *pCreateInfo, VkFormat format, bool *sign_reinterpret) { + const struct radv_physical_device *pdev = radv_device_physical(device); + /* DCC (Delta Color Compression) is only available for GFX8+. */ - if (device->physical_device->info.gfx_level < GFX8) + if (pdev->info.gfx_level < GFX8) return false; const VkImageCompressionControlEXT *compression = @@ -260,7 +267,7 @@ radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *imag * decompressing a lot anyway we might as well not have DCC. */ if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) && - (device->physical_device->info.gfx_level < GFX10 || + (pdev->info.gfx_level < GFX10 || radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags))) return false; @@ -278,24 +285,22 @@ radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *imag if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1) return false; - if (device->physical_device->info.gfx_level < GFX10) { + if (pdev->info.gfx_level < GFX10) { /* TODO: Add support for DCC MSAA on GFX8-9. */ - if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed) + if (pCreateInfo->samples > 1 && !pdev->dcc_msaa_allowed) return false; /* TODO: Add support for DCC layers/mipmaps on GFX9. */ - if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) && - device->physical_device->info.gfx_level == GFX9) + if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) && pdev->info.gfx_level == GFX9) return false; } /* DCC MSAA can't work on GFX10.3 and earlier without FMASK. */ - if (pCreateInfo->samples > 1 && device->physical_device->info.gfx_level < GFX11 && + if (pCreateInfo->samples > 1 && pdev->info.gfx_level < GFX11 && (device->instance->debug_flags & RADV_DEBUG_NO_FMASK)) return false; - return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format, pCreateInfo->flags, - sign_reinterpret); + return radv_are_formats_dcc_compatible(pdev, pCreateInfo->pNext, format, pCreateInfo->flags, sign_reinterpret); } static bool @@ -331,7 +336,9 @@ radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image bool radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image) { - return ac_surface_supports_dcc_image_stores(device->physical_device->info.gfx_level, &image->planes[0].surface); + const struct radv_physical_device *pdev = radv_device_physical(device); + + return ac_surface_supports_dcc_image_stores(pdev->info.gfx_level, &image->planes[0].surface); } /* @@ -347,12 +354,14 @@ radv_image_use_dcc_predication(const struct radv_device *device, const struct ra static inline bool radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image) { - if (device->physical_device->info.gfx_level == GFX9 && image->vk.array_layers > 1) { + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (pdev->info.gfx_level == GFX9 && image->vk.array_layers > 1) { /* On GFX9, FMASK can be interleaved with layers and this isn't properly supported. */ return false; } - return device->physical_device->use_fmask && image->vk.samples > 1 && + return pdev->use_fmask && image->vk.samples > 1 && ((image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) || (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)); } @@ -361,7 +370,8 @@ static inline bool radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image, const VkImageCreateInfo *pCreateInfo) { - const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; const VkImageCompressionControlEXT *compression = vk_find_struct_const(pCreateInfo->pNext, IMAGE_COMPRESSION_CONTROL_EXT); @@ -374,11 +384,10 @@ radv_use_htile_for_image(const struct radv_device *device, const struct radv_ima * - Investigate about mips+layers. * - Enable on other gens. */ - bool use_htile_for_mips = image->vk.array_layers == 1 && device->physical_device->info.gfx_level >= GFX10; + bool use_htile_for_mips = image->vk.array_layers == 1 && pdev->info.gfx_level >= GFX10; /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */ - if (device->physical_device->info.gfx_level == GFX10 && image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && - image->vk.mip_levels > 1) + if (pdev->info.gfx_level == GFX10 && image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->vk.mip_levels > 1) return false; /* Do not enable HTILE for very small images because it seems less performant but make sure it's @@ -395,19 +404,21 @@ radv_use_htile_for_image(const struct radv_device *device, const struct radv_ima static bool radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image) { + const struct radv_physical_device *pdev = radv_device_physical(device); + /* TC-compat CMASK is only available for GFX8+. */ - if (device->physical_device->info.gfx_level < GFX8) + if (pdev->info.gfx_level < GFX8) return false; /* GFX9 has issues when sample count is greater than 2 */ - if (device->physical_device->info.gfx_level == GFX9 && image->vk.samples > 2) + if (pdev->info.gfx_level == GFX9 && image->vk.samples > 2) return false; if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK) return false; /* TC-compat CMASK with storage images is supported on GFX10+. */ - if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && device->physical_device->info.gfx_level < GFX10) + if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && pdev->info.gfx_level < GFX10) return false; /* Do not enable TC-compatible if the image isn't readable by a shader @@ -427,7 +438,9 @@ radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image static uint32_t radv_get_bo_metadata_word1(const struct radv_device *device) { - return (ATI_VENDOR_ID << 16) | device->physical_device->info.pci_id; + const struct radv_physical_device *pdev = radv_device_physical(device); + + return (ATI_VENDOR_ID << 16) | pdev->info.pci_id; } static bool @@ -446,9 +459,11 @@ static void radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface, const struct radeon_bo_metadata *md) { + const struct radv_physical_device *pdev = radv_device_physical(device); + surface->flags = RADEON_SURF_CLR(surface->flags, MODE); - if (device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { if (md->u.gfx9.swizzle_mode > 0) surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE); else @@ -476,6 +491,7 @@ static VkResult radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image, const struct radv_image_create_info *create_info, struct ac_surf_info *image_info) { + const struct radv_physical_device *pdev = radv_device_physical(device); unsigned width = image->vk.extent.width; unsigned height = image->vk.extent.height; @@ -489,7 +505,7 @@ radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image if (create_info->bo_metadata && radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) { const struct radeon_bo_metadata *md = create_info->bo_metadata; - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1; height = G_00A008_HEIGHT(md->metadata[4]) + 1; } else { @@ -509,7 +525,7 @@ radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image "(internal dimensions: %d x %d, external dimensions: %d x %d)\n", image->vk.extent.width, image->vk.extent.height, width, height); return VK_ERROR_INVALID_EXTERNAL_HANDLE; - } else if (device->physical_device->info.gfx_level >= GFX10) { + } else if (pdev->info.gfx_level >= GFX10) { fprintf(stderr, "Tried to import an image with inconsistent width on GFX10.\n" "As GFX10 has no separate stride fields we cannot cope with\n" @@ -535,6 +551,8 @@ static VkResult radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image, const struct radv_image_create_info *create_info, struct ac_surf_info *image_info) { + const struct radv_physical_device *pdev = radv_device_physical(device); + VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info); if (result != VK_SUCCESS) return result; @@ -552,7 +570,7 @@ radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image * image_info->surf_index = NULL; } - if (create_info->prime_blit_src && !device->physical_device->info.sdma_supports_compression) { + if (create_info->prime_blit_src && !pdev->info.sdma_supports_compression) { /* Older SDMA hw can't handle DCC */ image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC; } @@ -579,9 +597,10 @@ static uint64_t radv_get_surface_flags(struct radv_device *device, struct radv_image *image, unsigned plane_id, const VkImageCreateInfo *pCreateInfo, VkFormat image_format) { + const struct radv_physical_device *pdev = radv_device_physical(device); uint64_t flags; unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format); - VkFormat format = radv_image_get_plane_format(device->physical_device, image, plane_id); + VkFormat format = radv_image_get_plane_format(pdev, image, plane_id); const struct util_format_description *desc = vk_format_description(format); bool is_depth, is_stencil; @@ -616,7 +635,7 @@ radv_get_surface_flags(struct radv_device *device, struct radv_image *image, uns if (is_depth) { flags |= RADEON_SURF_ZBUFFER; - if (is_depth && is_stencil && device->physical_device->info.gfx_level <= GFX8) { + if (is_depth && is_stencil && pdev->info.gfx_level <= GFX8) { if (!(pCreateInfo->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) flags |= RADEON_SURF_NO_RENDER_TARGET; @@ -641,7 +660,7 @@ radv_get_surface_flags(struct radv_device *device, struct radv_image *image, uns if (is_stencil) flags |= RADEON_SURF_SBUFFER; - if (device->physical_device->info.gfx_level >= GFX9 && pCreateInfo->imageType == VK_IMAGE_TYPE_3D && + if (pdev->info.gfx_level >= GFX9 && pCreateInfo->imageType == VK_IMAGE_TYPE_3D && vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format)) flags |= RADEON_SURF_NO_RENDER_TARGET; @@ -656,7 +675,7 @@ radv_get_surface_flags(struct radv_device *device, struct radv_image *image, uns } if (image->queue_family_mask & BITFIELD_BIT(RADV_QUEUE_TRANSFER)) { - if (!device->physical_device->info.sdma_supports_compression) + if (!pdev->info.sdma_supports_compression) flags |= RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_HTILE; } @@ -720,12 +739,14 @@ radv_compose_swizzle(const struct util_format_description *desc, const VkCompone bool vi_alpha_is_on_msb(const struct radv_device *device, const VkFormat format) { - if (device->physical_device->info.gfx_level >= GFX11) + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (pdev->info.gfx_level >= GFX11) return false; const struct util_format_description *desc = vk_format_description(format); - if (device->physical_device->info.gfx_level >= GFX10 && desc->nr_channels == 1) + if (pdev->info.gfx_level >= GFX10 && desc->nr_channels == 1) return desc->swizzle[3] == PIPE_SWIZZLE_X; return radv_translate_colorswap(format, false) <= 1; @@ -735,13 +756,13 @@ static void radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image, unsigned plane_id, struct radeon_bo_metadata *md) { + const struct radv_physical_device *pdev = radv_device_physical(device); static const VkComponentMapping fixedmapping; - const VkFormat plane_format = radv_image_get_plane_format(device->physical_device, image, plane_id); + const VkFormat plane_format = radv_image_get_plane_format(pdev, image, plane_id); const unsigned plane_width = vk_format_get_plane_width(image->vk.format, plane_id, image->vk.extent.width); const unsigned plane_height = vk_format_get_plane_height(image->vk.format, plane_id, image->vk.extent.height); struct radeon_surf *surface = &image->planes[plane_id].surface; - const struct legacy_surf_level *base_level_info = - device->physical_device->info.gfx_level <= GFX8 ? &surface->u.legacy.level[0] : NULL; + const struct legacy_surf_level *base_level_info = pdev->info.gfx_level <= GFX8 ? &surface->u.legacy.level[0] : NULL; uint32_t desc[8]; radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->vk.image_type, plane_format, @@ -751,21 +772,22 @@ radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image, radv_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, 0, 0, surface->blk_w, false, false, false, false, desc, NULL); - ac_surface_compute_umd_metadata(&device->physical_device->info, surface, image->vk.mip_levels, desc, - &md->size_metadata, md->metadata, + ac_surface_compute_umd_metadata(&pdev->info, surface, image->vk.mip_levels, desc, &md->size_metadata, md->metadata, device->instance->debug_flags & RADV_DEBUG_EXTRA_MD); } void radv_init_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *metadata) { + const struct radv_physical_device *pdev = radv_device_physical(device); + /* use plane 0, even when there are multiple planes, to follow radeonsi */ const unsigned plane_id = 0; struct radeon_surf *surface = &image->planes[plane_id].surface; memset(metadata, 0, sizeof(*metadata)); - if (device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { uint64_t dcc_offset = image->bindings[0].offset + (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset); metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode; @@ -796,7 +818,8 @@ void radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, uint64_t offset, uint32_t stride) { - ac_surface_override_offset_stride(&device->physical_device->info, &image->planes[0].surface, image->vk.array_layers, + const struct radv_physical_device *pdev = radv_device_physical(device); + ac_surface_override_offset_stride(&pdev->info, &image->planes[0].surface, image->vk.array_layers, image->vk.mip_levels, offset, stride); } @@ -819,6 +842,8 @@ radv_image_alloc_single_sample_cmask(const struct radv_device *device, const str static void radv_image_alloc_values(const struct radv_device *device, struct radv_image *image) { + const struct radv_physical_device *pdev = radv_device_physical(device); + /* images with modifiers can be potentially imported */ if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) return; @@ -839,7 +864,7 @@ radv_image_alloc_values(const struct radv_device *device, struct radv_image *ima image->size += 8 * image->vk.mip_levels; } - if (radv_image_is_tc_compat_htile(image) && device->physical_device->info.has_tc_compat_zrange_bug) { + if (radv_image_is_tc_compat_htile(image) && pdev->info.has_tc_compat_zrange_bug) { /* Metadata for the TC-compatible HTILE hardware bug which * have to be fixed by updating ZRANGE_PRECISION when doing * fast depth clears to 0.0f. @@ -855,13 +880,14 @@ radv_image_alloc_values(const struct radv_device *device, struct radv_image *ima static bool radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image) { - const struct radeon_info *gpu_info = &device->physical_device->info; + const struct radv_physical_device *pdev = radv_device_physical(device); + const struct radeon_info *gpu_info = &pdev->info; int log2_samples = util_logbase2(image->vk.samples); assert(gpu_info->gfx_level >= GFX10); for (unsigned i = 0; i < image->plane_count; ++i) { - VkFormat fmt = radv_image_get_plane_format(device->physical_device, image, i); + VkFormat fmt = radv_image_get_plane_format(pdev, image, i); int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt)); int log2_bpp_and_samples; @@ -903,9 +929,11 @@ radv_image_is_pipe_misaligned(const struct radv_device *device, const struct rad static bool radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image) { - if (device->physical_device->info.gfx_level >= GFX10) { - return !device->physical_device->info.tcc_rb_non_coherent && !radv_image_is_pipe_misaligned(device, image); - } else if (device->physical_device->info.gfx_level == GFX9) { + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (pdev->info.gfx_level >= GFX10) { + return !pdev->info.tcc_rb_non_coherent && !radv_image_is_pipe_misaligned(device, image); + } else if (pdev->info.gfx_level == GFX9) { if (image->vk.samples == 1 && (image->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) && !vk_format_has_stencil(image->vk.format)) { @@ -926,6 +954,8 @@ radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_im bool radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS) return false; @@ -934,7 +964,7 @@ radv_image_can_fast_clear(const struct radv_device *device, const struct radv_im return false; /* RB+ doesn't work with CMASK fast clear on Stoney. */ - if (!radv_image_has_dcc(image) && device->physical_device->info.family == CHIP_STONEY) + if (!radv_image_has_dcc(image) && pdev->info.family == CHIP_STONEY) return false; /* Fast-clears with CMASK aren't supported for 128-bit formats. */ @@ -958,8 +988,10 @@ radv_image_can_fast_clear(const struct radv_device *device, const struct radv_im static bool radv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image) { + const struct radv_physical_device *pdev = radv_device_physical(device); + /* comp-to-single is only available for GFX10+. */ - if (device->physical_device->info.gfx_level < GFX10) + if (pdev->info.gfx_level < GFX10) return false; /* If the image can't be fast cleared, comp-to-single can't be used. */ @@ -972,7 +1004,7 @@ radv_image_use_comp_to_single(const struct radv_device *device, const struct rad /* It seems 8bpp and 16bpp require RB+ to work. */ unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format); - if (bytes_per_pixel <= 2 && !device->physical_device->info.rbplus_allowed) + if (bytes_per_pixel <= 2 && !pdev->info.rbplus_allowed) return false; return true; @@ -1049,6 +1081,8 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info, const struct VkVideoProfileListInfoKHR *profile_list, struct radv_image *image) { + struct radv_physical_device *pdev = radv_device_physical(device); + /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the * common internal case. */ create_info.vk_info = NULL; @@ -1060,7 +1094,7 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count); - radv_image_reset_layout(device->physical_device, image); + radv_image_reset_layout(pdev, image); /* * Due to how the decoder works, the user can't supply an oversized image, because if it attempts @@ -1070,17 +1104,17 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in if (image->vk.usage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR)) { assert(profile_list); uint32_t width_align, height_align; - radv_video_get_profile_alignments(device->physical_device, profile_list, &width_align, &height_align); + radv_video_get_profile_alignments(pdev, profile_list, &width_align, &height_align); image_info.width = align(image_info.width, width_align); image_info.height = align(image_info.height, height_align); - if (radv_has_uvd(device->physical_device) && image->vk.usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) { + if (radv_has_uvd(pdev) && image->vk.usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) { /* UVD and kernel demand a full DPB allocation. */ image_info.array_size = MIN2(16, image_info.array_size); } } - unsigned plane_count = radv_get_internal_plane_count(device->physical_device, image->vk.format); + unsigned plane_count = radv_get_internal_plane_count(pdev, image->vk.format); for (unsigned plane = 0; plane < plane_count; ++plane) { struct ac_surf_info info = image_info; uint64_t offset; @@ -1101,9 +1135,9 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in } if (create_info.bo_metadata && !mod_info && - !ac_surface_apply_umd_metadata(&device->physical_device->info, &image->planes[plane].surface, - image->vk.samples, image->vk.mip_levels, - create_info.bo_metadata->size_metadata, create_info.bo_metadata->metadata)) + !ac_surface_apply_umd_metadata(&pdev->info, &image->planes[plane].surface, image->vk.samples, + image->vk.mip_levels, create_info.bo_metadata->size_metadata, + create_info.bo_metadata->metadata)) return VK_ERROR_INVALID_EXTERNAL_HANDLE; if (!create_info.no_metadata_planes && !create_info.bo_metadata && plane_count == 1 && !mod_info) @@ -1121,8 +1155,8 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in stride = 0; /* 0 means no override */ } - if (!ac_surface_override_offset_stride(&device->physical_device->info, &image->planes[plane].surface, - image->vk.array_layers, image->vk.mip_levels, offset, stride)) + if (!ac_surface_override_offset_stride(&pdev->info, &image->planes[plane].surface, image->vk.array_layers, + image->vk.mip_levels, offset, stride)) return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT; /* Validate DCC offsets in modifier layout. */ @@ -1132,8 +1166,8 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT; for (unsigned i = 1; i < mem_planes; ++i) { - if (ac_surface_get_plane_offset(device->physical_device->info.gfx_level, &image->planes[plane].surface, i, - 0) != mod_info->pPlaneLayouts[i].offset) + if (ac_surface_get_plane_offset(pdev->info.gfx_level, &image->planes[plane].surface, i, 0) != + mod_info->pPlaneLayouts[i].offset) return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT; } } @@ -1141,7 +1175,7 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size); image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2); - image->planes[plane].format = radv_image_get_plane_format(device->physical_device, image, plane); + image->planes[plane].format = radv_image_get_plane_format(pdev, image, plane); } image->tc_compatible_cmask = radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image); @@ -1177,6 +1211,8 @@ radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAll static void radv_image_print_info(struct radv_device *device, struct radv_image *image) { + const struct radv_physical_device *pdev = radv_device_physical(device); + fprintf(stderr, "Image:\n"); fprintf(stderr, " Info: size=%" PRIu64 ", alignment=%" PRIu32 ", " @@ -1188,11 +1224,11 @@ radv_image_print_info(struct radv_device *device, struct radv_image *image) const struct radv_image_plane *plane = &image->planes[i]; const struct radeon_surf *surf = &plane->surface; const struct util_format_description *desc = vk_format_description(plane->format); - uint64_t offset = ac_surface_get_plane_offset(device->physical_device->info.gfx_level, &plane->surface, 0, 0); + uint64_t offset = ac_surface_get_plane_offset(pdev->info.gfx_level, &plane->surface, 0, 0); fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset); - ac_surface_print_info(stderr, &device->physical_device->info, surf); + ac_surface_print_info(stderr, &pdev->info, surf); } } @@ -1200,7 +1236,7 @@ static uint64_t radv_select_modifier(const struct radv_device *dev, VkFormat format, const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list) { - const struct radv_physical_device *pdev = dev->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(dev); unsigned mod_count; assert(mod_list->drmFormatModifierCount); @@ -1238,6 +1274,7 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_ const VkAllocationCallbacks *alloc, VkImage *pImage, bool is_internal) { RADV_FROM_HANDLE(radv_device, device, _device); + const struct radv_physical_device *pdev = radv_device_physical(device); const VkImageCreateInfo *pCreateInfo = create_info->vk_info; uint64_t modifier = DRM_FORMAT_MOD_INVALID; struct radv_image *image = NULL; @@ -1250,7 +1287,7 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_ const struct VkVideoProfileListInfoKHR *profile_list = vk_find_struct_const(pCreateInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR); - unsigned plane_count = radv_get_internal_plane_count(device->physical_device, format); + unsigned plane_count = radv_get_internal_plane_count(pdev, format); const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count; @@ -1270,8 +1307,7 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_ pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT) image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u; else - image->queue_family_mask |= - 1u << vk_queue_to_radv(device->physical_device, pCreateInfo->pQueueFamilyIndices[i]); + image->queue_family_mask |= 1u << vk_queue_to_radv(pdev, pCreateInfo->pQueueFamilyIndices[i]); /* This queue never really accesses the image. */ image->queue_family_mask &= ~(1u << RADV_QUEUE_SPARSE); @@ -1375,10 +1411,12 @@ bool radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image, VkImageLayout layout, unsigned queue_mask) { + const struct radv_physical_device *pdev = radv_device_physical(device); + /* Don't compress exclusive images used on transfer queues when SDMA doesn't support HTILE. * Note that HTILE is already disabled on concurrent images when not supported. */ - if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !device->physical_device->info.sdma_supports_compression) + if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !pdev->info.sdma_supports_compression) return false; switch (layout) { @@ -1452,6 +1490,8 @@ bool radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, unsigned level, VkImageLayout layout, unsigned queue_mask) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (!radv_dcc_enabled(image, level)) return false; @@ -1470,7 +1510,7 @@ radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_i /* Don't compress exclusive images used on transfer queues when SDMA doesn't support DCC. * Note that DCC is already disabled on concurrent images when not supported. */ - if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !device->physical_device->info.sdma_supports_compression) + if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !pdev->info.sdma_supports_compression) return false; if (layout == VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT) { @@ -1480,7 +1520,7 @@ radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_i return false; } - return device->physical_device->info.gfx_level >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL; + return pdev->info.gfx_level >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL; } enum radv_fmask_compression @@ -1533,11 +1573,13 @@ radv_image_queue_family_mask(const struct radv_image *image, enum radv_queue_fam bool radv_image_is_renderable(const struct radv_device *device, const struct radv_image *image) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT || image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) return false; - if (device->physical_device->info.gfx_level >= GFX9 && image->vk.image_type == VK_IMAGE_TYPE_3D && + if (pdev->info.gfx_level >= GFX9 && image->vk.image_type == VK_IMAGE_TYPE_3D && vk_format_get_blocksizebits(image->vk.format) == 128 && vk_format_is_compressed(image->vk.format)) return false; @@ -1572,11 +1614,11 @@ radv_CreateImage(VkDevice _device, const VkImageCreateInfo *pCreateInfo, const V * we're guaranteed to access an Android object incorrectly. */ RADV_FROM_HANDLE(radv_device, device, _device); + const struct radv_physical_device *pdev = radv_device_physical(device); const VkImageSwapchainCreateInfoKHR *swapchain_info = vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR); if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) { - return wsi_common_create_swapchain_image(device->physical_device->vk.wsi_device, pCreateInfo, - swapchain_info->swapchain, pImage); + return wsi_common_create_swapchain_image(pdev->vk.wsi_device, pCreateInfo, swapchain_info->swapchain, pImage); } #endif @@ -1686,6 +1728,7 @@ radv_GetImageSubresourceLayout2KHR(VkDevice _device, VkImage _image, const VkIma { RADV_FROM_HANDLE(radv_image, image, _image); RADV_FROM_HANDLE(radv_device, device, _device); + const struct radv_physical_device *pdev = radv_device_physical(device); int level = pSubresource->imageSubresource.mipLevel; int layer = pSubresource->imageSubresource.arrayLayer; @@ -1703,18 +1746,17 @@ radv_GetImageSubresourceLayout2KHR(VkDevice _device, VkImage _image, const VkIma assert(level == 0); assert(layer == 0); - pLayout->subresourceLayout.offset = - ac_surface_get_plane_offset(device->physical_device->info.gfx_level, surface, mem_plane_id, 0); + pLayout->subresourceLayout.offset = ac_surface_get_plane_offset(pdev->info.gfx_level, surface, mem_plane_id, 0); pLayout->subresourceLayout.rowPitch = - ac_surface_get_plane_stride(device->physical_device->info.gfx_level, surface, mem_plane_id, level); + ac_surface_get_plane_stride(pdev->info.gfx_level, surface, mem_plane_id, level); pLayout->subresourceLayout.arrayPitch = 0; pLayout->subresourceLayout.depthPitch = 0; pLayout->subresourceLayout.size = ac_surface_get_plane_size(surface, mem_plane_id); - } else if (device->physical_device->info.gfx_level >= GFX9) { + } else if (pdev->info.gfx_level >= GFX9) { uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0; pLayout->subresourceLayout.offset = - ac_surface_get_plane_offset(device->physical_device->info.gfx_level, &plane->surface, 0, layer) + level_offset; + ac_surface_get_plane_offset(pdev->info.gfx_level, &plane->surface, 0, layer) + level_offset; if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT || image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) { /* Adjust the number of bytes between each row because diff --git a/src/amd/vulkan/radv_image_view.c b/src/amd/vulkan/radv_image_view.c index 265e5509779..e778cbcb8aa 100644 --- a/src/amd/vulkan/radv_image_view.c +++ b/src/amd/vulkan/radv_image_view.c @@ -106,7 +106,8 @@ radv_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image * uint64_t gpu_address = binding->bo ? radv_buffer_get_va(binding->bo) + binding->offset : 0; uint64_t va = gpu_address; uint8_t swizzle = plane->surface.tile_swizzle; - enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + enum amd_gfx_level gfx_level = pdev->info.gfx_level; uint64_t meta_va = 0; if (gfx_level >= GFX9) { if (is_stencil) @@ -154,7 +155,7 @@ radv_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image * * If an imported image is used with VK_IMAGE_VIEW_TYPE_2D_ARRAY, it may hang due to VM faults * because DEPTH means pitch with 2D, but it means depth with 2D array. */ - if (device->physical_device->info.gfx_level >= GFX10_3 && plane->surface.u.gfx9.uses_custom_pitch) { + if (pdev->info.gfx_level >= GFX10_3 && plane->surface.u.gfx9.uses_custom_pitch) { assert((plane->surface.u.gfx9.surf_pitch * plane->surface.bpe) % 256 == 0); assert(image->vk.image_type == VK_IMAGE_TYPE_2D); assert(plane->surface.is_linear); @@ -245,6 +246,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima uint32_t *fmask_state, VkImageCreateFlags img_create_flags, const struct ac_surf_nbc_view *nbc_view, const VkImageViewSlicedCreateInfoEXT *sliced_3d) { + const struct radv_physical_device *pdev = radv_device_physical(device); const struct util_format_description *desc; enum pipe_swizzle swizzle[4]; unsigned img_format; @@ -261,8 +263,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima desc = util_format_description(PIPE_FORMAT_R8G8B8X8_SRGB); } - img_format = - ac_get_gfx10_format_table(&device->physical_device->info)[vk_format_to_pipe_format(vk_format)].img_format; + img_format = ac_get_gfx10_format_table(&pdev->info)[vk_format_to_pipe_format(vk_format)].img_format; radv_compose_swizzle(desc, mapping, swizzle); @@ -271,7 +272,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima type = V_008F1C_SQ_RSRC_IMG_3D; } else { type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples, is_storage_image, - device->physical_device->info.gfx_level == GFX9); + pdev->info.gfx_level == GFX9); } if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { @@ -286,7 +287,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima state[0] = 0; state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1); state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | - S_00A008_RESOURCE_LEVEL(device->physical_device->info.gfx_level < GFX11); + S_00A008_RESOURCE_LEVEL(pdev->info.gfx_level < GFX11); state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) | S_00A00C_BASE_LEVEL(image->vk.samples > 1 ? 0 : first_level) | @@ -332,7 +333,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima max_mip = nbc_view->num_levels - 1; unsigned min_lod_clamped = radv_float_to_ufixed(CLAMP(min_lod, 0, 15), 8); - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { state[1] |= S_00A004_MAX_MIP(max_mip); state[5] |= S_00A014_MIN_LOD_LO(min_lod_clamped); state[6] |= S_00A018_MIN_LOD_HI(min_lod_clamped >> 5); @@ -413,6 +414,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state, uint32_t *fmask_state, VkImageCreateFlags img_create_flags) { + const struct radv_physical_device *pdev = radv_device_physical(device); const struct util_format_description *desc; enum pipe_swizzle swizzle[4]; int first_non_void; @@ -444,21 +446,19 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag } /* S8 with either Z16 or Z32 HTILE need a special format. */ - if (device->physical_device->info.gfx_level == GFX9 && vk_format == VK_FORMAT_S8_UINT && - radv_image_is_tc_compat_htile(image)) { + if (pdev->info.gfx_level == GFX9 && vk_format == VK_FORMAT_S8_UINT && radv_image_is_tc_compat_htile(image)) { if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) data_format = V_008F14_IMG_DATA_FORMAT_S8_32; else if (image->vk.format == VK_FORMAT_D16_UNORM_S8_UINT) data_format = V_008F14_IMG_DATA_FORMAT_S8_16; } - if (device->physical_device->info.gfx_level == GFX9 && - img_create_flags & VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT) { + if (pdev->info.gfx_level == GFX9 && img_create_flags & VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT) { assert(image->vk.image_type == VK_IMAGE_TYPE_3D); type = V_008F1C_SQ_RSRC_IMG_3D; } else { type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples, is_storage_image, - device->physical_device->info.gfx_level == GFX9); + pdev->info.gfx_level == GFX9); } if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { @@ -484,7 +484,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag state[6] = 0; state[7] = 0; - if (device->physical_device->info.gfx_level == GFX9) { + if (pdev->info.gfx_level == GFX9) { unsigned bc_swizzle = gfx9_border_color_swizzle(desc); /* Depth is the last accessible layer on Gfx9. @@ -509,7 +509,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag /* The last dword is unused by hw. The shader uses it to clear * bits in the first dword of sampler state. */ - if (device->physical_device->info.gfx_level <= GFX7 && image->vk.samples <= 1) { + if (pdev->info.gfx_level <= GFX7 && image->vk.samples <= 1) { if (first_level == last_level) state[7] = C_008F30_MAX_ANISO_RATIO; else @@ -529,7 +529,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag va = gpu_address + image->bindings[0].offset + image->planes[0].surface.fmask_offset; - if (device->physical_device->info.gfx_level == GFX9) { + if (pdev->info.gfx_level == GFX9) { fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK; switch (image->vk.samples) { case 2: @@ -576,7 +576,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag fmask_state[6] = 0; fmask_state[7] = 0; - if (device->physical_device->info.gfx_level == GFX9) { + if (pdev->info.gfx_level == GFX9) { fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode); fmask_state[4] |= S_008F20_DEPTH(last_layer) | S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch); @@ -615,7 +615,9 @@ radv_make_texture_descriptor(struct radv_device *device, struct radv_image *imag uint32_t *fmask_state, VkImageCreateFlags img_create_flags, const struct ac_surf_nbc_view *nbc_view, const VkImageViewSlicedCreateInfoEXT *sliced_3d) { - if (device->physical_device->info.gfx_level >= GFX10) { + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (pdev->info.gfx_level >= GFX10) { gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, first_level, last_level, first_layer, last_layer, width, height, depth, min_lod, state, fmask_state, img_create_flags, nbc_view, sliced_3d); @@ -630,12 +632,13 @@ static inline void compute_non_block_compressed_view(struct radv_device *device, const struct radv_image_view *iview, struct ac_surf_nbc_view *nbc_view) { + const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_image *image = iview->image; const struct radeon_surf *surf = &image->planes[0].surface; struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws); struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image); - ac_surface_compute_nbc_view(addrlib, &device->physical_device->info, surf, &surf_info, iview->vk.base_mip_level, + ac_surface_compute_nbc_view(addrlib, &pdev->info, surf, &surf_info, iview->vk.base_mip_level, iview->vk.base_array_layer, nbc_view); } @@ -647,6 +650,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic const struct ac_surf_nbc_view *nbc_view, const VkImageViewSlicedCreateInfoEXT *sliced_3d, bool force_zero_base_mip) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_image *image = iview->image; struct radv_image_plane *plane = &image->planes[plane_id]; bool is_stencil = iview->vk.aspects == VK_IMAGE_ASPECT_STENCIL_BIT; @@ -665,7 +669,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0); blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format); - if (device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { if (nbc_view->valid) { hw_level = nbc_view->level; iview->extent.width = nbc_view->width; @@ -689,7 +693,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic img_create_flags, nbc_view, sliced_3d); const struct legacy_surf_level *base_level_info = NULL; - if (device->physical_device->info.gfx_level <= GFX8) { + if (pdev->info.gfx_level <= GFX8) { if (is_stencil) base_level_info = &plane->surface.u.legacy.zs.stencil_level[iview->vk.base_mip_level]; else @@ -738,6 +742,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device, const struct radv_image_view_extra_create_info *extra_create_info) { RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image); + const struct radv_physical_device *pdev = radv_device_physical(device); const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; uint32_t plane_count = 1; float min_lod = 0.0f; @@ -755,7 +760,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device, vk_image_view_init(&device->vk, &iview->vk, !from_client, pCreateInfo); bool force_zero_base_mip = true; - if (device->physical_device->info.gfx_level <= GFX8 && min_lod) { + if (pdev->info.gfx_level <= GFX8 && min_lod) { /* Do not force the base level to zero to workaround a spurious bug with mipmaps and min LOD. */ force_zero_base_mip = false; } @@ -800,15 +805,15 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device, } /* when the view format is emulated, redirect the view to the hidden plane 1 */ - if (radv_is_format_emulated(device->physical_device, iview->vk.format)) { - assert(radv_is_format_emulated(device->physical_device, image->vk.format)); + if (radv_is_format_emulated(pdev, iview->vk.format)) { + assert(radv_is_format_emulated(pdev, image->vk.format)); iview->plane_id = 1; iview->vk.view_format = image->planes[iview->plane_id].format; iview->vk.format = image->planes[iview->plane_id].format; plane_count = 1; } - if (!force_zero_base_mip || device->physical_device->info.gfx_level >= GFX9) { + if (!force_zero_base_mip || pdev->info.gfx_level >= GFX9) { iview->extent = (VkExtent3D){ .width = image->vk.extent.width, .height = image->vk.extent.height, @@ -854,7 +859,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device, * block compatible format and the compressed format, so even if we take * the plain converted dimensions the physical layout is correct. */ - if (device->physical_device->info.gfx_level >= GFX9 && vk_format_is_block_compressed(plane->format) && + if (pdev->info.gfx_level >= GFX9 && vk_format_is_block_compressed(plane->format) && !vk_format_is_block_compressed(iview->vk.format)) { /* If we have multiple levels in the view we should ideally take the last level, * but the mip calculation has a max(..., 1) so walking back to the base mip in an @@ -879,7 +884,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device, * changes the descriptor's base level, and adjusts the address and * extents accordingly. */ - if (device->physical_device->info.gfx_level >= GFX10 && + if (pdev->info.gfx_level >= GFX10 && (radv_minify(iview->extent.width, range->baseMipLevel) < lvl_width || radv_minify(iview->extent.height, range->baseMipLevel) < lvl_height) && iview->vk.layer_count == 1) { diff --git a/src/amd/vulkan/radv_perfcounter.c b/src/amd/vulkan/radv_perfcounter.c index 49078fdcf5e..709d2e515c5 100644 --- a/src/amd/vulkan/radv_perfcounter.c +++ b/src/amd/vulkan/radv_perfcounter.c @@ -32,7 +32,9 @@ void radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders) { - if (device->physical_device->info.gfx_level >= GFX11) { + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (pdev->info.gfx_level >= GFX11) { radeon_set_uconfig_reg(cs, R_036760_SQG_PERFCOUNTER_CTRL, shaders & 0x7f); } else { radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2); @@ -74,12 +76,14 @@ radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family) { + const struct radv_physical_device *pdev = radv_device_physical(device); + radv_emit_windowed_counters(device, cs, family, false); /* Stop SPM counters. */ radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | - S_036020_SPM_PERFMON_STATE(device->physical_device->info.never_stop_sq_perf_counters + S_036020_SPM_PERFMON_STATE(pdev->info.never_stop_sq_perf_counters ? V_036020_STRM_PERFMON_STATE_START_COUNTING : V_036020_STRM_PERFMON_STATE_STOP_COUNTING)); } @@ -466,7 +470,8 @@ radv_emit_instance(struct radv_cmd_buffer *cmd_buffer, int se, int instance) static void radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, unsigned *selectors) { - const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; const enum radv_queue_family qf = cmd_buffer->qf; struct ac_pc_block_base *regs = block->b->b; struct radeon_cmdbuf *cs = cmd_buffer->cs; @@ -492,6 +497,7 @@ static void radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, uint64_t va) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct ac_pc_block_base *regs = block->b->b; struct radeon_cmdbuf *cs = cmd_buffer->cs; unsigned reg = regs->counter0_lo; @@ -510,7 +516,7 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p radeon_emit(cs, va); radeon_emit(cs, va >> 32); - va += sizeof(uint64_t) * 2 * radv_pc_get_num_instances(cmd_buffer->device->physical_device, block); + va += sizeof(uint64_t) * 2 * radv_pc_get_num_instances(pdev, block); reg += reg_delta; } } @@ -518,9 +524,10 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p static void radv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, uint64_t va) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); unsigned se_end = 1; if (block->b->b->flags & AC_PC_BLOCK_SE) - se_end = cmd_buffer->device->physical_device->info.max_se; + se_end = pdev->info.max_se; for (unsigned se = 0; se < se_end; ++se) { for (unsigned instance = 0; instance < block->num_instances; ++instance) { @@ -555,8 +562,8 @@ radv_pc_wait_idle(struct radv_cmd_buffer *cmd_buffer) static void radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va, bool end) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radeon_cmdbuf *cs = cmd_buffer->cs; - struct radv_physical_device *pdev = cmd_buffer->device->physical_device; radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0)); @@ -621,7 +628,7 @@ void radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va) { struct radeon_cmdbuf *cs = cmd_buffer->cs; - struct radv_physical_device *pdev = cmd_buffer->device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); ASSERTED unsigned cdw_max; cmd_buffer->state.uses_perf_counters = true; @@ -698,6 +705,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo void radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radeon_cmdbuf *cs = cmd_buffer->cs; ASSERTED unsigned cdw_max; @@ -710,9 +718,8 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->perf_counter_bo); uint64_t perf_ctr_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET; - radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf, - V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, - 1, cmd_buffer->gfx9_fence_va); + radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, 1, cmd_buffer->gfx9_fence_va); radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, perf_ctr_va, 1, 0xffffffff); radv_pc_wait_idle(cmd_buffer); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 9d551f5cc08..f51a30dcc1d 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -365,7 +365,8 @@ static unsigned lower_bit_size_callback(const nir_instr *instr, void *_) { struct radv_device *device = _; - enum amd_gfx_level chip = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + enum amd_gfx_level chip = pdev->info.gfx_level; if (instr->type != nir_instr_type_alu) return 0; @@ -437,7 +438,8 @@ opt_vectorize_callback(const nir_instr *instr, const void *_) return 0; const struct radv_device *device = _; - enum amd_gfx_level chip = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + enum amd_gfx_level chip = pdev->info.gfx_level; if (chip < GFX9) return 1; @@ -461,7 +463,8 @@ void radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stage) { - enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + enum amd_gfx_level gfx_level = pdev->info.gfx_level; bool progress; /* Wave and workgroup size should already be filled. */ @@ -548,8 +551,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat NIR_PASS(_, stage->nir, ac_nir_lower_tex, &(ac_nir_lower_tex_options){ .gfx_level = gfx_level, - .lower_array_layer_round_even = - !device->physical_device->info.conformant_trunc_coord || device->disable_trunc_coord, + .lower_array_layer_round_even = !pdev->info.conformant_trunc_coord || device->disable_trunc_coord, .fix_derivs_in_divergent_cf = fix_derivs_in_divergent_cf, .max_wqm_vgprs = 64, // TODO: improve spiller and RA support for linear VGPRs }); @@ -570,7 +572,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat nir_move_options sink_opts = nir_move_const_undef | nir_move_copies; if (!stage->key.optimisations_disabled) { - if (stage->stage != MESA_SHADER_FRAGMENT || !device->physical_device->cache_key.disable_sinking_load_input_fs) + if (stage->stage != MESA_SHADER_FRAGMENT || !pdev->cache_key.disable_sinking_load_input_fs) sink_opts |= nir_move_load_input; NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts); @@ -581,7 +583,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat * load_input can be reordered, but buffer loads can't. */ if (stage->stage == MESA_SHADER_VERTEX) { - NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, gfx_state, &device->physical_device->info); + NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, gfx_state, &pdev->info); } /* Lower I/O intrinsics to memory instructions. */ @@ -598,7 +600,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat stage->info.outinfo.export_prim_id, false, false, false, stage->info.force_vrs_per_vertex); } else { - bool emulate_ngg_gs_query_pipeline_stat = device->physical_device->emulate_ngg_gs_query_pipeline_stat; + bool emulate_ngg_gs_query_pipeline_stat = pdev->emulate_ngg_gs_query_pipeline_stat; ac_nir_gs_output_info gs_out_info = { .streams = stage->info.gs.output_streams, @@ -609,7 +611,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat } else if (stage->stage == MESA_SHADER_FRAGMENT) { ac_nir_lower_ps_options options = { .gfx_level = gfx_level, - .family = device->physical_device->info.family, + .family = pdev->info.family, .use_aco = !radv_use_llvm_for_stage(device, stage->stage), .uses_discard = true, .alpha_func = COMPARE_FUNC_ALWAYS, @@ -666,7 +668,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat NIR_PASS(_, stage->nir, ac_nir_lower_global_access); NIR_PASS_V(stage->nir, ac_nir_lower_intrinsics_to_args, gfx_level, radv_select_hw_stage(&stage->info, gfx_level), &stage->args.ac); - NIR_PASS_V(stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, device->physical_device->info.address32_hi); + NIR_PASS_V(stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, pdev->info.address32_hi); radv_optimize_nir_algebraic( stage->nir, io_to_mem || lowered_ngg || stage->stage == MESA_SHADER_COMPUTE || stage->stage == MESA_SHADER_TASK); @@ -926,7 +928,7 @@ radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecut struct radv_shader *shader = radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage); - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); unsigned lds_increment = pdev->info.gfx_level >= GFX11 && stage == MESA_SHADER_FRAGMENT ? 1024 : pdev->info.lds_encode_granularity; diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c index 7d851eb9284..312e54bd5ee 100644 --- a/src/amd/vulkan/radv_pipeline_cache.c +++ b/src/amd/vulkan/radv_pipeline_cache.c @@ -38,6 +38,8 @@ static bool radv_is_cache_disabled(struct radv_device *device) { + const struct radv_physical_device *pdev = radv_device_physical(device); + /* The buffer address used for debug printf is hardcoded. */ if (device->printf.buffer_addr) return true; @@ -45,8 +47,7 @@ radv_is_cache_disabled(struct radv_device *device) /* Pipeline caches can be disabled with RADV_DEBUG=nocache, with MESA_GLSL_CACHE_DISABLE=1 and * when ACO_DEBUG is used. MESA_GLSL_CACHE_DISABLE is done elsewhere. */ - return (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) || - (device->physical_device->use_llvm ? 0 : aco_get_codegen_flags()); + return (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) || (pdev->use_llvm ? 0 : aco_get_codegen_flags()); } void @@ -532,14 +533,15 @@ nir_shader * radv_pipeline_cache_lookup_nir(struct radv_device *device, struct vk_pipeline_cache *cache, gl_shader_stage stage, const blake3_hash key) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (radv_is_cache_disabled(device)) return NULL; if (!cache) cache = device->mem_cache; - return vk_pipeline_cache_lookup_nir(cache, key, sizeof(blake3_hash), &device->physical_device->nir_options[stage], - NULL, NULL); + return vk_pipeline_cache_lookup_nir(cache, key, sizeof(blake3_hash), &pdev->nir_options[stage], NULL, NULL); } void @@ -570,6 +572,7 @@ radv_pipeline_cache_lookup_nir_handle(struct radv_device *device, struct vk_pipe struct nir_shader * radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline_cache_object *object) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct blob_reader blob; struct vk_raw_data_cache_object *nir_object = container_of(object, struct vk_raw_data_cache_object, base); blob_reader_init(&blob, nir_object->data, nir_object->data_size); @@ -579,7 +582,7 @@ radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline ralloc_free(nir); return NULL; } - nir->options = &device->physical_device->nir_options[nir->info.stage]; + nir->options = &pdev->nir_options[nir->info.stage]; return nir; } diff --git a/src/amd/vulkan/radv_pipeline_compute.c b/src/amd/vulkan/radv_pipeline_compute.c index aecc0587725..270522378ac 100644 --- a/src/amd/vulkan/radv_pipeline_compute.c +++ b/src/amd/vulkan/radv_pipeline_compute.c @@ -75,6 +75,7 @@ void radv_get_compute_pipeline_metadata(const struct radv_device *device, const struct radv_compute_pipeline *pipeline, struct radv_compute_pipeline_metadata *metadata) { + const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader *cs = pipeline->base.shaders[MESA_SHADER_COMPUTE]; uint32_t upload_sgpr = 0, inline_sgpr = 0; @@ -84,7 +85,7 @@ radv_get_compute_pipeline_metadata(const struct radv_device *device, const struc metadata->rsrc1 = cs->config.rsrc1; metadata->rsrc2 = cs->config.rsrc2; metadata->rsrc3 = cs->config.rsrc3; - metadata->compute_resource_limits = radv_get_compute_resource_limits(device->physical_device, cs); + metadata->compute_resource_limits = radv_get_compute_resource_limits(pdev, cs); metadata->block_size_x = cs->info.cs.block_size[0]; metadata->block_size_y = cs->info.cs.block_size[1]; metadata->block_size_z = cs->info.cs.block_size[2]; @@ -136,7 +137,7 @@ static void radv_compute_generate_pm4(const struct radv_device *device, struct radv_compute_pipeline *pipeline, struct radv_shader *shader) { - struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); struct radeon_cmdbuf *cs = &pipeline->base.cs; cs->reserved_dw = cs->max_dw = pdev->info.gfx_level >= GFX10 ? 19 : 16; diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 3642cb5dc2a..578416ffbd5 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -132,12 +132,13 @@ static unsigned radv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format, bool blend_enable, bool blend_need_alpha) { + const struct radv_physical_device *pdev = radv_device_physical(device); const struct util_format_description *desc = vk_format_description(vk_format); - bool use_rbplus = device->physical_device->info.rbplus_allowed; + bool use_rbplus = pdev->info.rbplus_allowed; struct ac_spi_color_formats formats = {0}; unsigned format, ntype, swap; - format = ac_get_cb_format(device->physical_device->info.gfx_level, desc->format); + format = ac_get_cb_format(pdev->info.gfx_level, desc->format); ntype = ac_get_cb_number_type(desc->format); swap = radv_translate_colorswap(vk_format, false); @@ -508,12 +509,13 @@ static uint64_t radv_pipeline_needed_dynamic_state(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state) { + const struct radv_physical_device *pdev = radv_device_physical(device); bool has_color_att = radv_pipeline_has_color_attachments(state->rp); bool raster_enabled = !state->rs->rasterizer_discard_enable || (pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE); uint64_t states = RADV_DYNAMIC_ALL; - if (device->physical_device->info.gfx_level < GFX10_3) + if (pdev->info.gfx_level < GFX10_3) states &= ~RADV_DYNAMIC_FRAGMENT_SHADING_RATE; /* Disable dynamic states that are useless to mesh shading. */ @@ -568,7 +570,7 @@ radv_pipeline_needed_dynamic_state(const struct radv_device *device, const struc struct radv_ia_multi_vgt_param_helpers radv_compute_ia_multi_vgt_param(const struct radv_device *device, struct radv_shader *const *shaders) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0}; ia_multi_vgt_param.ia_switch_on_eoi = false; @@ -1295,7 +1297,8 @@ static void radv_link_shaders(const struct radv_device *device, struct radv_shader_stage *producer_stage, struct radv_shader_stage *consumer_stage, const struct radv_graphics_state_key *gfx_state) { - const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; nir_shader *producer = producer_stage->nir; nir_shader *consumer = consumer_stage->nir; bool progress; @@ -1686,6 +1689,7 @@ radv_graphics_shaders_link(const struct radv_device *device, const struct radv_g struct radv_ps_epilog_key radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ps_epilog_state *state) { + const struct radv_physical_device *pdev = radv_device_physical(device); unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0, z_format = 0; struct radv_ps_epilog_key key; @@ -1731,8 +1735,8 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ state->alpha_to_coverage_via_mrtz); key.spi_shader_col_format = col_format; - key.color_is_int8 = device->physical_device->info.gfx_level < GFX8 ? is_int8 : 0; - key.color_is_int10 = device->physical_device->info.gfx_level < GFX8 ? is_int10 : 0; + key.color_is_int8 = pdev->info.gfx_level < GFX8 ? is_int8 : 0; + key.color_is_int10 = pdev->info.gfx_level < GFX8 ? is_int10 : 0; key.enable_mrt_output_nan_fixup = device->instance->drirc.enable_mrt_output_nan_fixup ? is_float32 : 0; key.colors_written = state->colors_written; key.mrt0_is_dual_src = state->mrt0_is_dual_src; @@ -1811,7 +1815,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct const struct vk_graphics_pipeline_state *state, VkGraphicsPipelineLibraryFlagBitsEXT lib_flags) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_graphics_state_key key; memset(&key, 0, sizeof(key)); @@ -1884,7 +1888,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct } } - if (device->physical_device->info.gfx_level >= GFX11 && state->ms) { + if (pdev->info.gfx_level >= GFX11 && state->ms) { key.ms.alpha_to_coverage_via_mrtz = state->ms->alpha_to_coverage_enable; } @@ -1898,15 +1902,14 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct key.unknown_rast_prim = true; } - if (device->physical_device->info.gfx_level >= GFX10 && state->rs) { + if (pdev->info.gfx_level >= GFX10 && state->rs) { key.rs.provoking_vtx_last = state->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; } key.ps.force_vrs_enabled = device->force_vrs_enabled && !radv_is_static_vrs_enabled(pipeline, state); if ((radv_is_vrs_enabled(pipeline, state) || key.ps.force_vrs_enabled) && - (device->physical_device->info.family == CHIP_NAVI21 || device->physical_device->info.family == CHIP_NAVI22 || - device->physical_device->info.family == CHIP_VANGOGH)) + (pdev->info.family == CHIP_NAVI21 || pdev->info.family == CHIP_NAVI22 || pdev->info.family == CHIP_VANGOGH)) key.adjust_frag_coord_z = true; if (radv_pipeline_needs_ps_epilog(pipeline, lib_flags)) @@ -1914,7 +1917,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct key.ps.epilog = radv_pipeline_generate_ps_epilog_key(device, state); - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { /* On GFX11, alpha to coverage is exported via MRTZ when depth/stencil/samplemask are also * exported. Though, when a PS epilog is needed and the MS state is NULL (with dynamic * rendering), it's not possible to know the info at compile time and MRTZ needs to be @@ -1927,7 +1930,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct key.dynamic_rasterization_samples = !!(pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZATION_SAMPLES) || (!!(pipeline->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && !state->ms); - if (device->physical_device->use_ngg) { + if (pdev->use_ngg) { VkShaderStageFlags ngg_stage; if (pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT) { @@ -1995,7 +1998,9 @@ static void radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *stages, VkShaderStageFlagBits active_nir_stages) { - if (!device->physical_device->cache_key.use_ngg) + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (!pdev->cache_key.use_ngg) return; if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.next_stage != MESA_SHADER_TESS_CTRL) { @@ -2006,7 +2011,7 @@ radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage * stages[MESA_SHADER_MESH].info.is_ngg = true; } - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { if (stages[MESA_SHADER_GEOMETRY].nir) stages[MESA_SHADER_GEOMETRY].info.is_ngg = true; } else { @@ -2164,7 +2169,8 @@ static void radv_declare_pipeline_args(struct radv_device *device, struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state, VkShaderStageFlagBits active_nir_stages) { - enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + enum amd_gfx_level gfx_level = pdev->info.gfx_level; if (gfx_level >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) { radv_declare_shader_args(device, gfx_state, &stages[MESA_SHADER_TESS_CTRL].info, MESA_SHADER_TESS_CTRL, @@ -2210,15 +2216,16 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache bool keep_executable_info, bool keep_statistic_info, struct radv_shader_binary **gs_copy_binary) { + const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader_info *gs_info = &gs_stage->info; ac_nir_gs_output_info output_info = { .streams = gs_info->gs.output_streams, .usage_mask = gs_info->gs.output_usage_mask, }; nir_shader *nir = ac_nir_create_gs_copy_shader( - gs_stage->nir, device->physical_device->info.gfx_level, - gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask, gs_info->outinfo.vs_output_param_offset, - gs_info->outinfo.param_exports, false, false, false, gs_info->force_vrs_per_vertex, &output_info); + gs_stage->nir, pdev->info.gfx_level, gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask, + gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.param_exports, false, false, false, + gs_info->force_vrs_per_vertex, &output_info); nir_validate_shader(nir, "after ac_nir_create_gs_copy_shader"); nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); @@ -2246,10 +2253,8 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache gs_copy_stage.info.user_sgprs_locs = gs_copy_stage.args.user_sgprs_locs; gs_copy_stage.info.inline_push_constant_mask = gs_copy_stage.args.ac.inline_push_const_mask; - NIR_PASS_V(nir, ac_nir_lower_intrinsics_to_args, device->physical_device->info.gfx_level, AC_HW_VERTEX_SHADER, - &gs_copy_stage.args.ac); - NIR_PASS_V(nir, radv_nir_lower_abi, device->physical_device->info.gfx_level, &gs_copy_stage, gfx_state, - device->physical_device->info.address32_hi); + NIR_PASS_V(nir, ac_nir_lower_intrinsics_to_args, pdev->info.gfx_level, AC_HW_VERTEX_SHADER, &gs_copy_stage.args.ac); + NIR_PASS_V(nir, radv_nir_lower_abi, pdev->info.gfx_level, &gs_copy_stage, gfx_state, pdev->info.address32_hi); struct radv_graphics_pipeline_key key = {0}; bool dump_shader = radv_can_dump_shader(device, nir, true); @@ -2272,6 +2277,8 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_ struct radv_shader_binary **binaries, struct radv_shader **gs_copy_shader, struct radv_shader_binary **gs_copy_binary) { + const struct radv_physical_device *pdev = radv_device_physical(device); + for (int s = MESA_VULKAN_SHADER_STAGES - 1; s >= 0; s--) { if (!(active_nir_stages & (1 << s))) continue; @@ -2280,7 +2287,7 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_ unsigned shader_count = 1; /* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */ - if (device->physical_device->info.gfx_level >= GFX9 && + if (pdev->info.gfx_level >= GFX9 && ((s == MESA_SHADER_GEOMETRY && (active_nir_stages & (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))) || (s == MESA_SHADER_TESS_CTRL && (active_nir_stages & VK_SHADER_STAGE_VERTEX_BIT)))) { @@ -2348,6 +2355,7 @@ static void radv_pipeline_import_retained_shaders(const struct radv_device *device, struct radv_graphics_pipeline *pipeline, struct radv_graphics_lib_pipeline *lib, struct radv_shader_stage *stages) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_retained_shaders *retained_shaders = &lib->retained_shaders; /* Import the stages (SPIR-V only in case of cache hits). */ @@ -2370,7 +2378,7 @@ radv_pipeline_import_retained_shaders(const struct radv_device *device, struct r int64_t stage_start = os_time_get_nano(); /* Deserialize the NIR shader. */ - const struct nir_shader_compiler_options *options = &device->physical_device->nir_options[s]; + const struct nir_shader_compiler_options *options = &pdev->nir_options[s]; struct blob_reader blob_reader; blob_reader_init(&blob_reader, retained_shaders->stages[s].serialized_nir, retained_shaders->stages[s].serialized_nir_size); @@ -2442,6 +2450,7 @@ static bool radv_skip_graphics_pipeline_compile(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, VkGraphicsPipelineLibraryFlagBitsEXT lib_flags, bool fast_linking_enabled) { + const struct radv_physical_device *pdev = radv_device_physical(device); VkShaderStageFlagBits binary_stages = 0; /* Do not skip when fast-linking isn't enabled. */ @@ -2462,7 +2471,7 @@ radv_skip_graphics_pipeline_compile(const struct radv_device *device, const stru binary_stages |= mesa_to_vk_shader_stage(i); } - if (device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { /* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */ if (binary_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) { binary_stages |= VK_SHADER_STAGE_VERTEX_BIT; @@ -2490,6 +2499,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac struct radv_shader **shaders, struct radv_shader_binary **binaries, struct radv_shader **gs_copy_shader, struct radv_shader_binary **gs_copy_binary) { + const struct radv_physical_device *pdev = radv_device_physical(device); const bool nir_cache = device->instance->perftest_flags & RADV_PERFTEST_NIR_CACHE; for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) { if (!stages[s].entrypoint) @@ -2530,7 +2540,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac active_nir_stages |= mesa_to_vk_shader_stage(i); } - if (!device->physical_device->mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir && + if (!pdev->mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir && BITSET_TEST(stages[MESA_SHADER_MESH].nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID)) { nir_shader *mesh = stages[MESA_SHADER_MESH].nir; nir_shader *task = stages[MESA_SHADER_TASK].nir; @@ -2848,7 +2858,7 @@ void radv_emit_vgt_gs_mode(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, const struct radv_shader *last_vgt_api_shader) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader_info *info = &last_vgt_api_shader->info; unsigned vgt_primitiveid_en = 0; uint32_t vgt_gs_mode = 0; @@ -2871,7 +2881,7 @@ static void radv_emit_hw_vs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, const struct radv_shader *shader) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); uint64_t va = radv_shader_get_va(shader); radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4); @@ -2971,7 +2981,7 @@ static void radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, const struct radv_shader *es, const struct radv_shader *shader) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); uint64_t va = radv_shader_get_va(shader); gl_shader_stage es_type; const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info; @@ -3128,7 +3138,7 @@ radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, static void radv_emit_hw_hs(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_shader *shader) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); uint64_t va = radv_shader_get_va(shader); if (pdev->info.gfx_level >= GFX9) { @@ -3152,6 +3162,8 @@ void radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, const struct radv_shader *vs, const struct radv_shader *next_stage) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (vs->info.merged_shader_compiled_separately) { const struct radv_userdata_info *loc = &vs->info.user_sgprs_locs.shader_data[AC_UD_NEXT_STAGE_PC]; const uint32_t base_reg = vs->info.user_data_0; @@ -3164,7 +3176,7 @@ radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf * if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) { radv_shader_combine_cfg_vs_tcs(vs, next_stage, &rsrc1, NULL); - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, vs->va >> 8); } else { radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, vs->va >> 8); @@ -3174,7 +3186,7 @@ radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf * } else { radv_shader_combine_cfg_vs_gs(vs, next_stage, &rsrc1, &rsrc2); - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, vs->va >> 8); } else { radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, vs->va >> 8); @@ -3182,8 +3194,7 @@ radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf * unsigned lds_size; if (next_stage->info.is_ngg) { - lds_size = DIV_ROUND_UP(next_stage->info.ngg_info.lds_size, - device->physical_device->info.lds_encode_granularity); + lds_size = DIV_ROUND_UP(next_stage->info.ngg_info.lds_size, pdev->info.lds_encode_granularity); } else { lds_size = next_stage->info.gs_ring_info.lds_size; } @@ -3225,6 +3236,8 @@ void radv_emit_tess_eval_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, const struct radv_shader *tes, const struct radv_shader *gs) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (tes->info.merged_shader_compiled_separately) { const struct radv_userdata_info *loc = &tes->info.user_sgprs_locs.shader_data[AC_UD_NEXT_STAGE_PC]; const uint32_t base_reg = tes->info.user_data_0; @@ -3238,7 +3251,7 @@ radv_emit_tess_eval_shader(const struct radv_device *device, struct radeon_cmdbu unsigned lds_size; if (gs->info.is_ngg) { - lds_size = DIV_ROUND_UP(gs->info.ngg_info.lds_size, device->physical_device->info.lds_encode_granularity); + lds_size = DIV_ROUND_UP(gs->info.ngg_info.lds_size, pdev->info.lds_encode_granularity); } else { lds_size = gs->info.gs_ring_info.lds_size; } @@ -3264,7 +3277,7 @@ static void radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, const struct radv_shader *gs) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_legacy_gs_info *gs_state = &gs->info.gs_ring_info; unsigned gs_max_out_vertices; const uint8_t *num_components; @@ -3382,16 +3395,15 @@ void radv_emit_mesh_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, const struct radv_shader *ms) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); const uint32_t gs_out = radv_conv_gl_prim_to_gs_out(ms->info.ms.output_prim); radv_emit_hw_ngg(device, ctx_cs, cs, NULL, ms); - radeon_set_context_reg( - ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, - device->physical_device->mesh_fast_launch_2 ? ms->info.ngg_info.max_out_verts : ms->info.workgroup_size); + radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, + pdev->mesh_fast_launch_2 ? ms->info.ngg_info.max_out_verts : ms->info.workgroup_size); radeon_set_uconfig_reg_idx(pdev, ctx_cs, R_030908_VGT_PRIMITIVE_TYPE, 1, V_008958_DI_PT_POINTLIST); - if (device->physical_device->mesh_fast_launch_2) { + if (pdev->mesh_fast_launch_2) { radeon_set_sh_reg_seq(cs, R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, 2); radeon_emit(cs, S_00B2B0_MESHLET_NUM_THREAD_X(ms->info.cs.block_size[0] - 1) | S_00B2B0_MESHLET_NUM_THREAD_Y(ms->info.cs.block_size[1] - 1) | @@ -3476,9 +3488,10 @@ void radv_emit_ps_inputs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, const struct radv_shader *last_vgt_shader, const struct radv_shader *ps) { + const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_vs_output_info *outinfo = &last_vgt_shader->info.outinfo; bool mesh = last_vgt_shader->info.stage == MESA_SHADER_MESH; - bool gfx11plus = device->physical_device->info.gfx_level >= GFX11; + bool gfx11plus = pdev->info.gfx_level >= GFX11; uint32_t ps_input_cntl[32]; unsigned ps_offset = 0; @@ -3530,7 +3543,7 @@ void radv_emit_fragment_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, const struct radv_shader *ps) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); bool param_gen; uint64_t va; @@ -3566,7 +3579,7 @@ void radv_emit_vgt_reuse(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, const struct radv_shader *tes, const struct radv_vgt_shader_key *key) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); if (pdev->info.gfx_level == GFX10_3) { /* Legacy Tess+GS should disable reuse to prevent hangs on GFX10.3. */ @@ -3635,7 +3648,7 @@ void radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, const struct radv_vgt_shader_key *key) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); uint32_t stages = 0; if (key->tess) { @@ -3651,7 +3664,7 @@ radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdb stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1); } else if (key->mesh) { assert(!key->ngg_passthrough); - unsigned gs_fast_launch = device->physical_device->mesh_fast_launch_2 ? 2 : 1; + unsigned gs_fast_launch = pdev->mesh_fast_launch_2 ? 2 : 1; stages |= S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(gs_fast_launch) | S_028B54_NGG_WAVE_ID_EN(key->mesh_scratch_ring); } else if (key->ngg) { @@ -3682,7 +3695,7 @@ radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdb void radv_emit_vgt_gs_out(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, uint32_t vgt_gs_out_prim_type) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); if (pdev->info.gfx_level >= GFX11) { radeon_set_uconfig_reg(ctx_cs, R_030998_VGT_GS_OUT_PRIM_TYPE, vgt_gs_out_prim_type); @@ -3713,9 +3726,10 @@ gfx103_emit_vgt_draw_payload_cntl(struct radeon_cmdbuf *ctx_cs, const struct rad static bool gfx103_pipeline_vrs_coarse_shading(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; - if (device->physical_device->info.gfx_level != GFX10_3) + if (pdev->info.gfx_level != GFX10_3) return false; if (device->instance->debug_flags & RADV_DEBUG_NO_VRS_FLAT_SHADING) @@ -3731,7 +3745,7 @@ void gfx103_emit_vrs_state(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, const struct radv_shader *ps, bool enable_vrs, bool enable_vrs_coarse_shading, bool force_vrs_per_vertex) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); uint32_t mode = V_028064_SC_VRS_COMB_MODE_PASSTHRU; uint8_t rate_x = 0, rate_y = 0; @@ -3769,7 +3783,7 @@ radv_pipeline_emit_pm4(const struct radv_device *device, struct radv_graphics_pi const struct vk_graphics_pipeline_state *state) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader *last_vgt_shader = radv_get_last_vgt_shader(pipeline); const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; struct radeon_cmdbuf *ctx_cs = &pipeline->base.ctx_cs; @@ -3835,7 +3849,7 @@ static void radv_pipeline_init_vertex_input_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline, const struct vk_graphics_pipeline_state *state) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader_info *vs_info = &radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX)->info; if (state->vi) { @@ -4022,7 +4036,8 @@ bool radv_needs_null_export_workaround(const struct radv_device *device, const struct radv_shader *ps, unsigned custom_blend_mode) { - const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; if (!ps) return false; diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index 6452a19ddd6..fdabbc7dc00 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -370,6 +370,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache, const struct radv_ray_tracing_stage_info *traversal_stage_info, struct radv_serialized_shader_arena_block *replay_block, struct radv_shader **out_shader) { + struct radv_physical_device *pdev = radv_device_physical(device); struct radv_shader_binary *binary; bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags); bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.base.create_flags); @@ -405,7 +406,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache, .stack_alignment = 16, .localized_loads = true, .vectorizer_callback = radv_mem_vectorize_callback, - .vectorizer_data = &device->physical_device->info.gfx_level, + .vectorizer_data = &pdev->info.gfx_level, }; nir_lower_shader_calls(stage->nir, &opts, &resume_shaders, &num_resume_shaders, stage->nir); } @@ -828,6 +829,8 @@ postprocess_rt_config(struct ac_shader_config *config, enum amd_gfx_level gfx_le static void compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline) { + const struct radv_physical_device *pdev = radv_device_physical(device); + pipeline->prolog = radv_create_rt_prolog(device); /* create combined config */ @@ -839,7 +842,7 @@ compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline * if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]) combine_config(config, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]->config); - postprocess_rt_config(config, device->physical_device->info.gfx_level, device->physical_device->rt_wave_size); + postprocess_rt_config(config, pdev->info.gfx_level, pdev->rt_wave_size); pipeline->prolog->max_waves = radv_get_max_waves(device, config, &pipeline->prolog->info); } diff --git a/src/amd/vulkan/radv_printf.c b/src/amd/vulkan/radv_printf.c index 45459001437..732f0c1b571 100644 --- a/src/amd/vulkan/radv_printf.c +++ b/src/amd/vulkan/radv_printf.c @@ -18,6 +18,8 @@ static struct hash_table *device_ht = NULL; VkResult radv_printf_data_init(struct radv_device *device) { + const struct radv_physical_device *pdev = radv_device_physical(device); + util_dynarray_init(&device->printf.formats, NULL); device->printf.buffer_size = debug_get_num_option("RADV_PRINTF_BUFFER_SIZE", 0); @@ -45,9 +47,9 @@ radv_printf_data_init(struct radv_device *device) VkMemoryAllocateInfo alloc_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = requirements.size, - .memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), + .memoryTypeIndex = + radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), }; result = device->vk.dispatch_table.AllocateMemory(_device, &alloc_info, NULL, &device->printf.memory); diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 30f790c757e..cd73db416e2 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1134,8 +1134,6 @@ struct radv_device { /* Whether to keep shader debug info, for debugging. */ bool keep_shader_info; - struct radv_physical_device *physical_device; - /* Backup in-memory cache to be used if the app doesn't provide one */ struct vk_pipeline_cache *mem_cache; @@ -1275,6 +1273,12 @@ struct radv_device { uint32_t compute_scratch_waves; }; +static inline struct radv_physical_device * +radv_device_physical(const struct radv_device *dev) +{ + return (struct radv_physical_device *)dev->vk.physical; +} + bool radv_device_set_pstate(struct radv_device *device, bool enable); bool radv_device_acquire_performance_counters(struct radv_device *device); void radv_device_release_performance_counters(struct radv_device *device); @@ -2155,10 +2159,12 @@ static inline void radv_emit_shader_pointer_body(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, bool use_32bit_pointers) { + const struct radv_physical_device *pdev = radv_device_physical(device); + radeon_emit(cs, va); if (use_32bit_pointers) { - assert(va == 0 || (va >> 32) == device->physical_device->info.address32_hi); + assert(va == 0 || (va >> 32) == pdev->info.address32_hi); } else { radeon_emit(cs, va >> 32); } @@ -2798,7 +2804,8 @@ radv_image_has_htile(const struct radv_image *image) static inline bool radv_image_has_vrs_htile(const struct radv_device *device, const struct radv_image *image) { - const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; /* Any depth buffer can potentially use VRS on GFX10.3. */ return gfx_level == GFX10_3 && device->vk.enabled_features.attachmentFragmentShadingRate && @@ -2830,7 +2837,9 @@ radv_image_is_tc_compat_htile(const struct radv_image *image) static inline bool radv_image_tile_stencil_disabled(const struct radv_device *device, const struct radv_image *image) { - if (device->physical_device->info.gfx_level >= GFX9) { + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (pdev->info.gfx_level >= GFX9) { return !vk_format_has_stencil(image->vk.format) && !radv_image_has_vrs_htile(device, image); } else { /* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for @@ -2938,8 +2947,10 @@ radv_get_htile_initial_value(const struct radv_device *device, const struct radv static inline bool radv_image_get_iterate256(const struct radv_device *device, struct radv_image *image) { + const struct radv_physical_device *pdev = radv_device_physical(device); + /* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */ - return device->physical_device->info.gfx_level >= GFX10 && + return pdev->info.gfx_level >= GFX10 && (image->vk.usage & (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) && radv_image_is_tc_compat_htile(image) && image->vk.samples > 1; } @@ -3744,7 +3755,8 @@ radv_is_streamout_enabled(struct radv_cmd_buffer *cmd_buffer) static inline enum amd_ip_type radv_queue_ring(const struct radv_queue *queue) { - return radv_queue_family_to_ring(queue->device->physical_device, queue->state.qf); + const struct radv_physical_device *pdev = radv_device_physical(queue->device); + return radv_queue_family_to_ring(pdev, queue->state.qf); } /* radv_video */ @@ -3758,7 +3770,8 @@ void radv_video_get_profile_alignments(struct radv_physical_device *pdev, const static inline bool radv_use_llvm_for_stage(const struct radv_device *device, UNUSED gl_shader_stage stage) { - return device->physical_device->use_llvm; + const struct radv_physical_device *pdev = radv_device_physical(device); + return pdev->use_llvm; } static inline bool diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 8ec2d815222..3c0e4398949 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -56,7 +56,8 @@ radv_get_pipelinestat_query_size(struct radv_device *device) /* GFX10_3 only has 11 valid pipeline statistics queries but in order to emulate mesh/task shader * invocations, it's easier to use the same size as GFX11. */ - unsigned num_results = device->physical_device->info.gfx_level >= GFX10_3 ? 14 : 11; + const struct radv_physical_device *pdev = radv_device_physical(device); + unsigned num_results = pdev->info.gfx_level >= GFX10_3 ? 14 : 11; return num_results * 8; } @@ -120,6 +121,7 @@ build_occlusion_query_shader(struct radv_device *device) * } * } */ + const struct radv_physical_device *pdev = radv_device_physical(device); nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "occlusion_query"); b.shader->info.workgroup_size[0] = 64; @@ -128,8 +130,8 @@ build_occlusion_query_shader(struct radv_device *device) nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start"); nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end"); nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available"); - uint64_t enabled_rb_mask = device->physical_device->info.enabled_rb_mask; - unsigned db_count = device->physical_device->info.max_render_backends; + uint64_t enabled_rb_mask = pdev->info.enabled_rb_mask; + unsigned db_count = pdev->info.max_render_backends; nir_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4); @@ -275,6 +277,7 @@ build_pipeline_statistics_query_shader(struct radv_device *device) * } * } */ + const struct radv_physical_device *pdev = radv_device_physical(device); nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "pipeline_statistics_query"); b.shader->info.workgroup_size[0] = 64; @@ -301,7 +304,7 @@ build_pipeline_statistics_query_shader(struct radv_device *device) nir_def *available32 = nir_load_ssbo(&b, 1, 32, src_buf, avail_offset); nir_store_var(&b, available, nir_i2b(&b, available32), 0x1); - if (device->physical_device->emulate_mesh_shader_queries) { + if (pdev->emulate_mesh_shader_queries) { nir_push_if(&b, nir_test_mask(&b, stats_mask, VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT)); { const uint32_t idx = ffs(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT) - 1; @@ -867,6 +870,7 @@ build_ms_prim_gen_query_shader(struct radv_device *device) static VkResult radv_device_init_meta_query_state_internal(struct radv_device *device) { + const struct radv_physical_device *pdev = radv_device_physical(device); VkResult result; nir_shader *occlusion_cs = NULL; nir_shader *pipeline_statistics_cs = NULL; @@ -886,7 +890,7 @@ radv_device_init_meta_query_state_internal(struct radv_device *device) timestamp_cs = build_timestamp_query_shader(device); pg_cs = build_pg_query_shader(device); - if (device->physical_device->emulate_mesh_shader_queries) + if (pdev->emulate_mesh_shader_queries) ms_prim_gen_cs = build_ms_prim_gen_query_shader(device); VkDescriptorSetLayoutCreateInfo occlusion_ds_create_info = { @@ -1025,7 +1029,7 @@ radv_device_init_meta_query_state_internal(struct radv_device *device) result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &pg_pipeline_info, NULL, &device->meta_state.query.pg_query_pipeline); - if (device->physical_device->emulate_mesh_shader_queries) { + if (pdev->emulate_mesh_shader_queries) { VkPipelineShaderStageCreateInfo ms_prim_gen_pipeline_shader_stage = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .stage = VK_SHADER_STAGE_COMPUTE_BIT, @@ -1211,6 +1215,7 @@ static VkResult radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool) { + struct radv_physical_device *pdev = radv_device_physical(device); VkResult result; size_t pool_struct_size = pCreateInfo->queryType == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR ? sizeof(struct radv_pc_query_pool) @@ -1232,21 +1237,20 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo * * and the legacy GS path but it increments for NGG VS/TES because they are merged with GS. To * avoid this counter to increment, it's also emulated. */ - pool->uses_gds = - (device->physical_device->emulate_ngg_gs_query_pipeline_stat && - (pool->vk.pipeline_statistics & (VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | - VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT))) || - (device->physical_device->use_ngg && pCreateInfo->queryType == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) || - (device->physical_device->emulate_mesh_shader_queries && - (pCreateInfo->queryType == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT || - pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT)); + pool->uses_gds = (pdev->emulate_ngg_gs_query_pipeline_stat && + (pool->vk.pipeline_statistics & (VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT))) || + (pdev->use_ngg && pCreateInfo->queryType == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) || + (pdev->emulate_mesh_shader_queries && + (pCreateInfo->queryType == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT || + pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT)); /* The number of task shader invocations needs to be queried on ACE. */ pool->uses_ace = (pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT); switch (pCreateInfo->queryType) { case VK_QUERY_TYPE_OCCLUSION: - pool->stride = 16 * device->physical_device->info.max_render_backends; + pool->stride = 16 * pdev->info.max_render_backends; break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: pool->stride = radv_get_pipelinestat_query_size(device) * 2; @@ -1262,7 +1266,7 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo * pool->stride = 32; break; case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: - if (pool->uses_gds && device->physical_device->info.gfx_level < GFX11) { + if (pool->uses_gds && pdev->info.gfx_level < GFX11) { /* When the hardware can use both the legacy and the NGG paths in the same begin/end pair, * allocate 2x64-bit values for the GDS counters. */ @@ -1272,7 +1276,7 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo * } break; case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: { - result = radv_pc_init_query_pool(device->physical_device, pCreateInfo, (struct radv_pc_query_pool *)pool); + result = radv_pc_init_query_pool(pdev, pCreateInfo, (struct radv_pc_query_pool *)pool); if (result != VK_SUCCESS) { radv_destroy_query_pool(device, pAllocator, pool); @@ -1281,11 +1285,11 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo * break; } case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { /* GFX11 natively supports mesh generated primitives with pipeline statistics. */ pool->stride = radv_get_pipelinestat_query_size(device) * 2; } else { - assert(device->physical_device->emulate_mesh_shader_queries); + assert(pdev->emulate_mesh_shader_queries); pool->stride = 16; } break; @@ -1296,8 +1300,7 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo * pool->availability_offset = pool->stride * pCreateInfo->queryCount; pool->size = pool->availability_offset; if (pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS || - (pCreateInfo->queryType == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && - device->physical_device->info.gfx_level >= GFX11)) + (pCreateInfo->queryType == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && pdev->info.gfx_level >= GFX11)) pool->size += 4 * pCreateInfo->queryCount; result = radv_bo_create(device, pool->size, 64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING, @@ -1365,6 +1368,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); + const struct radv_physical_device *pdev = radv_device_physical(device); char *data = pData; VkResult result = VK_SUCCESS; @@ -1413,8 +1417,8 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first } case VK_QUERY_TYPE_OCCLUSION: { p_atomic_uint64_t const *src64 = (p_atomic_uint64_t const *)src; - uint32_t db_count = device->physical_device->info.max_render_backends; - uint64_t enabled_rb_mask = device->physical_device->info.enabled_rb_mask; + uint32_t db_count = pdev->info.max_render_backends; + uint64_t enabled_rb_mask = pdev->info.enabled_rb_mask; uint64_t sample_count = 0; available = 1; @@ -1460,7 +1464,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first do { available = p_atomic_read(avail_ptr); - if (pool->uses_ace && device->physical_device->emulate_mesh_shader_queries) { + if (pool->uses_ace && pdev->emulate_mesh_shader_queries) { const uint32_t task_invoc_offset = radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT); const uint32_t *avail_ptr_start = (const uint32_t *)(src + task_invoc_offset + 4); @@ -1550,7 +1554,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first break; } case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: { - const bool uses_gds_query = pool->uses_gds && device->physical_device->info.gfx_level < GFX11; + const bool uses_gds_query = pool->uses_gds && pdev->info.gfx_level < GFX11; p_atomic_uint64_t const *src64 = (p_atomic_uint64_t const *)src; uint64_t primitive_storage_needed; @@ -1615,7 +1619,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: { uint64_t ms_prim_gen; - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(device); const uint32_t *avail_ptr = (const uint32_t *)(pool->ptr + pool->availability_offset + 4 * query); @@ -1730,6 +1734,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radeon_cmdbuf *cs = cmd_buffer->cs; uint64_t va = radv_buffer_get_va(pool->bo); uint64_t dest_va = radv_buffer_get_va(dst_buffer->bo); @@ -1780,7 +1785,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo /* This waits on the ME. All copies below are done on the ME */ radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff); - if (pool->uses_ace && cmd_buffer->device->physical_device->emulate_mesh_shader_queries) { + if (pool->uses_ace && pdev->emulate_mesh_shader_queries) { const uint64_t src_va = va + query * pool->stride; const uint64_t start_va = src_va + task_invoc_offset + 4; const uint64_t stop_va = start_va + pipelinestat_block_size; @@ -1842,7 +1847,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo break; case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: if (flags & VK_QUERY_RESULT_WAIT_BIT) { - const bool uses_gds_query = pool->uses_gds && cmd_buffer->device->physical_device->info.gfx_level < GFX11; + const bool uses_gds_query = pool->uses_gds && pdev->info.gfx_level < GFX11; for (unsigned i = 0; i < queryCount; i++) { unsigned query = firstQuery + i; @@ -1863,11 +1868,10 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pg_query_pipeline, pool->bo, dst_buffer->bo, firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, - queryCount, flags, 0, 0, - pool->uses_gds && cmd_buffer->device->physical_device->info.gfx_level < GFX11); + queryCount, flags, 0, 0, pool->uses_gds && pdev->info.gfx_level < GFX11); break; case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { if (flags & VK_QUERY_RESULT_WAIT_BIT) { for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) { unsigned query = firstQuery + i; @@ -1928,6 +1932,7 @@ radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uin { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); uint32_t value = query_clear_value(pool->vk.query_type); uint32_t flush_bits = 0; @@ -1941,8 +1946,7 @@ radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uin queryCount * pool->stride, value); if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS || - (pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && - cmd_buffer->device->physical_device->info.gfx_level >= GFX11)) { + (pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && pdev->info.gfx_level >= GFX11)) { flush_bits |= radv_fill_buffer(cmd_buffer, NULL, pool->bo, radv_buffer_get_va(pool->bo) + pool->availability_offset + firstQuery * 4, queryCount * 4, 0); @@ -1960,6 +1964,7 @@ radv_ResetQueryPool(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); + const struct radv_physical_device *pdev = radv_device_physical(device); uint32_t value = query_clear_value(pool->vk.query_type); uint32_t *data = (uint32_t *)(pool->ptr + firstQuery * pool->stride); @@ -1969,8 +1974,7 @@ radv_ResetQueryPool(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery *p = value; if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS || - (pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && - device->physical_device->info.gfx_level >= GFX11)) { + (pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && pdev->info.gfx_level >= GFX11)) { memset(pool->ptr + pool->availability_offset + firstQuery * 4, 0, queryCount * 4); } } @@ -2055,6 +2059,7 @@ static void emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va, VkQueryType query_type, VkQueryControlFlags flags, uint32_t index) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radeon_cmdbuf *cs = cmd_buffer->cs; switch (query_type) { case VK_QUERY_TYPE_OCCLUSION: @@ -2082,12 +2087,11 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo } } - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11 && - cmd_buffer->device->physical_device->info.pfp_fw_version >= EVENT_WRITE_ZPASS_PFP_VERSION) { + if (pdev->info.gfx_level >= GFX11 && pdev->info.pfp_fw_version >= EVENT_WRITE_ZPASS_PFP_VERSION) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_ZPASS, 1, 0)); } else { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { radeon_emit(cs, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_DUMP) | EVENT_INDEX(1)); } else { radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1)); @@ -2149,7 +2153,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo uint32_t task_invoc_offset = radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT); - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { va += task_invoc_offset; radeon_check_space(cmd_buffer->device->ws, cmd_buffer->gang.cs, 4); @@ -2176,7 +2180,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo break; } case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: - if (cmd_buffer->device->physical_device->use_ngg_streamout) { + if (pdev->use_ngg_streamout) { /* generated prim counter */ gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va); radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000); @@ -2201,7 +2205,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo } break; case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: { - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { /* On GFX11+, primitives generated query always use GDS. */ gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va); radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000); @@ -2251,7 +2255,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo break; } case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: { - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { radeon_check_space(cmd_buffer->device->ws, cs, 4); ++cmd_buffer->state.active_pipeline_queries; @@ -2285,6 +2289,7 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va, uint64_t avail_va, VkQueryType query_type, uint32_t index) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radeon_cmdbuf *cs = cmd_buffer->cs; switch (query_type) { case VK_QUERY_TYPE_OCCLUSION: @@ -2300,12 +2305,11 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, cmd_buffer->state.dirty |= RADV_CMD_DIRTY_OCCLUSION_QUERY; } - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11 && - cmd_buffer->device->physical_device->info.pfp_fw_version >= EVENT_WRITE_ZPASS_PFP_VERSION) { + if (pdev->info.gfx_level >= GFX11 && pdev->info.pfp_fw_version >= EVENT_WRITE_ZPASS_PFP_VERSION) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_ZPASS, 1, 0)); } else { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { radeon_emit(cs, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_DUMP) | EVENT_INDEX(1)); } else { radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1)); @@ -2369,7 +2373,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint32_t task_invoc_offset = radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT); - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { va += task_invoc_offset; radeon_check_space(cmd_buffer->device->ws, cmd_buffer->gang.cs, 4); @@ -2391,13 +2395,12 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, } } - radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf, - V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va, - 1, cmd_buffer->gfx9_eop_bug_va); + radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va, 1, cmd_buffer->gfx9_eop_bug_va); break; } case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: - if (cmd_buffer->device->physical_device->use_ngg_streamout) { + if (pdev->use_ngg_streamout) { /* generated prim counter */ gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16); radv_cs_write_data_imm(cs, V_370_ME, va + 20, 0x80000000); @@ -2419,7 +2422,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, } break; case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: { - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { /* On GFX11+, primitives generated query always use GDS. */ gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16); radv_cs_write_data_imm(cs, V_370_ME, va + 20, 0x80000000); @@ -2463,7 +2466,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, break; } case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: { - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(cmd_buffer->device); radeon_check_space(cmd_buffer->device->ws, cs, 16); @@ -2479,9 +2482,9 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, radeon_emit(cs, va); radeon_emit(cs, va >> 32); - radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf, - V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, - avail_va, 1, cmd_buffer->gfx9_eop_bug_va); + radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va, 1, + cmd_buffer->gfx9_eop_bug_va); } else { gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET, va + 8); radv_cs_write_data_imm(cs, V_370_ME, va + 12, 0x80000000); @@ -2499,7 +2502,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE; - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB; } } @@ -2564,6 +2567,7 @@ radv_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool, void radv_write_timestamp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkPipelineStageFlags2 stage) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radeon_cmdbuf *cs = cmd_buffer->cs; if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT) { @@ -2575,9 +2579,8 @@ radv_write_timestamp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkPipeline radeon_emit(cs, va); radeon_emit(cs, va >> 32); } else { - radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf, - V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_TIMESTAMP, va, 0, - cmd_buffer->gfx9_eop_bug_va); + radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DST_SEL_MEM, EOP_DATA_SEL_TIMESTAMP, va, 0, cmd_buffer->gfx9_eop_bug_va); } } @@ -2587,6 +2590,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); const unsigned num_queries = MAX2(util_bitcount(cmd_buffer->state.render.view_mask), 1); struct radeon_cmdbuf *cs = cmd_buffer->cs; const uint64_t va = radv_buffer_get_va(pool->bo); @@ -2625,7 +2629,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE; - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB; } diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c index 5962e01a184..5115f321191 100644 --- a/src/amd/vulkan/radv_queue.c +++ b/src/amd/vulkan/radv_queue.c @@ -126,6 +126,7 @@ static VkResult radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMemoryBindInfo *bind) { RADV_FROM_HANDLE(radv_image, image, bind->image); + const struct radv_physical_device *pdev = radv_device_physical(device); struct radeon_surf *surface = &image->planes[0].surface; uint32_t bs = vk_format_get_blocksize(image->vk.format); VkResult result; @@ -149,7 +150,7 @@ radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMem if (bind->pBinds[i].memory != VK_NULL_HANDLE) mem = radv_device_memory_from_handle(bind->pBinds[i].memory); - if (device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { offset = surface->u.gfx9.surf_slice_size * layer + surface->u.gfx9.prt_level_offset[level]; pitch = surface->u.gfx9.prt_level_pitch[level]; depth_pitch = surface->u.gfx9.surf_slice_size; @@ -243,11 +244,13 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon struct radeon_winsys_bo *task_rings_bo, struct radeon_winsys_bo *mesh_scratch_ring_bo, uint32_t attr_ring_size, struct radeon_winsys_bo *attr_ring_bo) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (scratch_bo) { uint64_t scratch_va = radv_buffer_get_va(scratch_bo); uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32); - if (device->physical_device->info.gfx_level >= GFX11) + if (pdev->info.gfx_level >= GFX11) rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX11(1); else rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX6(1); @@ -270,17 +273,17 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | S_008F0C_INDEX_STRIDE(3) | S_008F0C_ADD_TID_ENABLE(1); - if (device->physical_device->info.gfx_level >= GFX11) + if (pdev->info.gfx_level >= GFX11) desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX11(1); else desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(1); - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); - } else if (device->physical_device->info.gfx_level >= GFX10) { + } else if (pdev->info.gfx_level >= GFX10) { desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); - } else if (device->physical_device->info.gfx_level >= GFX8) { + } else if (pdev->info.gfx_level >= GFX8) { /* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */ desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1); @@ -298,9 +301,9 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); - } else if (device->physical_device->info.gfx_level >= GFX10) { + } else if (pdev->info.gfx_level >= GFX10) { desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); } else { @@ -323,9 +326,9 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); - } else if (device->physical_device->info.gfx_level >= GFX10) { + } else if (pdev->info.gfx_level >= GFX10) { desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); } else { @@ -343,17 +346,17 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | S_008F0C_INDEX_STRIDE(1) | S_008F0C_ADD_TID_ENABLE(true); - if (device->physical_device->info.gfx_level >= GFX11) + if (pdev->info.gfx_level >= GFX11) desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX11(1); else desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX6(1); - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); - } else if (device->physical_device->info.gfx_level >= GFX10) { + } else if (pdev->info.gfx_level >= GFX10) { desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); - } else if (device->physical_device->info.gfx_level >= GFX8) { + } else if (pdev->info.gfx_level >= GFX8) { /* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */ desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1); @@ -367,17 +370,17 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon if (tess_rings_bo) { uint64_t tess_va = radv_buffer_get_va(tess_rings_bo); - uint64_t tess_offchip_va = tess_va + device->physical_device->hs.tess_offchip_ring_offset; + uint64_t tess_offchip_va = tess_va + pdev->hs.tess_offchip_ring_offset; desc[0] = tess_va; desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32); - desc[2] = device->physical_device->hs.tess_factor_ring_size; + desc[2] = pdev->hs.tess_factor_ring_size; desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); - } else if (device->physical_device->info.gfx_level >= GFX10) { + } else if (pdev->info.gfx_level >= GFX10) { desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); } else { @@ -387,13 +390,13 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon desc[4] = tess_offchip_va; desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32); - desc[6] = device->physical_device->hs.tess_offchip_ring_size; + desc[6] = pdev->hs.tess_offchip_ring_size; desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); - } else if (device->physical_device->info.gfx_level >= GFX10) { + } else if (pdev->info.gfx_level >= GFX10) { desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); } else { @@ -406,33 +409,33 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon if (task_rings_bo) { uint64_t task_va = radv_buffer_get_va(task_rings_bo); - uint64_t task_draw_ring_va = task_va + device->physical_device->task_info.draw_ring_offset; - uint64_t task_payload_ring_va = task_va + device->physical_device->task_info.payload_ring_offset; + uint64_t task_draw_ring_va = task_va + pdev->task_info.draw_ring_offset; + uint64_t task_payload_ring_va = task_va + pdev->task_info.payload_ring_offset; desc[0] = task_draw_ring_va; desc[1] = S_008F04_BASE_ADDRESS_HI(task_draw_ring_va >> 32); - desc[2] = device->physical_device->task_info.num_entries * AC_TASK_DRAW_ENTRY_BYTES; + desc[2] = pdev->task_info.num_entries * AC_TASK_DRAW_ENTRY_BYTES; desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); } else { - assert(device->physical_device->info.gfx_level >= GFX10_3); + assert(pdev->info.gfx_level >= GFX10_3); desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); } desc[4] = task_payload_ring_va; desc[5] = S_008F04_BASE_ADDRESS_HI(task_payload_ring_va >> 32); - desc[6] = device->physical_device->task_info.num_entries * AC_TASK_PAYLOAD_ENTRY_BYTES; + desc[6] = pdev->task_info.num_entries * AC_TASK_PAYLOAD_ENTRY_BYTES; desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); } else { - assert(device->physical_device->info.gfx_level >= GFX10_3); + assert(pdev->info.gfx_level >= GFX10_3); desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); } @@ -449,10 +452,10 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); } else { - assert(device->physical_device->info.gfx_level >= GFX10_3); + assert(pdev->info.gfx_level >= GFX10_3); desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); } @@ -461,7 +464,7 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon desc += 4; if (attr_ring_bo) { - assert(device->physical_device->info.gfx_level >= GFX11); + assert(pdev->info.gfx_level >= GFX11); uint64_t va = radv_buffer_get_va(attr_ring_bo); @@ -489,6 +492,8 @@ static void radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *esgs_ring_bo, uint32_t esgs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (!esgs_ring_bo && !gsvs_ring_bo) return; @@ -498,7 +503,7 @@ radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, st if (gsvs_ring_bo) radv_cs_add_buffer(device->ws, cs, gsvs_ring_bo); - if (device->physical_device->info.gfx_level >= GFX7) { + if (pdev->info.gfx_level >= GFX7) { radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2); radeon_emit(cs, esgs_ring_size >> 8); radeon_emit(cs, gsvs_ring_size >> 8); @@ -512,49 +517,51 @@ radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, st static void radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *tess_rings_bo) { + const struct radv_physical_device *pdev = radv_device_physical(device); uint64_t tf_va; uint32_t tf_ring_size; if (!tess_rings_bo) return; - tf_ring_size = device->physical_device->hs.tess_factor_ring_size / 4; + tf_ring_size = pdev->hs.tess_factor_ring_size / 4; tf_va = radv_buffer_get_va(tess_rings_bo); radv_cs_add_buffer(device->ws, cs, tess_rings_bo); - if (device->physical_device->info.gfx_level >= GFX7) { - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX7) { + if (pdev->info.gfx_level >= GFX11) { /* TF_RING_SIZE is per SE on GFX11. */ - tf_ring_size /= device->physical_device->info.max_se; + tf_ring_size /= pdev->info.max_se; } radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size)); radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, tf_va >> 8); - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI, S_030984_BASE_HI(tf_va >> 40)); - } else if (device->physical_device->info.gfx_level == GFX9) { + } else if (pdev->info.gfx_level == GFX9) { radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40)); } - radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param); + radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, pdev->hs.hs_offchip_param); } else { radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size)); radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8); - radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param); + radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, pdev->hs.hs_offchip_param); } } static VkResult radv_initialise_task_control_buffer(struct radv_device *device, struct radeon_winsys_bo *task_rings_bo) { + const struct radv_physical_device *pdev = radv_device_physical(device); uint32_t *ptr = (uint32_t *)radv_buffer_map(device->ws, task_rings_bo); if (!ptr) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - const uint32_t num_entries = device->physical_device->task_info.num_entries; + const uint32_t num_entries = pdev->task_info.num_entries; const uint64_t task_va = radv_buffer_get_va(task_rings_bo); - const uint64_t task_draw_ring_va = task_va + device->physical_device->task_info.draw_ring_offset; + const uint64_t task_draw_ring_va = task_va + pdev->task_info.draw_ring_offset; assert((task_draw_ring_va & 0xFFFFFF00) == (task_draw_ring_va & 0xFFFFFFFF)); /* 64-bit write_ptr */ @@ -599,7 +606,8 @@ static void radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves, struct radeon_winsys_bo *scratch_bo) { - const struct radeon_info *gpu_info = &device->physical_device->info; + const struct radv_physical_device *pdev = radv_device_physical(device); + const struct radeon_info *gpu_info = &pdev->info; if (!scratch_bo) return; @@ -626,7 +634,8 @@ static void radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves, struct radeon_winsys_bo *compute_scratch_bo) { - const struct radeon_info *gpu_info = &device->physical_device->info; + const struct radv_physical_device *pdev = radv_device_physical(device); + const struct radeon_info *gpu_info = &pdev->info; uint64_t scratch_va; uint32_t rsrc1; @@ -680,6 +689,7 @@ static void radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *descriptor_bo) { + const struct radv_physical_device *pdev = radv_device_physical(device); uint64_t va; if (!descriptor_bo) @@ -689,21 +699,21 @@ radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmd radv_cs_add_buffer(device->ws, cs, descriptor_bo); - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B420_SPI_SHADER_PGM_LO_HS, R_00B220_SPI_SHADER_PGM_LO_GS}; for (int i = 0; i < ARRAY_SIZE(regs); ++i) { radv_emit_shader_pointer(device, cs, regs[i], va, true); } - } else if (device->physical_device->info.gfx_level >= GFX10) { + } else if (pdev->info.gfx_level >= GFX10) { uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0, R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS}; for (int i = 0; i < ARRAY_SIZE(regs); ++i) { radv_emit_shader_pointer(device, cs, regs[i], va, true); } - } else if (device->physical_device->info.gfx_level == GFX9) { + } else if (pdev->info.gfx_level == GFX9) { uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0, R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS}; @@ -725,7 +735,7 @@ static void radv_emit_attribute_ring(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *attr_ring_bo, uint32_t attr_ring_size) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); uint64_t va; if (!attr_ring_bo) @@ -792,6 +802,7 @@ static VkResult radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *device, const struct radv_queue_ring_info *needs) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radeon_winsys *ws = device->ws; struct radeon_winsys_bo *scratch_bo = queue->scratch_bo; struct radeon_winsys_bo *descriptor_bo = queue->descriptor_bo; @@ -848,8 +859,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi } if (!queue->ring_info.tess_rings && needs->tess_rings) { - uint64_t tess_rings_size = - device->physical_device->hs.tess_offchip_ring_offset + device->physical_device->hs.tess_offchip_ring_size; + uint64_t tess_rings_size = pdev->hs.tess_offchip_ring_offset + pdev->hs.tess_offchip_ring_size; result = radv_bo_create(device, tess_rings_size, 256, RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, true, &tess_rings_bo); if (result != VK_SUCCESS) @@ -858,7 +868,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi } if (!queue->ring_info.task_rings && needs->task_rings) { - assert(device->physical_device->info.gfx_level >= GFX10_3); + assert(pdev->info.gfx_level >= GFX10_3); /* We write the control buffer from the CPU, so need to grant CPU access to the BO. * The draw ring needs to be zero-initialized otherwise the ready bits will be incorrect. @@ -866,12 +876,11 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi uint32_t task_rings_bo_flags = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM; - result = radv_bo_create(device, device->physical_device->task_info.bo_size_bytes, 256, RADEON_DOMAIN_VRAM, - task_rings_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, true, &task_rings_bo); + result = radv_bo_create(device, pdev->task_info.bo_size_bytes, 256, RADEON_DOMAIN_VRAM, task_rings_bo_flags, + RADV_BO_PRIORITY_SCRATCH, 0, true, &task_rings_bo); if (result != VK_SUCCESS) goto fail; - radv_rmv_log_command_buffer_bo_create(device, task_rings_bo, 0, 0, - device->physical_device->task_info.bo_size_bytes); + radv_rmv_log_command_buffer_bo_create(device, task_rings_bo, 0, 0, pdev->task_info.bo_size_bytes); result = radv_initialise_task_control_buffer(device, task_rings_bo); if (result != VK_SUCCESS) @@ -879,7 +888,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi } if (!queue->ring_info.mesh_scratch_ring && needs->mesh_scratch_ring) { - assert(device->physical_device->info.gfx_level >= GFX10_3); + assert(pdev->info.gfx_level >= GFX10_3); result = radv_bo_create(device, RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES, 256, RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, true, &mesh_scratch_ring_bo); @@ -891,7 +900,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi } if (needs->attr_ring_size > queue->ring_info.attr_ring_size) { - assert(device->physical_device->info.gfx_level >= GFX11); + assert(pdev->info.gfx_level >= GFX11); result = radv_bo_create(device, needs->attr_ring_size, 2 * 1024 * 1024 /* 2MiB */, RADEON_DOMAIN_VRAM, RADEON_FLAG_32BIT | RADEON_FLAG_DISCARDABLE | ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, true, &attr_ring_bo); @@ -901,7 +910,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi } if (!queue->ring_info.gds && needs->gds) { - assert(device->physical_device->info.gfx_level >= GFX10); + assert(pdev->info.gfx_level >= GFX10); /* 4 streamout GDS counters. * We need 256B (64 dw) of GDS, otherwise streamout hangs. @@ -920,7 +929,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi } if (!queue->ring_info.gds_oa && needs->gds_oa) { - assert(device->physical_device->info.gfx_level >= GFX10); + assert(pdev->info.gfx_level >= GFX10); result = radv_bo_create(device, 1, 1, RADEON_DOMAIN_OA, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, true, &gds_oa_bo); @@ -972,7 +981,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi for (int i = 0; i < 3; ++i) { enum rgp_flush_bits sqtt_flush_bits = 0; struct radeon_cmdbuf *cs = NULL; - cs = ws->cs_create(ws, radv_queue_family_to_ring(device->physical_device, queue->qf), false); + cs = ws->cs_create(ws, radv_queue_family_to_ring(pdev, queue->qf), false); if (!cs) { result = VK_ERROR_OUT_OF_DEVICE_MEMORY; goto fail; @@ -1027,7 +1036,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi if (i < 2) { /* The two initial preambles have a cache flush at the beginning. */ - const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; enum radv_cmd_flush_bits flush_bits = RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_START_PIPELINE_STATS; @@ -1143,6 +1152,7 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device struct vk_command_buffer *const *cmd_buffers, uint32_t cmd_buffer_count, bool *use_perf_counters, bool *has_follower) { + const struct radv_physical_device *pdev = radv_device_physical(device); bool has_indirect_pipeline_binds = false; if (queue->qf != RADV_QUEUE_GENERAL && queue->qf != RADV_QUEUE_COMPUTE) { @@ -1203,9 +1213,8 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device ? MIN2(needs.compute_scratch_waves, UINT32_MAX / needs.compute_scratch_size_per_wave) : 0; - if (device->physical_device->info.gfx_level >= GFX11 && queue->qf == RADV_QUEUE_GENERAL) { - needs.attr_ring_size = - device->physical_device->info.attribute_ring_size_per_se * device->physical_device->info.max_se; + if (pdev->info.gfx_level >= GFX11 && queue->qf == RADV_QUEUE_GENERAL) { + needs.attr_ring_size = pdev->info.attribute_ring_size_per_se * pdev->info.max_se; } /* Return early if we already match these needs. @@ -1230,13 +1239,15 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device static VkResult radv_create_gang_wait_preambles_postambles(struct radv_queue *queue) { + const struct radv_physical_device *pdev = radv_device_physical(queue->device); + if (queue->gang_sem_bo) return VK_SUCCESS; VkResult r = VK_SUCCESS; struct radv_device *device = queue->device; struct radeon_winsys *ws = device->ws; - const enum amd_ip_type leader_ip = radv_queue_family_to_ring(device->physical_device, queue->state.qf); + const enum amd_ip_type leader_ip = radv_queue_family_to_ring(pdev, queue->state.qf); struct radeon_winsys_bo *gang_sem_bo = NULL; /* Gang semaphores BO. @@ -1291,9 +1302,8 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue) */ radv_cp_wait_mem(leader_post_cs, queue->state.qf, WAIT_REG_MEM_GREATER_OR_EQUAL, leader_wait_va, 1, 0xffffffff); radv_cs_write_data(device, leader_post_cs, queue->state.qf, V_370_ME, leader_wait_va, 1, &zero, false); - radv_cs_emit_write_event_eop(ace_post_cs, device->physical_device->info.gfx_level, RADV_QUEUE_COMPUTE, - V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, - leader_wait_va, 1, 0); + radv_cs_emit_write_event_eop(ace_post_cs, pdev->info.gfx_level, RADV_QUEUE_COMPUTE, V_028A90_BOTTOM_OF_PIPE_TS, 0, + EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, leader_wait_va, 1, 0); r = ws->cs_finalize(leader_pre_cs); if (r != VK_SUCCESS) @@ -1681,13 +1691,14 @@ fail: static void radv_report_gpuvm_fault(struct radv_device *device) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_winsys_gpuvm_fault_info fault_info = {0}; if (!radv_vm_fault_occurred(device, &fault_info)) return; fprintf(stderr, "radv: GPUVM fault detected at address 0x%08" PRIx64 ".\n", fault_info.addr); - ac_print_gpuvm_fault_status(stderr, device->physical_device->info.gfx_level, fault_info.status); + ac_print_gpuvm_fault_status(stderr, pdev->info.gfx_level, fault_info.status); } static VkResult @@ -1735,9 +1746,10 @@ static VkResult radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission) { struct radv_queue *queue = (struct radv_queue *)vqueue; + const struct radv_physical_device *pdev = radv_device_physical(queue->device); VkResult result; - if (!radv_sparse_queue_enabled(queue->device->physical_device)) { + if (!radv_sparse_queue_enabled(pdev)) { result = radv_queue_submit_bind_sparse_memory(queue->device, submission); if (result != VK_SUCCESS) goto fail; @@ -1792,10 +1804,12 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx, const VkDeviceQueueCreateInfo *create_info, const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority) { + const struct radv_physical_device *pdev = radv_device_physical(device); + queue->device = device; queue->priority = radv_get_queue_global_priority(global_priority); queue->hw_ctx = device->hw_ctx[queue->priority]; - queue->state.qf = vk_queue_to_radv(device->physical_device, create_info->queueFamilyIndex); + queue->state.qf = vk_queue_to_radv(pdev, create_info->queueFamilyIndex); queue->gang_sem_bo = NULL; VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx); diff --git a/src/amd/vulkan/radv_rmv.c b/src/amd/vulkan/radv_rmv.c index ed5d6e040f3..3d7a14ade9c 100644 --- a/src/amd/vulkan/radv_rmv.c +++ b/src/amd/vulkan/radv_rmv.c @@ -173,6 +173,8 @@ static void evaluate_trace_event(struct radv_device *device, uint64_t timestamp, struct util_dynarray *tokens, struct trace_event_amdgpu_vm_update_ptes *event) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (event->common.pid != getpid() && event->pid != getpid()) { return; } @@ -180,8 +182,8 @@ evaluate_trace_event(struct radv_device *device, uint64_t timestamp, struct util struct trace_event_address_array *array = (struct trace_event_address_array *)(event + 1); for (uint32_t i = 0; i < event->num_ptes; ++i) - emit_page_table_update_event(&device->vk.memory_trace_data, !device->physical_device->info.has_dedicated_vram, - timestamp, event, (uint64_t *)array->data, i); + emit_page_table_update_event(&device->vk.memory_trace_data, !pdev->info.has_dedicated_vram, timestamp, event, + (uint64_t *)array->data, i); } static void @@ -480,6 +482,8 @@ void radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool is_internal, VkMemoryAllocateFlags alloc_flags) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (!device->vk.memory_trace_data.is_enabled) return; @@ -495,7 +499,7 @@ radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool i token.is_driver_internal = is_internal; token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)heap); token.type = VK_RMV_RESOURCE_TYPE_HEAP; - token.heap.alignment = device->physical_device->info.max_alignment; + token.heap.alignment = pdev->info.max_alignment; token.heap.size = memory->alloc_size; token.heap.heap_index = memory->heap_index; token.heap.alloc_flags = alloc_flags; @@ -508,6 +512,8 @@ radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool i void radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, bool is_internal) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (!device->vk.memory_trace_data.is_enabled) return; @@ -518,7 +524,7 @@ radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo struct vk_rmv_virtual_allocate_token token = {0}; token.address = bo->va; /* If all VRAM is visible, no bo will be in invisible memory. */ - token.is_in_invisible_vram = bo->vram_no_cpu_access && !device->physical_device->info.all_vram_visible; + token.is_in_invisible_vram = bo->vram_no_cpu_access && !pdev->info.all_vram_visible; token.preferred_domains = (enum vk_rmv_kernel_memory_domain)bo->initial_domain; token.is_driver_internal = is_internal; token.page_count = DIV_ROUND_UP(bo->size, 4096); diff --git a/src/amd/vulkan/radv_rra.c b/src/amd/vulkan/radv_rra.c index d52687e054d..2450518db1e 100644 --- a/src/amd/vulkan/radv_rra.c +++ b/src/amd/vulkan/radv_rra.c @@ -898,15 +898,17 @@ exit: VkResult radv_rra_trace_init(struct radv_device *device) { + const struct radv_physical_device *pdev = radv_device_physical(device); + device->rra_trace.validate_as = debug_get_bool_option("RADV_RRA_TRACE_VALIDATE", false); device->rra_trace.copy_after_build = debug_get_bool_option("RADV_RRA_TRACE_COPY_AFTER_BUILD", false); device->rra_trace.accel_structs = _mesa_pointer_hash_table_create(NULL); device->rra_trace.accel_struct_vas = _mesa_hash_table_u64_create(NULL); simple_mtx_init(&device->rra_trace.data_mtx, mtx_plain); - device->rra_trace.copy_memory_index = radv_find_memory_index( - device->physical_device, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT); + device->rra_trace.copy_memory_index = + radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT); util_dynarray_init(&device->rra_trace.ray_history, NULL); @@ -939,9 +941,9 @@ radv_rra_trace_init(struct radv_device *device) VkMemoryAllocateInfo alloc_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = requirements.size, - .memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), + .memoryTypeIndex = + radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), }; result = radv_AllocateMemory(_device, &alloc_info, NULL, &device->rra_trace.ray_history_memory); @@ -1316,6 +1318,7 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename) { RADV_FROM_HANDLE(radv_queue, queue, vk_queue); struct radv_device *device = queue->device; + const struct radv_physical_device *pdev = radv_device_physical(device); VkDevice vk_device = radv_device_to_handle(device); VkResult result = vk_common_DeviceWaitIdle(vk_device); @@ -1365,7 +1368,7 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename) fwrite(&api, sizeof(uint64_t), 1, file); uint64_t asic_info_offset = (uint64_t)ftell(file); - rra_dump_asic_info(&device->physical_device->info, file); + rra_dump_asic_info(&pdev->info, file); uint64_t written_accel_struct_count = 0; diff --git a/src/amd/vulkan/radv_sampler.c b/src/amd/vulkan/radv_sampler.c index 0c0708d802d..ab1dd632e73 100644 --- a/src/amd/vulkan/radv_sampler.c +++ b/src/amd/vulkan/radv_sampler.c @@ -196,14 +196,14 @@ radv_unregister_border_color(struct radv_device *device, uint32_t slot) static void radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, const VkSamplerCreateInfo *pCreateInfo) { + const struct radv_physical_device *pdev = radv_device_physical(device); uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo); uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso); - bool compat_mode = - device->physical_device->info.gfx_level == GFX8 || device->physical_device->info.gfx_level == GFX9; + bool compat_mode = pdev->info.gfx_level == GFX8 || pdev->info.gfx_level == GFX9; unsigned filter_mode = radv_tex_filter_mode(sampler->vk.reduction_mode); unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; bool trunc_coord = ((pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST) || - device->physical_device->info.conformant_trunc_coord) && + pdev->info.conformant_trunc_coord) && !device->disable_trunc_coord; bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || @@ -246,18 +246,17 @@ radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, cons S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode))); sampler->state[3] = S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color)); - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { sampler->state[2] |= S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -32, 31), 8)) | S_008F38_ANISO_OVERRIDE_GFX10(device->instance->drirc.disable_aniso_single_level); } else { sampler->state[2] |= S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) | - S_008F38_DISABLE_LSB_CEIL(device->physical_device->info.gfx_level <= GFX8) | - S_008F38_FILTER_PREC_FIX(1) | + S_008F38_DISABLE_LSB_CEIL(pdev->info.gfx_level <= GFX8) | S_008F38_FILTER_PREC_FIX(1) | S_008F38_ANISO_OVERRIDE_GFX8(device->instance->drirc.disable_aniso_single_level && - device->physical_device->info.gfx_level >= GFX8); + pdev->info.gfx_level >= GFX8); } - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { sampler->state[3] |= S_008F3C_BORDER_COLOR_PTR_GFX11(border_color_ptr); } else { sampler->state[3] |= S_008F3C_BORDER_COLOR_PTR_GFX6(border_color_ptr); diff --git a/src/amd/vulkan/radv_sdma.c b/src/amd/vulkan/radv_sdma.c index 538ca9308df..c3c3e37a281 100644 --- a/src/amd/vulkan/radv_sdma.c +++ b/src/amd/vulkan/radv_sdma.c @@ -57,7 +57,9 @@ static const VkExtent3D radv_sdma_t2t_alignment_3d[] = { ALWAYS_INLINE static unsigned radv_sdma_pitch_alignment(const struct radv_device *device, const unsigned bpp) { - if (device->physical_device->info.sdma_ip_version >= SDMA_5_0) + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (pdev->info.sdma_ip_version >= SDMA_5_0) return MAX2(1, 4 / bpp); return 4; @@ -82,7 +84,9 @@ radv_sdma_check_pitches(const unsigned pitch, const unsigned slice_pitch, const ALWAYS_INLINE static enum gfx9_resource_type radv_sdma_surface_resource_type(const struct radv_device *const device, const struct radeon_surf *const surf) { - if (device->physical_device->info.sdma_ip_version >= SDMA_5_0) { + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (pdev->info.sdma_ip_version >= SDMA_5_0) { /* Use the 2D resource type for rotated or Z swizzles. */ if ((surf->u.gfx9.resource_type == RADEON_RESOURCE_1D || surf->u.gfx9.resource_type == RADEON_RESOURCE_3D) && (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER || surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH)) @@ -195,7 +199,9 @@ radv_sdma_get_metadata_config(const struct radv_device *const device, const stru const struct radeon_surf *const surf, const VkImageSubresourceLayers subresource, const VkImageAspectFlags aspect_mask) { - if (!device->physical_device->info.sdma_supports_compression || + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (!pdev->info.sdma_supports_compression || !(radv_dcc_enabled(image, subresource.mipLevel) || radv_image_has_htile(image))) { return 0; } @@ -203,8 +209,7 @@ radv_sdma_get_metadata_config(const struct radv_device *const device, const stru const VkFormat format = vk_format_get_aspect_format(image->vk.format, aspect_mask); const struct util_format_description *desc = vk_format_description(format); - const uint32_t data_format = - ac_get_cb_format(device->physical_device->info.gfx_level, vk_format_to_pipe_format(format)); + const uint32_t data_format = ac_get_cb_format(pdev->info.gfx_level, vk_format_to_pipe_format(format)); const uint32_t alpha_is_on_msb = vi_alpha_is_on_msb(device, format); const uint32_t number_type = radv_translate_buffer_numformat(desc, vk_format_get_first_non_void_channel(format)); const uint32_t surface_type = radv_sdma_surface_type_from_aspect_mask(aspect_mask); @@ -220,11 +225,12 @@ static uint32_t radv_sdma_get_tiled_info_dword(const struct radv_device *const device, const struct radv_image *const image, const struct radeon_surf *const surf, const VkImageSubresourceLayers subresource) { + const struct radv_physical_device *pdev = radv_device_physical(device); const uint32_t element_size = util_logbase2(surf->bpe); const uint32_t swizzle_mode = surf->has_stencil ? surf->u.gfx9.zs.stencil_swizzle_mode : surf->u.gfx9.swizzle_mode; const enum gfx9_resource_type dimension = radv_sdma_surface_resource_type(device, surf); const uint32_t info = element_size | swizzle_mode << 3 | dimension << 9; - const enum sdma_version ver = device->physical_device->info.sdma_ip_version; + const enum sdma_version ver = pdev->info.sdma_ip_version; if (ver >= SDMA_5_0) { const uint32_t mip_max = MAX2(image->vk.mip_levels, 1); @@ -242,7 +248,8 @@ static uint32_t radv_sdma_get_tiled_header_dword(const struct radv_device *const device, const struct radv_image *const image, const VkImageSubresourceLayers subresource) { - const enum sdma_version ver = device->physical_device->info.sdma_ip_version; + const struct radv_physical_device *pdev = radv_device_physical(device); + const enum sdma_version ver = pdev->info.sdma_ip_version; if (ver >= SDMA_5_0) { return 0; @@ -262,6 +269,7 @@ radv_sdma_get_surf(const struct radv_device *const device, const struct radv_ima { assert(util_bitcount(aspect_mask) == 1); + const struct radv_physical_device *pdev = radv_device_physical(device); const unsigned plane_idx = radv_plane_from_aspect(aspect_mask); const unsigned binding_idx = image->disjoint ? plane_idx : 0; const struct radv_image_binding *binding = &image->bindings[binding_idx]; @@ -301,7 +309,7 @@ radv_sdma_get_surf(const struct radv_device *const device, const struct radv_ima info.info_dword = radv_sdma_get_tiled_info_dword(device, image, surf, subresource); info.header_dword = radv_sdma_get_tiled_header_dword(device, image, subresource); - if (device->physical_device->info.sdma_supports_compression && + if (pdev->info.sdma_supports_compression && (radv_dcc_enabled(image, subresource.mipLevel) || radv_image_has_htile(image))) { info.meta_va = binding->bo->va + binding->offset + surf->meta_offset; info.meta_config = radv_sdma_get_metadata_config(device, image, surf, subresource, aspect_mask); @@ -326,7 +334,8 @@ radv_sdma_copy_buffer(const struct radv_device *device, struct radeon_cmdbuf *cs if (size == 0) return; - const enum sdma_version ver = device->physical_device->info.sdma_ip_version; + const struct radv_physical_device *pdev = radv_device_physical(device); + const enum sdma_version ver = pdev->info.sdma_ip_version; const unsigned max_size_per_packet = ver >= SDMA_5_2 ? SDMA_V5_2_COPY_MAX_BYTES : SDMA_V2_0_COPY_MAX_BYTES; unsigned align = ~0u; @@ -367,11 +376,13 @@ void radv_sdma_fill_buffer(const struct radv_device *device, struct radeon_cmdbuf *cs, const uint64_t va, const uint64_t size, const uint32_t value) { + const struct radv_physical_device *pdev = radv_device_physical(device); + const uint32_t fill_size = 2; /* This means that the count is in dwords. */ const uint32_t constant_fill_header = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0) | (fill_size & 0x3) << 30; /* This packet is the same since SDMA v2.4, haven't bothered to check older versions. */ - const enum sdma_version ver = device->physical_device->info.sdma_ip_version; + const enum sdma_version ver = pdev->info.sdma_ip_version; assert(ver >= SDMA_2_4); /* Maximum allowed fill size depends on the GPU. @@ -450,7 +461,9 @@ radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct ra const struct radv_sdma_surf *const linear, const VkExtent3D pix_extent, const bool detile) { - if (!device->physical_device->info.sdma_supports_compression) { + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (!pdev->info.sdma_supports_compression) { assert(!tiled->meta_va); } @@ -499,14 +512,15 @@ radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct rade const struct radv_sdma_surf *const src, const struct radv_sdma_surf *const dst, const VkExtent3D px_extent) { + const struct radv_physical_device *pdev = radv_device_physical(device); + /* We currently only support the SDMA v4+ versions of this packet. */ - assert(device->physical_device->info.sdma_ip_version >= SDMA_4_0); + assert(pdev->info.sdma_ip_version >= SDMA_4_0); /* On GFX10+ this supports DCC, but cannot copy a compressed surface to another compressed surface. */ assert(!src->meta_va || !dst->meta_va); - if (device->physical_device->info.sdma_ip_version >= SDMA_4_0 && - device->physical_device->info.sdma_ip_version < SDMA_5_0) { + if (pdev->info.sdma_ip_version >= SDMA_4_0 && pdev->info.sdma_ip_version < SDMA_5_0) { /* SDMA v4 doesn't support mip_id selection in the T2T copy packet. */ assert(src->header_dword >> 24 == 0); assert(dst->header_dword >> 24 == 0); @@ -696,7 +710,8 @@ radv_sdma_use_t2t_scanline_copy(const struct radv_device *device, const struct r /* SDMA can't do format conversion. */ assert(src->bpp == dst->bpp); - const enum sdma_version ver = device->physical_device->info.sdma_ip_version; + const struct radv_physical_device *pdev = radv_device_physical(device); + const enum sdma_version ver = pdev->info.sdma_ip_version; if (ver < SDMA_5_0) { /* SDMA v4.x and older doesn't support proper mip level selection. */ if (src->mip_levels > 1 || dst->mip_levels > 1) diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 5e1d683fe24..f310246d479 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -304,6 +304,7 @@ nir_shader * radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_stage *stage, const struct radv_spirv_to_nir_options *options, bool is_internal) { + const struct radv_physical_device *pdev = radv_device_physical(device); unsigned subgroup_size = 64, ballot_bit_size = 64; const unsigned required_subgroup_size = stage->key.subgroup_required_size * 32; if (required_subgroup_size) { @@ -340,7 +341,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st .device = device, .object = stage->spirv.object, }; - const bool has_fragment_shader_interlock = radv_has_pops(device->physical_device); + const bool has_fragment_shader_interlock = radv_has_pops(pdev); const struct spirv_to_nir_options spirv_options = { .caps = { @@ -359,7 +360,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st .device_group = true, .draw_parameters = true, .float_controls = true, - .float16 = device->physical_device->info.has_packed_math_16bit, + .float16 = pdev->info.has_packed_math_16bit, .float32_atomic_add = true, .float32_atomic_min_max = true, .float64 = true, @@ -411,7 +412,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st .variable_pointers = true, .vk_memory_model = true, .vk_memory_model_device_scope = true, - .fragment_shading_rate = device->physical_device->info.gfx_level >= GFX10_3, + .fragment_shading_rate = pdev->info.gfx_level >= GFX10_3, .workgroup_memory_explicit_layout = true, .cooperative_matrix = true, }, @@ -426,11 +427,11 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st .func = radv_spirv_nir_debug, .private_data = &spirv_debug_data, }, - .force_tex_non_uniform = device->physical_device->cache_key.tex_non_uniform, - .force_ssbo_non_uniform = device->physical_device->cache_key.ssbo_non_uniform, + .force_tex_non_uniform = pdev->cache_key.tex_non_uniform, + .force_ssbo_non_uniform = pdev->cache_key.ssbo_non_uniform, }; nir = spirv_to_nir(spirv, stage->spirv.size / 4, spec_entries, num_spec_entries, stage->stage, stage->entrypoint, - &spirv_options, &device->physical_device->nir_options[stage->stage]); + &spirv_options, &pdev->nir_options[stage->stage]); nir->info.internal |= is_internal; assert(nir->info.stage == stage->stage); nir_validate_shader(nir, "after spirv_to_nir"); @@ -507,7 +508,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st NIR_PASS(_, nir, nir_lower_vars_to_ssa); - NIR_PASS(_, nir, nir_propagate_invariant, device->physical_device->cache_key.invariant_geom); + NIR_PASS(_, nir, nir_propagate_invariant, pdev->cache_key.invariant_geom); NIR_PASS(_, nir, nir_lower_clip_cull_distance_arrays); @@ -515,11 +516,11 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st nir->info.stage == MESA_SHADER_GEOMETRY) NIR_PASS_V(nir, nir_shader_gather_xfb_info); - NIR_PASS(_, nir, nir_lower_discard_or_demote, device->physical_device->cache_key.lower_discard_to_demote); + NIR_PASS(_, nir, nir_lower_discard_or_demote, pdev->cache_key.lower_discard_to_demote); nir_lower_doubles_options lower_doubles = nir->options->lower_doubles_options; - if (device->physical_device->info.gfx_level == GFX6) { + if (pdev->info.gfx_level == GFX6) { /* GFX6 doesn't support v_floor_f64 and the precision * of v_fract_f64 which is used to implement 64-bit * floor is less than what Vulkan requires. @@ -537,7 +538,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st /* Mesh shaders run as NGG which can implement local_invocation_index from * the wave ID in merged_wave_info, but they don't have local_invocation_ids on GFX10.3. */ - .lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !device->physical_device->mesh_fast_launch_2, + .lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !pdev->mesh_fast_launch_2, .lower_local_invocation_index = nir->info.stage == MESA_SHADER_COMPUTE && ((nir->info.workgroup_size[0] == 1) + (nir->info.workgroup_size[1] == 1) + (nir->info.workgroup_size[2] == 1)) == 2, @@ -569,10 +570,10 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st .lower_txf_offset = true, .lower_tg4_offsets = true, .lower_txs_cube_array = true, - .lower_to_fragment_fetch_amd = device->physical_device->use_fmask, + .lower_to_fragment_fetch_amd = pdev->use_fmask, .lower_lod_zero_width = true, .lower_invalid_implicit_lod = true, - .lower_1d = device->physical_device->info.gfx_level == GFX9, + .lower_1d = pdev->info.gfx_level == GFX9, }; NIR_PASS(_, nir, nir_lower_tex, &tex_options); @@ -597,7 +598,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st NIR_PASS(_, nir, nir_lower_global_vars_to_local); NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL); - bool gfx7minus = device->physical_device->info.gfx_level <= GFX7; + bool gfx7minus = pdev->info.gfx_level <= GFX7; bool has_inverse_ballot = true; #if LLVM_AVAILABLE has_inverse_ballot = !radv_use_llvm_for_stage(device, nir->info.stage) || LLVM_VERSION_MAJOR >= 17; @@ -690,7 +691,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st nir->info.stage == MESA_SHADER_MESH) && nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE)) { /* Lower primitive shading rate to match HW requirements. */ - NIR_PASS(_, nir, radv_nir_lower_primitive_shading_rate, device->physical_device->info.gfx_level); + NIR_PASS(_, nir, radv_nir_lower_primitive_shading_rate, pdev->info.gfx_level); } /* Indirect lowering must be called after the radv_optimize_nir() loop @@ -698,8 +699,8 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st * bloat the instruction count of the loop and cause it to be * considered too large for unrolling. */ - if (ac_nir_lower_indirect_derefs(nir, device->physical_device->info.gfx_level) && - !stage->key.optimisations_disabled && nir->info.stage != MESA_SHADER_COMPUTE) { + if (ac_nir_lower_indirect_derefs(nir, pdev->info.gfx_level) && !stage->key.optimisations_disabled && + nir->info.stage != MESA_SHADER_COMPUTE) { /* Optimize the lowered code before the linking optimizations. */ radv_optimize_nir(nir, false); } @@ -775,6 +776,7 @@ void radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage, const struct radv_graphics_state_key *gfx_state) { + const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader_info *info = &ngg_stage->info; nir_shader *nir = ngg_stage->nir; @@ -818,19 +820,19 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage, nir->info.shared_size = info->ngg_info.lds_size; ac_nir_lower_ngg_options options = {0}; - options.family = device->physical_device->info.family; - options.gfx_level = device->physical_device->info.gfx_level; + options.family = pdev->info.family; + options.gfx_level = pdev->info.gfx_level; options.max_workgroup_size = info->workgroup_size; options.wave_size = info->wave_size; options.clip_cull_dist_mask = info->outinfo.clip_dist_mask | info->outinfo.cull_dist_mask; options.vs_output_param_offset = info->outinfo.vs_output_param_offset; options.has_param_exports = info->outinfo.param_exports || info->outinfo.prim_param_exports; options.can_cull = nir->info.stage != MESA_SHADER_GEOMETRY && info->has_ngg_culling; - options.disable_streamout = !device->physical_device->use_ngg_streamout; + options.disable_streamout = !pdev->use_ngg_streamout; options.has_gen_prim_query = info->has_prim_query; options.has_xfb_prim_query = info->has_xfb_query; - options.has_gs_invocations_query = device->physical_device->info.gfx_level < GFX11; - options.has_gs_primitives_query = device->physical_device->info.gfx_level < GFX11; + options.has_gs_invocations_query = pdev->info.gfx_level < GFX11; + options.has_gs_primitives_query = pdev->info.gfx_level < GFX11; options.force_vrs = info->force_vrs_per_vertex; if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) { @@ -862,8 +864,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage, bool scratch_ring = false; NIR_PASS_V(nir, ac_nir_lower_ngg_ms, options.gfx_level, options.clip_cull_dist_mask, options.vs_output_param_offset, options.has_param_exports, &scratch_ring, info->wave_size, - hw_workgroup_size, gfx_state->has_multiview_view_index, info->ms.has_query, - device->physical_device->mesh_fast_launch_2); + hw_workgroup_size, gfx_state->has_multiview_view_index, info->ms.has_query, pdev->mesh_fast_launch_2); ngg_stage->info.ms.needs_ms_scratch_ring = scratch_ring; } else { unreachable("invalid SW stage passed to radv_lower_ngg"); @@ -933,6 +934,7 @@ static struct radv_shader_arena * radv_create_shader_arena(struct radv_device *device, struct radv_shader_free_list *free_list, unsigned min_size, unsigned arena_size, bool replayable, uint64_t replay_va) { + const struct radv_physical_device *pdev = radv_device_physical(device); union radv_shader_arena_block *alloc = NULL; struct radv_shader_arena *arena = calloc(1, sizeof(struct radv_shader_arena)); if (!arena) @@ -948,7 +950,7 @@ radv_create_shader_arena(struct radv_device *device, struct radv_shader_free_lis if (device->shader_use_invisible_vram) flags |= RADEON_FLAG_NO_CPU_ACCESS; else - flags |= (device->physical_device->info.cpdma_prefetch_writes_memory ? 0 : RADEON_FLAG_READ_ONLY); + flags |= (pdev->info.cpdma_prefetch_writes_memory ? 0 : RADEON_FLAG_READ_ONLY); if (replayable) flags |= RADEON_FLAG_REPLAYABLE; @@ -1079,7 +1081,9 @@ insert_block(struct radv_device *device, union radv_shader_arena_block *hole, ui union radv_shader_arena_block * radv_alloc_shader_memory(struct radv_device *device, uint32_t size, bool replayable, void *ptr) { - size = ac_align_shader_binary_for_prefetch(&device->physical_device->info, size); + const struct radv_physical_device *pdev = radv_device_physical(device); + + size = ac_align_shader_binary_for_prefetch(&pdev->info, size); size = align(size, RADV_SHADER_ALLOC_ALIGNMENT); mtx_lock(&device->shader_arena_mutex); @@ -1402,7 +1406,8 @@ radv_destroy_shader_upload_queue(struct radv_device *device) static bool radv_should_use_wgp_mode(const struct radv_device *device, gl_shader_stage stage, const struct radv_shader_info *info) { - enum amd_gfx_level chip = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + enum amd_gfx_level chip = pdev->info.gfx_level; switch (stage) { case MESA_SHADER_COMPUTE: case MESA_SHADER_TESS_CTRL: @@ -1422,13 +1427,13 @@ static bool radv_open_rtld_binary(struct radv_device *device, const struct radv_shader_binary *binary, struct ac_rtld_binary *rtld_binary) { + const struct radv_physical_device *pdev = radv_device_physical(device); const char *elf_data = (const char *)((struct radv_shader_binary_rtld *)binary)->data; size_t elf_size = ((struct radv_shader_binary_rtld *)binary)->elf_size; struct ac_rtld_symbol lds_symbols[3]; unsigned num_lds_symbols = 0; - if (device->physical_device->info.gfx_level >= GFX9 && - (binary->info.stage == MESA_SHADER_GEOMETRY || binary->info.is_ngg)) { + if (pdev->info.gfx_level >= GFX9 && (binary->info.stage == MESA_SHADER_GEOMETRY || binary->info.is_ngg)) { struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++]; sym->name = "esgs_ring"; sym->size = binary->info.ngg_info.esgs_ring_size; @@ -1448,7 +1453,7 @@ radv_open_rtld_binary(struct radv_device *device, const struct radv_shader_binar } struct ac_rtld_open_info open_info = { - .info = &device->physical_device->info, + .info = &pdev->info, .shader_type = binary->info.stage, .wave_size = binary->info.wave_size, .num_parts = 1, @@ -1466,6 +1471,7 @@ static bool radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_binary *binary, const struct radv_shader_args *args) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct ac_shader_config *config = &binary->config; if (binary->type == RADV_BINARY_TYPE_RTLD) { @@ -1478,13 +1484,13 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi return false; } - if (!ac_rtld_read_config(&device->physical_device->info, &rtld_binary, config)) { + if (!ac_rtld_read_config(&pdev->info, &rtld_binary, config)) { ac_rtld_close(&rtld_binary); return false; } if (rtld_binary.lds_size > 0) { - unsigned encode_granularity = device->physical_device->info.lds_encode_granularity; + unsigned encode_granularity = pdev->info.lds_encode_granularity; config->lds_size = DIV_ROUND_UP(rtld_binary.lds_size, encode_granularity); } if (!config->lds_size && binary->info.stage == MESA_SHADER_TESS_CTRL) { @@ -1499,7 +1505,6 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi const struct radv_shader_info *info = &binary->info; gl_shader_stage stage = binary->info.stage; - const struct radv_physical_device *pdev = device->physical_device; bool scratch_enabled = config->scratch_bytes_per_wave > 0; bool trap_enabled = !!device->trap_handler_shader; unsigned vgpr_comp_cnt = 0; @@ -2064,7 +2069,8 @@ unsigned radv_get_max_waves(const struct radv_device *device, const struct ac_shader_config *conf, const struct radv_shader_info *info) { - const struct radeon_info *gpu_info = &device->physical_device->info; + const struct radv_physical_device *pdev = radv_device_physical(device); + const struct radeon_info *gpu_info = &pdev->info; const enum amd_gfx_level gfx_level = gpu_info->gfx_level; const uint8_t wave_size = info->wave_size; gl_shader_stage stage = info->stage; @@ -2109,7 +2115,8 @@ radv_get_max_waves(const struct radv_device *device, const struct ac_shader_conf unsigned radv_get_max_scratch_waves(const struct radv_device *device, struct radv_shader *shader) { - const unsigned num_cu = device->physical_device->info.num_cu; + const struct radv_physical_device *pdev = radv_device_physical(device); + const unsigned num_cu = pdev->info.num_cu; return MIN2(device->scratch_waves, 4 * num_cu * shader->max_waves); } @@ -2423,10 +2430,12 @@ radv_fill_nir_compiler_options(struct radv_nir_compiler_options *options, struct bool can_dump_shader, bool is_meta_shader, bool keep_shader_info, bool keep_statistic_info) { + const struct radv_physical_device *pdev = radv_device_physical(device); + /* robust_buffer_access_llvm here used by LLVM only, pipeline robustness is not exposed there. */ options->robust_buffer_access_llvm = device->buffer_robustness >= RADV_BUFFER_ROBUSTNESS_1; options->wgp_mode = should_use_wgp; - options->info = &device->physical_device->info; + options->info = &pdev->info; options->dump_shader = can_dump_shader; options->dump_preoptir = options->dump_shader && device->instance->debug_flags & RADV_DEBUG_PREOPTIR; options->record_ir = keep_shader_info; @@ -2607,6 +2616,7 @@ radv_aco_build_shader_part(void **bin, uint32_t num_sgprs, uint32_t num_vgprs, c struct radv_shader * radv_create_rt_prolog(struct radv_device *device) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_shader *prolog; struct radv_shader_args in_args = {0}; struct radv_shader_args out_args = {0}; @@ -2618,13 +2628,13 @@ radv_create_rt_prolog(struct radv_device *device) info.stage = MESA_SHADER_COMPUTE; info.loads_push_constants = true; info.desc_set_used_mask = -1; /* just to force indirection */ - info.wave_size = device->physical_device->rt_wave_size; + info.wave_size = pdev->rt_wave_size; info.workgroup_size = info.wave_size; info.user_data_0 = R_00B900_COMPUTE_USER_DATA_0; info.cs.is_rt_shader = true; info.cs.uses_dynamic_rt_callable_stack = true; info.cs.block_size[0] = 8; - info.cs.block_size[1] = device->physical_device->rt_wave_size == 64 ? 8 : 4; + info.cs.block_size[1] = pdev->rt_wave_size == 64 ? 8 : 4; info.cs.block_size[2] = 1; info.cs.uses_thread_id[0] = true; info.cs.uses_thread_id[1] = true; @@ -2739,6 +2749,7 @@ struct radv_shader_part * radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_key *key, struct radv_shader_part_binary **binary_out) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_shader_part *epilog; struct radv_shader_args args = {0}; struct radv_nir_compiler_options options = {0}; @@ -2748,7 +2759,7 @@ radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_ke struct radv_shader_info info = {0}; info.stage = MESA_SHADER_FRAGMENT; - info.wave_size = device->physical_device->ps_wave_size; + info.wave_size = pdev->ps_wave_size; info.workgroup_size = 64; radv_declare_ps_epilog_args(device, key, &args); diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index 6d769c512f9..1fec667d75e 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -258,7 +258,9 @@ declare_ms_input_sgprs(const struct radv_shader_info *info, struct radv_shader_a static void declare_ms_input_vgprs(const struct radv_device *device, struct radv_shader_args *args) { - if (device->physical_device->mesh_fast_launch_2) { + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (pdev->mesh_fast_launch_2) { ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids); } else { ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id); @@ -510,7 +512,8 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics const struct radv_shader_info *info, gl_shader_stage stage, gl_shader_stage previous_stage, struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info) { - const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; bool has_shader_query = info->has_prim_query || info->has_xfb_query || (stage == MESA_SHADER_GEOMETRY && info->gs.has_pipeline_stat_query) || (stage == MESA_SHADER_MESH && info->ms.has_query) || @@ -784,7 +787,7 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics declare_ngg_sgprs(info, args, has_ngg_provoking_vtx); } - if (previous_stage != MESA_SHADER_MESH || !device->physical_device->mesh_fast_launch_2) { + if (previous_stage != MESA_SHADER_MESH || !pdev->mesh_fast_launch_2) { ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id); @@ -871,7 +874,8 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra if (info->loads_push_constants) num_user_sgprs++; - const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; uint32_t available_sgprs = gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16; uint32_t remaining_sgprs = available_sgprs - num_user_sgprs; diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index d7659a66cba..4464135c1dd 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -342,6 +342,8 @@ static uint8_t radv_get_wave_size(struct radv_device *device, gl_shader_stage stage, const struct radv_shader_info *info, const struct radv_shader_stage_key *stage_key) { + const struct radv_physical_device *pdev = radv_device_physical(device); + if (stage_key->subgroup_required_size) return stage_key->subgroup_required_size * 32; @@ -350,11 +352,11 @@ radv_get_wave_size(struct radv_device *device, gl_shader_stage stage, const stru else if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_TASK) return info->wave_size; else if (stage == MESA_SHADER_FRAGMENT) - return device->physical_device->ps_wave_size; + return pdev->ps_wave_size; else if (gl_shader_stage_is_rt(stage)) - return device->physical_device->rt_wave_size; + return pdev->rt_wave_size; else - return device->physical_device->ge_wave_size; + return pdev->ge_wave_size; } static uint8_t @@ -370,6 +372,7 @@ radv_get_ballot_bit_size(struct radv_device *device, gl_shader_stage stage, cons static uint32_t radv_compute_esgs_itemsize(const struct radv_device *device, uint32_t num_varyings) { + const struct radv_physical_device *pdev = radv_device_physical(device); uint32_t esgs_itemsize; esgs_itemsize = num_varyings * 16; @@ -377,7 +380,7 @@ radv_compute_esgs_itemsize(const struct radv_device *device, uint32_t num_varyin /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank * conflicts, i.e. each vertex will start on a different bank. */ - if (device->physical_device->info.gfx_level >= GFX9 && esgs_itemsize) + if (pdev->info.gfx_level >= GFX9 && esgs_itemsize) esgs_itemsize += 4; return esgs_itemsize; @@ -562,6 +565,8 @@ static void gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir, const struct radv_graphics_state_key *gfx_state, struct radv_shader_info *info) { + const struct radv_physical_device *pdev = radv_device_physical(device); + info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out; info->tcs.tes_inputs_read = ~0ULL; info->tcs.tes_patch_inputs_read = ~0ULL; @@ -571,15 +576,14 @@ gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir, if (gfx_state->ts.patch_control_points) { /* Number of tessellation patches per workgroup processed by the current pipeline. */ - info->num_tess_patches = - get_tcs_num_patches(gfx_state->ts.patch_control_points, nir->info.tess.tcs_vertices_out, - info->tcs.num_linked_inputs, info->tcs.num_linked_outputs, - info->tcs.num_linked_patch_outputs, device->physical_device->hs.tess_offchip_block_dw_size, - device->physical_device->info.gfx_level, device->physical_device->info.family); + info->num_tess_patches = get_tcs_num_patches( + gfx_state->ts.patch_control_points, nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs, + info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs, pdev->hs.tess_offchip_block_dw_size, + pdev->info.gfx_level, pdev->info.family); /* LDS size used by VS+TCS for storing TCS inputs and outputs. */ info->tcs.num_lds_blocks = - calculate_tess_lds_size(device->physical_device->info.gfx_level, gfx_state->ts.patch_control_points, + calculate_tess_lds_size(pdev->info.gfx_level, gfx_state->ts.patch_control_points, nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs, info->num_tess_patches, info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs); } @@ -616,7 +620,7 @@ gather_shader_info_tes(struct radv_device *device, const nir_shader *nir, struct static void radv_init_legacy_gs_ring_info(const struct radv_device *device, struct radv_shader_info *gs_info) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_legacy_gs_info *gs_ring_info = &gs_info->gs_ring_info; unsigned num_se = pdev->info.max_se; unsigned wave_size = 64; @@ -650,6 +654,7 @@ radv_init_legacy_gs_ring_info(const struct radv_device *device, struct radv_shad static void radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *gs_info) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_legacy_gs_info *out = &gs_info->gs_ring_info; const unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1); const bool uses_adjacency = @@ -734,7 +739,7 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_inf const uint32_t gs_prims_per_subgroup = gs_prims; const uint32_t gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations; const uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out; - const uint32_t lds_granularity = device->physical_device->info.lds_encode_granularity; + const uint32_t lds_granularity = pdev->info.lds_encode_granularity; const uint32_t total_lds_bytes = align(esgs_lds_size * 4, lds_granularity); out->lds_size = total_lds_bytes / lds_granularity; out->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) | @@ -750,6 +755,7 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_inf static void gather_shader_info_gs(struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info) { + const struct radv_physical_device *pdev = radv_device_physical(device); unsigned add_clip = nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4; info->gs.gsvs_vertex_size = (util_bitcount64(nir->info.outputs_written) + add_clip) * 16; info->gs.max_gsvs_emit_size = info->gs.gsvs_vertex_size * nir->info.gs.vertices_out; @@ -770,7 +776,7 @@ gather_shader_info_gs(struct radv_device *device, const nir_shader *nir, struct info->gs.num_stream_output_components[stream] += num_components; } - info->gs.has_pipeline_stat_query = device->physical_device->emulate_ngg_gs_query_pipeline_stat; + info->gs.has_pipeline_stat_query = pdev->emulate_ngg_gs_query_pipeline_stat; gather_info_unlinked_input(info, nir); @@ -830,9 +836,10 @@ gather_shader_info_mesh(struct radv_device *device, const nir_shader *nir, static void calc_mesh_workgroup_size(const struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info) { + const struct radv_physical_device *pdev = radv_device_physical(device); unsigned api_workgroup_size = ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX); - if (device->physical_device->mesh_fast_launch_2) { + if (pdev->mesh_fast_launch_2) { /* Use multi-row export. It is also necessary to use the API workgroup size for non-emulated queries. */ info->workgroup_size = api_workgroup_size; } else { @@ -848,6 +855,7 @@ static void gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir, const struct radv_graphics_state_key *gfx_state, struct radv_shader_info *info) { + const struct radv_physical_device *pdev = radv_device_physical(device); uint64_t per_primitive_input_mask = nir->info.inputs_read & nir->info.per_primitive_inputs; unsigned num_per_primitive_inputs = util_bitcount64(per_primitive_input_mask); assert(num_per_primitive_inputs <= nir->num_inputs); @@ -855,7 +863,7 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir, info->ps.num_interp = nir->num_inputs; info->ps.num_prim_interp = 0; - if (device->physical_device->info.gfx_level == GFX10_3) { + if (pdev->info.gfx_level == GFX10_3) { /* GFX10.3 distinguishes NUM_INTERP and NUM_PRIM_INTERP, but * these are counted together in NUM_INTERP on GFX11. */ @@ -972,7 +980,7 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir, */ info->ps.force_sample_iter_shading_rate = (info->ps.reads_sample_mask_in && !info->ps.needs_poly_line_smooth) || - (device->physical_device->info.gfx_level == GFX10_3 && + (pdev->info.gfx_level == GFX10_3 && (nir->info.fs.sample_interlock_ordered || nir->info.fs.sample_interlock_unordered || nir->info.fs.pixel_interlock_ordered || nir->info.fs.pixel_interlock_unordered)); @@ -992,8 +1000,7 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir, */ const bool mask_export_enable = info->ps.writes_sample_mask; - const bool disable_rbplus = - device->physical_device->info.has_rbplus && !device->physical_device->info.rbplus_allowed; + const bool disable_rbplus = pdev->info.has_rbplus && !pdev->info.rbplus_allowed; info->ps.db_shader_control = S_02880C_Z_EXPORT_ENABLE(info->ps.writes_z) | S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(info->ps.writes_stencil) | @@ -1020,9 +1027,10 @@ static void gather_shader_info_cs(struct radv_device *device, const nir_shader *nir, const struct radv_shader_stage_key *stage_key, struct radv_shader_info *info) { - unsigned default_wave_size = device->physical_device->cs_wave_size; + const struct radv_physical_device *pdev = radv_device_physical(device); + unsigned default_wave_size = pdev->cs_wave_size; if (info->cs.uses_rt) - default_wave_size = device->physical_device->rt_wave_size; + default_wave_size = pdev->rt_wave_size; unsigned local_size = nir->info.workgroup_size[0] * nir->info.workgroup_size[1] * nir->info.workgroup_size[2]; @@ -1040,14 +1048,14 @@ gather_shader_info_cs(struct radv_device *device, const nir_shader *nir, const s info->wave_size = required_subgroup_size; } else if (require_full_subgroups) { info->wave_size = RADV_SUBGROUP_SIZE; - } else if (device->physical_device->info.gfx_level >= GFX10 && local_size <= 32) { + } else if (pdev->info.gfx_level >= GFX10 && local_size <= 32) { /* Use wave32 for small workgroups. */ info->wave_size = 32; } else { info->wave_size = default_wave_size; } - if (device->physical_device->info.has_cs_regalloc_hang_bug) { + if (pdev->info.has_cs_regalloc_hang_bug) { info->cs.regalloc_hang_bug = info->cs.block_size[0] * info->cs.block_size[1] * info->cs.block_size[2] > 256; } } @@ -1083,7 +1091,8 @@ gather_shader_info_task(struct radv_device *device, const nir_shader *nir, static uint32_t radv_get_user_data_0(const struct radv_device *device, struct radv_shader_info *info) { - const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; switch (info->stage) { case MESA_SHADER_VERTEX: @@ -1139,7 +1148,8 @@ radv_get_user_data_0(const struct radv_device *device, struct radv_shader_info * static bool radv_is_merged_shader_compiled_separately(const struct radv_device *device, const struct radv_shader_info *info) { - const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; if (gfx_level >= GFX9) { switch (info->stage) { @@ -1180,6 +1190,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n const struct radv_graphics_state_key *gfx_state, const enum radv_pipeline_type pipeline_type, bool consider_force_vrs, struct radv_shader_info *info) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions); if (layout->use_dynamic_descriptors) { @@ -1257,7 +1268,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n /* The HW always assumes that there is at least 1 per-vertex param. * so if there aren't any, we have to offset per-primitive params by 1. */ - const unsigned extra_offset = !!(total_param_exports == 0 && device->physical_device->info.gfx_level >= GFX11); + const unsigned extra_offset = !!(total_param_exports == 0 && pdev->info.gfx_level >= GFX11); /* Per-primitive outputs: the HW needs these to be last. */ assign_outinfo_params(outinfo, per_prim_mask, &total_param_exports, extra_offset); @@ -1274,7 +1285,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n /* Used by compute and mesh shaders. Mesh shaders must always declare this before GFX11. */ info->cs.uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS) || - (nir->info.stage == MESA_SHADER_MESH && device->physical_device->info.gfx_level < GFX11); + (nir->info.stage == MESA_SHADER_MESH && pdev->info.gfx_level < GFX11); info->cs.uses_local_invocation_idx = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) | BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) | BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS) | @@ -1348,9 +1359,9 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n break; case MESA_SHADER_TESS_CTRL: if (gfx_state->ts.patch_control_points) { - info->workgroup_size = ac_compute_lshs_workgroup_size( - device->physical_device->info.gfx_level, MESA_SHADER_TESS_CTRL, info->num_tess_patches, - gfx_state->ts.patch_control_points, info->tcs.tcs_vertices_out); + info->workgroup_size = + ac_compute_lshs_workgroup_size(pdev->info.gfx_level, MESA_SHADER_TESS_CTRL, info->num_tess_patches, + gfx_state->ts.patch_control_points, info->tcs.tcs_vertices_out); } else { /* Set the maximum possible value when the workgroup size can't be determined. */ info->workgroup_size = 256; @@ -1371,7 +1382,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n unsigned es_verts_per_subgroup = G_028A44_ES_VERTS_PER_SUBGRP(info->gs_ring_info.vgt_gs_onchip_cntl); unsigned gs_inst_prims_in_subgroup = G_028A44_GS_INST_PRIMS_IN_SUBGRP(info->gs_ring_info.vgt_gs_onchip_cntl); - info->workgroup_size = ac_compute_esgs_workgroup_size(device->physical_device->info.gfx_level, info->wave_size, + info->workgroup_size = ac_compute_esgs_workgroup_size(pdev->info.gfx_level, info->wave_size, es_verts_per_subgroup, gs_inst_prims_in_subgroup); } else { /* Set the maximum possible value by default, this will be optimized during linking if @@ -1441,6 +1452,7 @@ static unsigned gfx10_get_ngg_scratch_lds_base(const struct radv_device *device, const struct radv_shader_info *es_info, const struct radv_shader_info *gs_info, const struct gfx10_ngg_info *ngg_info) { + const struct radv_physical_device *pdev = radv_device_physical(device); uint32_t scratch_lds_base; if (gs_info) { @@ -1451,7 +1463,7 @@ gfx10_get_ngg_scratch_lds_base(const struct radv_device *device, const struct ra } else { const bool uses_instanceid = es_info->vs.needs_instance_id; const bool uses_primitive_id = es_info->uses_prim_id; - const bool streamout_enabled = es_info->so.num_outputs && device->physical_device->use_ngg_streamout; + const bool streamout_enabled = es_info->so.num_outputs && pdev->use_ngg_streamout; const uint32_t num_outputs = es_info->stage == MESA_SHADER_VERTEX ? es_info->vs.num_outputs : es_info->tes.num_outputs; unsigned pervertex_lds_bytes = ac_ngg_nogs_get_pervertex_lds_size( @@ -1471,7 +1483,8 @@ void gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info, struct gfx10_ngg_info *out) { - const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; const unsigned max_verts_per_prim = radv_get_num_input_vertices(es_info, gs_info); const unsigned min_verts_per_prim = gs_info ? max_verts_per_prim : 1; @@ -1683,9 +1696,8 @@ gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es /* Get scratch LDS usage. */ const struct radv_shader_info *info = gs_info ? gs_info : es_info; - const unsigned scratch_lds_size = - ac_ngg_get_scratch_lds_size(info->stage, info->workgroup_size, info->wave_size, - device->physical_device->use_ngg_streamout, info->has_ngg_culling); + const unsigned scratch_lds_size = ac_ngg_get_scratch_lds_size(info->stage, info->workgroup_size, info->wave_size, + pdev->use_ngg_streamout, info->has_ngg_culling); out->lds_size = out->scratch_lds_base + scratch_lds_size; unsigned workgroup_size = @@ -1700,6 +1712,8 @@ static void radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage *es_stage, struct radv_shader_stage *fs_stage, const struct radv_graphics_state_key *gfx_state) { + const struct radv_physical_device *pdev = radv_device_physical(device); + assert(es_stage->stage == MESA_SHADER_VERTEX || es_stage->stage == MESA_SHADER_TESS_EVAL); assert(!fs_stage || fs_stage->stage == MESA_SHADER_FRAGMENT); @@ -1715,8 +1729,8 @@ radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage : 3; } - es_stage->info.has_ngg_culling = radv_consider_culling(device->physical_device, es_stage->nir, ps_inputs_read, - num_vertices_per_prim, &es_stage->info); + es_stage->info.has_ngg_culling = + radv_consider_culling(pdev, es_stage->nir, ps_inputs_read, num_vertices_per_prim, &es_stage->info); nir_function_impl *impl = nir_shader_get_entrypoint(es_stage->nir); es_stage->info.has_ngg_early_prim_export = exec_list_is_singular(&impl->body); @@ -1732,6 +1746,8 @@ static void radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *producer, struct radv_shader_stage *consumer, const struct radv_graphics_state_key *gfx_state) { + const struct radv_physical_device *pdev = radv_device_physical(device); + /* Export primitive ID and clip/cull distances if read by the FS, or export unconditionally when * the next stage is unknown (with graphics pipeline library). */ @@ -1782,9 +1798,9 @@ radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *pro struct radv_shader_stage *tcs_stage = consumer; if (gfx_state->ts.patch_control_points) { - vs_stage->info.workgroup_size = ac_compute_lshs_workgroup_size( - device->physical_device->info.gfx_level, MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches, - gfx_state->ts.patch_control_points, tcs_stage->info.tcs.tcs_vertices_out); + vs_stage->info.workgroup_size = + ac_compute_lshs_workgroup_size(pdev->info.gfx_level, MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches, + gfx_state->ts.patch_control_points, tcs_stage->info.tcs.tcs_vertices_out); if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) { /* When the number of TCS input and output vertices are the same (typically 3): @@ -1797,7 +1813,7 @@ radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *pro * instruction dominating another with a different mode. */ vs_stage->info.vs.tcs_in_out_eq = - device->physical_device->info.gfx_level >= GFX9 && + pdev->info.gfx_level >= GFX9 && gfx_state->ts.patch_control_points == tcs_stage->info.tcs.tcs_vertices_out && vs_stage->nir->info.float_controls_execution_mode == tcs_stage->nir->info.float_controls_execution_mode; @@ -1865,6 +1881,8 @@ void radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stages) { + const struct radv_physical_device *pdev = radv_device_physical(device); + /* Walk backwards to link */ struct radv_shader_stage *next_stage = stages[MESA_SHADER_FRAGMENT].nir ? &stages[MESA_SHADER_FRAGMENT] : NULL; @@ -1877,7 +1895,7 @@ radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics next_stage = &stages[s]; } - if (device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { /* Merge shader info for VS+TCS. */ if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_TESS_CTRL].nir) { radv_nir_shader_info_merge(&stages[MESA_SHADER_VERTEX], &stages[MESA_SHADER_TESS_CTRL]); diff --git a/src/amd/vulkan/radv_shader_object.c b/src/amd/vulkan/radv_shader_object.c index 19db55e3d81..02eba1a7e9d 100644 --- a/src/amd/vulkan/radv_shader_object.c +++ b/src/amd/vulkan/radv_shader_object.c @@ -128,6 +128,7 @@ static VkResult radv_shader_object_init_graphics(struct radv_shader_object *shader_obj, struct radv_device *device, const VkShaderCreateInfoEXT *pCreateInfo) { + const struct radv_physical_device *pdev = radv_device_physical(device); gl_shader_stage stage = vk_to_mesa_shader_stage(pCreateInfo->stage); struct radv_shader_stage stages[MESA_VULKAN_SHADER_STAGES]; @@ -149,7 +150,7 @@ radv_shader_object_init_graphics(struct radv_shader_object *shader_obj, struct r gfx_state.dynamic_provoking_vtx_mode = true; gfx_state.dynamic_line_rast_mode = true; - if (device->physical_device->info.gfx_level >= GFX11) + if (pdev->info.gfx_level >= GFX11) gfx_state.ps.exports_mrtz_via_epilog = true; struct radv_shader *shader = NULL; @@ -297,6 +298,7 @@ static VkResult radv_shader_object_init(struct radv_shader_object *shader_obj, struct radv_device *device, const VkShaderCreateInfoEXT *pCreateInfo) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_shader_layout layout; VkResult result; @@ -317,7 +319,7 @@ radv_shader_object_init(struct radv_shader_object *shader_obj, struct radv_devic const uint8_t *cache_uuid = blob_read_bytes(&blob, VK_UUID_SIZE); - if (memcmp(cache_uuid, device->physical_device->cache_uuid, VK_UUID_SIZE)) + if (memcmp(cache_uuid, pdev->cache_uuid, VK_UUID_SIZE)) return VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT; const bool has_main_binary = blob_read_uint32(&blob); @@ -407,6 +409,7 @@ radv_shader_object_create_linked(VkDevice _device, uint32_t createInfoCount, con const VkAllocationCallbacks *pAllocator, VkShaderEXT *pShaders) { RADV_FROM_HANDLE(radv_device, device, _device); + const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_shader_stage stages[MESA_VULKAN_SHADER_STAGES]; for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) { @@ -425,7 +428,7 @@ radv_shader_object_create_linked(VkDevice _device, uint32_t createInfoCount, con gfx_state.dynamic_provoking_vtx_mode = true; gfx_state.dynamic_line_rast_mode = true; - if (device->physical_device->info.gfx_level >= GFX11) + if (pdev->info.gfx_level >= GFX11) gfx_state.ps.exports_mrtz_via_epilog = true; for (unsigned i = 0; i < createInfoCount; i++) { @@ -621,6 +624,7 @@ radv_GetShaderBinaryDataEXT(VkDevice _device, VkShaderEXT shader, size_t *pDataS { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_shader_object, shader_obj, shader); + const struct radv_physical_device *pdev = radv_device_physical(device); const size_t size = radv_get_shader_object_size(shader_obj); if (!pData) { @@ -635,7 +639,7 @@ radv_GetShaderBinaryDataEXT(VkDevice _device, VkShaderEXT shader, size_t *pDataS struct blob blob; blob_init_fixed(&blob, pData, *pDataSize); - blob_write_bytes(&blob, device->physical_device->cache_uuid, VK_UUID_SIZE); + blob_write_bytes(&blob, pdev->cache_uuid, VK_UUID_SIZE); radv_write_shader_binary(&blob, shader_obj->binary); diff --git a/src/amd/vulkan/radv_spm.c b/src/amd/vulkan/radv_spm.c index eae524686f7..26c152c8f30 100644 --- a/src/amd/vulkan/radv_spm.c +++ b/src/amd/vulkan/radv_spm.c @@ -62,7 +62,8 @@ radv_spm_init_bo(struct radv_device *device) static void radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf) { - const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; struct ac_spm *spm = &device->spm; if (gfx_level >= GFX11) { @@ -142,7 +143,8 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu void radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf) { - const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; struct ac_spm *spm = &device->spm; uint64_t va = radv_buffer_get_va(spm->bo); uint64_t ring_size = spm->buffer_size; @@ -170,7 +172,7 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum r radeon_set_uconfig_reg(cs, R_03726C_RLC_SPM_ACCUM_MODE, 0); - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { radeon_set_uconfig_reg(cs, R_03721C_RLC_SPM_PERFMON_SEGMENT_SIZE, S_03721C_TOTAL_NUM_SEGMENT(total_muxsel_lines) | S_03721C_GLOBAL_NUM_SEGMENT(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]) | @@ -238,8 +240,9 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum r bool radv_spm_init(struct radv_device *device) { - const struct radeon_info *gpu_info = &device->physical_device->info; - struct ac_perfcounters *pc = &device->physical_device->ac_perfcounters; + struct radv_physical_device *pdev = radv_device_physical(device); + const struct radeon_info *gpu_info = &pdev->info; + struct ac_perfcounters *pc = &pdev->ac_perfcounters; /* We failed to initialize the performance counters. */ if (!pc->blocks) diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index 165c71ce313..7366df38308 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -54,15 +54,16 @@ gfx11_get_sqtt_ctrl(const struct radv_device *device, bool enable) static uint32_t gfx10_get_sqtt_ctrl(const struct radv_device *device, bool enable) { + const struct radv_physical_device *pdev = radv_device_physical(device); uint32_t sqtt_ctrl = S_008D1C_MODE(enable) | S_008D1C_HIWATER(5) | S_008D1C_UTIL_TIMER(1) | S_008D1C_RT_FREQ(2) | /* 4096 clk */ S_008D1C_DRAW_EVENT_EN(1) | S_008D1C_REG_STALL_EN(1) | S_008D1C_SPI_STALL_EN(1) | S_008D1C_SQ_STALL_EN(1) | S_008D1C_REG_DROP_ON_STALL(0); - if (device->physical_device->info.gfx_level == GFX10_3) + if (pdev->info.gfx_level == GFX10_3) sqtt_ctrl |= S_008D1C_LOWATER_OFFSET(4); - if (device->physical_device->info.has_sqtt_auto_flush_mode_bug) + if (pdev->info.has_sqtt_auto_flush_mode_bug) sqtt_ctrl |= S_008D1C_AUTO_FLUSH_MODE(1); return sqtt_ctrl; @@ -86,10 +87,11 @@ radv_ip_to_queue_family(enum amd_ip_type t) static void radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf *cs, int family) { + const struct radv_physical_device *pdev = radv_device_physical(device); const enum radv_queue_family qf = radv_ip_to_queue_family(family); enum rgp_flush_bits sqtt_flush_bits = 0; radv_cs_emit_cache_flush( - device->ws, cs, device->physical_device->info.gfx_level, NULL, 0, qf, + device->ws, cs, pdev->info.gfx_level, NULL, 0, qf, (family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) | RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2, @@ -99,9 +101,10 @@ radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf * static void radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf) { - const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; uint32_t shifted_size = device->sqtt.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT; - const struct radeon_info *gpu_info = &device->physical_device->info; + const struct radeon_info *gpu_info = &pdev->info; const unsigned shader_mask = ac_sqtt_get_shader_mask(gpu_info); unsigned max_se = gpu_info->max_se; @@ -111,7 +114,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va = radv_buffer_get_va(device->sqtt.bo); uint64_t data_va = ac_sqtt_get_data_va(gpu_info, &device->sqtt, va, se); uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT; - int active_cu = ac_sqtt_get_active_cu(&device->physical_device->info, se); + int active_cu = ac_sqtt_get_active_cu(&pdev->info, se); if (ac_sqtt_se_is_disabled(gpu_info, se)) continue; @@ -120,7 +123,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1)); - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { /* Order seems important for the following 2 registers. */ radeon_set_perfctr_reg(gfx_level, qf, cs, R_0367A4_SQ_THREAD_TRACE_BUF0_SIZE, S_0367A4_SIZE(shifted_size) | S_0367A4_BASE_HI(shifted_va >> 32)); @@ -151,7 +154,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, /* Should be emitted last (it enables thread traces). */ radeon_set_perfctr_reg(gfx_level, qf, cs, R_0367B0_SQ_THREAD_TRACE_CTRL, gfx11_get_sqtt_ctrl(device, true)); - } else if (device->physical_device->info.gfx_level >= GFX10) { + } else if (pdev->info.gfx_level >= GFX10) { /* Order seems important for the following 2 registers. */ radeon_set_privileged_config_reg(cs, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE, S_008D04_SIZE(shifted_size) | S_008D04_BASE_HI(shifted_va >> 32)); @@ -196,7 +199,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, S_030CC8_VM_ID_MASK(0) | S_030CC8_REG_STALL_EN(1) | S_030CC8_SPI_STALL_EN(1) | S_030CC8_SQ_STALL_EN(1); - if (device->physical_device->info.gfx_level < GFX9) { + if (pdev->info.gfx_level < GFX9) { sqtt_mask |= S_030CC8_RANDOM_SEED(0xffff); } @@ -214,7 +217,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, radeon_set_uconfig_reg(cs, R_030CEC_SQ_THREAD_TRACE_HIWATER, S_030CEC_HIWATER(4)); - if (device->physical_device->info.gfx_level == GFX9) { + if (pdev->info.gfx_level == GFX9) { /* Reset thread trace status errors. */ radeon_set_uconfig_reg(cs, R_030CE8_SQ_THREAD_TRACE_STATUS, S_030CE8_UTC_ERROR(0)); } @@ -225,7 +228,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */ S_030CD8_MODE(1); - if (device->physical_device->info.gfx_level == GFX9) { + if (pdev->info.gfx_level == GFX9) { /* Count SQTT traffic in TCC perf counters. */ sqtt_mode |= S_030CD8_TC_PERF_EN(1); } @@ -274,17 +277,17 @@ static const uint32_t gfx11_sqtt_info_regs[] = { static void radv_copy_sqtt_info_regs(const struct radv_device *device, struct radeon_cmdbuf *cs, unsigned se_index) { - const struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); const uint32_t *sqtt_info_regs = NULL; - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { sqtt_info_regs = gfx11_sqtt_info_regs; - } else if (device->physical_device->info.gfx_level >= GFX10) { + } else if (pdev->info.gfx_level >= GFX10) { sqtt_info_regs = gfx10_sqtt_info_regs; - } else if (device->physical_device->info.gfx_level == GFX9) { + } else if (pdev->info.gfx_level == GFX9) { sqtt_info_regs = gfx9_sqtt_info_regs; } else { - assert(device->physical_device->info.gfx_level == GFX8); + assert(pdev->info.gfx_level == GFX8); sqtt_info_regs = gfx8_sqtt_info_regs; } @@ -330,8 +333,9 @@ radv_copy_sqtt_info_regs(const struct radv_device *device, struct radeon_cmdbuf static void radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf) { - const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; - unsigned max_se = device->physical_device->info.max_se; + const struct radv_physical_device *pdev = radv_device_physical(device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; + unsigned max_se = pdev->info.max_se; radeon_check_space(device->ws, cs, 8 + max_se * 64); @@ -346,20 +350,20 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs, radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0)); - if (device->physical_device->info.has_sqtt_rb_harvest_bug) { + if (pdev->info.has_sqtt_rb_harvest_bug) { /* Some chips with disabled RBs should wait for idle because FINISH_DONE doesn't work. */ radv_emit_wait_for_idle(device, cs, qf); } for (unsigned se = 0; se < max_se; se++) { - if (ac_sqtt_se_is_disabled(&device->physical_device->info, se)) + if (ac_sqtt_se_is_disabled(&pdev->info, se)) continue; /* Target SEi and SH0. */ radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1)); - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { /* Make sure to wait for the trace buffer. */ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */ @@ -380,8 +384,8 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs, radeon_emit(cs, 0); /* reference value */ radeon_emit(cs, ~C_0367D0_BUSY); /* mask */ radeon_emit(cs, 4); /* poll interval */ - } else if (device->physical_device->info.gfx_level >= GFX10) { - if (!device->physical_device->info.has_sqtt_rb_harvest_bug) { + } else if (pdev->info.gfx_level >= GFX10) { + if (!pdev->info.has_sqtt_rb_harvest_bug) { /* Make sure to wait for the trace buffer. */ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */ @@ -429,7 +433,8 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs, void radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords) { - const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; const enum radv_queue_family qf = cmd_buffer->qf; struct radv_device *device = cmd_buffer->device; struct radeon_cmdbuf *cs = cmd_buffer->cs; @@ -446,7 +451,7 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da /* Without the perfctr bit the CP might not always pass the * write on correctly. */ - if (device->physical_device->info.gfx_level >= GFX10) + if (pdev->info.gfx_level >= GFX10) radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count); else radeon_set_uconfig_reg_seq(cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count); @@ -460,11 +465,13 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da void radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf *cs, bool enable) { - if (device->physical_device->info.gfx_level >= GFX9) { + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (pdev->info.gfx_level >= GFX9) { uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) | S_031100_EXP_PRIORITY_ORDER(3) | S_031100_ENABLE_SQG_TOP_EVENTS(enable) | S_031100_ENABLE_SQG_BOP_EVENTS(enable); - if (device->physical_device->info.gfx_level >= GFX10) + if (pdev->info.gfx_level >= GFX10) spi_config_cntl |= S_031100_PS_PKR_PRIORITY_CNTL(3); radeon_set_uconfig_reg(cs, R_031100_SPI_CONFIG_CNTL, spi_config_cntl); @@ -478,12 +485,14 @@ radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf void radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit) { - if (device->physical_device->info.gfx_level >= GFX11) + const struct radv_physical_device *pdev = radv_device_physical(device); + + if (pdev->info.gfx_level >= GFX11) return; /* not needed */ - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL, S_037390_PERFMON_CLOCK_STATE(inhibit)); - } else if (device->physical_device->info.gfx_level >= GFX8) { + } else if (pdev->info.gfx_level >= GFX8) { radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL, S_0372FC_PERFMON_CLOCK_STATE(inhibit)); } } @@ -620,7 +629,8 @@ radv_sqtt_finish_queue_event(struct radv_device *device) static bool radv_sqtt_init_bo(struct radv_device *device) { - unsigned max_se = device->physical_device->info.max_se; + const struct radv_physical_device *pdev = radv_device_physical(device); + unsigned max_se = pdev->info.max_se; struct radeon_winsys *ws = device->ws; VkResult result; uint64_t size; @@ -801,6 +811,7 @@ bool radv_begin_sqtt(struct radv_queue *queue) { struct radv_device *device = queue->device; + const struct radv_physical_device *pdev = radv_device_physical(device); enum radv_queue_family family = queue->state.qf; struct radeon_winsys *ws = device->ws; struct radeon_cmdbuf *cs; @@ -846,7 +857,7 @@ radv_begin_sqtt(struct radv_queue *queue) if (device->spm.bo) { /* Enable all shader stages by default. */ - radv_perfcounter_emit_shaders(device, cs, ac_sqtt_get_shader_mask(&device->physical_device->info)); + radv_perfcounter_emit_shaders(device, cs, ac_sqtt_get_shader_mask(&pdev->info)); radv_emit_spm_setup(device, cs, family); } @@ -936,7 +947,8 @@ bool radv_get_sqtt_trace(struct radv_queue *queue, struct ac_sqtt_trace *sqtt_trace) { struct radv_device *device = queue->device; - const struct radeon_info *gpu_info = &device->physical_device->info; + const struct radv_physical_device *pdev = radv_device_physical(device); + const struct radeon_info *gpu_info = &pdev->info; if (!ac_sqtt_get_trace(&device->sqtt, gpu_info, sqtt_trace)) { if (!radv_sqtt_resize_bo(device)) diff --git a/src/amd/vulkan/radv_video.c b/src/amd/vulkan/radv_video.c index f1914dc033d..66447013592 100644 --- a/src/amd/vulkan/radv_video.c +++ b/src/amd/vulkan/radv_video.c @@ -311,7 +311,7 @@ calc_ctx_size_h265_main10(struct radv_video_session *vid) static unsigned calc_ctx_size_av1(struct radv_device *device, struct radv_video_session *vid) { - struct radv_physical_device *pdev = device->physical_device; + const struct radv_physical_device *pdev = radv_device_physical(device); unsigned frame_ctxt_size = pdev->av1_version == RDECODE_AV1_VER_0 ? align(sizeof(rvcn_av1_frame_context_t), 2048) : align(sizeof(rvcn_av1_vcn4_frame_context_t), 2048); @@ -345,6 +345,7 @@ radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR * const VkAllocationCallbacks *pAllocator, VkVideoSessionKHR *pVideoSession) { RADV_FROM_HANDLE(radv_device, device, _device); + struct radv_physical_device *pdev = radv_device_physical(device); struct radv_video_session *vid = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*vid), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); @@ -365,12 +366,12 @@ radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR * switch (vid->vk.op) { case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: vid->stream_type = RDECODE_CODEC_H264_PERF; - if (radv_enable_tier2(device->physical_device)) + if (radv_enable_tier2(pdev)) vid->dpb_type = DPB_DYNAMIC_TIER_2; break; case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: vid->stream_type = RDECODE_CODEC_H265; - if (radv_enable_tier2(device->physical_device)) + if (radv_enable_tier2(pdev)) vid->dpb_type = DPB_DYNAMIC_TIER_2; break; case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: @@ -381,10 +382,10 @@ radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR * return VK_ERROR_FEATURE_NOT_PRESENT; } - vid->stream_handle = radv_vid_alloc_stream_handle(device->physical_device); + vid->stream_handle = radv_vid_alloc_stream_handle(pdev); vid->dbg_frame_cnt = 0; vid->db_alignment = radv_video_get_db_alignment( - device->physical_device, vid->vk.max_coded.width, + pdev, vid->vk.max_coded.width, (vid->stream_type == RDECODE_CODEC_AV1 || (vid->stream_type == RDECODE_CODEC_H265 && vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10))); @@ -656,11 +657,13 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR vi { RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_video_session, vid, videoSession); - uint32_t memory_type_bits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1; + const struct radv_physical_device *pdev = radv_device_physical(device); + + uint32_t memory_type_bits = (1u << pdev->memory_properties.memoryTypeCount) - 1; VK_OUTARRAY_MAKE_TYPED(VkVideoSessionMemoryRequirementsKHR, out, pMemoryRequirements, pMemoryRequirementsCount); /* 1 buffer for session context */ - if (device->physical_device->info.family >= CHIP_POLARIS10) { + if (pdev->info.family >= CHIP_POLARIS10) { vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) { m->memoryBindIndex = RADV_BIND_SESSION_CTX; @@ -670,7 +673,7 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR vi } } - if (vid->stream_type == RDECODE_CODEC_H264_PERF && device->physical_device->info.family >= CHIP_POLARIS10) { + if (vid->stream_type == RDECODE_CODEC_H264_PERF && pdev->info.family >= CHIP_POLARIS10) { vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) { m->memoryBindIndex = RADV_BIND_DECODER_CTX; @@ -701,9 +704,8 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR vi m->memoryRequirements.size = align(calc_ctx_size_av1(device, vid), 4096); m->memoryRequirements.alignment = 0; m->memoryRequirements.memoryTypeBits = 0; - for (unsigned i = 0; i < device->physical_device->memory_properties.memoryTypeCount; i++) - if (device->physical_device->memory_properties.memoryTypes[i].propertyFlags & - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + for (unsigned i = 0; i < pdev->memory_properties.memoryTypeCount; i++) + if (pdev->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) m->memoryRequirements.memoryTypeBits |= (1 << i); } } @@ -761,14 +763,15 @@ set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val) static void send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, struct radeon_winsys_bo *bo, uint32_t offset) { - struct radv_physical_device *pdev = cmd_buffer->device->physical_device; + struct radv_device *device = cmd_buffer->device; + const struct radv_physical_device *pdev = radv_device_physical(device); uint64_t addr; radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo); addr = radv_buffer_get_va(bo); addr += offset; - if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) { + if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) { radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 6); set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr); set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32); @@ -1037,6 +1040,7 @@ get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct uint32_t *height_in_samples, void *it_ptr) { + const struct radv_physical_device *pdev = radv_device_physical(device); rvcn_dec_message_hevc_t result; int i, j; const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info = @@ -1059,7 +1063,7 @@ get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7; result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8; - if (device->physical_device->info.family == CHIP_CARRIZO) + if (pdev->info.family == CHIP_CARRIZO) result.sps_info_flags |= 1 << 9; if (!h265_pic_info->pStdPictureInfo->flags.short_term_ref_pic_set_sps_flag) { @@ -2097,6 +2101,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se const struct VkVideoDecodeInfoKHR *frame_info) { struct radv_device *device = cmd_buffer->device; + const struct radv_physical_device *pdev = radv_device_physical(device); rvcn_dec_message_header_t *header; rvcn_dec_message_index_t *index_codec; rvcn_dec_message_decode_t *decode; @@ -2182,7 +2187,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se decode->dt_tiling_mode = 0; decode->dt_swizzle_mode = luma->surface.u.gfx9.swizzle_mode; - decode->dt_array_mode = device->physical_device->vid_addr_gfx_mode; + decode->dt_array_mode = pdev->vid_addr_gfx_mode; decode->dt_field_mode = vid->interlaced ? 1 : 0; decode->dt_surf_tile_config = 0; decode->dt_uv_surf_tile_config = 0; @@ -2254,7 +2259,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se decode->db_pitch = dpb->planes[0].surface.u.gfx9.surf_pitch; decode->db_aligned_height = dpb->planes[0].surface.u.gfx9.surf_height; decode->db_swizzle_mode = dpb->planes[0].surface.u.gfx9.swizzle_mode; - decode->db_array_mode = device->physical_device->vid_addr_gfx_mode; + decode->db_array_mode = pdev->vid_addr_gfx_mode; decode->hw_ctxt_size = vid->ctx.size; @@ -2427,6 +2432,7 @@ get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, str const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *width_in_samples, uint32_t *height_in_samples, void *it_ptr) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct ruvd_h265 result; int i, j; const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info = @@ -2450,7 +2456,7 @@ get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, str result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7; result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8; - if (device->physical_device->info.family == CHIP_CARRIZO) + if (pdev->info.family == CHIP_CARRIZO) result.sps_info_flags |= 1 << 9; *width_in_samples = sps->pic_width_in_luma_samples; @@ -2592,6 +2598,7 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v struct radv_video_session_params *params, void *ptr, void *it_ptr, uint32_t *slice_offset, const struct VkVideoDecodeInfoKHR *frame_info) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct ruvd_msg *msg = ptr; struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding); struct radv_image *img = dst_iv->image; @@ -2616,7 +2623,7 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v msg->body.decode.bsd_size = frame_info->srcBufferRange; msg->body.decode.db_pitch = align(frame_info->dstPictureResource.codedExtent.width, vid->db_alignment); - if (vid->stream_type == RUVD_CODEC_H264_PERF && device->physical_device->info.family >= CHIP_POLARIS10) + if (vid->stream_type == RUVD_CODEC_H264_PERF && pdev->info.family >= CHIP_POLARIS10) msg->body.decode.dpb_reserved = vid->ctx.size; *slice_offset = 0; @@ -2643,7 +2650,7 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v msg->body.decode.dt_field_mode = false; - if (device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { msg->body.decode.dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w; msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; @@ -2703,7 +2710,7 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v RUVD_MACRO_TILE_ASPECT_RATIO(util_logbase2(luma->surface.u.legacy.mtilea)); } - if (device->physical_device->info.family >= CHIP_STONEY) + if (pdev->info.family >= CHIP_STONEY) msg->body.decode.dt_wa_chroma_top_offset = msg->body.decode.dt_pitch / 2; msg->body.decode.db_surf_tile_config = msg->body.decode.dt_surf_tile_config; @@ -2740,8 +2747,8 @@ radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoBeginCod static void radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_video_session *vid = cmd_buffer->video.vid; - struct radv_physical_device *pdev = cmd_buffer->device->physical_device; uint32_t size = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t); void *ptr; @@ -2771,7 +2778,7 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer) } radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr); - if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED) + if (pdev->vid_decode_ip == AMD_IP_VCN_UNIFIED) radv_vcn_sq_start(cmd_buffer); rvcn_dec_message_create(vid, ptr, size); @@ -2779,7 +2786,7 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer) send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset); /* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */ - if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) { + if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) { radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 8); for (unsigned i = 0; i < 8; i++) radeon_emit(cmd_buffer->cs, 0x81ff); @@ -2812,8 +2819,10 @@ VKAPI_ATTR void VKAPI_CALL radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoCodingControlInfoKHR *pCodingControlInfo) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR) { - if (radv_has_uvd(cmd_buffer->device->physical_device)) + if (radv_has_uvd(pdev)) radv_uvd_cmd_reset(cmd_buffer); else radv_vcn_cmd_reset(cmd_buffer); @@ -2829,14 +2838,14 @@ static void radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info) { RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_video_session *vid = cmd_buffer->video.vid; struct radv_video_session_params *params = cmd_buffer->video.params; unsigned size = sizeof(struct ruvd_msg); void *ptr, *fb_ptr, *it_probs_ptr = NULL; uint32_t out_offset, fb_offset, it_probs_offset = 0; struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL; - unsigned fb_size = - (cmd_buffer->device->physical_device->info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE; + unsigned fb_size = (pdev->info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE; radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset, &fb_ptr); fb_bo = cmd_buffer->upload.upload_bo; @@ -2876,13 +2885,14 @@ radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo, it_probs_offset); radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2); - set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1); + set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1); } static void radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info) { RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer); + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); struct radv_video_session *vid = cmd_buffer->video.vid; struct radv_video_session_params *params = cmd_buffer->video.params; unsigned size = 0; @@ -2924,7 +2934,7 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr); msg_bo = cmd_buffer->upload.upload_bo; - if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED) + if (pdev->vid_decode_ip == AMD_IP_VCN_UNIFIED) radv_vcn_sq_start(cmd_buffer); uint32_t slice_offset; @@ -2955,9 +2965,9 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf else if (have_probs(vid)) send_cmd(cmd_buffer, RDECODE_CMD_PROB_TBL_BUFFER, it_probs_bo, it_probs_offset); - if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) { + if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) { radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2); - set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1); + set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1); } else radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq); } @@ -2966,8 +2976,9 @@ VKAPI_ATTR void VKAPI_CALL radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer, const VkVideoDecodeInfoKHR *frame_info) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); + struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); - if (radv_has_uvd(cmd_buffer->device->physical_device)) + if (radv_has_uvd(pdev)) radv_uvd_decode_video(cmd_buffer, frame_info); else radv_vcn_decode_video(cmd_buffer, frame_info); diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c index 21d289bcc36..b0433fea036 100644 --- a/src/amd/vulkan/radv_wsi.c +++ b/src/amd/vulkan/radv_wsi.c @@ -54,17 +54,17 @@ static VkQueue radv_wsi_get_prime_blit_queue(VkDevice _device) { RADV_FROM_HANDLE(radv_device, device, _device); + struct radv_physical_device *pdev = radv_device_physical(device); if (device->private_sdma_queue != VK_NULL_HANDLE) return vk_queue_to_handle(&device->private_sdma_queue->vk); - if (device->physical_device->info.gfx_level >= GFX9 && - !(device->physical_device->instance->debug_flags & RADV_DEBUG_NO_DMA_BLIT)) { + if (pdev->info.gfx_level >= GFX9 && !(pdev->instance->debug_flags & RADV_DEBUG_NO_DMA_BLIT)) { - device->physical_device->vk_queue_to_radv[device->physical_device->num_queues++] = RADV_QUEUE_TRANSFER; + pdev->vk_queue_to_radv[pdev->num_queues++] = RADV_QUEUE_TRANSFER; const VkDeviceQueueCreateInfo queue_create = { .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, - .queueFamilyIndex = device->physical_device->num_queues - 1, + .queueFamilyIndex = pdev->num_queues - 1, .queueCount = 1, }; diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index eb3f89b8012..84f083ef4fa 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -73,14 +73,15 @@ radv_write_harvested_raster_configs(struct radv_physical_device *pdev, struct ra void radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs) { - const struct radeon_info *gpu_info = &device->physical_device->info; + const struct radv_physical_device *pdev = radv_device_physical(device); + const struct radeon_info *gpu_info = &pdev->info; radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3); radeon_emit(cs, 0); radeon_emit(cs, 0); radeon_emit(cs, 0); - radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(device->physical_device->info.address32_hi >> 8)); + radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(pdev->info.address32_hi >> 8)); radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2); /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1, @@ -90,7 +91,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs) radeon_emit(cs, S_00B8AC_SA0_CU_EN(cu_mask) | S_00B8AC_SA1_CU_EN(cu_mask)); } - if (device->physical_device->info.gfx_level >= GFX7) { + if (pdev->info.gfx_level >= GFX7) { /* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */ radeon_set_sh_reg_seq(cs, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2); for (unsigned i = 2; i < 4; ++i) { @@ -107,12 +108,11 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs) } } - if (device->physical_device->info.gfx_level >= GFX9 && device->physical_device->info.gfx_level < GFX11) { - radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY, - device->physical_device->info.gfx_level >= GFX10 ? 0x20 : 0); + if (pdev->info.gfx_level >= GFX9 && pdev->info.gfx_level < GFX11) { + radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY, pdev->info.gfx_level >= GFX10 ? 0x20 : 0); } - if (device->physical_device->info.gfx_level >= GFX10) { + if (pdev->info.gfx_level >= GFX10) { radeon_set_sh_reg_seq(cs, R_00B890_COMPUTE_USER_ACCUM_0, 4); radeon_emit(cs, 0); /* R_00B890_COMPUTE_USER_ACCUM_0 */ radeon_emit(cs, 0); /* R_00B894_COMPUTE_USER_ACCUM_1 */ @@ -122,7 +122,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs) radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0); } - if (device->physical_device->info.gfx_level == GFX6) { + if (pdev->info.gfx_level == GFX6) { if (device->border_color_data.bo) { uint64_t bc_va = radv_buffer_get_va(device->border_color_data.bo); radeon_set_config_reg(cs, R_00950C_TA_CS_BC_BASE_ADDR, bc_va >> 8); @@ -132,7 +132,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs) if (device->tma_bo) { uint64_t tba_va, tma_va; - assert(device->physical_device->info.gfx_level == GFX8); + assert(pdev->info.gfx_level == GFX8); tba_va = radv_shader_get_va(device->trap_handler_shader); tma_va = radv_buffer_get_va(device->tma_bo); @@ -144,7 +144,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs) radeon_emit(cs, tma_va >> 40); } - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { radeon_set_sh_reg_seq(cs, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4, 4); /* SE4-SE7 */ for (unsigned i = 4; i < 8; ++i) { @@ -187,7 +187,7 @@ radv_set_raster_config(struct radv_physical_device *pdev, struct radeon_cmdbuf * void radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) { - struct radv_physical_device *pdev = device->physical_device; + struct radv_physical_device *pdev = radv_device_physical(device); bool has_clear_state = pdev->info.has_clear_state; int i; @@ -300,26 +300,19 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0); } - if (device->physical_device->info.gfx_level >= GFX10) { - radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS, - S_00B524_MEM_BASE(device->physical_device->info.address32_hi >> 8)); - radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES, - S_00B324_MEM_BASE(device->physical_device->info.address32_hi >> 8)); - } else if (device->physical_device->info.gfx_level == GFX9) { - radeon_set_sh_reg(cs, R_00B414_SPI_SHADER_PGM_HI_LS, - S_00B414_MEM_BASE(device->physical_device->info.address32_hi >> 8)); - radeon_set_sh_reg(cs, R_00B214_SPI_SHADER_PGM_HI_ES, - S_00B214_MEM_BASE(device->physical_device->info.address32_hi >> 8)); + if (pdev->info.gfx_level >= GFX10) { + radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS, S_00B524_MEM_BASE(pdev->info.address32_hi >> 8)); + radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(pdev->info.address32_hi >> 8)); + } else if (pdev->info.gfx_level == GFX9) { + radeon_set_sh_reg(cs, R_00B414_SPI_SHADER_PGM_HI_LS, S_00B414_MEM_BASE(pdev->info.address32_hi >> 8)); + radeon_set_sh_reg(cs, R_00B214_SPI_SHADER_PGM_HI_ES, S_00B214_MEM_BASE(pdev->info.address32_hi >> 8)); } else { - radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS, - S_00B524_MEM_BASE(device->physical_device->info.address32_hi >> 8)); - radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES, - S_00B324_MEM_BASE(device->physical_device->info.address32_hi >> 8)); + radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS, S_00B524_MEM_BASE(pdev->info.address32_hi >> 8)); + radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(pdev->info.address32_hi >> 8)); } - if (device->physical_device->info.gfx_level < GFX11) - radeon_set_sh_reg(cs, R_00B124_SPI_SHADER_PGM_HI_VS, - S_00B124_MEM_BASE(device->physical_device->info.address32_hi >> 8)); + if (pdev->info.gfx_level < GFX11) + radeon_set_sh_reg(cs, R_00B124_SPI_SHADER_PGM_HI_VS, S_00B124_MEM_BASE(pdev->info.address32_hi >> 8)); unsigned cu_mask_ps = 0xffffffff; @@ -400,8 +393,7 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) /* Enable CMASK/FMASK/HTILE/DCC caching in L2 for small chips. */ unsigned meta_write_policy, meta_read_policy; - unsigned no_alloc = - device->physical_device->info.gfx_level >= GFX11 ? V_02807C_CACHE_NOA_GFX11 : V_02807C_CACHE_NOA_GFX10; + unsigned no_alloc = pdev->info.gfx_level >= GFX11 ? V_02807C_CACHE_NOA_GFX11 : V_02807C_CACHE_NOA_GFX10; /* TODO: investigate whether LRU improves performance on other chips too */ if (pdev->info.max_render_backends <= 4) { @@ -419,7 +411,7 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) S_02807C_S_RD_POLICY(no_alloc) | S_02807C_HTILE_RD_POLICY(meta_read_policy)); uint32_t gl2_cc; - if (device->physical_device->info.gfx_level >= GFX11) { + if (pdev->info.gfx_level >= GFX11) { gl2_cc = S_028410_DCC_WR_POLICY_GFX11(meta_write_policy) | S_028410_COLOR_WR_POLICY_GFX11(V_028410_CACHE_STREAM) | S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_GFX11); @@ -569,7 +561,7 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) if (device->tma_bo) { uint64_t tba_va, tma_va; - assert(device->physical_device->info.gfx_level == GFX8); + assert(pdev->info.gfx_level == GFX8); tba_va = radv_shader_get_va(device->trap_handler_shader); tma_va = radv_buffer_get_va(device->tma_bo); @@ -630,6 +622,7 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) void radv_create_gfx_config(struct radv_device *device) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radeon_cmdbuf *cs = device->ws->cs_create(device->ws, AMD_IP_GFX, false); if (!cs) return; @@ -639,7 +632,7 @@ radv_create_gfx_config(struct radv_device *device) radv_emit_graphics(device, cs); while (cs->cdw & 7) { - if (device->physical_device->info.gfx_ib_pad_with_type2) + if (pdev->info.gfx_ib_pad_with_type2) radeon_emit(cs, PKT2_NOP_PAD); else radeon_emit(cs, PKT3_NOP_PAD); @@ -817,7 +810,8 @@ radv_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_d bool count_from_stream_output, uint32_t draw_vertex_count, unsigned topology, bool prim_restart_enable, unsigned patch_control_points, unsigned num_tess_patches) { - const struct radeon_info *gpu_info = &cmd_buffer->device->physical_device->info; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + const struct radeon_info *gpu_info = &pdev->info; const unsigned max_primgroup_in_wave = 2; /* SWITCH_ON_EOP(0) is always preferable. */ bool wd_switch_on_eop = false; @@ -839,7 +833,7 @@ radv_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_d /* GS requirement. */ if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY) && gpu_info->gfx_level <= GFX8) { - unsigned gs_table_depth = cmd_buffer->device->physical_device->gs_table_depth; + unsigned gs_table_depth = pdev->gs_table_depth; if (SI_GS_PER_ES / primgroup_size >= gs_table_depth - 3) partial_es_wave = true; } @@ -1495,6 +1489,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enu void radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); bool is_compute = cmd_buffer->qf == RADV_QUEUE_COMPUTE; if (is_compute) @@ -1509,10 +1504,10 @@ radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) return; } - radv_cs_emit_cache_flush(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->physical_device->info.gfx_level, - &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va, - radv_cmd_buffer_uses_mec(cmd_buffer), cmd_buffer->state.flush_bits, - &cmd_buffer->state.sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va); + radv_cs_emit_cache_flush(cmd_buffer->device->ws, cmd_buffer->cs, pdev->info.gfx_level, &cmd_buffer->gfx9_fence_idx, + cmd_buffer->gfx9_fence_va, radv_cmd_buffer_uses_mec(cmd_buffer), + cmd_buffer->state.flush_bits, &cmd_buffer->state.sqtt_flush_bits, + cmd_buffer->gfx9_eop_bug_va); if (radv_device_fault_detection_enabled(cmd_buffer->device)) radv_cmd_buffer_trace_emit(cmd_buffer); @@ -1539,6 +1534,7 @@ radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) void radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, unsigned pred_op, uint64_t va) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); uint32_t op = 0; radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4); @@ -1554,7 +1550,7 @@ radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_vi */ op |= draw_visible ? PREDICATION_DRAW_VISIBLE : PREDICATION_DRAW_NOT_VISIBLE; } - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 2, 0)); radeon_emit(cmd_buffer->cs, op); radeon_emit(cmd_buffer->cs, va); @@ -1569,7 +1565,8 @@ radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_vi void radv_emit_cond_exec(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, uint32_t count) { - const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(device); + const enum amd_gfx_level gfx_level = pdev->info.gfx_level; if (gfx_level >= GFX7) { radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0)); @@ -1619,12 +1616,13 @@ static void radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool predicating, uint64_t dst_va, uint64_t src_va, unsigned size, unsigned flags) { + const struct radv_physical_device *pdev = radv_device_physical(device); uint32_t header = 0, command = 0; - assert(size <= cp_dma_max_byte_count(device->physical_device->info.gfx_level)); + assert(size <= cp_dma_max_byte_count(pdev->info.gfx_level)); radeon_check_space(device->ws, cs, 9); - if (device->physical_device->info.gfx_level >= GFX9) + if (pdev->info.gfx_level >= GFX9) command |= S_415_BYTE_COUNT_GFX9(size); else command |= S_415_BYTE_COUNT_GFX6(size); @@ -1637,7 +1635,7 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool p command |= S_415_RAW_WAIT(1); /* Src and dst flags. */ - if (device->physical_device->info.gfx_level >= GFX9 && !(flags & CP_DMA_CLEAR) && src_va == dst_va) + if (pdev->info.gfx_level >= GFX9 && !(flags & CP_DMA_CLEAR) && src_va == dst_va) header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */ else if (flags & CP_DMA_USE_L2) header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2); @@ -1647,7 +1645,7 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool p else if (flags & CP_DMA_USE_L2) header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2); - if (device->physical_device->info.gfx_level >= GFX7) { + if (pdev->info.gfx_level >= GFX7) { radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, predicating)); radeon_emit(cs, header); radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */ @@ -1699,8 +1697,9 @@ void radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, unsigned size, bool predicating) { + const struct radv_physical_device *pdev = radv_device_physical(device); struct radeon_winsys *ws = device->ws; - enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; + enum amd_gfx_level gfx_level = pdev->info.gfx_level; uint32_t header = 0, command = 0; if (gfx_level >= GFX11) @@ -1784,15 +1783,15 @@ radv_cp_dma_realign_engine(struct radv_cmd_buffer *cmd_buffer, unsigned size) void radv_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, uint64_t size) { - enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + enum amd_gfx_level gfx_level = pdev->info.gfx_level; uint64_t main_src_va, main_dest_va; uint64_t skipped_size = 0, realign_size = 0; /* Assume that we are not going to sync after the last DMA operation. */ cmd_buffer->state.dma_is_busy = true; - if (cmd_buffer->device->physical_device->info.family <= CHIP_CARRIZO || - cmd_buffer->device->physical_device->info.family == CHIP_STONEY) { + if (pdev->info.family <= CHIP_CARRIZO || pdev->info.family == CHIP_STONEY) { /* If the size is not aligned, we must add a dummy copy at the end * just to align the internal counter. Otherwise, the DMA engine * would slow down by an order of magnitude for following copies. @@ -1818,7 +1817,7 @@ radv_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uin unsigned dma_flags = 0; unsigned byte_count = MIN2(size, cp_dma_max_byte_count(gfx_level)); - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { /* DMA operations via L2 are coherent and faster. * TODO: GFX7-GFX8 should also support this but it * requires tests/benchmarks. @@ -1858,12 +1857,14 @@ radv_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uin void radv_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, unsigned value) { + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + if (!size) return; assert(va % 4 == 0 && size % 4 == 0); - enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; + enum amd_gfx_level gfx_level = pdev->info.gfx_level; /* Assume that we are not going to sync after the last DMA operation. */ cmd_buffer->state.dma_is_busy = true; @@ -1872,7 +1873,7 @@ radv_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64 unsigned byte_count = MIN2(size, cp_dma_max_byte_count(gfx_level)); unsigned dma_flags = CP_DMA_CLEAR; - if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) { + if (pdev->info.gfx_level >= GFX9) { /* DMA operations via L2 are coherent and faster. * TODO: GFX7-GFX8 should also support this but it * requires tests/benchmarks. @@ -1895,7 +1896,9 @@ radv_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64 void radv_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer) { - if (cmd_buffer->device->physical_device->info.gfx_level < GFX7) + const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device); + + if (pdev->info.gfx_level < GFX7) return; if (!cmd_buffer->state.dma_is_busy)