radv: remove radv_device::physical_device

Get the logical device object using the base object.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28500>
This commit is contained in:
Samuel Pitoiset 2024-03-28 14:42:10 +01:00 committed by Marge Bot
parent 310597cab6
commit 896c9cf486
55 changed files with 1390 additions and 1035 deletions

View file

@ -35,7 +35,8 @@
void void
radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline) radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline)
{ {
const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc; struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc;
struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radeon_cmdbuf *cs = cmd_buffer->cs;
uint64_t va; uint64_t va;
@ -340,7 +341,8 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
return; return;
/* Reserve a command buffer ID for SQTT. */ /* Reserve a command buffer ID for SQTT. */
enum amd_ip_type ip_type = radv_queue_family_to_ring(cmd_buffer->device->physical_device, cmd_buffer->qf); const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
enum amd_ip_type ip_type = radv_queue_family_to_ring(pdev, cmd_buffer->qf);
union rgp_sqtt_marker_cb_id cb_id = ac_sqtt_get_next_cmdbuf_id(&cmd_buffer->device->sqtt, ip_type); union rgp_sqtt_marker_cb_id cb_id = ac_sqtt_get_next_cmdbuf_id(&cmd_buffer->device->sqtt, ip_type);
cmd_buffer->sqtt_cb_id = cb_id.all; cmd_buffer->sqtt_cb_id = cb_id.all;
@ -354,7 +356,7 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
if (cmd_buffer->qf == RADV_QUEUE_GENERAL) if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT; marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;
if (!radv_sparse_queue_enabled(cmd_buffer->device->physical_device)) if (!radv_sparse_queue_enabled(pdev))
marker.queue_flags |= VK_QUEUE_SPARSE_BINDING_BIT; marker.queue_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4); radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
@ -655,6 +657,7 @@ radv_handle_sqtt(VkQueue _queue)
{ {
RADV_FROM_HANDLE(radv_queue, queue, _queue); RADV_FROM_HANDLE(radv_queue, queue, _queue);
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
bool trigger = queue->device->sqtt_triggered; bool trigger = queue->device->sqtt_triggered;
queue->device->sqtt_triggered = false; queue->device->sqtt_triggered = false;
@ -673,8 +676,7 @@ radv_handle_sqtt(VkQueue _queue)
if (queue->device->spm.bo) if (queue->device->spm.bo)
ac_spm_get_trace(&queue->device->spm, &spm_trace); ac_spm_get_trace(&queue->device->spm, &spm_trace);
ac_dump_rgp_capture(&queue->device->physical_device->info, &sqtt_trace, ac_dump_rgp_capture(&pdev->info, &sqtt_trace, queue->device->spm.bo ? &spm_trace : NULL);
queue->device->spm.bo ? &spm_trace : NULL);
} else { } else {
/* Trigger a new capture if the driver failed to get /* Trigger a new capture if the driver failed to get
* the trace because the buffer was too small. * the trace because the buffer was too small.
@ -687,7 +689,7 @@ radv_handle_sqtt(VkQueue _queue)
} }
if (trigger) { if (trigger) {
if (ac_check_profile_state(&queue->device->physical_device->info)) { if (ac_check_profile_state(&pdev->info)) {
fprintf(stderr, "radv: Canceling RGP trace request as a hang condition has been " fprintf(stderr, "radv: Canceling RGP trace request as a hang condition has been "
"detected. Force the GPU into a profiling mode with e.g. " "detected. Force the GPU into a profiling mode with e.g. "
"\"echo profile_peak > " "\"echo profile_peak > "
@ -1415,7 +1417,7 @@ static void
radv_fill_code_object_record(struct radv_device *device, struct rgp_shader_data *shader_data, radv_fill_code_object_record(struct radv_device *device, struct rgp_shader_data *shader_data,
struct radv_shader *shader, uint64_t va) struct radv_shader *shader, uint64_t va)
{ {
struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned lds_increment = pdev->info.gfx_level >= GFX11 && shader->info.stage == MESA_SHADER_FRAGMENT unsigned lds_increment = pdev->info.gfx_level >= GFX11 && shader->info.stage == MESA_SHADER_FRAGMENT
? 1024 ? 1024
: pdev->info.lds_encode_granularity; : pdev->info.lds_encode_granularity;

View file

@ -461,6 +461,7 @@ fail:
VkResult VkResult
radv_device_init_meta(struct radv_device *device) radv_device_init_meta(struct radv_device *device)
{ {
struct radv_physical_device *pdev = radv_device_physical(device);
VkResult result; VkResult result;
memset(&device->meta_state, 0, sizeof(device->meta_state)); memset(&device->meta_state, 0, sizeof(device->meta_state));
@ -521,7 +522,7 @@ radv_device_init_meta(struct radv_device *device)
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail_resolve_fragment; goto fail_resolve_fragment;
if (device->physical_device->use_fmask) { if (pdev->use_fmask) {
result = radv_device_init_meta_fmask_expand_state(device, on_demand); result = radv_device_init_meta_fmask_expand_state(device, on_demand);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail_fmask_expand; goto fail_fmask_expand;
@ -555,11 +556,11 @@ radv_device_init_meta(struct radv_device *device)
/* FIXME: Acceleration structure builds hang when the build shaders are compiled with LLVM. /* FIXME: Acceleration structure builds hang when the build shaders are compiled with LLVM.
* Work around it by forcing ACO for now. * Work around it by forcing ACO for now.
*/ */
bool use_llvm = device->physical_device->use_llvm; bool use_llvm = pdev->use_llvm;
if (loaded_cache || use_llvm) { if (loaded_cache || use_llvm) {
device->physical_device->use_llvm = false; pdev->use_llvm = false;
result = radv_device_init_accel_struct_build_state(device); result = radv_device_init_accel_struct_build_state(device);
device->physical_device->use_llvm = use_llvm; pdev->use_llvm = use_llvm;
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail_accel_struct; goto fail_accel_struct;
@ -639,6 +640,7 @@ radv_device_finish_meta(struct radv_device *device)
nir_builder PRINTFLIKE(3, 4) nir_builder PRINTFLIKE(3, 4)
radv_meta_init_shader(struct radv_device *dev, gl_shader_stage stage, const char *name, ...) radv_meta_init_shader(struct radv_device *dev, gl_shader_stage stage, const char *name, ...)
{ {
const struct radv_physical_device *pdev = radv_device_physical(dev);
nir_builder b = nir_builder_init_simple_shader(stage, NULL, NULL); nir_builder b = nir_builder_init_simple_shader(stage, NULL, NULL);
if (name) { if (name) {
va_list args; va_list args;
@ -647,7 +649,7 @@ nir_builder PRINTFLIKE(3, 4)
va_end(args); va_end(args);
} }
b.shader->options = &dev->physical_device->nir_options[stage]; b.shader->options = &pdev->nir_options[stage];
radv_device_associate_nir(dev, b.shader); radv_device_associate_nir(dev, b.shader);
@ -684,6 +686,7 @@ void
radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, int samples, radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, int samples,
nir_variable *input_img, nir_variable *color, nir_def *img_coord) nir_variable *input_img, nir_variable *color, nir_def *img_coord)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_deref_instr *input_img_deref = nir_build_deref_var(b, input_img); nir_deref_instr *input_img_deref = nir_build_deref_var(b, input_img);
nir_def *sample0 = nir_txf_ms_deref(b, input_img_deref, img_coord, nir_imm_int(b, 0)); nir_def *sample0 = nir_txf_ms_deref(b, input_img_deref, img_coord, nir_imm_int(b, 0));
@ -692,7 +695,7 @@ radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b,
return; return;
} }
if (device->physical_device->use_fmask) { if (pdev->use_fmask) {
nir_def *all_same = nir_samples_identical_deref(b, input_img_deref, img_coord); nir_def *all_same = nir_samples_identical_deref(b, input_img_deref, img_coord);
nir_push_if(b, nir_inot(b, all_same)); nir_push_if(b, nir_inot(b, all_same));
} }
@ -706,7 +709,7 @@ radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b,
accum = nir_fdiv_imm(b, accum, samples); accum = nir_fdiv_imm(b, accum, samples);
nir_store_var(b, color, accum, 0xf); nir_store_var(b, color, accum, 0xf);
if (device->physical_device->use_fmask) { if (pdev->use_fmask) {
nir_push_else(b, NULL); nir_push_else(b, NULL);
nir_store_var(b, color, sample0, 0xf); nir_store_var(b, color, sample0, 0xf);
nir_pop_if(b, NULL); nir_pop_if(b, NULL);

View file

@ -32,9 +32,10 @@
VkResult VkResult
radv_device_init_meta_astc_decode_state(struct radv_device *device, bool on_demand) radv_device_init_meta_astc_decode_state(struct radv_device *device, bool on_demand)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_meta_state *state = &device->meta_state; struct radv_meta_state *state = &device->meta_state;
if (!device->physical_device->emulate_astc) if (!pdev->emulate_astc)
return VK_SUCCESS; return VK_SUCCESS;
return vk_texcompress_astc_init(&device->vk, &state->alloc, state->cache, &state->astc_decode); return vk_texcompress_astc_init(&device->vk, &state->alloc, state->cache, &state->astc_decode);
@ -43,10 +44,11 @@ radv_device_init_meta_astc_decode_state(struct radv_device *device, bool on_dema
void void
radv_device_finish_meta_astc_decode_state(struct radv_device *device) radv_device_finish_meta_astc_decode_state(struct radv_device *device)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_meta_state *state = &device->meta_state; struct radv_meta_state *state = &device->meta_state;
struct vk_texcompress_astc_state *astc = state->astc_decode; struct vk_texcompress_astc_state *astc = state->astc_decode;
if (!device->physical_device->emulate_astc) if (!pdev->emulate_astc)
return; return;
vk_texcompress_astc_finish(&device->vk, &state->alloc, astc); vk_texcompress_astc_finish(&device->vk, &state->alloc, astc);

View file

@ -213,9 +213,10 @@ static bool
radv_prefer_compute_dma(const struct radv_device *device, uint64_t size, struct radeon_winsys_bo *src_bo, radv_prefer_compute_dma(const struct radv_device *device, uint64_t size, struct radeon_winsys_bo *src_bo,
struct radeon_winsys_bo *dst_bo) struct radeon_winsys_bo *dst_bo)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
bool use_compute = size >= RADV_BUFFER_OPS_CS_THRESHOLD; bool use_compute = size >= RADV_BUFFER_OPS_CS_THRESHOLD;
if (device->physical_device->info.gfx_level >= GFX10 && device->physical_device->info.has_dedicated_vram) { if (pdev->info.gfx_level >= GFX10 && pdev->info.has_dedicated_vram) {
if ((src_bo && !(src_bo->initial_domain & RADEON_DOMAIN_VRAM)) || if ((src_bo && !(src_bo->initial_domain & RADEON_DOMAIN_VRAM)) ||
(dst_bo && !(dst_bo->initial_domain & RADEON_DOMAIN_VRAM))) { (dst_bo && !(dst_bo->initial_domain & RADEON_DOMAIN_VRAM))) {
/* Prefer CP DMA for GTT on dGPUS due to slow PCIe. */ /* Prefer CP DMA for GTT on dGPUS due to slow PCIe. */

View file

@ -1174,11 +1174,12 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_bl
const struct radv_meta_blit2d_surf *img_bsurf, const struct radv_meta_blit2d_rect *rect, const struct radv_meta_blit2d_surf *img_bsurf, const struct radv_meta_blit2d_rect *rect,
bool to_image) bool to_image)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
const unsigned mip_level = img_bsurf->level; const unsigned mip_level = img_bsurf->level;
const struct radv_image *image = img_bsurf->image; const struct radv_image *image = img_bsurf->image;
const struct radeon_surf *surf = &image->planes[0].surface; const struct radeon_surf *surf = &image->planes[0].surface;
struct radv_device *device = cmd_buffer->device; struct radv_device *device = cmd_buffer->device;
const struct radeon_info *gpu_info = &device->physical_device->info; const struct radeon_info *gpu_info = &pdev->info;
struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws); struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws);
struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image); struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image);
@ -1243,9 +1244,10 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_bl
static unsigned static unsigned
get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf) get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
unsigned stride; unsigned stride;
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
stride = surf->image->planes[0].surface.u.gfx9.surf_pitch; stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
} else { } else {
stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3; stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;

View file

@ -1139,10 +1139,11 @@ uint32_t
radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range, radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
uint32_t value) uint32_t value)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
uint64_t offset = image->bindings[0].offset + image->planes[0].surface.cmask_offset; uint64_t offset = image->bindings[0].offset + image->planes[0].surface.cmask_offset;
uint64_t size; uint64_t size;
if (cmd_buffer->device->physical_device->info.gfx_level == GFX9) { if (pdev->info.gfx_level == GFX9) {
/* TODO: clear layers. */ /* TODO: clear layers. */
size = image->planes[0].surface.cmask_size; size = image->planes[0].surface.cmask_size;
} else { } else {
@ -1178,6 +1179,7 @@ uint32_t
radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range, radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
uint32_t value) uint32_t value)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
uint32_t level_count = vk_image_subresource_level_count(&image->vk, range); uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
uint32_t layer_count = vk_image_subresource_layer_count(&image->vk, range); uint32_t layer_count = vk_image_subresource_layer_count(&image->vk, range);
uint32_t flush_bits = 0; uint32_t flush_bits = 0;
@ -1190,12 +1192,12 @@ radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, con
uint32_t level = range->baseMipLevel + l; uint32_t level = range->baseMipLevel + l;
uint64_t size; uint64_t size;
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
/* DCC for mipmaps+layers is currently disabled. */ /* DCC for mipmaps+layers is currently disabled. */
offset += image->planes[0].surface.meta_slice_size * range->baseArrayLayer + offset += image->planes[0].surface.meta_slice_size * range->baseArrayLayer +
image->planes[0].surface.u.gfx9.meta_levels[level].offset; image->planes[0].surface.u.gfx9.meta_levels[level].offset;
size = image->planes[0].surface.u.gfx9.meta_levels[level].size * layer_count; size = image->planes[0].surface.u.gfx9.meta_levels[level].size * layer_count;
} else if (cmd_buffer->device->physical_device->info.gfx_level == GFX9) { } else if (pdev->info.gfx_level == GFX9) {
/* Mipmap levels and layers aren't implemented. */ /* Mipmap levels and layers aren't implemented. */
assert(level == 0); assert(level == 0);
size = image->planes[0].surface.meta_size; size = image->planes[0].surface.meta_size;
@ -1331,6 +1333,7 @@ uint32_t
radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
const VkImageSubresourceRange *range, uint32_t value) const VkImageSubresourceRange *range, uint32_t value)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
uint32_t level_count = vk_image_subresource_level_count(&image->vk, range); uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
uint32_t flush_bits = 0; uint32_t flush_bits = 0;
uint32_t htile_mask; uint32_t htile_mask;
@ -1338,7 +1341,7 @@ radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im
htile_mask = radv_get_htile_mask(cmd_buffer->device, image, range->aspectMask); htile_mask = radv_get_htile_mask(cmd_buffer->device, image, range->aspectMask);
if (level_count != image->vk.mip_levels) { if (level_count != image->vk.mip_levels) {
assert(cmd_buffer->device->physical_device->info.gfx_level >= GFX10); assert(pdev->info.gfx_level >= GFX10);
/* Clear individuals levels separately. */ /* Clear individuals levels separately. */
for (uint32_t l = 0; l < level_count; l++) { for (uint32_t l = 0; l < level_count; l++) {
@ -1398,7 +1401,8 @@ enum {
static uint32_t static uint32_t
radv_dcc_single_clear_value(const struct radv_device *device) radv_dcc_single_clear_value(const struct radv_device *device)
{ {
return device->physical_device->info.gfx_level >= GFX11 ? RADV_DCC_GFX11_CLEAR_SINGLE : RADV_DCC_GFX9_CLEAR_SINGLE; const struct radv_physical_device *pdev = radv_device_physical(device);
return pdev->info.gfx_level >= GFX11 ? RADV_DCC_GFX11_CLEAR_SINGLE : RADV_DCC_GFX9_CLEAR_SINGLE;
} }
static void static void
@ -1605,6 +1609,7 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_
VkImageLayout image_layout, const VkClearRect *clear_rect, VkClearColorValue clear_value, VkImageLayout image_layout, const VkClearRect *clear_rect, VkClearColorValue clear_value,
uint32_t view_mask) uint32_t view_mask)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
uint32_t clear_color[2]; uint32_t clear_color[2];
if (!iview || !iview->support_fast_clear) if (!iview || !iview->support_fast_clear)
@ -1641,7 +1646,7 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_
bool can_avoid_fast_clear_elim; bool can_avoid_fast_clear_elim;
uint32_t reset_value; uint32_t reset_value;
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
if (!gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value)) if (!gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value))
return false; return false;
} else { } else {
@ -1650,7 +1655,7 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_
} }
if (iview->image->vk.mip_levels > 1) { if (iview->image->vk.mip_levels > 1) {
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
uint32_t last_level = iview->vk.base_mip_level + iview->vk.level_count - 1; uint32_t last_level = iview->vk.base_mip_level + iview->vk.level_count - 1;
if (last_level >= iview->image->planes[0].surface.num_meta_levels) { if (last_level >= iview->image->planes[0].surface.num_meta_levels) {
/* Do not fast clears if one level can't be fast cleard. */ /* Do not fast clears if one level can't be fast cleard. */
@ -1680,6 +1685,7 @@ radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_imag
const VkClearAttachment *clear_att, enum radv_cmd_flush_bits *pre_flush, const VkClearAttachment *clear_att, enum radv_cmd_flush_bits *pre_flush,
enum radv_cmd_flush_bits *post_flush) enum radv_cmd_flush_bits *post_flush)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
VkClearColorValue clear_value = clear_att->clearValue.color; VkClearColorValue clear_value = clear_att->clearValue.color;
uint32_t clear_color[4], flush_bits = 0; uint32_t clear_color[4], flush_bits = 0;
uint32_t cmask_clear_value; uint32_t cmask_clear_value;
@ -1710,7 +1716,7 @@ radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_imag
uint32_t reset_value; uint32_t reset_value;
bool can_avoid_fast_clear_elim = true; bool can_avoid_fast_clear_elim = true;
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
ASSERTED bool result = gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value); ASSERTED bool result = gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value);
assert(result); assert(result);
} else { } else {
@ -2074,6 +2080,7 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *imag
const VkClearValue *clear_value, uint32_t range_count, const VkImageSubresourceRange *ranges, const VkClearValue *clear_value, uint32_t range_count, const VkImageSubresourceRange *ranges,
bool cs) bool cs)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
VkFormat format = image->vk.format; VkFormat format = image->vk.format;
VkClearValue internal_clear_value; VkClearValue internal_clear_value;
@ -2086,8 +2093,8 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *imag
if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) { if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
bool blendable; bool blendable;
if (cs ? !radv_is_storage_image_format_supported(cmd_buffer->device->physical_device, format) if (cs ? !radv_is_storage_image_format_supported(pdev, format)
: !radv_is_colorbuffer_format_supported(cmd_buffer->device->physical_device, format, &blendable)) { : !radv_is_colorbuffer_format_supported(pdev, format, &blendable)) {
format = VK_FORMAT_R32_UINT; format = VK_FORMAT_R32_UINT;
internal_clear_value.color.uint32[0] = float3_to_rgb9e5(clear_value->color.float32); internal_clear_value.color.uint32[0] = float3_to_rgb9e5(clear_value->color.float32);

View file

@ -239,13 +239,14 @@ radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToIm
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer); RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
RADV_FROM_HANDLE(radv_image, dst_image, pCopyBufferToImageInfo->dstImage); RADV_FROM_HANDLE(radv_image, dst_image, pCopyBufferToImageInfo->dstImage);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) { for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
copy_buffer_to_image(cmd_buffer, src_buffer, dst_image, pCopyBufferToImageInfo->dstImageLayout, copy_buffer_to_image(cmd_buffer, src_buffer, dst_image, pCopyBufferToImageInfo->dstImageLayout,
&pCopyBufferToImageInfo->pRegions[r]); &pCopyBufferToImageInfo->pRegions[r]);
} }
if (radv_is_format_emulated(cmd_buffer->device->physical_device, dst_image->vk.format)) { if (radv_is_format_emulated(pdev, dst_image->vk.format)) {
cmd_buffer->state.flush_bits |= cmd_buffer->state.flush_bits |=
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) | radv_src_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) |
@ -422,6 +423,8 @@ static void
copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkImageLayout src_image_layout, copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkImageLayout src_image_layout,
struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageCopy2 *region) struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageCopy2 *region)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) { if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
transfer_copy_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region); transfer_copy_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region);
return; return;
@ -499,9 +502,9 @@ copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkI
src_image_layout, src_queue_mask); src_image_layout, src_queue_mask);
bool need_dcc_sign_reinterpret = false; bool need_dcc_sign_reinterpret = false;
if (!src_compressed || (radv_dcc_formats_compatible(cmd_buffer->device->physical_device->info.gfx_level, if (!src_compressed ||
b_src.format, b_dst.format, &need_dcc_sign_reinterpret) && (radv_dcc_formats_compatible(pdev->info.gfx_level, b_src.format, b_dst.format, &need_dcc_sign_reinterpret) &&
!need_dcc_sign_reinterpret)) { !need_dcc_sign_reinterpret)) {
b_src.format = b_dst.format; b_src.format = b_dst.format;
} else if (!dst_compressed) { } else if (!dst_compressed) {
b_dst.format = b_src.format; b_dst.format = b_src.format;
@ -613,13 +616,14 @@ radv_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyI
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_image, src_image, pCopyImageInfo->srcImage); RADV_FROM_HANDLE(radv_image, src_image, pCopyImageInfo->srcImage);
RADV_FROM_HANDLE(radv_image, dst_image, pCopyImageInfo->dstImage); RADV_FROM_HANDLE(radv_image, dst_image, pCopyImageInfo->dstImage);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) { for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
copy_image(cmd_buffer, src_image, pCopyImageInfo->srcImageLayout, dst_image, pCopyImageInfo->dstImageLayout, copy_image(cmd_buffer, src_image, pCopyImageInfo->srcImageLayout, dst_image, pCopyImageInfo->dstImageLayout,
&pCopyImageInfo->pRegions[r]); &pCopyImageInfo->pRegions[r]);
} }
if (radv_is_format_emulated(cmd_buffer->device->physical_device, dst_image->vk.format)) { if (radv_is_format_emulated(pdev, dst_image->vk.format)) {
cmd_buffer->state.flush_bits |= cmd_buffer->state.flush_bits |=
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) | radv_src_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) |

View file

@ -43,6 +43,7 @@ radv_device_finish_meta_copy_vrs_htile_state(struct radv_device *device)
static nir_shader * static nir_shader *
build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf) build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "meta_copy_vrs_htile"); nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "meta_copy_vrs_htile");
b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8; b.shader->info.workgroup_size[1] = 8;
@ -64,8 +65,8 @@ build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf
/* Get the HTILE addr from coordinates. */ /* Get the HTILE addr from coordinates. */
nir_def *zero = nir_imm_int(&b, 0); nir_def *zero = nir_imm_int(&b, 0);
nir_def *htile_addr = nir_def *htile_addr =
ac_nir_htile_addr_from_coord(&b, &device->physical_device->info, &surf->u.gfx9.zs.htile_equation, htile_pitch, ac_nir_htile_addr_from_coord(&b, &pdev->info, &surf->u.gfx9.zs.htile_equation, htile_pitch, htile_slice_size,
htile_slice_size, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero); nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero);
/* Set up the input VRS image descriptor. */ /* Set up the input VRS image descriptor. */
const struct glsl_type *vrs_sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT); const struct glsl_type *vrs_sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT);

View file

@ -31,6 +31,7 @@
static nir_shader * static nir_shader *
build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *surf) build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *surf)
{ {
const struct radv_physical_device *pdev = radv_device_physical(dev);
enum glsl_sampler_dim dim = GLSL_SAMPLER_DIM_BUF; enum glsl_sampler_dim dim = GLSL_SAMPLER_DIM_BUF;
const struct glsl_type *buf_type = glsl_image_type(dim, false, GLSL_TYPE_UINT); const struct glsl_type *buf_type = glsl_image_type(dim, false, GLSL_TYPE_UINT);
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "dcc_retile_compute"); nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "dcc_retile_compute");
@ -60,12 +61,12 @@ build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *sur
coord = coord =
nir_imul(&b, coord, nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height)); nir_imul(&b, coord, nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height));
nir_def *src = ac_nir_dcc_addr_from_coord(&b, &dev->physical_device->info, surf->bpe, nir_def *src = ac_nir_dcc_addr_from_coord(&b, &pdev->info, surf->bpe, &surf->u.gfx9.color.dcc_equation,
&surf->u.gfx9.color.dcc_equation, src_dcc_pitch, src_dcc_height, zero, src_dcc_pitch, src_dcc_height, zero, nir_channel(&b, coord, 0),
nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero); nir_channel(&b, coord, 1), zero, zero, zero);
nir_def *dst = ac_nir_dcc_addr_from_coord( nir_def *dst = ac_nir_dcc_addr_from_coord(&b, &pdev->info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation,
&b, &dev->physical_device->info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation, dst_dcc_pitch, dst_dcc_pitch, dst_dcc_height, zero, nir_channel(&b, coord, 0),
dst_dcc_height, zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero); nir_channel(&b, coord, 1), zero, zero, zero);
nir_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref, nir_vec4(&b, src, src, src, src), nir_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref, nir_vec4(&b, src, src, src, src),
nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = dim); nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = dim);

View file

@ -34,13 +34,14 @@
VkResult VkResult
radv_device_init_meta_etc_decode_state(struct radv_device *device, bool on_demand) radv_device_init_meta_etc_decode_state(struct radv_device *device, bool on_demand)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_meta_state *state = &device->meta_state; struct radv_meta_state *state = &device->meta_state;
if (!device->physical_device->emulate_etc2) if (!pdev->emulate_etc2)
return VK_SUCCESS; return VK_SUCCESS;
state->etc_decode.allocator = &state->alloc; state->etc_decode.allocator = &state->alloc;
state->etc_decode.nir_options = &device->physical_device->nir_options[MESA_SHADER_COMPUTE]; state->etc_decode.nir_options = &pdev->nir_options[MESA_SHADER_COMPUTE];
state->etc_decode.pipeline_cache = state->cache; state->etc_decode.pipeline_cache = state->cache;
vk_texcompress_etc2_init(&device->vk, &state->etc_decode); vk_texcompress_etc2_init(&device->vk, &state->etc_decode);

View file

@ -155,6 +155,7 @@ create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
static VkResult static VkResult
create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipelineLayout layout) create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipelineLayout layout)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
VkResult result; VkResult result;
VkDevice device_h = radv_device_to_handle(device); VkDevice device_h = radv_device_to_handle(device);
@ -363,8 +364,8 @@ create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipeli
}, },
&(struct radv_graphics_pipeline_create_info){ &(struct radv_graphics_pipeline_create_info){
.use_rectlist = true, .use_rectlist = true,
.custom_blend_mode = device->physical_device->info.gfx_level >= GFX11 ? V_028808_CB_DCC_DECOMPRESS_GFX11 .custom_blend_mode =
: V_028808_CB_DCC_DECOMPRESS_GFX8, pdev->info.gfx_level >= GFX11 ? V_028808_CB_DCC_DECOMPRESS_GFX11 : V_028808_CB_DCC_DECOMPRESS_GFX8,
}, },
&device->meta_state.alloc, &device->meta_state.fast_clear_flush.dcc_decompress_pipeline); &device->meta_state.alloc, &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)

View file

@ -232,8 +232,10 @@ radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_im
const struct radv_image *dst_image, unsigned num_rects, const struct radv_image *dst_image, unsigned num_rects,
const struct radv_meta_blit2d_rect *rects) const struct radv_meta_blit2d_rect *rects)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
/* TODO: Test on pre GFX10 chips. */ /* TODO: Test on pre GFX10 chips. */
if (cmd_buffer->device->physical_device->info.gfx_level < GFX10) if (pdev->info.gfx_level < GFX10)
return false; return false;
/* TODO: Add support for layers. */ /* TODO: Add support for layers. */

View file

@ -253,7 +253,8 @@ enum radv_resolve_method {
static bool static bool
image_hw_resolve_compat(const struct radv_device *device, struct radv_image *src_image, struct radv_image *dst_image) image_hw_resolve_compat(const struct radv_device *device, struct radv_image *src_image, struct radv_image *dst_image)
{ {
if (device->physical_device->info.gfx_level >= GFX9) { const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level >= GFX9) {
return dst_image->planes[0].surface.u.gfx9.swizzle_mode == src_image->planes[0].surface.u.gfx9.swizzle_mode; return dst_image->planes[0].surface.u.gfx9.swizzle_mode == src_image->planes[0].surface.u.gfx9.swizzle_mode;
} else { } else {
return dst_image->planes[0].surface.micro_tile_mode == src_image->planes[0].surface.micro_tile_mode; return dst_image->planes[0].surface.micro_tile_mode == src_image->planes[0].surface.micro_tile_mode;
@ -506,9 +507,9 @@ radv_CmdResolveImage2(VkCommandBuffer commandBuffer, const VkResolveImageInfo2 *
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_image, src_image, pResolveImageInfo->srcImage); RADV_FROM_HANDLE(radv_image, src_image, pResolveImageInfo->srcImage);
RADV_FROM_HANDLE(radv_image, dst_image, pResolveImageInfo->dstImage); RADV_FROM_HANDLE(radv_image, dst_image, pResolveImageInfo->dstImage);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
VkImageLayout src_image_layout = pResolveImageInfo->srcImageLayout; VkImageLayout src_image_layout = pResolveImageInfo->srcImageLayout;
VkImageLayout dst_image_layout = pResolveImageInfo->dstImageLayout; VkImageLayout dst_image_layout = pResolveImageInfo->dstImageLayout;
const struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
enum radv_resolve_method resolve_method = pdev->info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW; enum radv_resolve_method resolve_method = pdev->info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW;
/* we can use the hw resolve only for single full resolves */ /* we can use the hw resolve only for single full resolves */
@ -622,7 +623,7 @@ radv_cmd_buffer_resolve_rendering_hw(struct radv_cmd_buffer *cmd_buffer, struct
void void
radv_cmd_buffer_resolve_rendering(struct radv_cmd_buffer *cmd_buffer) radv_cmd_buffer_resolve_rendering(struct radv_cmd_buffer *cmd_buffer)
{ {
const struct radv_physical_device *pdev = cmd_buffer->device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
const struct radv_rendering_state *render = &cmd_buffer->state.render; const struct radv_rendering_state *render = &cmd_buffer->state.render;
enum radv_resolve_method resolve_method = pdev->info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW; enum radv_resolve_method resolve_method = pdev->info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW;

View file

@ -501,12 +501,13 @@ apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *te
void void
radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device, const struct radv_shader_stage *stage) radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device, const struct radv_shader_stage *stage)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
apply_layout_state state = { apply_layout_state state = {
.gfx_level = device->physical_device->info.gfx_level, .gfx_level = pdev->info.gfx_level,
.address32_hi = device->physical_device->info.address32_hi, .address32_hi = pdev->info.address32_hi,
.disable_aniso_single_level = device->instance->drirc.disable_aniso_single_level, .disable_aniso_single_level = device->instance->drirc.disable_aniso_single_level,
.has_image_load_dcc_bug = device->physical_device->info.has_image_load_dcc_bug, .has_image_load_dcc_bug = pdev->info.has_image_load_dcc_bug,
.disable_tg4_trunc_coord = !device->physical_device->info.conformant_trunc_coord && !device->disable_trunc_coord, .disable_tg4_trunc_coord = !pdev->info.conformant_trunc_coord && !device->disable_trunc_coord,
.args = &stage->args, .args = &stage->args,
.info = &stage->info, .info = &stage->info,
.layout = &stage->layout, .layout = &stage->layout,

View file

@ -72,6 +72,8 @@ radv_nir_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask)
void void
radv_nir_lower_io(struct radv_device *device, nir_shader *nir) radv_nir_lower_io(struct radv_device *device, nir_shader *nir)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (nir->info.stage == MESA_SHADER_FRAGMENT) { if (nir->info.stage == MESA_SHADER_FRAGMENT) {
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, MESA_SHADER_FRAGMENT); nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, MESA_SHADER_FRAGMENT);
} }
@ -89,7 +91,7 @@ radv_nir_lower_io(struct radv_device *device, nir_shader *nir)
NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out); NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
if (device->physical_device->use_ngg_streamout && nir->xfb_info) { if (pdev->use_ngg_streamout && nir->xfb_info) {
NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info); NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
/* The total number of shader outputs is required for computing the pervertex LDS size for /* The total number of shader outputs is required for computing the pervertex LDS size for
@ -133,6 +135,7 @@ radv_map_io_driver_location(unsigned semantic)
bool bool
radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *stage) radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *stage)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader_info *info = &stage->info; const struct radv_shader_info *info = &stage->info;
ac_nir_map_io_driver_location map_input = info->inputs_linked ? NULL : radv_map_io_driver_location; ac_nir_map_io_driver_location map_input = info->inputs_linked ? NULL : radv_map_io_driver_location;
ac_nir_map_io_driver_location map_output = info->outputs_linked ? NULL : radv_map_io_driver_location; ac_nir_map_io_driver_location map_output = info->outputs_linked ? NULL : radv_map_io_driver_location;
@ -144,35 +147,33 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *s
info->vs.tcs_temp_only_input_mask); info->vs.tcs_temp_only_input_mask);
return true; return true;
} else if (info->vs.as_es) { } else if (info->vs.as_es) {
NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, device->physical_device->info.gfx_level, NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize);
info->esgs_itemsize);
return true; return true;
} }
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) { } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, map_input, info->vs.tcs_in_out_eq); NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, map_input, info->vs.tcs_in_out_eq);
NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, map_output, device->physical_device->info.gfx_level, NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, map_output, pdev->info.gfx_level, info->tcs.tes_inputs_read,
info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, info->tcs.num_linked_outputs, info->tcs.tes_patch_inputs_read, info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs,
info->tcs.num_linked_patch_outputs, info->wave_size, false, false); info->wave_size, false, false);
return true; return true;
} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) { } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
NIR_PASS_V(nir, ac_nir_lower_tes_inputs_to_mem, map_input); NIR_PASS_V(nir, ac_nir_lower_tes_inputs_to_mem, map_input);
if (info->tes.as_es) { if (info->tes.as_es) {
NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, device->physical_device->info.gfx_level, NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize);
info->esgs_itemsize);
} }
return true; return true;
} else if (nir->info.stage == MESA_SHADER_GEOMETRY) { } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
NIR_PASS_V(nir, ac_nir_lower_gs_inputs_to_mem, map_input, device->physical_device->info.gfx_level, false); NIR_PASS_V(nir, ac_nir_lower_gs_inputs_to_mem, map_input, pdev->info.gfx_level, false);
return true; return true;
} else if (nir->info.stage == MESA_SHADER_TASK) { } else if (nir->info.stage == MESA_SHADER_TASK) {
ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, device->physical_device->task_info.num_entries, ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, pdev->task_info.num_entries,
info->cs.has_query); info->cs.has_query);
return true; return true;
} else if (nir->info.stage == MESA_SHADER_MESH) { } else if (nir->info.stage == MESA_SHADER_MESH) {
ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, device->physical_device->task_info.num_entries); ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, pdev->task_info.num_entries);
return true; return true;
} }

View file

@ -648,6 +648,7 @@ lower_rq_terminate(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, s
bool bool
radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device) radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
bool progress = false; bool progress = false;
struct hash_table *query_ht = _mesa_pointer_hash_table_create(NULL); struct hash_table *query_ht = _mesa_pointer_hash_table_create(NULL);
@ -655,7 +656,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
if (!var->data.ray_query) if (!var->data.ray_query)
continue; continue;
lower_ray_query(shader, var, query_ht, device->physical_device->max_shared_size); lower_ray_query(shader, var, query_ht, pdev->max_shared_size);
progress = true; progress = true;
} }
@ -670,7 +671,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
if (!var->data.ray_query) if (!var->data.ray_query)
continue; continue;
lower_ray_query(shader, var, query_ht, device->physical_device->max_shared_size); lower_ray_query(shader, var, query_ht, pdev->max_shared_size);
progress = true; progress = true;
} }

View file

@ -297,11 +297,12 @@ build_addr_to_node(nir_builder *b, nir_def *addr)
static nir_def * static nir_def *
build_node_to_addr(struct radv_device *device, nir_builder *b, nir_def *node, bool skip_type_and) build_node_to_addr(struct radv_device *device, nir_builder *b, nir_def *node, bool skip_type_and)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_def *addr = skip_type_and ? node : nir_iand_imm(b, node, ~7ull); nir_def *addr = skip_type_and ? node : nir_iand_imm(b, node, ~7ull);
addr = nir_ishl_imm(b, addr, 3); addr = nir_ishl_imm(b, addr, 3);
/* Assumes everything is in the top half of address space, which is true in /* Assumes everything is in the top half of address space, which is true in
* GFX9+ for now. */ * GFX9+ for now. */
return device->physical_device->info.gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr; return pdev->info.gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr;
} }
nir_def * nir_def *
@ -477,6 +478,7 @@ radv_test_flag(nir_builder *b, const struct radv_ray_traversal_args *args, uint3
nir_def * nir_def *
radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args) radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete"); nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
nir_store_var(b, incomplete, nir_imm_true(b), 0x1); nir_store_var(b, incomplete, nir_imm_true(b), 0x1);
@ -568,7 +570,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
nir_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node)); nir_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));
nir_def *intrinsic_result = NULL; nir_def *intrinsic_result = NULL;
if (!radv_emulate_rt(device->physical_device)) { if (!radv_emulate_rt(pdev)) {
intrinsic_result = intrinsic_result =
nir_bvh64_intersect_ray_amd(b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node), nir_bvh64_intersect_ray_amd(b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node),
nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),

View file

@ -1524,6 +1524,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, bool monolithic, nir_builder *b, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, bool monolithic, nir_builder *b,
struct rt_variables *vars, bool ignore_cull_mask, struct radv_ray_tracing_stage_info *info) struct rt_variables *vars, bool ignore_cull_mask, struct radv_ray_tracing_stage_info *info)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_variable *barycentrics = nir_variable *barycentrics =
nir_variable_create(b->shader, nir_var_ray_hit_attrib, glsl_vector_type(GLSL_TYPE_FLOAT, 2), "barycentrics"); nir_variable_create(b->shader, nir_var_ray_hit_attrib, glsl_vector_type(GLSL_TYPE_FLOAT, 2), "barycentrics");
barycentrics->data.driver_location = 0; barycentrics->data.driver_location = 0;
@ -1602,7 +1603,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
.tmin = nir_load_var(b, vars->tmin), .tmin = nir_load_var(b, vars->tmin),
.dir = nir_load_var(b, vars->direction), .dir = nir_load_var(b, vars->direction),
.vars = trav_vars_args, .vars = trav_vars_args,
.stack_stride = device->physical_device->rt_wave_size * sizeof(uint32_t), .stack_stride = pdev->rt_wave_size * sizeof(uint32_t),
.stack_entries = MAX_STACK_ENTRY_COUNT, .stack_entries = MAX_STACK_ENTRY_COUNT,
.stack_base = 0, .stack_base = 0,
.ignore_cull_mask = ignore_cull_mask, .ignore_cull_mask = ignore_cull_mask,
@ -1638,7 +1639,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
hit_attribs[i] = hit_attribs[i] =
nir_local_variable_create(nir_shader_get_entrypoint(b->shader), glsl_uint_type(), "ahit_attrib"); nir_local_variable_create(nir_shader_get_entrypoint(b->shader), glsl_uint_type(), "ahit_attrib");
lower_hit_attribs(b->shader, hit_attribs, device->physical_device->rt_wave_size); lower_hit_attribs(b->shader, hit_attribs, pdev->rt_wave_size);
} }
/* Initialize follow-up shader. */ /* Initialize follow-up shader. */
@ -1702,6 +1703,7 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
struct radv_ray_tracing_stage_info *info) struct radv_ray_tracing_stage_info *info)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
const VkPipelineCreateFlagBits2KHR create_flags = vk_rt_pipeline_create_flags(pCreateInfo); const VkPipelineCreateFlagBits2KHR create_flags = vk_rt_pipeline_create_flags(pCreateInfo);
/* Create the traversal shader as an intersection shader to prevent validation failures due to /* Create the traversal shader as an intersection shader to prevent validation failures due to
@ -1709,8 +1711,8 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
nir_builder b = radv_meta_init_shader(device, MESA_SHADER_INTERSECTION, "rt_traversal"); nir_builder b = radv_meta_init_shader(device, MESA_SHADER_INTERSECTION, "rt_traversal");
b.shader->info.internal = false; b.shader->info.internal = false;
b.shader->info.workgroup_size[0] = 8; b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = device->physical_device->rt_wave_size == 64 ? 8 : 4; b.shader->info.workgroup_size[1] = pdev->rt_wave_size == 64 ? 8 : 4;
b.shader->info.shared_size = device->physical_device->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t); b.shader->info.shared_size = pdev->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t);
struct rt_variables vars = create_rt_variables(b.shader, device, create_flags, false); struct rt_variables vars = create_rt_variables(b.shader, device, create_flags, false);
if (info->tmin.state == RADV_RT_CONST_ARG_STATE_VALID) if (info->tmin.state == RADV_RT_CONST_ARG_STATE_VALID)
@ -1773,6 +1775,7 @@ lower_rt_instruction_monolithic(nir_builder *b, nir_instr *instr, void *data)
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
struct lower_rt_instruction_monolithic_state *state = data; struct lower_rt_instruction_monolithic_state *state = data;
const struct radv_physical_device *pdev = radv_device_physical(state->device);
struct rt_variables *vars = state->vars; struct rt_variables *vars = state->vars;
switch (intr->intrinsic) { switch (intr->intrinsic) {
@ -1800,8 +1803,8 @@ lower_rt_instruction_monolithic(nir_builder *b, nir_instr *instr, void *data)
nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, stack_ptr, b->shader->scratch_size), 0x1); nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, stack_ptr, b->shader->scratch_size), 0x1);
radv_build_traversal(state->device, state->pipeline, state->pCreateInfo, true, b, vars, ignore_cull_mask, NULL); radv_build_traversal(state->device, state->pipeline, state->pCreateInfo, true, b, vars, ignore_cull_mask, NULL);
b->shader->info.shared_size = MAX2(b->shader->info.shared_size, state->device->physical_device->rt_wave_size * b->shader->info.shared_size =
MAX_STACK_ENTRY_COUNT * sizeof(uint32_t)); MAX2(b->shader->info.shared_size, pdev->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t));
nir_store_var(b, vars->stack_ptr, stack_ptr, 0x1); nir_store_var(b, vars->stack_ptr, stack_ptr, 0x1);

View file

@ -424,7 +424,9 @@ cleanup:
VkResult VkResult
radv_device_init_null_accel_struct(struct radv_device *device) radv_device_init_null_accel_struct(struct radv_device *device)
{ {
if (device->physical_device->memory_properties.memoryTypeCount == 0) const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->memory_properties.memoryTypeCount == 0)
return VK_SUCCESS; /* Exit in the case of null winsys. */ return VK_SUCCESS; /* Exit in the case of null winsys. */
VkDevice _device = radv_device_to_handle(device); VkDevice _device = radv_device_to_handle(device);
@ -465,9 +467,9 @@ radv_device_init_null_accel_struct(struct radv_device *device)
VkMemoryAllocateInfo alloc_info = { VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = mem_req.memoryRequirements.size, .allocationSize = mem_req.memoryRequirements.size,
.memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | .memoryTypeIndex =
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
}; };
result = radv_AllocateMemory(_device, &alloc_info, &device->meta_state.alloc, &memory); result = radv_AllocateMemory(_device, &alloc_info, &device->meta_state.alloc, &memory);
@ -1537,9 +1539,9 @@ radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,
VkAccelerationStructureCompatibilityKHR *pCompatibility) VkAccelerationStructureCompatibilityKHR *pCompatibility)
{ {
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
bool compat = const struct radv_physical_device *pdev = radv_device_physical(device);
memcmp(pVersionInfo->pVersionData, device->physical_device->driver_uuid, VK_UUID_SIZE) == 0 && bool compat = memcmp(pVersionInfo->pVersionData, pdev->driver_uuid, VK_UUID_SIZE) == 0 &&
memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, device->physical_device->cache_uuid, VK_UUID_SIZE) == 0; memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE) == 0;
*pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR *pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR
: VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR; : VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
} }
@ -1601,6 +1603,7 @@ radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src); RADV_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
RADV_FROM_HANDLE(radv_buffer, src_buffer, src->buffer); RADV_FROM_HANDLE(radv_buffer, src_buffer, src->buffer);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radv_meta_saved_state saved_state; struct radv_meta_saved_state saved_state;
VkResult result = radv_device_init_accel_struct_copy_state(cmd_buffer->device); VkResult result = radv_device_init_accel_struct_copy_state(cmd_buffer->device);
@ -1634,8 +1637,8 @@ radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
/* Set the header of the serialized data. */ /* Set the header of the serialized data. */
uint8_t header_data[2 * VK_UUID_SIZE]; uint8_t header_data[2 * VK_UUID_SIZE];
memcpy(header_data, cmd_buffer->device->physical_device->driver_uuid, VK_UUID_SIZE); memcpy(header_data, pdev->driver_uuid, VK_UUID_SIZE);
memcpy(header_data + VK_UUID_SIZE, cmd_buffer->device->physical_device->cache_uuid, VK_UUID_SIZE); memcpy(header_data + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE);
radv_update_buffer_cp(cmd_buffer, pInfo->dst.deviceAddress, header_data, sizeof(header_data)); radv_update_buffer_cp(cmd_buffer, pInfo->dst.deviceAddress, header_data, sizeof(header_data));
} }

View file

@ -114,6 +114,7 @@ radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
{ {
RADV_FROM_HANDLE(radv_device, device, device_h); RADV_FROM_HANDLE(radv_device, device, device_h);
const struct radv_physical_device *pdev = radv_device_physical(device);
VkImage image_h = VK_NULL_HANDLE; VkImage image_h = VK_NULL_HANDLE;
struct radv_image *image = NULL; struct radv_image *image = NULL;
VkResult result; VkResult result;
@ -141,10 +142,9 @@ radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
/* Find the first VRAM memory type, or GART for PRIME images. */ /* Find the first VRAM memory type, or GART for PRIME images. */
int memory_type_index = -1; int memory_type_index = -1;
for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) { for (int i = 0; i < pdev->memory_properties.memoryTypeCount; ++i) {
bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags & bool is_local = !!(pdev->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); bool is_32bit = !!(pdev->memory_types_32bit & (1u << i));
bool is_32bit = !!(device->physical_device->memory_types_32bit & (1u << i));
if (is_local && !is_32bit) { if (is_local && !is_32bit) {
memory_type_index = i; memory_type_index = i;
break; break;
@ -217,7 +217,7 @@ radv_GetSwapchainGrallocUsageANDROID(VkDevice device_h, VkFormat format, VkImage
int *grallocUsage) int *grallocUsage)
{ {
RADV_FROM_HANDLE(radv_device, device, device_h); RADV_FROM_HANDLE(radv_device, device, device_h);
struct radv_physical_device *pdev = device->physical_device; struct radv_physical_device *pdev = radv_device_physical(device);
VkPhysicalDevice pdev_h = radv_physical_device_to_handle(pdev); VkPhysicalDevice pdev_h = radv_physical_device_to_handle(pdev);
VkResult result; VkResult result;
@ -298,7 +298,7 @@ radv_GetSwapchainGrallocUsage2ANDROID(VkDevice device_h, VkFormat format, VkImag
* vkGetSwapchainGrallocUsageANDROID. */ * vkGetSwapchainGrallocUsageANDROID. */
#if ANDROID_API_LEVEL >= 26 #if ANDROID_API_LEVEL >= 26
RADV_FROM_HANDLE(radv_device, device, device_h); RADV_FROM_HANDLE(radv_device, device, device_h);
struct radv_physical_device *pdev = device->physical_device; struct radv_physical_device *pdev = radv_device_physical(device);
VkPhysicalDevice pdev_h = radv_physical_device_to_handle(pdev); VkPhysicalDevice pdev_h = radv_physical_device_to_handle(pdev);
VkResult result; VkResult result;
@ -408,6 +408,7 @@ get_ahb_buffer_format_properties(VkDevice device_h, const struct AHardwareBuffer
VkAndroidHardwareBufferFormatPropertiesANDROID *pProperties) VkAndroidHardwareBufferFormatPropertiesANDROID *pProperties)
{ {
RADV_FROM_HANDLE(radv_device, device, device_h); RADV_FROM_HANDLE(radv_device, device, device_h);
struct radv_physical_device *pdev = radv_device_physical(device);
/* Get a description of buffer contents . */ /* Get a description of buffer contents . */
AHardwareBuffer_Desc desc; AHardwareBuffer_Desc desc;
@ -431,8 +432,7 @@ get_ahb_buffer_format_properties(VkDevice device_h, const struct AHardwareBuffer
VkFormatProperties2 format_properties = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2}; VkFormatProperties2 format_properties = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2};
radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(device->physical_device), p->format, radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(pdev), p->format, &format_properties);
&format_properties);
if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER)
p->formatFeatures = format_properties.formatProperties.linearTilingFeatures; p->formatFeatures = format_properties.formatProperties.linearTilingFeatures;
@ -481,6 +481,7 @@ get_ahb_buffer_format_properties2(VkDevice device_h, const struct AHardwareBuffe
VkAndroidHardwareBufferFormatProperties2ANDROID *pProperties) VkAndroidHardwareBufferFormatProperties2ANDROID *pProperties)
{ {
RADV_FROM_HANDLE(radv_device, device, device_h); RADV_FROM_HANDLE(radv_device, device, device_h);
struct radv_physical_device *pdev = radv_device_physical(device);
/* Get a description of buffer contents . */ /* Get a description of buffer contents . */
AHardwareBuffer_Desc desc; AHardwareBuffer_Desc desc;
@ -504,8 +505,7 @@ get_ahb_buffer_format_properties2(VkDevice device_h, const struct AHardwareBuffe
VkFormatProperties2 format_properties = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2}; VkFormatProperties2 format_properties = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2};
radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(device->physical_device), p->format, radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(pdev), p->format, &format_properties);
&format_properties);
if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER) if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER)
p->formatFeatures = format_properties.formatProperties.linearTilingFeatures; p->formatFeatures = format_properties.formatProperties.linearTilingFeatures;
@ -554,7 +554,7 @@ radv_GetAndroidHardwareBufferPropertiesANDROID(VkDevice device_h, const struct A
VkAndroidHardwareBufferPropertiesANDROID *pProperties) VkAndroidHardwareBufferPropertiesANDROID *pProperties)
{ {
RADV_FROM_HANDLE(radv_device, dev, device_h); RADV_FROM_HANDLE(radv_device, dev, device_h);
struct radv_physical_device *pdev = dev->physical_device; struct radv_physical_device *pdev = radv_device_physical(dev);
VkAndroidHardwareBufferFormatPropertiesANDROID *format_prop = VkAndroidHardwareBufferFormatPropertiesANDROID *format_prop =
vk_find_struct(pProperties->pNext, ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_ANDROID); vk_find_struct(pProperties->pNext, ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_ANDROID);

View file

@ -176,9 +176,10 @@ static void
radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize size, VkBufferCreateFlags flags, radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize size, VkBufferCreateFlags flags,
VkBufferUsageFlags2KHR usage, VkMemoryRequirements2 *pMemoryRequirements) VkBufferUsageFlags2KHR usage, VkMemoryRequirements2 *pMemoryRequirements)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
pMemoryRequirements->memoryRequirements.memoryTypeBits = pMemoryRequirements->memoryRequirements.memoryTypeBits =
((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) & ((1u << pdev->memory_properties.memoryTypeCount) - 1u) & ~pdev->memory_types_32bit;
~device->physical_device->memory_types_32bit;
/* Allow 32-bit address-space for DGC usage, as this buffer will contain /* Allow 32-bit address-space for DGC usage, as this buffer will contain
* cmd buffer upload buffers, and those get passed to shaders through 32-bit * cmd buffer upload buffers, and those get passed to shaders through 32-bit
@ -190,14 +191,14 @@ radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize siz
* intersection is non-zero at least) * intersection is non-zero at least)
*/ */
if ((usage & VK_BUFFER_USAGE_2_INDIRECT_BUFFER_BIT_KHR) && radv_uses_device_generated_commands(device)) if ((usage & VK_BUFFER_USAGE_2_INDIRECT_BUFFER_BIT_KHR) && radv_uses_device_generated_commands(device))
pMemoryRequirements->memoryRequirements.memoryTypeBits |= device->physical_device->memory_types_32bit; pMemoryRequirements->memoryRequirements.memoryTypeBits |= pdev->memory_types_32bit;
/* Force 32-bit address-space for descriptor buffers usage because they are passed to shaders /* Force 32-bit address-space for descriptor buffers usage because they are passed to shaders
* through 32-bit pointers. * through 32-bit pointers.
*/ */
if (usage & if (usage &
(VK_BUFFER_USAGE_2_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | VK_BUFFER_USAGE_2_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT)) (VK_BUFFER_USAGE_2_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | VK_BUFFER_USAGE_2_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT))
pMemoryRequirements->memoryRequirements.memoryTypeBits = device->physical_device->memory_types_32bit; pMemoryRequirements->memoryRequirements.memoryTypeBits = pdev->memory_types_32bit;
if (flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) if (flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
pMemoryRequirements->memoryRequirements.alignment = 4096; pMemoryRequirements->memoryRequirements.alignment = 4096;

View file

@ -34,6 +34,7 @@ void
radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFormat vk_format, unsigned offset, radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFormat vk_format, unsigned offset,
unsigned range, uint32_t *state) unsigned range, uint32_t *state)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct util_format_description *desc; const struct util_format_description *desc;
unsigned stride; unsigned stride;
unsigned num_format, data_format; unsigned num_format, data_format;
@ -49,16 +50,15 @@ radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFor
va += offset; va += offset;
if (device->physical_device->info.gfx_level != GFX8 && stride) { if (pdev->info.gfx_level != GFX8 && stride) {
range /= stride; range /= stride;
} }
rsrc_word3 = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | rsrc_word3 = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3])); S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
const struct gfx10_format *fmt = const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&pdev->info)[vk_format_to_pipe_format(vk_format)];
&ac_get_gfx10_format_table(&device->physical_device->info)[vk_format_to_pipe_format(vk_format)];
/* OOB_SELECT chooses the out-of-bounds check. /* OOB_SELECT chooses the out-of-bounds check.
* *
@ -81,7 +81,7 @@ radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFor
* offset+payload > NUM_RECORDS * offset+payload > NUM_RECORDS
*/ */
rsrc_word3 |= S_008F0C_FORMAT(fmt->img_format) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) | rsrc_word3 |= S_008F0C_FORMAT(fmt->img_format) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
S_008F0C_RESOURCE_LEVEL(device->physical_device->info.gfx_level < GFX11); S_008F0C_RESOURCE_LEVEL(pdev->info.gfx_level < GFX11);
} else { } else {
num_format = radv_translate_buffer_numformat(desc, first_non_void); num_format = radv_translate_buffer_numformat(desc, first_non_void);
data_format = radv_translate_buffer_dataformat(desc, first_non_void); data_format = radv_translate_buffer_dataformat(desc, first_non_void);

File diff suppressed because it is too large Load diff

View file

@ -38,8 +38,9 @@ radv_set_context_reg_array(struct radeon_cmdbuf *cs, unsigned reg, unsigned num,
VkResult VkResult
radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_state *queue_state) radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_state *queue_state)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_winsys *ws = device->ws; struct radeon_winsys *ws = device->ws;
const struct radeon_info *gpu_info = &device->physical_device->info; const struct radeon_info *gpu_info = &pdev->info;
VkResult result; VkResult result;
struct radeon_cmdbuf *cs = ws->cs_create(ws, AMD_IP_GFX, false); struct radeon_cmdbuf *cs = ws->cs_create(ws, AMD_IP_GFX, false);
@ -125,7 +126,8 @@ radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_devic
VkResult VkResult
radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct radv_queue *queue) radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct radv_queue *queue)
{ {
const struct radeon_info *gpu_info = &device->physical_device->info; const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
struct radeon_winsys *ws = device->ws; struct radeon_winsys *ws = device->ws;
struct radeon_cmdbuf *cs; struct radeon_cmdbuf *cs;
VkResult result; VkResult result;

View file

@ -108,17 +108,19 @@ radv_dump_trace(const struct radv_device *device, struct radeon_cmdbuf *cs, FILE
static void static void
radv_dump_mmapped_reg(const struct radv_device *device, FILE *f, unsigned offset) radv_dump_mmapped_reg(const struct radv_device *device, FILE *f, unsigned offset)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_winsys *ws = device->ws; struct radeon_winsys *ws = device->ws;
uint32_t value; uint32_t value;
if (ws->read_registers(ws, offset, 1, &value)) if (ws->read_registers(ws, offset, 1, &value))
ac_dump_reg(f, device->physical_device->info.gfx_level, device->physical_device->info.family, offset, value, ~0); ac_dump_reg(f, pdev->info.gfx_level, pdev->info.family, offset, value, ~0);
} }
static void static void
radv_dump_debug_registers(const struct radv_device *device, FILE *f) radv_dump_debug_registers(const struct radv_device *device, FILE *f)
{ {
const struct radeon_info *gpu_info = &device->physical_device->info; const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
fprintf(f, "Memory-mapped registers:\n"); fprintf(f, "Memory-mapped registers:\n");
radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS); radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
@ -190,8 +192,9 @@ radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level, enum r
static void static void
radv_dump_descriptor_set(const struct radv_device *device, const struct radv_descriptor_set *set, unsigned id, FILE *f) radv_dump_descriptor_set(const struct radv_device *device, const struct radv_descriptor_set *set, unsigned id, FILE *f)
{ {
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
enum radeon_family family = device->physical_device->info.family; enum amd_gfx_level gfx_level = pdev->info.gfx_level;
enum radeon_family family = pdev->info.family;
const struct radv_descriptor_set_layout *layout; const struct radv_descriptor_set_layout *layout;
int i; int i;
@ -376,6 +379,8 @@ static void
radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader, radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader,
gl_shader_stage stage, const char *dump_dir, FILE *f) gl_shader_stage stage, const char *dump_dir, FILE *f)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!shader) if (!shader)
return; return;
@ -400,7 +405,7 @@ radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline, str
fprintf(f, "NIR:\n%s\n", shader->nir_string); fprintf(f, "NIR:\n%s\n", shader->nir_string);
} }
fprintf(f, "%s IR:\n%s\n", device->physical_device->use_llvm ? "LLVM" : "ACO", shader->ir_string); fprintf(f, "%s IR:\n%s\n", pdev->use_llvm ? "LLVM" : "ACO", shader->ir_string);
fprintf(f, "DISASM:\n%s\n", shader->disasm_string); fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
radv_dump_shader_stats(device, pipeline, shader, stage, f); radv_dump_shader_stats(device, pipeline, shader, stage, f);
@ -504,9 +509,10 @@ radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f)
} }
if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR)) { if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]; struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; enum amd_gfx_level gfx_level = pdev->info.gfx_level;
unsigned num_waves = ac_get_wave_info(gfx_level, &device->physical_device->info, waves); unsigned num_waves = ac_get_wave_info(gfx_level, &pdev->info, waves);
fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves); fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
@ -633,21 +639,22 @@ radv_dump_app_info(const struct radv_device *device, FILE *f)
static void static void
radv_dump_device_name(const struct radv_device *device, FILE *f) radv_dump_device_name(const struct radv_device *device, FILE *f)
{ {
const struct radeon_info *gpu_info = &device->physical_device->info; const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
#ifndef _WIN32 #ifndef _WIN32
char kernel_version[128] = {0}; char kernel_version[128] = {0};
struct utsname uname_data; struct utsname uname_data;
#endif #endif
#ifdef _WIN32 #ifdef _WIN32
fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", device->physical_device->marketing_name, gpu_info->drm_major, fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", pdev->marketing_name, gpu_info->drm_major, gpu_info->drm_minor,
gpu_info->drm_minor, gpu_info->drm_patchlevel); gpu_info->drm_patchlevel);
#else #else
if (uname(&uname_data) == 0) if (uname(&uname_data) == 0)
snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release); snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", device->physical_device->marketing_name, gpu_info->drm_major, fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", pdev->marketing_name, gpu_info->drm_major, gpu_info->drm_minor,
gpu_info->drm_minor, gpu_info->drm_patchlevel, kernel_version); gpu_info->drm_patchlevel, kernel_version);
#endif #endif
} }
@ -655,18 +662,16 @@ static void
radv_dump_umr_ring(const struct radv_queue *queue, FILE *f) radv_dump_umr_ring(const struct radv_queue *queue, FILE *f)
{ {
#ifndef _WIN32 #ifndef _WIN32
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
const enum amd_ip_type ring = radv_queue_ring(queue); const enum amd_ip_type ring = radv_queue_ring(queue);
const struct radv_device *device = queue->device;
char cmd[256]; char cmd[256];
/* TODO: Dump compute ring. */ /* TODO: Dump compute ring. */
if (ring != AMD_IP_GFX) if (ring != AMD_IP_GFX)
return; return;
sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -RS %s 2>&1", device->physical_device->bus_info.domain, sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -RS %s 2>&1", pdev->bus_info.domain, pdev->bus_info.bus,
device->physical_device->bus_info.bus, device->physical_device->bus_info.dev, pdev->bus_info.dev, pdev->bus_info.func, pdev->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
device->physical_device->bus_info.func,
device->physical_device->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
fprintf(f, "\nUMR GFX ring:\n\n"); fprintf(f, "\nUMR GFX ring:\n\n");
radv_dump_cmd(cmd, f); radv_dump_cmd(cmd, f);
#endif #endif
@ -676,18 +681,17 @@ static void
radv_dump_umr_waves(struct radv_queue *queue, FILE *f) radv_dump_umr_waves(struct radv_queue *queue, FILE *f)
{ {
#ifndef _WIN32 #ifndef _WIN32
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
enum amd_ip_type ring = radv_queue_ring(queue); enum amd_ip_type ring = radv_queue_ring(queue);
struct radv_device *device = queue->device;
char cmd[256]; char cmd[256];
/* TODO: Dump compute ring. */ /* TODO: Dump compute ring. */
if (ring != AMD_IP_GFX) if (ring != AMD_IP_GFX)
return; return;
sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -O bits,halt_waves -go 0 -wa %s -go 1 2>&1", sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -O bits,halt_waves -go 0 -wa %s -go 1 2>&1", pdev->bus_info.domain,
device->physical_device->bus_info.domain, device->physical_device->bus_info.bus, pdev->bus_info.bus, pdev->bus_info.dev, pdev->bus_info.func,
device->physical_device->bus_info.dev, device->physical_device->bus_info.func, pdev->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
device->physical_device->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
fprintf(f, "\nUMR GFX waves:\n\n"); fprintf(f, "\nUMR GFX waves:\n\n");
radv_dump_cmd(cmd, f); radv_dump_cmd(cmd, f);
#endif #endif
@ -707,7 +711,9 @@ radv_gpu_hang_occurred(struct radv_queue *queue, enum amd_ip_type ring)
bool bool
radv_vm_fault_occurred(struct radv_device *device, struct radv_winsys_gpuvm_fault_info *fault_info) radv_vm_fault_occurred(struct radv_device *device, struct radv_winsys_gpuvm_fault_info *fault_info)
{ {
if (!device->physical_device->info.has_gpuvm_fault_query) const struct radv_physical_device *pdev = radv_device_physical(device);
if (!pdev->info.has_gpuvm_fault_query)
return false; return false;
return device->ws->query_gpuvm_fault(device->ws, fault_info); return device->ws->query_gpuvm_fault(device->ws, fault_info);
@ -742,6 +748,7 @@ radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_i
fprintf(stderr, "radv: GPU hang detected...\n"); fprintf(stderr, "radv: GPU hang detected...\n");
#ifndef _WIN32 #ifndef _WIN32
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
const bool save_hang_report = !queue->device->vk.enabled_features.deviceFaultVendorBinary; const bool save_hang_report = !queue->device->vk.enabled_features.deviceFaultVendorBinary;
struct radv_winsys_gpuvm_fault_info fault_info = {0}; struct radv_winsys_gpuvm_fault_info fault_info = {0};
struct radv_device *device = queue->device; struct radv_device *device = queue->device;
@ -822,7 +829,7 @@ radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_i
if (vm_fault_occurred) { if (vm_fault_occurred) {
fprintf(f, "VM fault report.\n\n"); fprintf(f, "VM fault report.\n\n");
fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n", fault_info.addr); fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n", fault_info.addr);
ac_print_gpuvm_fault_status(f, device->physical_device->info.gfx_level, fault_info.status); ac_print_gpuvm_fault_status(f, pdev->info.gfx_level, fault_info.status);
} }
break; break;
case RADV_DEVICE_FAULT_CHUNK_APP_INFO: case RADV_DEVICE_FAULT_CHUNK_APP_INFO:
@ -830,7 +837,7 @@ radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_i
break; break;
case RADV_DEVICE_FAULT_CHUNK_GPU_INFO: case RADV_DEVICE_FAULT_CHUNK_GPU_INFO:
radv_dump_device_name(device, f); radv_dump_device_name(device, f);
ac_print_gpu_info(&device->physical_device->info, f); ac_print_gpu_info(&pdev->info, f);
break; break;
case RADV_DEVICE_FAULT_CHUNK_DMESG: case RADV_DEVICE_FAULT_CHUNK_DMESG:
radv_dump_dmesg(f); radv_dump_dmesg(f);
@ -1010,12 +1017,13 @@ struct radv_sq_hw_reg {
static void static void
radv_dump_sq_hw_regs(struct radv_device *device) radv_dump_sq_hw_regs(struct radv_device *device)
{ {
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
enum radeon_family family = device->physical_device->info.family; enum amd_gfx_level gfx_level = pdev->info.gfx_level;
enum radeon_family family = pdev->info.family;
struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6]; struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
fprintf(stderr, "\nHardware registers:\n"); fprintf(stderr, "\nHardware registers:\n");
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
ac_dump_reg(stderr, gfx_level, family, R_000408_SQ_WAVE_STATUS, regs->status, ~0); ac_dump_reg(stderr, gfx_level, family, R_000408_SQ_WAVE_STATUS, regs->status, ~0);
ac_dump_reg(stderr, gfx_level, family, R_00040C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0); ac_dump_reg(stderr, gfx_level, family, R_00040C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0);
ac_dump_reg(stderr, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, regs->hw_id, ~0); ac_dump_reg(stderr, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, regs->hw_id, ~0);
@ -1084,6 +1092,7 @@ radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCount
&pFaultCounts->addressInfoCount); &pFaultCounts->addressInfoCount);
struct radv_winsys_gpuvm_fault_info fault_info = {0}; struct radv_winsys_gpuvm_fault_info fault_info = {0};
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
bool vm_fault_occurred = false; bool vm_fault_occurred = false;
/* Query if a GPUVM fault happened. */ /* Query if a GPUVM fault happened. */
@ -1094,8 +1103,6 @@ radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCount
pFaultCounts->vendorBinarySize = 0; pFaultCounts->vendorBinarySize = 0;
if (device->gpu_hang_report) { if (device->gpu_hang_report) {
const struct radv_physical_device *pdev = device->physical_device;
VkDeviceFaultVendorBinaryHeaderVersionOneEXT hdr; VkDeviceFaultVendorBinaryHeaderVersionOneEXT hdr;
hdr.headerSize = sizeof(VkDeviceFaultVendorBinaryHeaderVersionOneEXT); hdr.headerSize = sizeof(VkDeviceFaultVendorBinaryHeaderVersionOneEXT);
@ -1127,7 +1134,7 @@ radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCount
if (pFaultInfo) if (pFaultInfo)
strncpy(pFaultInfo->description, "A GPUVM fault has been detected", sizeof(pFaultInfo->description)); strncpy(pFaultInfo->description, "A GPUVM fault has been detected", sizeof(pFaultInfo->description));
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
addr_fault_info.addressType = G_00A130_RW(fault_info.status) ? VK_DEVICE_FAULT_ADDRESS_TYPE_WRITE_INVALID_EXT addr_fault_info.addressType = G_00A130_RW(fault_info.status) ? VK_DEVICE_FAULT_ADDRESS_TYPE_WRITE_INVALID_EXT
: VK_DEVICE_FAULT_ADDRESS_TYPE_READ_INVALID_EXT; : VK_DEVICE_FAULT_ADDRESS_TYPE_READ_INVALID_EXT;
} else { } else {

View file

@ -1074,6 +1074,8 @@ write_texel_buffer_descriptor(struct radv_device *device, struct radv_cmd_buffer
static ALWAYS_INLINE void static ALWAYS_INLINE void
write_buffer_descriptor(struct radv_device *device, unsigned *dst, uint64_t va, uint64_t range) write_buffer_descriptor(struct radv_device *device, unsigned *dst, uint64_t va, uint64_t range)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!va) { if (!va) {
memset(dst, 0, 4 * 4); memset(dst, 0, 4 * 4);
return; return;
@ -1082,9 +1084,9 @@ write_buffer_descriptor(struct radv_device *device, unsigned *dst, uint64_t va,
uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (device->physical_device->info.gfx_level >= GFX10) { } else if (pdev->info.gfx_level >= GFX10) {
rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
S_008F0C_RESOURCE_LEVEL(1); S_008F0C_RESOURCE_LEVEL(1);
} else { } else {

View file

@ -97,10 +97,10 @@ radv_GetMemoryHostPointerPropertiesEXT(VkDevice _device, VkExternalMemoryHandleT
VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties) VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
{ {
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
switch (handleType) { switch (handleType) {
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: { case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
const struct radv_physical_device *pdev = device->physical_device;
uint32_t memoryTypeBits = 0; uint32_t memoryTypeBits = 0;
for (int i = 0; i < pdev->memory_properties.memoryTypeCount; i++) { for (int i = 0; i < pdev->memory_properties.memoryTypeCount; i++) {
if (pdev->memory_domains[i] == RADEON_DOMAIN_GTT && !(pdev->memory_flags[i] & RADEON_FLAG_GTT_WC)) { if (pdev->memory_domains[i] == RADEON_DOMAIN_GTT && !(pdev->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
@ -186,8 +186,10 @@ static struct radv_shader_part_cache_ops vs_prolog_ops = {
static VkResult static VkResult
radv_device_init_vs_prologs(struct radv_device *device) radv_device_init_vs_prologs(struct radv_device *device)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!radv_shader_part_cache_init(&device->vs_prologs, &vs_prolog_ops)) if (!radv_shader_part_cache_init(&device->vs_prologs, &vs_prolog_ops))
return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); return vk_error(pdev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
/* don't pre-compile prologs if we want to print them */ /* don't pre-compile prologs if we want to print them */
if (device->instance->debug_flags & RADV_DEBUG_DUMP_PROLOGS) if (device->instance->debug_flags & RADV_DEBUG_DUMP_PROLOGS)
@ -196,9 +198,9 @@ radv_device_init_vs_prologs(struct radv_device *device)
struct radv_vs_prolog_key key; struct radv_vs_prolog_key key;
memset(&key, 0, sizeof(key)); memset(&key, 0, sizeof(key));
key.as_ls = false; key.as_ls = false;
key.is_ngg = device->physical_device->use_ngg; key.is_ngg = pdev->use_ngg;
key.next_stage = MESA_SHADER_VERTEX; key.next_stage = MESA_SHADER_VERTEX;
key.wave32 = device->physical_device->ge_wave_size == 32; key.wave32 = pdev->ge_wave_size == 32;
for (unsigned i = 1; i <= MAX_VERTEX_ATTRIBS; i++) { for (unsigned i = 1; i <= MAX_VERTEX_ATTRIBS; i++) {
key.instance_rate_inputs = 0; key.instance_rate_inputs = 0;
@ -206,7 +208,7 @@ radv_device_init_vs_prologs(struct radv_device *device)
device->simple_vs_prologs[i - 1] = radv_create_vs_prolog(device, &key); device->simple_vs_prologs[i - 1] = radv_create_vs_prolog(device, &key);
if (!device->simple_vs_prologs[i - 1]) if (!device->simple_vs_prologs[i - 1])
return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); return vk_error(pdev->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
} }
unsigned idx = 0; unsigned idx = 0;
@ -218,7 +220,7 @@ radv_device_init_vs_prologs(struct radv_device *device)
struct radv_shader_part *prolog = radv_create_vs_prolog(device, &key); struct radv_shader_part *prolog = radv_create_vs_prolog(device, &key);
if (!prolog) if (!prolog)
return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY); return vk_error(pdev->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
assert(idx == radv_instance_rate_prolog_index(num_attributes, key.instance_rate_inputs)); assert(idx == radv_instance_rate_prolog_index(num_attributes, key.instance_rate_inputs));
device->instance_rate_vs_prologs[idx++] = prolog; device->instance_rate_vs_prologs[idx++] = prolog;
@ -638,11 +640,11 @@ capture_trace(VkQueue _queue)
static void static void
radv_device_init_cache_key(struct radv_device *device) radv_device_init_cache_key(struct radv_device *device)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_device_cache_key *key = &device->cache_key; struct radv_device_cache_key *key = &device->cache_key;
key->disable_trunc_coord = device->disable_trunc_coord; key->disable_trunc_coord = device->disable_trunc_coord;
key->image_2d_view_of_3d = key->image_2d_view_of_3d = device->vk.enabled_features.image2DViewOf3D && pdev->info.gfx_level == GFX9;
device->vk.enabled_features.image2DViewOf3D && device->physical_device->info.gfx_level == GFX9;
key->mesh_shader_queries = device->vk.enabled_features.meshShaderQueries; key->mesh_shader_queries = device->vk.enabled_features.meshShaderQueries;
key->primitives_generated_query = radv_uses_primitives_generated_query(device); key->primitives_generated_query = radv_uses_primitives_generated_query(device);
@ -655,7 +657,7 @@ radv_device_init_cache_key(struct radv_device *device)
* enabled, regardless of what features are actually enabled on the logical device. * enabled, regardless of what features are actually enabled on the logical device.
*/ */
if (device->vk.enabled_features.shaderObject) { if (device->vk.enabled_features.shaderObject) {
key->image_2d_view_of_3d = device->physical_device->info.gfx_level == GFX9; key->image_2d_view_of_3d = pdev->info.gfx_level == GFX9;
key->primitives_generated_query = true; key->primitives_generated_query = true;
} }
@ -701,7 +703,6 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
device->vk.command_buffer_ops = &radv_cmd_buffer_ops; device->vk.command_buffer_ops = &radv_cmd_buffer_ops;
device->instance = pdev->instance; device->instance = pdev->instance;
device->physical_device = pdev;
init_dispatch_tables(device, pdev); init_dispatch_tables(device, pdev);
@ -782,13 +783,12 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
device->shader_use_invisible_vram = (device->instance->perftest_flags & RADV_PERFTEST_DMA_SHADERS) && device->shader_use_invisible_vram = (device->instance->perftest_flags & RADV_PERFTEST_DMA_SHADERS) &&
/* SDMA buffer copy is only implemented for GFX7+. */ /* SDMA buffer copy is only implemented for GFX7+. */
device->physical_device->info.gfx_level >= GFX7; pdev->info.gfx_level >= GFX7;
result = radv_init_shader_upload_queue(device); result = radv_init_shader_upload_queue(device);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail; goto fail;
device->pbb_allowed = device->pbb_allowed = pdev->info.gfx_level >= GFX9 && !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
device->physical_device->info.gfx_level >= GFX9 && !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
device->disable_trunc_coord = device->instance->drirc.disable_trunc_coord; device->disable_trunc_coord = device->instance->drirc.disable_trunc_coord;
@ -818,13 +818,13 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1); device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
if (device->physical_device->info.gfx_level >= GFX7) { if (pdev->info.gfx_level >= GFX7) {
/* If the KMD allows it (there is a KMD hw register for it), /* If the KMD allows it (there is a KMD hw register for it),
* allow launching waves out-of-order. * allow launching waves out-of-order.
*/ */
device->dispatch_initiator |= S_00B800_ORDER_MODE(1); device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
} }
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
/* Enable asynchronous compute tunneling. The KMD restricts this feature /* Enable asynchronous compute tunneling. The KMD restricts this feature
* to high-priority compute queues, so setting the bit on any other queue * to high-priority compute queues, so setting the bit on any other queue
* is a no-op. PAL always sets this bit as well. * is a no-op. PAL always sets this bit as well.
@ -862,7 +862,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
} }
if (device->instance->vk.trace_mode & RADV_TRACE_MODE_RGP) { if (device->instance->vk.trace_mode & RADV_TRACE_MODE_RGP) {
if (device->physical_device->info.gfx_level < GFX8 || device->physical_device->info.gfx_level > GFX11) { if (pdev->info.gfx_level < GFX8 || pdev->info.gfx_level > GFX11) {
fprintf(stderr, "GPU hardware not supported: refer to " fprintf(stderr, "GPU hardware not supported: refer to "
"the RGP documentation for the list of " "the RGP documentation for the list of "
"supported GPUs!\n"); "supported GPUs!\n");
@ -882,13 +882,13 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
radv_sqtt_queue_events_enabled() ? "enabled" : "disabled"); radv_sqtt_queue_events_enabled() ? "enabled" : "disabled");
if (radv_spm_trace_enabled(device->instance)) { if (radv_spm_trace_enabled(device->instance)) {
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
if (!radv_spm_init(device)) { if (!radv_spm_init(device)) {
result = VK_ERROR_INITIALIZATION_FAILED; result = VK_ERROR_INITIALIZATION_FAILED;
goto fail; goto fail;
} }
} else { } else {
fprintf(stderr, "radv: SPM isn't supported for this GPU (%s)!\n", device->physical_device->name); fprintf(stderr, "radv: SPM isn't supported for this GPU (%s)!\n", pdev->name);
} }
} }
} }
@ -905,7 +905,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
if (getenv("RADV_TRAP_HANDLER")) { if (getenv("RADV_TRAP_HANDLER")) {
/* TODO: Add support for more hardware. */ /* TODO: Add support for more hardware. */
assert(device->physical_device->info.gfx_level == GFX8); assert(pdev->info.gfx_level == GFX8);
fprintf(stderr, "**********************************************************************\n"); fprintf(stderr, "**********************************************************************\n");
fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n"); fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
@ -922,7 +922,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
} }
} }
if (device->physical_device->info.gfx_level == GFX10_3) { if (pdev->info.gfx_level == GFX10_3) {
if (getenv("RADV_FORCE_VRS_CONFIG_FILE")) { if (getenv("RADV_FORCE_VRS_CONFIG_FILE")) {
const char *file = radv_get_force_vrs_config_file(); const char *file = radv_get_force_vrs_config_file();
@ -942,7 +942,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
} }
/* PKT3_LOAD_SH_REG_INDEX is supported on GFX8+, but it hangs with compute queues until GFX10.3. */ /* PKT3_LOAD_SH_REG_INDEX is supported on GFX8+, but it hangs with compute queues until GFX10.3. */
device->load_grid_size_from_user_sgpr = device->physical_device->info.gfx_level >= GFX10_3; device->load_grid_size_from_user_sgpr = pdev->info.gfx_level >= GFX10_3;
device->keep_shader_info = keep_shader_info; device->keep_shader_info = keep_shader_info;
@ -1009,7 +1009,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
goto fail_cache; goto fail_cache;
} }
if (!device->physical_device->ac_perfcounters.blocks) { if (!pdev->ac_perfcounters.blocks) {
result = VK_ERROR_INITIALIZATION_FAILED; result = VK_ERROR_INITIALIZATION_FAILED;
goto fail_cache; goto fail_cache;
} }
@ -1029,7 +1029,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail_cache; goto fail_cache;
if (device->physical_device->info.gfx_level == GFX11 && device->physical_device->info.has_dedicated_vram && if (pdev->info.gfx_level == GFX11 && pdev->info.has_dedicated_vram &&
device->instance->drirc.force_pstate_peak_gfx11_dgpu) { device->instance->drirc.force_pstate_peak_gfx11_dgpu) {
if (!radv_device_acquire_performance_counters(device)) if (!radv_device_acquire_performance_counters(device))
fprintf(stderr, "radv: failed to set pstate to profile_peak.\n"); fprintf(stderr, "radv: failed to set pstate to profile_peak.\n");
@ -1197,10 +1197,10 @@ radv_GetImageMemoryRequirements2(VkDevice _device, const VkImageMemoryRequiremen
{ {
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_image, image, pInfo->image); RADV_FROM_HANDLE(radv_image, image, pInfo->image);
const struct radv_physical_device *pdev = radv_device_physical(device);
pMemoryRequirements->memoryRequirements.memoryTypeBits = pMemoryRequirements->memoryRequirements.memoryTypeBits =
((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) & ((1u << pdev->memory_properties.memoryTypeCount) - 1u) & ~pdev->memory_types_32bit;
~device->physical_device->memory_types_32bit;
pMemoryRequirements->memoryRequirements.size = image->size; pMemoryRequirements->memoryRequirements.size = image->size;
pMemoryRequirements->memoryRequirements.alignment = image->alignment; pMemoryRequirements->memoryRequirements.alignment = image->alignment;
@ -1254,7 +1254,9 @@ radv_surface_max_layer_count(struct radv_image_view *iview)
unsigned unsigned
radv_get_dcc_max_uncompressed_block_size(const struct radv_device *device, const struct radv_image *image) radv_get_dcc_max_uncompressed_block_size(const struct radv_device *device, const struct radv_image *image)
{ {
if (device->physical_device->info.gfx_level < GFX10 && image->vk.samples > 1) { const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level < GFX10 && image->vk.samples > 1) {
if (image->planes[0].surface.bpe == 1) if (image->planes[0].surface.bpe == 1)
return V_028C78_MAX_BLOCK_SIZE_64B; return V_028C78_MAX_BLOCK_SIZE_64B;
else if (image->planes[0].surface.bpe == 2) else if (image->planes[0].surface.bpe == 2)
@ -1267,7 +1269,9 @@ radv_get_dcc_max_uncompressed_block_size(const struct radv_device *device, const
static unsigned static unsigned
get_dcc_min_compressed_block_size(const struct radv_device *device) get_dcc_min_compressed_block_size(const struct radv_device *device)
{ {
if (!device->physical_device->info.has_dedicated_vram) { const struct radv_physical_device *pdev = radv_device_physical(device);
if (!pdev->info.has_dedicated_vram) {
/* amdvlk: [min-compressed-block-size] should be set to 32 for /* amdvlk: [min-compressed-block-size] should be set to 32 for
* dGPU and 64 for APU because all of our APUs to date use * dGPU and 64 for APU because all of our APUs to date use
* DIMMs which have a request granularity size of 64B while all * DIMMs which have a request granularity size of 64B while all
@ -1282,6 +1286,7 @@ get_dcc_min_compressed_block_size(const struct radv_device *device)
static uint32_t static uint32_t
radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iview) radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iview)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned max_uncompressed_block_size = radv_get_dcc_max_uncompressed_block_size(device, iview->image); unsigned max_uncompressed_block_size = radv_get_dcc_max_uncompressed_block_size(device, iview->image);
unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device); unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device);
unsigned max_compressed_block_size; unsigned max_compressed_block_size;
@ -1293,7 +1298,7 @@ radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iv
/* For GFX9+ ac_surface computes values for us (except min_compressed /* For GFX9+ ac_surface computes values for us (except min_compressed
* and max_uncompressed) */ * and max_uncompressed) */
if (device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
max_compressed_block_size = iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size; max_compressed_block_size = iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size;
independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_128B_blocks; independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_128B_blocks;
independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_64B_blocks; independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_64B_blocks;
@ -1322,12 +1327,12 @@ radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iv
S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) | S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks); S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
result |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX11(independent_128b_blocks) | result |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX11(independent_128b_blocks) |
S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) | S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) |
S_028C78_FDCC_ENABLE(radv_dcc_enabled(iview->image, iview->vk.base_mip_level)); S_028C78_FDCC_ENABLE(radv_dcc_enabled(iview->image, iview->vk.base_mip_level));
if (device->physical_device->info.family >= CHIP_GFX1103_R2) { if (pdev->info.family >= CHIP_GFX1103_R2) {
result |= S_028C78_ENABLE_MAX_COMP_FRAG_OVERRIDE(1) | S_028C78_MAX_COMP_FRAGS(iview->image->vk.samples >= 4); result |= S_028C78_ENABLE_MAX_COMP_FRAG_OVERRIDE(1) | S_028C78_MAX_COMP_FRAGS(iview->image->vk.samples >= 4);
} }
} else { } else {
@ -1341,6 +1346,7 @@ void
radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb, radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
struct radv_image_view *iview) struct radv_image_view *iview)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct util_format_description *desc; const struct util_format_description *desc;
unsigned ntype, format, swap, endian; unsigned ntype, format, swap, endian;
unsigned blend_clamp = 0, blend_bypass = 0; unsigned blend_clamp = 0, blend_bypass = 0;
@ -1354,7 +1360,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
memset(cb, 0, sizeof(*cb)); memset(cb, 0, sizeof(*cb));
/* Intensity is implemented as Red, so treat it that way. */ /* Intensity is implemented as Red, so treat it that way. */
if (device->physical_device->info.gfx_level >= GFX11) if (pdev->info.gfx_level >= GFX11)
cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX11(desc->swizzle[3] == PIPE_SWIZZLE_1); cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX11(desc->swizzle[3] == PIPE_SWIZZLE_1);
else else
cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX6(desc->swizzle[3] == PIPE_SWIZZLE_1); cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX6(desc->swizzle[3] == PIPE_SWIZZLE_1);
@ -1369,11 +1375,11 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
cb->cb_color_base = va >> 8; cb->cb_color_base = va >> 8;
if (device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) | cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned); S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
} else if (device->physical_device->info.gfx_level >= GFX10) { } else if (pdev->info.gfx_level >= GFX10) {
cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) | cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
S_028EE0_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) | S_028EE0_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
S_028EE0_CMASK_PIPE_ALIGNED(1) | S_028EE0_CMASK_PIPE_ALIGNED(1) |
@ -1414,13 +1420,13 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index); cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
if (radv_image_has_fmask(iview->image)) { if (radv_image_has_fmask(iview->image)) {
if (device->physical_device->info.gfx_level >= GFX7) if (pdev->info.gfx_level >= GFX7)
cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1); cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1);
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index); cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index);
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max); cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max);
} else { } else {
/* This must be set for fast clear to work without FMASK. */ /* This must be set for fast clear to work without FMASK. */
if (device->physical_device->info.gfx_level >= GFX7) if (pdev->info.gfx_level >= GFX7)
cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
@ -1435,7 +1441,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset; va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset;
va += surf->meta_offset; va += surf->meta_offset;
if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && device->physical_device->info.gfx_level <= GFX8) if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && pdev->info.gfx_level <= GFX8)
va += plane->surface.u.legacy.color.dcc_level[iview->vk.base_mip_level].dcc_offset; va += plane->surface.u.legacy.color.dcc_level[iview->vk.base_mip_level].dcc_offset;
unsigned dcc_tile_swizzle = tile_swizzle; unsigned dcc_tile_swizzle = tile_swizzle;
@ -1452,7 +1458,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
if (iview->image->vk.samples > 1) { if (iview->image->vk.samples > 1) {
unsigned log_samples = util_logbase2(iview->image->vk.samples); unsigned log_samples = util_logbase2(iview->image->vk.samples);
if (device->physical_device->info.gfx_level >= GFX11) if (pdev->info.gfx_level >= GFX11)
cb->cb_color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(log_samples); cb->cb_color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(log_samples);
else else
cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS_GFX6(log_samples); cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS_GFX6(log_samples);
@ -1467,7 +1473,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
} }
ntype = ac_get_cb_number_type(desc->format); ntype = ac_get_cb_number_type(desc->format);
format = ac_get_cb_format(device->physical_device->info.gfx_level, desc->format); format = ac_get_cb_format(pdev->info.gfx_level, desc->format);
assert(format != V_028C70_COLOR_INVALID); assert(format != V_028C70_COLOR_INVALID);
swap = radv_translate_colorswap(iview->vk.format, false); swap = radv_translate_colorswap(iview->vk.format, false);
@ -1498,14 +1504,14 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
format != V_028C70_COLOR_24_8) | format != V_028C70_COLOR_24_8) |
S_028C70_NUMBER_TYPE(ntype); S_028C70_NUMBER_TYPE(ntype);
if (device->physical_device->info.gfx_level >= GFX11) if (pdev->info.gfx_level >= GFX11)
cb->cb_color_info |= S_028C70_FORMAT_GFX11(format); cb->cb_color_info |= S_028C70_FORMAT_GFX11(format);
else else
cb->cb_color_info |= S_028C70_FORMAT_GFX6(format) | S_028C70_ENDIAN(endian); cb->cb_color_info |= S_028C70_FORMAT_GFX6(format) | S_028C70_ENDIAN(endian);
if (radv_image_has_fmask(iview->image)) { if (radv_image_has_fmask(iview->image)) {
cb->cb_color_info |= S_028C70_COMPRESSION(1); cb->cb_color_info |= S_028C70_COMPRESSION(1);
if (device->physical_device->info.gfx_level == GFX6) { if (pdev->info.gfx_level == GFX6) {
unsigned fmask_bankh = util_logbase2(surf->u.legacy.color.fmask.bankh); unsigned fmask_bankh = util_logbase2(surf->u.legacy.color.fmask.bankh);
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
} }
@ -1514,7 +1520,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
/* Allow the texture block to read FMASK directly without decompressing it. */ /* Allow the texture block to read FMASK directly without decompressing it. */
cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1); cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
if (device->physical_device->info.gfx_level == GFX8) { if (pdev->info.gfx_level == GFX8) {
/* Set CMASK into a tiling format that allows /* Set CMASK into a tiling format that allows
* the texture block to read it. * the texture block to read it.
*/ */
@ -1527,25 +1533,25 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
cb->cb_color_info |= S_028C70_FAST_CLEAR(1); cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && !iview->disable_dcc_mrt && if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && !iview->disable_dcc_mrt &&
device->physical_device->info.gfx_level < GFX11) pdev->info.gfx_level < GFX11)
cb->cb_color_info |= S_028C70_DCC_ENABLE(1); cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview); cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
/* This must be set for fast clear to work without FMASK. */ /* This must be set for fast clear to work without FMASK. */
if (!radv_image_has_fmask(iview->image) && device->physical_device->info.gfx_level == GFX6) { if (!radv_image_has_fmask(iview->image) && pdev->info.gfx_level == GFX6) {
unsigned bankh = util_logbase2(surf->u.legacy.bankh); unsigned bankh = util_logbase2(surf->u.legacy.bankh);
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
} }
if (device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
unsigned mip0_depth = iview->image->vk.image_type == VK_IMAGE_TYPE_3D ? (iview->extent.depth - 1) unsigned mip0_depth = iview->image->vk.image_type == VK_IMAGE_TYPE_3D ? (iview->extent.depth - 1)
: (iview->image->vk.array_layers - 1); : (iview->image->vk.array_layers - 1);
unsigned width = vk_format_get_plane_width(iview->image->vk.format, iview->plane_id, iview->extent.width); unsigned width = vk_format_get_plane_width(iview->image->vk.format, iview->plane_id, iview->extent.width);
unsigned height = vk_format_get_plane_height(iview->image->vk.format, iview->plane_id, iview->extent.height); unsigned height = vk_format_get_plane_height(iview->image->vk.format, iview->plane_id, iview->extent.height);
unsigned max_mip = iview->image->vk.mip_levels - 1; unsigned max_mip = iview->image->vk.mip_levels - 1;
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
unsigned base_level = iview->vk.base_mip_level; unsigned base_level = iview->vk.base_mip_level;
if (iview->nbc_view.valid) { if (iview->nbc_view.valid) {
@ -1556,7 +1562,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(base_level); cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(base_level);
cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) | S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) | cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) | S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
S_028EE0_RESOURCE_LEVEL(device->physical_device->info.gfx_level >= GFX11 ? 0 : 1); S_028EE0_RESOURCE_LEVEL(pdev->info.gfx_level >= GFX11 ? 0 : 1);
} else { } else {
cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->vk.base_mip_level); cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->vk.base_mip_level);
cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type); cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
@ -1567,7 +1573,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
* *
* We set the pitch in MIP0_WIDTH. * We set the pitch in MIP0_WIDTH.
*/ */
if (device->physical_device->info.gfx_level && iview->image->vk.image_type == VK_IMAGE_TYPE_2D && if (pdev->info.gfx_level && iview->image->vk.image_type == VK_IMAGE_TYPE_2D &&
iview->image->vk.array_layers == 1 && plane->surface.is_linear) { iview->image->vk.array_layers == 1 && plane->surface.is_linear) {
assert((plane->surface.u.gfx9.surf_pitch * plane->surface.bpe) % 256 == 0); assert((plane->surface.u.gfx9.surf_pitch * plane->surface.bpe) % 256 == 0);
@ -1586,11 +1592,12 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
static unsigned static unsigned
radv_calc_decompress_on_z_planes(const struct radv_device *device, struct radv_image_view *iview) radv_calc_decompress_on_z_planes(const struct radv_device *device, struct radv_image_view *iview)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned max_zplanes = 0; unsigned max_zplanes = 0;
assert(radv_image_is_tc_compat_htile(iview->image)); assert(radv_image_is_tc_compat_htile(iview->image));
if (device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
/* Default value for 32-bit depth surfaces. */ /* Default value for 32-bit depth surfaces. */
max_zplanes = 4; max_zplanes = 4;
@ -1598,9 +1605,8 @@ radv_calc_decompress_on_z_planes(const struct radv_device *device, struct radv_i
max_zplanes = 2; max_zplanes = 2;
/* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */ /* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
if (device->physical_device->info.has_two_planes_iterate256_bug && if (pdev->info.has_two_planes_iterate256_bug && radv_image_get_iterate256(device, iview->image) &&
radv_image_get_iterate256(device, iview->image) && !radv_image_tile_stencil_disabled(device, iview->image) && !radv_image_tile_stencil_disabled(device, iview->image) && iview->image->vk.samples == 4) {
iview->image->vk.samples == 4) {
max_zplanes = 1; max_zplanes = 1;
} }
@ -1650,6 +1656,7 @@ void
radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buffer_info *ds, radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buffer_info *ds,
struct radv_image_view *iview, VkImageAspectFlags ds_aspects) struct radv_image_view *iview, VkImageAspectFlags ds_aspects)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned level = iview->vk.base_mip_level; unsigned level = iview->vk.base_mip_level;
unsigned format, stencil_format; unsigned format, stencil_format;
uint64_t va, s_offs, z_offs; uint64_t va, s_offs, z_offs;
@ -1668,7 +1675,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) | S_028008_SLICE_MAX(max_slice) | ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) | S_028008_SLICE_MAX(max_slice) |
S_028008_Z_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) | S_028008_Z_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) |
S_028008_STENCIL_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT)); S_028008_STENCIL_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT));
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
ds->db_depth_view |= ds->db_depth_view |=
S_028008_SLICE_START_HI(iview->vk.base_array_layer >> 11) | S_028008_SLICE_MAX_HI(max_slice >> 11); S_028008_SLICE_START_HI(iview->vk.base_array_layer >> 11) | S_028008_SLICE_MAX_HI(max_slice >> 11);
} }
@ -1681,20 +1688,19 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
/* Recommended value for better performance with 4x and 8x. */ /* Recommended value for better performance with 4x and 8x. */
ds->db_render_override2 = S_028010_DECOMPRESS_Z_ON_FLUSH(iview->image->vk.samples >= 4) | ds->db_render_override2 = S_028010_DECOMPRESS_Z_ON_FLUSH(iview->image->vk.samples >= 4) |
S_028010_CENTROID_COMPUTATION_MODE(device->physical_device->info.gfx_level >= GFX10_3); S_028010_CENTROID_COMPUTATION_MODE(pdev->info.gfx_level >= GFX10_3);
if (device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
assert(surf->u.gfx9.surf_offset == 0); assert(surf->u.gfx9.surf_offset == 0);
s_offs += surf->u.gfx9.zs.stencil_offset; s_offs += surf->u.gfx9.zs.stencil_offset;
ds->db_z_info = S_028038_FORMAT(format) | S_028038_NUM_SAMPLES(util_logbase2(iview->image->vk.samples)) | ds->db_z_info = S_028038_FORMAT(format) | S_028038_NUM_SAMPLES(util_logbase2(iview->image->vk.samples)) |
S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) | S_028038_MAXMIP(iview->image->vk.mip_levels - 1) | S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) | S_028038_MAXMIP(iview->image->vk.mip_levels - 1) |
S_028038_ZRANGE_PRECISION(1) | S_028038_ZRANGE_PRECISION(1) | S_028040_ITERATE_256(pdev->info.gfx_level >= GFX11);
S_028040_ITERATE_256(device->physical_device->info.gfx_level >= GFX11);
ds->db_stencil_info = S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) | ds->db_stencil_info = S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) |
S_028044_ITERATE_256(device->physical_device->info.gfx_level >= GFX11); S_028044_ITERATE_256(pdev->info.gfx_level >= GFX11);
if (device->physical_device->info.gfx_level == GFX9) { if (pdev->info.gfx_level == GFX9) {
ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch); ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch);
ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.zs.stencil_epitch); ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.zs.stencil_epitch);
} }
@ -1711,7 +1717,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes); ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
bool iterate256 = radv_image_get_iterate256(device, iview->image); bool iterate256 = radv_image_get_iterate256(device, iview->image);
ds->db_z_info |= S_028040_ITERATE_FLUSH(1); ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
@ -1732,7 +1738,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
ds->db_htile_data_base = va >> 8; ds->db_htile_data_base = va >> 8;
ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1); ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);
if (device->physical_device->info.gfx_level == GFX9) { if (pdev->info.gfx_level == GFX9) {
ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1); ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
} }
@ -1741,7 +1747,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
} }
} }
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
radv_gfx11_set_db_render_control(device, iview->image->vk.samples, &ds->db_render_control); radv_gfx11_set_db_render_control(device, iview->image->vk.samples, &ds->db_render_control);
} }
} else { } else {
@ -1760,8 +1766,8 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
if (iview->image->vk.samples > 1) if (iview->image->vk.samples > 1)
ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->vk.samples)); ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->vk.samples));
if (device->physical_device->info.gfx_level >= GFX7) { if (pdev->info.gfx_level >= GFX7) {
const struct radeon_info *gpu_info = &device->physical_device->info; const struct radeon_info *gpu_info = &pdev->info;
unsigned tiling_index = surf->u.legacy.tiling_index[level]; unsigned tiling_index = surf->u.legacy.tiling_index[level];
unsigned stencil_index = surf->u.legacy.zs.stencil_tiling_index[level]; unsigned stencil_index = surf->u.legacy.zs.stencil_tiling_index[level];
unsigned macro_index = surf->u.legacy.macro_tile_index; unsigned macro_index = surf->u.legacy.macro_tile_index;
@ -1820,7 +1826,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
void void
radv_gfx11_set_db_render_control(const struct radv_device *device, unsigned num_samples, unsigned *db_render_control) radv_gfx11_set_db_render_control(const struct radv_device *device, unsigned num_samples, unsigned *db_render_control)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned max_allowed_tiles_in_wave = 0; unsigned max_allowed_tiles_in_wave = 0;
if (pdev->info.has_dedicated_vram) { if (pdev->info.has_dedicated_vram) {
@ -1911,6 +1917,7 @@ radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBi
VkMemoryFdPropertiesKHR *pMemoryFdProperties) VkMemoryFdPropertiesKHR *pMemoryFdProperties)
{ {
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_physical_device *pdev = radv_device_physical(device);
switch (handleType) { switch (handleType) {
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: { case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
@ -1919,7 +1926,7 @@ radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBi
if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags)) if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(device->physical_device, domains, flags); pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(pdev, domains, flags);
return VK_SUCCESS; return VK_SUCCESS;
} }
default: default:
@ -1941,7 +1948,8 @@ radv_GetCalibratedTimestampsKHR(VkDevice _device, uint32_t timestampCount,
{ {
#ifndef _WIN32 #ifndef _WIN32
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
uint32_t clock_crystal_freq = device->physical_device->info.clock_crystal_freq; const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t clock_crystal_freq = pdev->info.clock_crystal_freq;
int d; int d;
uint64_t begin, end; uint64_t begin, end;
uint64_t max_clock_period = 0; uint64_t max_clock_period = 0;
@ -1992,10 +2000,11 @@ radv_GetCalibratedTimestampsKHR(VkDevice _device, uint32_t timestampCount,
bool bool
radv_device_set_pstate(struct radv_device *device, bool enable) radv_device_set_pstate(struct radv_device *device, bool enable)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_winsys *ws = device->ws; struct radeon_winsys *ws = device->ws;
enum radeon_ctx_pstate pstate = enable ? RADEON_CTX_PSTATE_PEAK : RADEON_CTX_PSTATE_NONE; enum radeon_ctx_pstate pstate = enable ? RADEON_CTX_PSTATE_PEAK : RADEON_CTX_PSTATE_NONE;
if (device->physical_device->info.has_stable_pstate) { if (pdev->info.has_stable_pstate) {
/* pstate is per-device; setting it for one ctx is sufficient. /* pstate is per-device; setting it for one ctx is sufficient.
* We pick the first initialized one below. */ * We pick the first initialized one below. */
for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++)

View file

@ -35,6 +35,7 @@ radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout
const struct radv_compute_pipeline *pipeline, uint32_t *cmd_size) const struct radv_compute_pipeline *pipeline, uint32_t *cmd_size)
{ {
const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk); const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk);
const struct radv_physical_device *pdev = radv_device_physical(device);
/* dispatch */ /* dispatch */
*cmd_size += 5 * 4; *cmd_size += 5 * 4;
@ -55,7 +56,7 @@ radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout
/* COMPUTE_PGM_{LO,RSRC1,RSRC2} */ /* COMPUTE_PGM_{LO,RSRC1,RSRC2} */
*cmd_size += 7 * 4; *cmd_size += 7 * 4;
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
/* COMPUTE_PGM_RSRC3 */ /* COMPUTE_PGM_RSRC3 */
*cmd_size += 3 * 4; *cmd_size += 3 * 4;
} }
@ -87,6 +88,7 @@ radv_get_sequence_size_graphics(const struct radv_indirect_command_layout *layou
uint32_t *upload_size) uint32_t *upload_size)
{ {
const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk); const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk);
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader *vs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX); const struct radv_shader *vs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX);
if (layout->bind_vbo_mask) { if (layout->bind_vbo_mask) {
@ -115,7 +117,7 @@ radv_get_sequence_size_graphics(const struct radv_indirect_command_layout *layou
} else { } else {
if (layout->draw_mesh_tasks) { if (layout->draw_mesh_tasks) {
/* userdata writes + instance count + non-indexed draw */ /* userdata writes + instance count + non-indexed draw */
*cmd_size += (6 + 2 + (device->physical_device->mesh_fast_launch_2 ? 5 : 3)) * 4; *cmd_size += (6 + 2 + (pdev->mesh_fast_launch_2 ? 5 : 3)) * 4;
} else { } else {
/* userdata writes + instance count + non-indexed draw */ /* userdata writes + instance count + non-indexed draw */
*cmd_size += (5 + 2 + 3) * 4; *cmd_size += (5 + 2 + 3) * 4;
@ -189,7 +191,8 @@ radv_get_sequence_size(const struct radv_indirect_command_layout *layout, struct
static uint32_t static uint32_t
radv_align_cmdbuf_size(const struct radv_device *device, uint32_t size, enum amd_ip_type ip_type) radv_align_cmdbuf_size(const struct radv_device *device, uint32_t size, enum amd_ip_type ip_type)
{ {
const uint32_t ib_alignment = device->physical_device->info.ip[ip_type].ib_alignment; const struct radv_physical_device *pdev = radv_device_physical(device);
const uint32_t ib_alignment = pdev->info.ip[ip_type].ib_alignment;
return align(size, ib_alignment); return align(size, ib_alignment);
} }
@ -365,7 +368,9 @@ nir_pkt3(nir_builder *b, unsigned op, nir_def *len)
static nir_def * static nir_def *
dgc_get_nop_packet(nir_builder *b, const struct radv_device *device) dgc_get_nop_packet(nir_builder *b, const struct radv_device *device)
{ {
if (device->physical_device->info.gfx_ib_pad_with_type2) { const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_ib_pad_with_type2) {
return nir_imm_int(b, PKT2_NOP_PAD); return nir_imm_int(b, PKT2_NOP_PAD);
} else { } else {
return nir_imm_int(b, PKT3_NOP_PAD); return nir_imm_int(b, PKT3_NOP_PAD);
@ -691,6 +696,8 @@ dgc_main_cmd_buf_offset(nir_builder *b, const struct radv_device *device)
static void static void
build_dgc_buffer_tail(nir_builder *b, nir_def *sequence_count, const struct radv_device *device) build_dgc_buffer_tail(nir_builder *b, nir_def *sequence_count, const struct radv_device *device)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_def *global_id = get_global_ids(b, 1); nir_def *global_id = get_global_ids(b, 1);
nir_def *cmd_buf_stride = load_param32(b, cmd_buf_stride); nir_def *cmd_buf_stride = load_param32(b, cmd_buf_stride);
@ -718,7 +725,7 @@ build_dgc_buffer_tail(nir_builder *b, nir_def *sequence_count, const struct radv
nir_def *packet, *packet_size; nir_def *packet, *packet_size;
if (device->physical_device->info.gfx_ib_pad_with_type2) { if (pdev->info.gfx_ib_pad_with_type2) {
packet_size = nir_imm_int(b, 4); packet_size = nir_imm_int(b, 4);
packet = nir_imm_int(b, PKT2_NOP_PAD); packet = nir_imm_int(b, PKT2_NOP_PAD);
} else { } else {
@ -741,6 +748,8 @@ build_dgc_buffer_tail(nir_builder *b, nir_def *sequence_count, const struct radv
static void static void
build_dgc_buffer_preamble(nir_builder *b, nir_def *sequence_count, const struct radv_device *device) build_dgc_buffer_preamble(nir_builder *b, nir_def *sequence_count, const struct radv_device *device)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_def *global_id = get_global_ids(b, 1); nir_def *global_id = get_global_ids(b, 1);
nir_def *use_preamble = nir_ine_imm(b, load_param8(b, use_preamble), 0); nir_def *use_preamble = nir_ine_imm(b, load_param8(b, use_preamble), 0);
@ -778,7 +787,7 @@ build_dgc_buffer_preamble(nir_builder *b, nir_def *sequence_count, const struct
nir_def *chain_packets[] = { nir_def *chain_packets[] = {
nir_imm_int(b, PKT3(PKT3_INDIRECT_BUFFER, 2, 0)), nir_imm_int(b, PKT3(PKT3_INDIRECT_BUFFER, 2, 0)),
addr, addr,
nir_imm_int(b, device->physical_device->info.address32_hi), nir_imm_int(b, pdev->info.address32_hi),
nir_ior_imm(b, words, S_3F2_CHAIN(1) | S_3F2_VALID(1) | S_3F2_PRE_ENA(false)), nir_ior_imm(b, words, S_3F2_CHAIN(1) | S_3F2_VALID(1) | S_3F2_PRE_ENA(false)),
}; };
@ -861,6 +870,8 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf
nir_def *index_buffer_offset, nir_def *ibo_type_32, nir_def *ibo_type_8, nir_def *index_buffer_offset, nir_def *ibo_type_32, nir_def *ibo_type_8,
nir_variable *max_index_count_var, const struct radv_device *device) nir_variable *max_index_count_var, const struct radv_device *device)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_def *index_stream_offset = nir_iadd(b, index_buffer_offset, stream_base); nir_def *index_stream_offset = nir_iadd(b, index_buffer_offset, stream_base);
nir_def *data = nir_load_ssbo(b, 4, 32, stream_buf, index_stream_offset); nir_def *data = nir_load_ssbo(b, 4, 32, stream_buf, index_stream_offset);
@ -876,10 +887,9 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf
nir_def *cmd_values[3 + 2 + 3]; nir_def *cmd_values[3 + 2 + 3];
if (device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX; unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
if (device->physical_device->info.gfx_level < GFX9 || if (pdev->info.gfx_level < GFX9 || (pdev->info.gfx_level == GFX9 && pdev->info.me_fw_version < 26))
(device->physical_device->info.gfx_level == GFX9 && device->physical_device->info.me_fw_version < 26))
opcode = PKT3_SET_UCONFIG_REG; opcode = PKT3_SET_UCONFIG_REG;
cmd_values[0] = nir_imm_int(b, PKT3(opcode, 1, 0)); cmd_values[0] = nir_imm_int(b, PKT3(opcode, 1, 0));
cmd_values[1] = nir_imm_int(b, (R_03090C_VGT_INDEX_TYPE - CIK_UCONFIG_REG_OFFSET) >> 2 | (2u << 28)); cmd_values[1] = nir_imm_int(b, (R_03090C_VGT_INDEX_TYPE - CIK_UCONFIG_REG_OFFSET) >> 2 | (2u << 28));
@ -1186,6 +1196,8 @@ static void
dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base, dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
nir_def *vbo_bind_mask, nir_variable *upload_offset, const struct radv_device *device) nir_def *vbo_bind_mask, nir_variable *upload_offset, const struct radv_device *device)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_def *vbo_cnt = load_param8(b, vbo_cnt); nir_def *vbo_cnt = load_param8(b, vbo_cnt);
nir_variable *vbo_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx"); nir_variable *vbo_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx");
nir_store_var(b, vbo_idx, nir_imm_int(b, 0), 0x1); nir_store_var(b, vbo_idx, nir_imm_int(b, 0), 0x1);
@ -1252,9 +1264,9 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
nir_pop_if(b, NULL); nir_pop_if(b, NULL);
nir_def *convert_cond = nir_ine_imm(b, nir_load_var(b, num_records), 0); nir_def *convert_cond = nir_ine_imm(b, nir_load_var(b, num_records), 0);
if (device->physical_device->info.gfx_level == GFX9) if (pdev->info.gfx_level == GFX9)
convert_cond = nir_imm_false(b); convert_cond = nir_imm_false(b);
else if (device->physical_device->info.gfx_level != GFX8) else if (pdev->info.gfx_level != GFX8)
convert_cond = nir_iand(b, convert_cond, nir_ieq_imm(b, stride, 0)); convert_cond = nir_iand(b, convert_cond, nir_ieq_imm(b, stride, 0));
nir_def *new_records = nir_def *new_records =
@ -1264,7 +1276,7 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
} }
nir_push_else(b, NULL); nir_push_else(b, NULL);
{ {
if (device->physical_device->info.gfx_level != GFX8) { if (pdev->info.gfx_level != GFX8) {
nir_push_if(b, nir_ine_imm(b, stride, 0)); nir_push_if(b, nir_ine_imm(b, stride, 0));
{ {
nir_def *r = nir_iadd(b, nir_load_var(b, num_records), nir_iadd_imm(b, stride, -1)); nir_def *r = nir_iadd(b, nir_load_var(b, num_records), nir_iadd_imm(b, stride, -1));
@ -1276,7 +1288,7 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
nir_pop_if(b, NULL); nir_pop_if(b, NULL);
nir_def *rsrc_word3 = nir_channel(b, nir_load_var(b, vbo_data), 3); nir_def *rsrc_word3 = nir_channel(b, nir_load_var(b, vbo_data), 3);
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
nir_def *oob_select = nir_bcsel(b, nir_ieq_imm(b, stride, 0), nir_imm_int(b, V_008F0C_OOB_SELECT_RAW), nir_def *oob_select = nir_bcsel(b, nir_ieq_imm(b, stride, 0), nir_imm_int(b, V_008F0C_OOB_SELECT_RAW),
nir_imm_int(b, V_008F0C_OOB_SELECT_STRUCTURED)); nir_imm_int(b, V_008F0C_OOB_SELECT_STRUCTURED));
rsrc_word3 = nir_iand_imm(b, rsrc_word3, C_008F0C_OOB_SELECT); rsrc_word3 = nir_iand_imm(b, rsrc_word3, C_008F0C_OOB_SELECT);
@ -1408,6 +1420,8 @@ static void
dgc_emit_draw_mesh_tasks(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base, dgc_emit_draw_mesh_tasks(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
nir_def *draw_params_offset, nir_def *sequence_id, const struct radv_device *device) nir_def *draw_params_offset, nir_def *sequence_id, const struct radv_device *device)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr); nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
nir_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base); nir_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base);
@ -1424,7 +1438,7 @@ dgc_emit_draw_mesh_tasks(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_
dgc_emit_userdata_mesh(b, cs, vtx_base_sgpr, x, y, z, sequence_id, device); dgc_emit_userdata_mesh(b, cs, vtx_base_sgpr, x, y, z, sequence_id, device);
dgc_emit_instance_count(b, cs, nir_imm_int(b, 1)); dgc_emit_instance_count(b, cs, nir_imm_int(b, 1));
if (device->physical_device->mesh_fast_launch_2) { if (pdev->mesh_fast_launch_2) {
dgc_emit_dispatch_mesh_direct(b, cs, x, y, z); dgc_emit_dispatch_mesh_direct(b, cs, x, y, z);
} else { } else {
nir_def *vertex_count = nir_imul(b, x, nir_imul(b, y, z)); nir_def *vertex_count = nir_imul(b, x, nir_imul(b, y, z));
@ -1454,6 +1468,8 @@ static void
dgc_emit_bind_pipeline(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base, dgc_emit_bind_pipeline(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
nir_def *pipeline_params_offset, const struct radv_device *device) nir_def *pipeline_params_offset, const struct radv_device *device)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base); nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset); nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
@ -1465,7 +1481,7 @@ dgc_emit_bind_pipeline(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
dgc_emit1(b, cs, load_metadata32(b, rsrc1)); dgc_emit1(b, cs, load_metadata32(b, rsrc1));
dgc_emit1(b, cs, load_metadata32(b, rsrc2)); dgc_emit1(b, cs, load_metadata32(b, rsrc2));
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
dgc_emit_set_sh_reg_seq(b, cs, R_00B8A0_COMPUTE_PGM_RSRC3, 1); dgc_emit_set_sh_reg_seq(b, cs, R_00B8A0_COMPUTE_PGM_RSRC3, 1);
dgc_emit1(b, cs, load_metadata32(b, rsrc3)); dgc_emit1(b, cs, load_metadata32(b, rsrc3));
} }
@ -1504,6 +1520,7 @@ dgc_is_cond_render_enabled(nir_builder *b)
static nir_shader * static nir_shader *
build_dgc_prepare_shader(struct radv_device *dev) build_dgc_prepare_shader(struct radv_device *dev)
{ {
const struct radv_physical_device *pdev = radv_device_physical(dev);
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_dgc_prepare"); nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_dgc_prepare");
b.shader->info.workgroup_size[0] = 64; b.shader->info.workgroup_size[0] = 64;
@ -1554,7 +1571,7 @@ build_dgc_prepare_shader(struct radv_device *dev)
struct dgc_cmdbuf cmd_buf = { struct dgc_cmdbuf cmd_buf = {
.descriptor = radv_meta_load_descriptor(&b, 0, DGC_DESC_PREPARE), .descriptor = radv_meta_load_descriptor(&b, 0, DGC_DESC_PREPARE),
.offset = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "cmd_buf_offset"), .offset = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "cmd_buf_offset"),
.gfx_level = dev->physical_device->info.gfx_level, .gfx_level = pdev->info.gfx_level,
.sqtt_enabled = !!dev->sqtt.bo, .sqtt_enabled = !!dev->sqtt.bo,
}; };
nir_store_var(&b, cmd_buf.offset, nir_iadd(&b, nir_imul(&b, global_id, cmd_buf_stride), cmd_buf_base_offset), 1); nir_store_var(&b, cmd_buf.offset, nir_iadd(&b, nir_imul(&b, global_id, cmd_buf_stride), cmd_buf_base_offset), 1);
@ -1647,7 +1664,7 @@ build_dgc_prepare_shader(struct radv_device *dev)
/* Pad the cmdbuffer if we did not use the whole stride */ /* Pad the cmdbuffer if we did not use the whole stride */
nir_push_if(&b, nir_ine(&b, nir_load_var(&b, cmd_buf.offset), cmd_buf_end)); nir_push_if(&b, nir_ine(&b, nir_load_var(&b, cmd_buf.offset), cmd_buf_end));
{ {
if (dev->physical_device->info.gfx_ib_pad_with_type2) { if (pdev->info.gfx_ib_pad_with_type2) {
nir_push_loop(&b); nir_push_loop(&b);
{ {
nir_def *curr_offset = nir_load_var(&b, cmd_buf.offset); nir_def *curr_offset = nir_load_var(&b, cmd_buf.offset);
@ -1872,6 +1889,7 @@ radv_GetGeneratedCommandsMemoryRequirementsNV(VkDevice _device,
VkMemoryRequirements2 *pMemoryRequirements) VkMemoryRequirements2 *pMemoryRequirements)
{ {
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
VK_FROM_HANDLE(radv_indirect_command_layout, layout, pInfo->indirectCommandsLayout); VK_FROM_HANDLE(radv_indirect_command_layout, layout, pInfo->indirectCommandsLayout);
VK_FROM_HANDLE(radv_pipeline, pipeline, pInfo->pipeline); VK_FROM_HANDLE(radv_pipeline, pipeline, pInfo->pipeline);
@ -1882,10 +1900,9 @@ radv_GetGeneratedCommandsMemoryRequirementsNV(VkDevice _device,
radv_dgc_preamble_cmdbuf_size(device); radv_dgc_preamble_cmdbuf_size(device);
VkDeviceSize upload_buf_size = upload_stride * pInfo->maxSequencesCount; VkDeviceSize upload_buf_size = upload_stride * pInfo->maxSequencesCount;
pMemoryRequirements->memoryRequirements.memoryTypeBits = device->physical_device->memory_types_32bit; pMemoryRequirements->memoryRequirements.memoryTypeBits = pdev->memory_types_32bit;
pMemoryRequirements->memoryRequirements.alignment = pMemoryRequirements->memoryRequirements.alignment =
MAX2(device->physical_device->info.ip[AMD_IP_GFX].ib_alignment, MAX2(pdev->info.ip[AMD_IP_GFX].ib_alignment, pdev->info.ip[AMD_IP_COMPUTE].ib_alignment);
device->physical_device->info.ip[AMD_IP_COMPUTE].ib_alignment);
pMemoryRequirements->memoryRequirements.size = pMemoryRequirements->memoryRequirements.size =
align(cmd_buf_size + upload_buf_size, pMemoryRequirements->memoryRequirements.alignment); align(cmd_buf_size + upload_buf_size, pMemoryRequirements->memoryRequirements.alignment);
} }
@ -2051,6 +2068,7 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
{ {
VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout); VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout);
VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline); VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
*upload_size = MAX2(*upload_size, 16); *upload_size = MAX2(*upload_size, 16);
@ -2074,7 +2092,7 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
struct radv_shader *cs = radv_get_shader(compute_pipeline->base.shaders, MESA_SHADER_COMPUTE); struct radv_shader *cs = radv_get_shader(compute_pipeline->base.shaders, MESA_SHADER_COMPUTE);
if (cs->info.wave_size == 32) { if (cs->info.wave_size == 32) {
assert(cmd_buffer->device->physical_device->info.gfx_level >= GFX10); assert(pdev->info.gfx_level >= GFX10);
params->dispatch_initiator |= S_00B800_CS_W32_EN(1); params->dispatch_initiator |= S_00B800_CS_W32_EN(1);
} }
@ -2276,9 +2294,9 @@ radv_GetPipelineIndirectMemoryRequirementsNV(VkDevice _device, const VkComputePi
VkMemoryRequirements *reqs = &pMemoryRequirements->memoryRequirements; VkMemoryRequirements *reqs = &pMemoryRequirements->memoryRequirements;
const uint32_t size = sizeof(struct radv_compute_pipeline_metadata); const uint32_t size = sizeof(struct radv_compute_pipeline_metadata);
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
reqs->memoryTypeBits = ((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) & reqs->memoryTypeBits = ((1u << pdev->memory_properties.memoryTypeCount) - 1u) & ~pdev->memory_types_32bit;
~device->physical_device->memory_types_32bit;
reqs->alignment = 4; reqs->alignment = 4;
reqs->size = align(size, reqs->alignment); reqs->size = align(size, reqs->alignment);
} }

View file

@ -195,12 +195,13 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc
mem->user_ptr = host_ptr_info->pHostPointer; mem->user_ptr = host_ptr_info->pHostPointer;
} }
} else { } else {
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
uint32_t heap_index; uint32_t heap_index;
heap_index = device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex].heapIndex; heap_index = pdev->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex].heapIndex;
domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex]; domain = pdev->memory_domains[pAllocateInfo->memoryTypeIndex];
flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex]; flags |= pdev->memory_flags[pAllocateInfo->memoryTypeIndex];
if (export_info && export_info->handleTypes) { if (export_info && export_info->handleTypes) {
/* Setting RADEON_FLAG_GTT_WC in case the bo is spilled to GTT. This is important when the /* Setting RADEON_FLAG_GTT_WC in case the bo is spilled to GTT. This is important when the
@ -226,7 +227,7 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc
flags |= RADEON_FLAG_ZERO_VRAM; flags |= RADEON_FLAG_ZERO_VRAM;
if (device->overallocation_disallowed) { if (device->overallocation_disallowed) {
uint64_t total_size = device->physical_device->memory_properties.memoryHeaps[heap_index].size; uint64_t total_size = pdev->memory_properties.memoryHeaps[heap_index].size;
mtx_lock(&device->overallocation_mutex); mtx_lock(&device->overallocation_mutex);
if (device->allocated_memory_size[heap_index] + alloc_size > total_size) { if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {
@ -238,8 +239,8 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc
mtx_unlock(&device->overallocation_mutex); mtx_unlock(&device->overallocation_mutex);
} }
result = radv_bo_create(device, alloc_size, device->physical_device->info.max_alignment, domain, flags, priority, result = radv_bo_create(device, alloc_size, pdev->info.max_alignment, domain, flags, priority, replay_address,
replay_address, is_internal, &mem->bo); is_internal, &mem->bo);
if (result != VK_SUCCESS) { if (result != VK_SUCCESS) {
if (device->overallocation_disallowed) { if (device->overallocation_disallowed) {

View file

@ -1881,6 +1881,7 @@ radv_GetImageSparseMemoryRequirements2(VkDevice _device, const VkImageSparseMemo
{ {
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_image, image, pInfo->image); RADV_FROM_HANDLE(radv_image, image, pInfo->image);
struct radv_physical_device *pdev = radv_device_physical(device);
if (!(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)) { if (!(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)) {
*pSparseMemoryRequirementCount = 0; *pSparseMemoryRequirementCount = 0;
@ -1892,12 +1893,12 @@ radv_GetImageSparseMemoryRequirements2(VkDevice _device, const VkImageSparseMemo
vk_outarray_append_typed(VkSparseImageMemoryRequirements2, &out, req) vk_outarray_append_typed(VkSparseImageMemoryRequirements2, &out, req)
{ {
fill_sparse_image_format_properties(device->physical_device, image->vk.image_type, image->vk.format, fill_sparse_image_format_properties(pdev, image->vk.image_type, image->vk.format,
&req->memoryRequirements.formatProperties); &req->memoryRequirements.formatProperties);
req->memoryRequirements.imageMipTailFirstLod = image->planes[0].surface.first_mip_tail_level; req->memoryRequirements.imageMipTailFirstLod = image->planes[0].surface.first_mip_tail_level;
if (req->memoryRequirements.imageMipTailFirstLod < image->vk.mip_levels) { if (req->memoryRequirements.imageMipTailFirstLod < image->vk.mip_levels) {
if (device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
/* The tail is always a single tile per layer. */ /* The tail is always a single tile per layer. */
req->memoryRequirements.imageMipTailSize = 65536; req->memoryRequirements.imageMipTailSize = 65536;
req->memoryRequirements.imageMipTailOffset = req->memoryRequirements.imageMipTailOffset =

View file

@ -42,6 +42,8 @@
static unsigned static unsigned
radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format) radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) { if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
assert(pCreateInfo->samples <= 1); assert(pCreateInfo->samples <= 1);
return RADEON_SURF_MODE_LINEAR_ALIGNED; return RADEON_SURF_MODE_LINEAR_ALIGNED;
@ -54,8 +56,7 @@ radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateI
if (pCreateInfo->samples > 1) if (pCreateInfo->samples > 1)
return RADEON_SURF_MODE_2D; return RADEON_SURF_MODE_2D;
if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) && if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) && pdev->info.gfx_level <= GFX8) {
device->physical_device->info.gfx_level <= GFX8) {
/* this causes hangs in some VK CTS tests on GFX9. */ /* this causes hangs in some VK CTS tests on GFX9. */
/* Textures with a very small height are recommended to be linear. */ /* Textures with a very small height are recommended to be linear. */
if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D || if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
@ -71,14 +72,16 @@ radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateI
static bool static bool
radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format) radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
/* TC-compat HTILE is only available for GFX8+. */ /* TC-compat HTILE is only available for GFX8+. */
if (device->physical_device->info.gfx_level < GFX8) if (pdev->info.gfx_level < GFX8)
return false; return false;
/* TC-compat HTILE looks broken on Tonga (and Iceland is the same design) and the documented bug /* TC-compat HTILE looks broken on Tonga (and Iceland is the same design) and the documented bug
* workarounds don't help. * workarounds don't help.
*/ */
if (device->physical_device->info.family == CHIP_TONGA || device->physical_device->info.family == CHIP_ICELAND) if (pdev->info.family == CHIP_TONGA || pdev->info.family == CHIP_ICELAND)
return false; return false;
if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
@ -91,7 +94,7 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCrea
(VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT))) (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
return false; return false;
if (device->physical_device->info.gfx_level < GFX9) { if (pdev->info.gfx_level < GFX9) {
/* TC-compat HTILE for MSAA depth/stencil images is broken /* TC-compat HTILE for MSAA depth/stencil images is broken
* on GFX8 because the tiling doesn't match. * on GFX8 because the tiling doesn't match.
*/ */
@ -114,7 +117,7 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCrea
} }
/* GFX9 has issues when the sample count is 4 and the format is D16 */ /* GFX9 has issues when the sample count is 4 and the format is D16 */
if (device->physical_device->info.gfx_level == GFX9 && pCreateInfo->samples == 4 && format == VK_FORMAT_D16_UNORM) if (pdev->info.gfx_level == GFX9 && pCreateInfo->samples == 4 && format == VK_FORMAT_D16_UNORM)
return false; return false;
return true; return true;
@ -123,8 +126,10 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCrea
static bool static bool
radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info) radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (info->bo_metadata) { if (info->bo_metadata) {
if (device->physical_device->info.gfx_level >= GFX9) if (pdev->info.gfx_level >= GFX9)
return info->bo_metadata->u.gfx9.scanout; return info->bo_metadata->u.gfx9.scanout;
else else
return info->bo_metadata->u.legacy.scanout; return info->bo_metadata->u.legacy.scanout;
@ -237,8 +242,10 @@ static bool
radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image, const VkImageCreateInfo *pCreateInfo, radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image, const VkImageCreateInfo *pCreateInfo,
VkFormat format, bool *sign_reinterpret) VkFormat format, bool *sign_reinterpret)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
/* DCC (Delta Color Compression) is only available for GFX8+. */ /* DCC (Delta Color Compression) is only available for GFX8+. */
if (device->physical_device->info.gfx_level < GFX8) if (pdev->info.gfx_level < GFX8)
return false; return false;
const VkImageCompressionControlEXT *compression = const VkImageCompressionControlEXT *compression =
@ -260,7 +267,7 @@ radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *imag
* decompressing a lot anyway we might as well not have DCC. * decompressing a lot anyway we might as well not have DCC.
*/ */
if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) && if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
(device->physical_device->info.gfx_level < GFX10 || (pdev->info.gfx_level < GFX10 ||
radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags))) radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags)))
return false; return false;
@ -278,24 +285,22 @@ radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *imag
if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1) if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
return false; return false;
if (device->physical_device->info.gfx_level < GFX10) { if (pdev->info.gfx_level < GFX10) {
/* TODO: Add support for DCC MSAA on GFX8-9. */ /* TODO: Add support for DCC MSAA on GFX8-9. */
if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed) if (pCreateInfo->samples > 1 && !pdev->dcc_msaa_allowed)
return false; return false;
/* TODO: Add support for DCC layers/mipmaps on GFX9. */ /* TODO: Add support for DCC layers/mipmaps on GFX9. */
if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) && if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) && pdev->info.gfx_level == GFX9)
device->physical_device->info.gfx_level == GFX9)
return false; return false;
} }
/* DCC MSAA can't work on GFX10.3 and earlier without FMASK. */ /* DCC MSAA can't work on GFX10.3 and earlier without FMASK. */
if (pCreateInfo->samples > 1 && device->physical_device->info.gfx_level < GFX11 && if (pCreateInfo->samples > 1 && pdev->info.gfx_level < GFX11 &&
(device->instance->debug_flags & RADV_DEBUG_NO_FMASK)) (device->instance->debug_flags & RADV_DEBUG_NO_FMASK))
return false; return false;
return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format, pCreateInfo->flags, return radv_are_formats_dcc_compatible(pdev, pCreateInfo->pNext, format, pCreateInfo->flags, sign_reinterpret);
sign_reinterpret);
} }
static bool static bool
@ -331,7 +336,9 @@ radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image
bool bool
radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image) radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
{ {
return ac_surface_supports_dcc_image_stores(device->physical_device->info.gfx_level, &image->planes[0].surface); const struct radv_physical_device *pdev = radv_device_physical(device);
return ac_surface_supports_dcc_image_stores(pdev->info.gfx_level, &image->planes[0].surface);
} }
/* /*
@ -347,12 +354,14 @@ radv_image_use_dcc_predication(const struct radv_device *device, const struct ra
static inline bool static inline bool
radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image) radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image)
{ {
if (device->physical_device->info.gfx_level == GFX9 && image->vk.array_layers > 1) { const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level == GFX9 && image->vk.array_layers > 1) {
/* On GFX9, FMASK can be interleaved with layers and this isn't properly supported. */ /* On GFX9, FMASK can be interleaved with layers and this isn't properly supported. */
return false; return false;
} }
return device->physical_device->use_fmask && image->vk.samples > 1 && return pdev->use_fmask && image->vk.samples > 1 &&
((image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) || ((image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
(device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)); (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
} }
@ -361,7 +370,8 @@ static inline bool
radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image, radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image,
const VkImageCreateInfo *pCreateInfo) const VkImageCreateInfo *pCreateInfo)
{ {
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
const VkImageCompressionControlEXT *compression = const VkImageCompressionControlEXT *compression =
vk_find_struct_const(pCreateInfo->pNext, IMAGE_COMPRESSION_CONTROL_EXT); vk_find_struct_const(pCreateInfo->pNext, IMAGE_COMPRESSION_CONTROL_EXT);
@ -374,11 +384,10 @@ radv_use_htile_for_image(const struct radv_device *device, const struct radv_ima
* - Investigate about mips+layers. * - Investigate about mips+layers.
* - Enable on other gens. * - Enable on other gens.
*/ */
bool use_htile_for_mips = image->vk.array_layers == 1 && device->physical_device->info.gfx_level >= GFX10; bool use_htile_for_mips = image->vk.array_layers == 1 && pdev->info.gfx_level >= GFX10;
/* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */ /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
if (device->physical_device->info.gfx_level == GFX10 && image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && if (pdev->info.gfx_level == GFX10 && image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->vk.mip_levels > 1)
image->vk.mip_levels > 1)
return false; return false;
/* Do not enable HTILE for very small images because it seems less performant but make sure it's /* Do not enable HTILE for very small images because it seems less performant but make sure it's
@ -395,19 +404,21 @@ radv_use_htile_for_image(const struct radv_device *device, const struct radv_ima
static bool static bool
radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image) radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
/* TC-compat CMASK is only available for GFX8+. */ /* TC-compat CMASK is only available for GFX8+. */
if (device->physical_device->info.gfx_level < GFX8) if (pdev->info.gfx_level < GFX8)
return false; return false;
/* GFX9 has issues when sample count is greater than 2 */ /* GFX9 has issues when sample count is greater than 2 */
if (device->physical_device->info.gfx_level == GFX9 && image->vk.samples > 2) if (pdev->info.gfx_level == GFX9 && image->vk.samples > 2)
return false; return false;
if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK) if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
return false; return false;
/* TC-compat CMASK with storage images is supported on GFX10+. */ /* TC-compat CMASK with storage images is supported on GFX10+. */
if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && device->physical_device->info.gfx_level < GFX10) if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && pdev->info.gfx_level < GFX10)
return false; return false;
/* Do not enable TC-compatible if the image isn't readable by a shader /* Do not enable TC-compatible if the image isn't readable by a shader
@ -427,7 +438,9 @@ radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image
static uint32_t static uint32_t
radv_get_bo_metadata_word1(const struct radv_device *device) radv_get_bo_metadata_word1(const struct radv_device *device)
{ {
return (ATI_VENDOR_ID << 16) | device->physical_device->info.pci_id; const struct radv_physical_device *pdev = radv_device_physical(device);
return (ATI_VENDOR_ID << 16) | pdev->info.pci_id;
} }
static bool static bool
@ -446,9 +459,11 @@ static void
radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface, radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface,
const struct radeon_bo_metadata *md) const struct radeon_bo_metadata *md)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
surface->flags = RADEON_SURF_CLR(surface->flags, MODE); surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
if (device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
if (md->u.gfx9.swizzle_mode > 0) if (md->u.gfx9.swizzle_mode > 0)
surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE); surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
else else
@ -476,6 +491,7 @@ static VkResult
radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image, radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image,
const struct radv_image_create_info *create_info, struct ac_surf_info *image_info) const struct radv_image_create_info *create_info, struct ac_surf_info *image_info)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned width = image->vk.extent.width; unsigned width = image->vk.extent.width;
unsigned height = image->vk.extent.height; unsigned height = image->vk.extent.height;
@ -489,7 +505,7 @@ radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image
if (create_info->bo_metadata && radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) { if (create_info->bo_metadata && radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
const struct radeon_bo_metadata *md = create_info->bo_metadata; const struct radeon_bo_metadata *md = create_info->bo_metadata;
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1; width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
height = G_00A008_HEIGHT(md->metadata[4]) + 1; height = G_00A008_HEIGHT(md->metadata[4]) + 1;
} else { } else {
@ -509,7 +525,7 @@ radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image
"(internal dimensions: %d x %d, external dimensions: %d x %d)\n", "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
image->vk.extent.width, image->vk.extent.height, width, height); image->vk.extent.width, image->vk.extent.height, width, height);
return VK_ERROR_INVALID_EXTERNAL_HANDLE; return VK_ERROR_INVALID_EXTERNAL_HANDLE;
} else if (device->physical_device->info.gfx_level >= GFX10) { } else if (pdev->info.gfx_level >= GFX10) {
fprintf(stderr, fprintf(stderr,
"Tried to import an image with inconsistent width on GFX10.\n" "Tried to import an image with inconsistent width on GFX10.\n"
"As GFX10 has no separate stride fields we cannot cope with\n" "As GFX10 has no separate stride fields we cannot cope with\n"
@ -535,6 +551,8 @@ static VkResult
radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image, radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image,
const struct radv_image_create_info *create_info, struct ac_surf_info *image_info) const struct radv_image_create_info *create_info, struct ac_surf_info *image_info)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info); VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
return result; return result;
@ -552,7 +570,7 @@ radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *
image_info->surf_index = NULL; image_info->surf_index = NULL;
} }
if (create_info->prime_blit_src && !device->physical_device->info.sdma_supports_compression) { if (create_info->prime_blit_src && !pdev->info.sdma_supports_compression) {
/* Older SDMA hw can't handle DCC */ /* Older SDMA hw can't handle DCC */
image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC; image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
} }
@ -579,9 +597,10 @@ static uint64_t
radv_get_surface_flags(struct radv_device *device, struct radv_image *image, unsigned plane_id, radv_get_surface_flags(struct radv_device *device, struct radv_image *image, unsigned plane_id,
const VkImageCreateInfo *pCreateInfo, VkFormat image_format) const VkImageCreateInfo *pCreateInfo, VkFormat image_format)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t flags; uint64_t flags;
unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format); unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
VkFormat format = radv_image_get_plane_format(device->physical_device, image, plane_id); VkFormat format = radv_image_get_plane_format(pdev, image, plane_id);
const struct util_format_description *desc = vk_format_description(format); const struct util_format_description *desc = vk_format_description(format);
bool is_depth, is_stencil; bool is_depth, is_stencil;
@ -616,7 +635,7 @@ radv_get_surface_flags(struct radv_device *device, struct radv_image *image, uns
if (is_depth) { if (is_depth) {
flags |= RADEON_SURF_ZBUFFER; flags |= RADEON_SURF_ZBUFFER;
if (is_depth && is_stencil && device->physical_device->info.gfx_level <= GFX8) { if (is_depth && is_stencil && pdev->info.gfx_level <= GFX8) {
if (!(pCreateInfo->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) if (!(pCreateInfo->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT))
flags |= RADEON_SURF_NO_RENDER_TARGET; flags |= RADEON_SURF_NO_RENDER_TARGET;
@ -641,7 +660,7 @@ radv_get_surface_flags(struct radv_device *device, struct radv_image *image, uns
if (is_stencil) if (is_stencil)
flags |= RADEON_SURF_SBUFFER; flags |= RADEON_SURF_SBUFFER;
if (device->physical_device->info.gfx_level >= GFX9 && pCreateInfo->imageType == VK_IMAGE_TYPE_3D && if (pdev->info.gfx_level >= GFX9 && pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format)) vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
flags |= RADEON_SURF_NO_RENDER_TARGET; flags |= RADEON_SURF_NO_RENDER_TARGET;
@ -656,7 +675,7 @@ radv_get_surface_flags(struct radv_device *device, struct radv_image *image, uns
} }
if (image->queue_family_mask & BITFIELD_BIT(RADV_QUEUE_TRANSFER)) { if (image->queue_family_mask & BITFIELD_BIT(RADV_QUEUE_TRANSFER)) {
if (!device->physical_device->info.sdma_supports_compression) if (!pdev->info.sdma_supports_compression)
flags |= RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_HTILE; flags |= RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_HTILE;
} }
@ -720,12 +739,14 @@ radv_compose_swizzle(const struct util_format_description *desc, const VkCompone
bool bool
vi_alpha_is_on_msb(const struct radv_device *device, const VkFormat format) vi_alpha_is_on_msb(const struct radv_device *device, const VkFormat format)
{ {
if (device->physical_device->info.gfx_level >= GFX11) const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level >= GFX11)
return false; return false;
const struct util_format_description *desc = vk_format_description(format); const struct util_format_description *desc = vk_format_description(format);
if (device->physical_device->info.gfx_level >= GFX10 && desc->nr_channels == 1) if (pdev->info.gfx_level >= GFX10 && desc->nr_channels == 1)
return desc->swizzle[3] == PIPE_SWIZZLE_X; return desc->swizzle[3] == PIPE_SWIZZLE_X;
return radv_translate_colorswap(format, false) <= 1; return radv_translate_colorswap(format, false) <= 1;
@ -735,13 +756,13 @@ static void
radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image, unsigned plane_id, radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image, unsigned plane_id,
struct radeon_bo_metadata *md) struct radeon_bo_metadata *md)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
static const VkComponentMapping fixedmapping; static const VkComponentMapping fixedmapping;
const VkFormat plane_format = radv_image_get_plane_format(device->physical_device, image, plane_id); const VkFormat plane_format = radv_image_get_plane_format(pdev, image, plane_id);
const unsigned plane_width = vk_format_get_plane_width(image->vk.format, plane_id, image->vk.extent.width); const unsigned plane_width = vk_format_get_plane_width(image->vk.format, plane_id, image->vk.extent.width);
const unsigned plane_height = vk_format_get_plane_height(image->vk.format, plane_id, image->vk.extent.height); const unsigned plane_height = vk_format_get_plane_height(image->vk.format, plane_id, image->vk.extent.height);
struct radeon_surf *surface = &image->planes[plane_id].surface; struct radeon_surf *surface = &image->planes[plane_id].surface;
const struct legacy_surf_level *base_level_info = const struct legacy_surf_level *base_level_info = pdev->info.gfx_level <= GFX8 ? &surface->u.legacy.level[0] : NULL;
device->physical_device->info.gfx_level <= GFX8 ? &surface->u.legacy.level[0] : NULL;
uint32_t desc[8]; uint32_t desc[8];
radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->vk.image_type, plane_format, radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->vk.image_type, plane_format,
@ -751,21 +772,22 @@ radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image,
radv_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, 0, 0, surface->blk_w, false, false, false, radv_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, 0, 0, surface->blk_w, false, false, false,
false, desc, NULL); false, desc, NULL);
ac_surface_compute_umd_metadata(&device->physical_device->info, surface, image->vk.mip_levels, desc, ac_surface_compute_umd_metadata(&pdev->info, surface, image->vk.mip_levels, desc, &md->size_metadata, md->metadata,
&md->size_metadata, md->metadata,
device->instance->debug_flags & RADV_DEBUG_EXTRA_MD); device->instance->debug_flags & RADV_DEBUG_EXTRA_MD);
} }
void void
radv_init_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *metadata) radv_init_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *metadata)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
/* use plane 0, even when there are multiple planes, to follow radeonsi */ /* use plane 0, even when there are multiple planes, to follow radeonsi */
const unsigned plane_id = 0; const unsigned plane_id = 0;
struct radeon_surf *surface = &image->planes[plane_id].surface; struct radeon_surf *surface = &image->planes[plane_id].surface;
memset(metadata, 0, sizeof(*metadata)); memset(metadata, 0, sizeof(*metadata));
if (device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
uint64_t dcc_offset = uint64_t dcc_offset =
image->bindings[0].offset + (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset); image->bindings[0].offset + (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset);
metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode; metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode;
@ -796,7 +818,8 @@ void
radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, uint64_t offset, radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, uint64_t offset,
uint32_t stride) uint32_t stride)
{ {
ac_surface_override_offset_stride(&device->physical_device->info, &image->planes[0].surface, image->vk.array_layers, const struct radv_physical_device *pdev = radv_device_physical(device);
ac_surface_override_offset_stride(&pdev->info, &image->planes[0].surface, image->vk.array_layers,
image->vk.mip_levels, offset, stride); image->vk.mip_levels, offset, stride);
} }
@ -819,6 +842,8 @@ radv_image_alloc_single_sample_cmask(const struct radv_device *device, const str
static void static void
radv_image_alloc_values(const struct radv_device *device, struct radv_image *image) radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
/* images with modifiers can be potentially imported */ /* images with modifiers can be potentially imported */
if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
return; return;
@ -839,7 +864,7 @@ radv_image_alloc_values(const struct radv_device *device, struct radv_image *ima
image->size += 8 * image->vk.mip_levels; image->size += 8 * image->vk.mip_levels;
} }
if (radv_image_is_tc_compat_htile(image) && device->physical_device->info.has_tc_compat_zrange_bug) { if (radv_image_is_tc_compat_htile(image) && pdev->info.has_tc_compat_zrange_bug) {
/* Metadata for the TC-compatible HTILE hardware bug which /* Metadata for the TC-compatible HTILE hardware bug which
* have to be fixed by updating ZRANGE_PRECISION when doing * have to be fixed by updating ZRANGE_PRECISION when doing
* fast depth clears to 0.0f. * fast depth clears to 0.0f.
@ -855,13 +880,14 @@ radv_image_alloc_values(const struct radv_device *device, struct radv_image *ima
static bool static bool
radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image) radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
{ {
const struct radeon_info *gpu_info = &device->physical_device->info; const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
int log2_samples = util_logbase2(image->vk.samples); int log2_samples = util_logbase2(image->vk.samples);
assert(gpu_info->gfx_level >= GFX10); assert(gpu_info->gfx_level >= GFX10);
for (unsigned i = 0; i < image->plane_count; ++i) { for (unsigned i = 0; i < image->plane_count; ++i) {
VkFormat fmt = radv_image_get_plane_format(device->physical_device, image, i); VkFormat fmt = radv_image_get_plane_format(pdev, image, i);
int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt)); int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
int log2_bpp_and_samples; int log2_bpp_and_samples;
@ -903,9 +929,11 @@ radv_image_is_pipe_misaligned(const struct radv_device *device, const struct rad
static bool static bool
radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image) radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
{ {
if (device->physical_device->info.gfx_level >= GFX10) { const struct radv_physical_device *pdev = radv_device_physical(device);
return !device->physical_device->info.tcc_rb_non_coherent && !radv_image_is_pipe_misaligned(device, image);
} else if (device->physical_device->info.gfx_level == GFX9) { if (pdev->info.gfx_level >= GFX10) {
return !pdev->info.tcc_rb_non_coherent && !radv_image_is_pipe_misaligned(device, image);
} else if (pdev->info.gfx_level == GFX9) {
if (image->vk.samples == 1 && if (image->vk.samples == 1 &&
(image->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) && (image->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
!vk_format_has_stencil(image->vk.format)) { !vk_format_has_stencil(image->vk.format)) {
@ -926,6 +954,8 @@ radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_im
bool bool
radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image) radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS) if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
return false; return false;
@ -934,7 +964,7 @@ radv_image_can_fast_clear(const struct radv_device *device, const struct radv_im
return false; return false;
/* RB+ doesn't work with CMASK fast clear on Stoney. */ /* RB+ doesn't work with CMASK fast clear on Stoney. */
if (!radv_image_has_dcc(image) && device->physical_device->info.family == CHIP_STONEY) if (!radv_image_has_dcc(image) && pdev->info.family == CHIP_STONEY)
return false; return false;
/* Fast-clears with CMASK aren't supported for 128-bit formats. */ /* Fast-clears with CMASK aren't supported for 128-bit formats. */
@ -958,8 +988,10 @@ radv_image_can_fast_clear(const struct radv_device *device, const struct radv_im
static bool static bool
radv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image) radv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
/* comp-to-single is only available for GFX10+. */ /* comp-to-single is only available for GFX10+. */
if (device->physical_device->info.gfx_level < GFX10) if (pdev->info.gfx_level < GFX10)
return false; return false;
/* If the image can't be fast cleared, comp-to-single can't be used. */ /* If the image can't be fast cleared, comp-to-single can't be used. */
@ -972,7 +1004,7 @@ radv_image_use_comp_to_single(const struct radv_device *device, const struct rad
/* It seems 8bpp and 16bpp require RB+ to work. */ /* It seems 8bpp and 16bpp require RB+ to work. */
unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format); unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format);
if (bytes_per_pixel <= 2 && !device->physical_device->info.rbplus_allowed) if (bytes_per_pixel <= 2 && !pdev->info.rbplus_allowed)
return false; return false;
return true; return true;
@ -1049,6 +1081,8 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info, const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
const struct VkVideoProfileListInfoKHR *profile_list, struct radv_image *image) const struct VkVideoProfileListInfoKHR *profile_list, struct radv_image *image)
{ {
struct radv_physical_device *pdev = radv_device_physical(device);
/* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
* common internal case. */ * common internal case. */
create_info.vk_info = NULL; create_info.vk_info = NULL;
@ -1060,7 +1094,7 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count); assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
radv_image_reset_layout(device->physical_device, image); radv_image_reset_layout(pdev, image);
/* /*
* Due to how the decoder works, the user can't supply an oversized image, because if it attempts * Due to how the decoder works, the user can't supply an oversized image, because if it attempts
@ -1070,17 +1104,17 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
if (image->vk.usage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR)) { if (image->vk.usage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR)) {
assert(profile_list); assert(profile_list);
uint32_t width_align, height_align; uint32_t width_align, height_align;
radv_video_get_profile_alignments(device->physical_device, profile_list, &width_align, &height_align); radv_video_get_profile_alignments(pdev, profile_list, &width_align, &height_align);
image_info.width = align(image_info.width, width_align); image_info.width = align(image_info.width, width_align);
image_info.height = align(image_info.height, height_align); image_info.height = align(image_info.height, height_align);
if (radv_has_uvd(device->physical_device) && image->vk.usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) { if (radv_has_uvd(pdev) && image->vk.usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) {
/* UVD and kernel demand a full DPB allocation. */ /* UVD and kernel demand a full DPB allocation. */
image_info.array_size = MIN2(16, image_info.array_size); image_info.array_size = MIN2(16, image_info.array_size);
} }
} }
unsigned plane_count = radv_get_internal_plane_count(device->physical_device, image->vk.format); unsigned plane_count = radv_get_internal_plane_count(pdev, image->vk.format);
for (unsigned plane = 0; plane < plane_count; ++plane) { for (unsigned plane = 0; plane < plane_count; ++plane) {
struct ac_surf_info info = image_info; struct ac_surf_info info = image_info;
uint64_t offset; uint64_t offset;
@ -1101,9 +1135,9 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
} }
if (create_info.bo_metadata && !mod_info && if (create_info.bo_metadata && !mod_info &&
!ac_surface_apply_umd_metadata(&device->physical_device->info, &image->planes[plane].surface, !ac_surface_apply_umd_metadata(&pdev->info, &image->planes[plane].surface, image->vk.samples,
image->vk.samples, image->vk.mip_levels, image->vk.mip_levels, create_info.bo_metadata->size_metadata,
create_info.bo_metadata->size_metadata, create_info.bo_metadata->metadata)) create_info.bo_metadata->metadata))
return VK_ERROR_INVALID_EXTERNAL_HANDLE; return VK_ERROR_INVALID_EXTERNAL_HANDLE;
if (!create_info.no_metadata_planes && !create_info.bo_metadata && plane_count == 1 && !mod_info) if (!create_info.no_metadata_planes && !create_info.bo_metadata && plane_count == 1 && !mod_info)
@ -1121,8 +1155,8 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
stride = 0; /* 0 means no override */ stride = 0; /* 0 means no override */
} }
if (!ac_surface_override_offset_stride(&device->physical_device->info, &image->planes[plane].surface, if (!ac_surface_override_offset_stride(&pdev->info, &image->planes[plane].surface, image->vk.array_layers,
image->vk.array_layers, image->vk.mip_levels, offset, stride)) image->vk.mip_levels, offset, stride))
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT; return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
/* Validate DCC offsets in modifier layout. */ /* Validate DCC offsets in modifier layout. */
@ -1132,8 +1166,8 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT; return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
for (unsigned i = 1; i < mem_planes; ++i) { for (unsigned i = 1; i < mem_planes; ++i) {
if (ac_surface_get_plane_offset(device->physical_device->info.gfx_level, &image->planes[plane].surface, i, if (ac_surface_get_plane_offset(pdev->info.gfx_level, &image->planes[plane].surface, i, 0) !=
0) != mod_info->pPlaneLayouts[i].offset) mod_info->pPlaneLayouts[i].offset)
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT; return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
} }
} }
@ -1141,7 +1175,7 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size); image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2); image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2);
image->planes[plane].format = radv_image_get_plane_format(device->physical_device, image, plane); image->planes[plane].format = radv_image_get_plane_format(pdev, image, plane);
} }
image->tc_compatible_cmask = radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image); image->tc_compatible_cmask = radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
@ -1177,6 +1211,8 @@ radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAll
static void static void
radv_image_print_info(struct radv_device *device, struct radv_image *image) radv_image_print_info(struct radv_device *device, struct radv_image *image)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
fprintf(stderr, "Image:\n"); fprintf(stderr, "Image:\n");
fprintf(stderr, fprintf(stderr,
" Info: size=%" PRIu64 ", alignment=%" PRIu32 ", " " Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
@ -1188,11 +1224,11 @@ radv_image_print_info(struct radv_device *device, struct radv_image *image)
const struct radv_image_plane *plane = &image->planes[i]; const struct radv_image_plane *plane = &image->planes[i];
const struct radeon_surf *surf = &plane->surface; const struct radeon_surf *surf = &plane->surface;
const struct util_format_description *desc = vk_format_description(plane->format); const struct util_format_description *desc = vk_format_description(plane->format);
uint64_t offset = ac_surface_get_plane_offset(device->physical_device->info.gfx_level, &plane->surface, 0, 0); uint64_t offset = ac_surface_get_plane_offset(pdev->info.gfx_level, &plane->surface, 0, 0);
fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset); fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
ac_surface_print_info(stderr, &device->physical_device->info, surf); ac_surface_print_info(stderr, &pdev->info, surf);
} }
} }
@ -1200,7 +1236,7 @@ static uint64_t
radv_select_modifier(const struct radv_device *dev, VkFormat format, radv_select_modifier(const struct radv_device *dev, VkFormat format,
const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list) const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list)
{ {
const struct radv_physical_device *pdev = dev->physical_device; const struct radv_physical_device *pdev = radv_device_physical(dev);
unsigned mod_count; unsigned mod_count;
assert(mod_list->drmFormatModifierCount); assert(mod_list->drmFormatModifierCount);
@ -1238,6 +1274,7 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_
const VkAllocationCallbacks *alloc, VkImage *pImage, bool is_internal) const VkAllocationCallbacks *alloc, VkImage *pImage, bool is_internal)
{ {
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
const VkImageCreateInfo *pCreateInfo = create_info->vk_info; const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
uint64_t modifier = DRM_FORMAT_MOD_INVALID; uint64_t modifier = DRM_FORMAT_MOD_INVALID;
struct radv_image *image = NULL; struct radv_image *image = NULL;
@ -1250,7 +1287,7 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_
const struct VkVideoProfileListInfoKHR *profile_list = const struct VkVideoProfileListInfoKHR *profile_list =
vk_find_struct_const(pCreateInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR); vk_find_struct_const(pCreateInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR);
unsigned plane_count = radv_get_internal_plane_count(device->physical_device, format); unsigned plane_count = radv_get_internal_plane_count(pdev, format);
const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count; const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
@ -1270,8 +1307,7 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_
pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT) pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u; image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
else else
image->queue_family_mask |= image->queue_family_mask |= 1u << vk_queue_to_radv(pdev, pCreateInfo->pQueueFamilyIndices[i]);
1u << vk_queue_to_radv(device->physical_device, pCreateInfo->pQueueFamilyIndices[i]);
/* This queue never really accesses the image. */ /* This queue never really accesses the image. */
image->queue_family_mask &= ~(1u << RADV_QUEUE_SPARSE); image->queue_family_mask &= ~(1u << RADV_QUEUE_SPARSE);
@ -1375,10 +1411,12 @@ bool
radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image, VkImageLayout layout, radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image, VkImageLayout layout,
unsigned queue_mask) unsigned queue_mask)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
/* Don't compress exclusive images used on transfer queues when SDMA doesn't support HTILE. /* Don't compress exclusive images used on transfer queues when SDMA doesn't support HTILE.
* Note that HTILE is already disabled on concurrent images when not supported. * Note that HTILE is already disabled on concurrent images when not supported.
*/ */
if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !device->physical_device->info.sdma_supports_compression) if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !pdev->info.sdma_supports_compression)
return false; return false;
switch (layout) { switch (layout) {
@ -1452,6 +1490,8 @@ bool
radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, unsigned level, radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, unsigned level,
VkImageLayout layout, unsigned queue_mask) VkImageLayout layout, unsigned queue_mask)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!radv_dcc_enabled(image, level)) if (!radv_dcc_enabled(image, level))
return false; return false;
@ -1470,7 +1510,7 @@ radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_i
/* Don't compress exclusive images used on transfer queues when SDMA doesn't support DCC. /* Don't compress exclusive images used on transfer queues when SDMA doesn't support DCC.
* Note that DCC is already disabled on concurrent images when not supported. * Note that DCC is already disabled on concurrent images when not supported.
*/ */
if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !device->physical_device->info.sdma_supports_compression) if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !pdev->info.sdma_supports_compression)
return false; return false;
if (layout == VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT) { if (layout == VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT) {
@ -1480,7 +1520,7 @@ radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_i
return false; return false;
} }
return device->physical_device->info.gfx_level >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL; return pdev->info.gfx_level >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
} }
enum radv_fmask_compression enum radv_fmask_compression
@ -1533,11 +1573,13 @@ radv_image_queue_family_mask(const struct radv_image *image, enum radv_queue_fam
bool bool
radv_image_is_renderable(const struct radv_device *device, const struct radv_image *image) radv_image_is_renderable(const struct radv_device *device, const struct radv_image *image)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT || if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT ||
image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) image->vk.format == VK_FORMAT_R32G32B32_SFLOAT)
return false; return false;
if (device->physical_device->info.gfx_level >= GFX9 && image->vk.image_type == VK_IMAGE_TYPE_3D && if (pdev->info.gfx_level >= GFX9 && image->vk.image_type == VK_IMAGE_TYPE_3D &&
vk_format_get_blocksizebits(image->vk.format) == 128 && vk_format_is_compressed(image->vk.format)) vk_format_get_blocksizebits(image->vk.format) == 128 && vk_format_is_compressed(image->vk.format))
return false; return false;
@ -1572,11 +1614,11 @@ radv_CreateImage(VkDevice _device, const VkImageCreateInfo *pCreateInfo, const V
* we're guaranteed to access an Android object incorrectly. * we're guaranteed to access an Android object incorrectly.
*/ */
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
const VkImageSwapchainCreateInfoKHR *swapchain_info = const VkImageSwapchainCreateInfoKHR *swapchain_info =
vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR); vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR);
if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) { if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) {
return wsi_common_create_swapchain_image(device->physical_device->vk.wsi_device, pCreateInfo, return wsi_common_create_swapchain_image(pdev->vk.wsi_device, pCreateInfo, swapchain_info->swapchain, pImage);
swapchain_info->swapchain, pImage);
} }
#endif #endif
@ -1686,6 +1728,7 @@ radv_GetImageSubresourceLayout2KHR(VkDevice _device, VkImage _image, const VkIma
{ {
RADV_FROM_HANDLE(radv_image, image, _image); RADV_FROM_HANDLE(radv_image, image, _image);
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
int level = pSubresource->imageSubresource.mipLevel; int level = pSubresource->imageSubresource.mipLevel;
int layer = pSubresource->imageSubresource.arrayLayer; int layer = pSubresource->imageSubresource.arrayLayer;
@ -1703,18 +1746,17 @@ radv_GetImageSubresourceLayout2KHR(VkDevice _device, VkImage _image, const VkIma
assert(level == 0); assert(level == 0);
assert(layer == 0); assert(layer == 0);
pLayout->subresourceLayout.offset = pLayout->subresourceLayout.offset = ac_surface_get_plane_offset(pdev->info.gfx_level, surface, mem_plane_id, 0);
ac_surface_get_plane_offset(device->physical_device->info.gfx_level, surface, mem_plane_id, 0);
pLayout->subresourceLayout.rowPitch = pLayout->subresourceLayout.rowPitch =
ac_surface_get_plane_stride(device->physical_device->info.gfx_level, surface, mem_plane_id, level); ac_surface_get_plane_stride(pdev->info.gfx_level, surface, mem_plane_id, level);
pLayout->subresourceLayout.arrayPitch = 0; pLayout->subresourceLayout.arrayPitch = 0;
pLayout->subresourceLayout.depthPitch = 0; pLayout->subresourceLayout.depthPitch = 0;
pLayout->subresourceLayout.size = ac_surface_get_plane_size(surface, mem_plane_id); pLayout->subresourceLayout.size = ac_surface_get_plane_size(surface, mem_plane_id);
} else if (device->physical_device->info.gfx_level >= GFX9) { } else if (pdev->info.gfx_level >= GFX9) {
uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0; uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
pLayout->subresourceLayout.offset = pLayout->subresourceLayout.offset =
ac_surface_get_plane_offset(device->physical_device->info.gfx_level, &plane->surface, 0, layer) + level_offset; ac_surface_get_plane_offset(pdev->info.gfx_level, &plane->surface, 0, layer) + level_offset;
if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT || if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT ||
image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) { image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) {
/* Adjust the number of bytes between each row because /* Adjust the number of bytes between each row because

View file

@ -106,7 +106,8 @@ radv_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *
uint64_t gpu_address = binding->bo ? radv_buffer_get_va(binding->bo) + binding->offset : 0; uint64_t gpu_address = binding->bo ? radv_buffer_get_va(binding->bo) + binding->offset : 0;
uint64_t va = gpu_address; uint64_t va = gpu_address;
uint8_t swizzle = plane->surface.tile_swizzle; uint8_t swizzle = plane->surface.tile_swizzle;
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
uint64_t meta_va = 0; uint64_t meta_va = 0;
if (gfx_level >= GFX9) { if (gfx_level >= GFX9) {
if (is_stencil) if (is_stencil)
@ -154,7 +155,7 @@ radv_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *
* If an imported image is used with VK_IMAGE_VIEW_TYPE_2D_ARRAY, it may hang due to VM faults * If an imported image is used with VK_IMAGE_VIEW_TYPE_2D_ARRAY, it may hang due to VM faults
* because DEPTH means pitch with 2D, but it means depth with 2D array. * because DEPTH means pitch with 2D, but it means depth with 2D array.
*/ */
if (device->physical_device->info.gfx_level >= GFX10_3 && plane->surface.u.gfx9.uses_custom_pitch) { if (pdev->info.gfx_level >= GFX10_3 && plane->surface.u.gfx9.uses_custom_pitch) {
assert((plane->surface.u.gfx9.surf_pitch * plane->surface.bpe) % 256 == 0); assert((plane->surface.u.gfx9.surf_pitch * plane->surface.bpe) % 256 == 0);
assert(image->vk.image_type == VK_IMAGE_TYPE_2D); assert(image->vk.image_type == VK_IMAGE_TYPE_2D);
assert(plane->surface.is_linear); assert(plane->surface.is_linear);
@ -245,6 +246,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima
uint32_t *fmask_state, VkImageCreateFlags img_create_flags, uint32_t *fmask_state, VkImageCreateFlags img_create_flags,
const struct ac_surf_nbc_view *nbc_view, const VkImageViewSlicedCreateInfoEXT *sliced_3d) const struct ac_surf_nbc_view *nbc_view, const VkImageViewSlicedCreateInfoEXT *sliced_3d)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct util_format_description *desc; const struct util_format_description *desc;
enum pipe_swizzle swizzle[4]; enum pipe_swizzle swizzle[4];
unsigned img_format; unsigned img_format;
@ -261,8 +263,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima
desc = util_format_description(PIPE_FORMAT_R8G8B8X8_SRGB); desc = util_format_description(PIPE_FORMAT_R8G8B8X8_SRGB);
} }
img_format = img_format = ac_get_gfx10_format_table(&pdev->info)[vk_format_to_pipe_format(vk_format)].img_format;
ac_get_gfx10_format_table(&device->physical_device->info)[vk_format_to_pipe_format(vk_format)].img_format;
radv_compose_swizzle(desc, mapping, swizzle); radv_compose_swizzle(desc, mapping, swizzle);
@ -271,7 +272,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima
type = V_008F1C_SQ_RSRC_IMG_3D; type = V_008F1C_SQ_RSRC_IMG_3D;
} else { } else {
type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples, is_storage_image, type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples, is_storage_image,
device->physical_device->info.gfx_level == GFX9); pdev->info.gfx_level == GFX9);
} }
if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
@ -286,7 +287,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima
state[0] = 0; state[0] = 0;
state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1); state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1);
state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
S_00A008_RESOURCE_LEVEL(device->physical_device->info.gfx_level < GFX11); S_00A008_RESOURCE_LEVEL(pdev->info.gfx_level < GFX11);
state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) | state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) | S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
S_00A00C_BASE_LEVEL(image->vk.samples > 1 ? 0 : first_level) | S_00A00C_BASE_LEVEL(image->vk.samples > 1 ? 0 : first_level) |
@ -332,7 +333,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima
max_mip = nbc_view->num_levels - 1; max_mip = nbc_view->num_levels - 1;
unsigned min_lod_clamped = radv_float_to_ufixed(CLAMP(min_lod, 0, 15), 8); unsigned min_lod_clamped = radv_float_to_ufixed(CLAMP(min_lod, 0, 15), 8);
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
state[1] |= S_00A004_MAX_MIP(max_mip); state[1] |= S_00A004_MAX_MIP(max_mip);
state[5] |= S_00A014_MIN_LOD_LO(min_lod_clamped); state[5] |= S_00A014_MIN_LOD_LO(min_lod_clamped);
state[6] |= S_00A018_MIN_LOD_HI(min_lod_clamped >> 5); state[6] |= S_00A018_MIN_LOD_HI(min_lod_clamped >> 5);
@ -413,6 +414,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state, unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state,
uint32_t *fmask_state, VkImageCreateFlags img_create_flags) uint32_t *fmask_state, VkImageCreateFlags img_create_flags)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct util_format_description *desc; const struct util_format_description *desc;
enum pipe_swizzle swizzle[4]; enum pipe_swizzle swizzle[4];
int first_non_void; int first_non_void;
@ -444,21 +446,19 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
} }
/* S8 with either Z16 or Z32 HTILE need a special format. */ /* S8 with either Z16 or Z32 HTILE need a special format. */
if (device->physical_device->info.gfx_level == GFX9 && vk_format == VK_FORMAT_S8_UINT && if (pdev->info.gfx_level == GFX9 && vk_format == VK_FORMAT_S8_UINT && radv_image_is_tc_compat_htile(image)) {
radv_image_is_tc_compat_htile(image)) {
if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
data_format = V_008F14_IMG_DATA_FORMAT_S8_32; data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
else if (image->vk.format == VK_FORMAT_D16_UNORM_S8_UINT) else if (image->vk.format == VK_FORMAT_D16_UNORM_S8_UINT)
data_format = V_008F14_IMG_DATA_FORMAT_S8_16; data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
} }
if (device->physical_device->info.gfx_level == GFX9 && if (pdev->info.gfx_level == GFX9 && img_create_flags & VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT) {
img_create_flags & VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT) {
assert(image->vk.image_type == VK_IMAGE_TYPE_3D); assert(image->vk.image_type == VK_IMAGE_TYPE_3D);
type = V_008F1C_SQ_RSRC_IMG_3D; type = V_008F1C_SQ_RSRC_IMG_3D;
} else { } else {
type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples, is_storage_image, type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples, is_storage_image,
device->physical_device->info.gfx_level == GFX9); pdev->info.gfx_level == GFX9);
} }
if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
@ -484,7 +484,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
state[6] = 0; state[6] = 0;
state[7] = 0; state[7] = 0;
if (device->physical_device->info.gfx_level == GFX9) { if (pdev->info.gfx_level == GFX9) {
unsigned bc_swizzle = gfx9_border_color_swizzle(desc); unsigned bc_swizzle = gfx9_border_color_swizzle(desc);
/* Depth is the last accessible layer on Gfx9. /* Depth is the last accessible layer on Gfx9.
@ -509,7 +509,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
/* The last dword is unused by hw. The shader uses it to clear /* The last dword is unused by hw. The shader uses it to clear
* bits in the first dword of sampler state. * bits in the first dword of sampler state.
*/ */
if (device->physical_device->info.gfx_level <= GFX7 && image->vk.samples <= 1) { if (pdev->info.gfx_level <= GFX7 && image->vk.samples <= 1) {
if (first_level == last_level) if (first_level == last_level)
state[7] = C_008F30_MAX_ANISO_RATIO; state[7] = C_008F30_MAX_ANISO_RATIO;
else else
@ -529,7 +529,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
va = gpu_address + image->bindings[0].offset + image->planes[0].surface.fmask_offset; va = gpu_address + image->bindings[0].offset + image->planes[0].surface.fmask_offset;
if (device->physical_device->info.gfx_level == GFX9) { if (pdev->info.gfx_level == GFX9) {
fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK; fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
switch (image->vk.samples) { switch (image->vk.samples) {
case 2: case 2:
@ -576,7 +576,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
fmask_state[6] = 0; fmask_state[6] = 0;
fmask_state[7] = 0; fmask_state[7] = 0;
if (device->physical_device->info.gfx_level == GFX9) { if (pdev->info.gfx_level == GFX9) {
fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode); fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode);
fmask_state[4] |= fmask_state[4] |=
S_008F20_DEPTH(last_layer) | S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch); S_008F20_DEPTH(last_layer) | S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch);
@ -615,7 +615,9 @@ radv_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
uint32_t *fmask_state, VkImageCreateFlags img_create_flags, uint32_t *fmask_state, VkImageCreateFlags img_create_flags,
const struct ac_surf_nbc_view *nbc_view, const VkImageViewSlicedCreateInfoEXT *sliced_3d) const struct ac_surf_nbc_view *nbc_view, const VkImageViewSlicedCreateInfoEXT *sliced_3d)
{ {
if (device->physical_device->info.gfx_level >= GFX10) { const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level >= GFX10) {
gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, first_level, gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, first_level,
last_level, first_layer, last_layer, width, height, depth, min_lod, state, last_level, first_layer, last_layer, width, height, depth, min_lod, state,
fmask_state, img_create_flags, nbc_view, sliced_3d); fmask_state, img_create_flags, nbc_view, sliced_3d);
@ -630,12 +632,13 @@ static inline void
compute_non_block_compressed_view(struct radv_device *device, const struct radv_image_view *iview, compute_non_block_compressed_view(struct radv_device *device, const struct radv_image_view *iview,
struct ac_surf_nbc_view *nbc_view) struct ac_surf_nbc_view *nbc_view)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_image *image = iview->image; const struct radv_image *image = iview->image;
const struct radeon_surf *surf = &image->planes[0].surface; const struct radeon_surf *surf = &image->planes[0].surface;
struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws); struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws);
struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image); struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image);
ac_surface_compute_nbc_view(addrlib, &device->physical_device->info, surf, &surf_info, iview->vk.base_mip_level, ac_surface_compute_nbc_view(addrlib, &pdev->info, surf, &surf_info, iview->vk.base_mip_level,
iview->vk.base_array_layer, nbc_view); iview->vk.base_array_layer, nbc_view);
} }
@ -647,6 +650,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic
const struct ac_surf_nbc_view *nbc_view, const struct ac_surf_nbc_view *nbc_view,
const VkImageViewSlicedCreateInfoEXT *sliced_3d, bool force_zero_base_mip) const VkImageViewSlicedCreateInfoEXT *sliced_3d, bool force_zero_base_mip)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_image *image = iview->image; struct radv_image *image = iview->image;
struct radv_image_plane *plane = &image->planes[plane_id]; struct radv_image_plane *plane = &image->planes[plane_id];
bool is_stencil = iview->vk.aspects == VK_IMAGE_ASPECT_STENCIL_BIT; bool is_stencil = iview->vk.aspects == VK_IMAGE_ASPECT_STENCIL_BIT;
@ -665,7 +669,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic
assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0); assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format); blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
if (device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
if (nbc_view->valid) { if (nbc_view->valid) {
hw_level = nbc_view->level; hw_level = nbc_view->level;
iview->extent.width = nbc_view->width; iview->extent.width = nbc_view->width;
@ -689,7 +693,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic
img_create_flags, nbc_view, sliced_3d); img_create_flags, nbc_view, sliced_3d);
const struct legacy_surf_level *base_level_info = NULL; const struct legacy_surf_level *base_level_info = NULL;
if (device->physical_device->info.gfx_level <= GFX8) { if (pdev->info.gfx_level <= GFX8) {
if (is_stencil) if (is_stencil)
base_level_info = &plane->surface.u.legacy.zs.stencil_level[iview->vk.base_mip_level]; base_level_info = &plane->surface.u.legacy.zs.stencil_level[iview->vk.base_mip_level];
else else
@ -738,6 +742,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
const struct radv_image_view_extra_create_info *extra_create_info) const struct radv_image_view_extra_create_info *extra_create_info)
{ {
RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image); RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
const struct radv_physical_device *pdev = radv_device_physical(device);
const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange; const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
uint32_t plane_count = 1; uint32_t plane_count = 1;
float min_lod = 0.0f; float min_lod = 0.0f;
@ -755,7 +760,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
vk_image_view_init(&device->vk, &iview->vk, !from_client, pCreateInfo); vk_image_view_init(&device->vk, &iview->vk, !from_client, pCreateInfo);
bool force_zero_base_mip = true; bool force_zero_base_mip = true;
if (device->physical_device->info.gfx_level <= GFX8 && min_lod) { if (pdev->info.gfx_level <= GFX8 && min_lod) {
/* Do not force the base level to zero to workaround a spurious bug with mipmaps and min LOD. */ /* Do not force the base level to zero to workaround a spurious bug with mipmaps and min LOD. */
force_zero_base_mip = false; force_zero_base_mip = false;
} }
@ -800,15 +805,15 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
} }
/* when the view format is emulated, redirect the view to the hidden plane 1 */ /* when the view format is emulated, redirect the view to the hidden plane 1 */
if (radv_is_format_emulated(device->physical_device, iview->vk.format)) { if (radv_is_format_emulated(pdev, iview->vk.format)) {
assert(radv_is_format_emulated(device->physical_device, image->vk.format)); assert(radv_is_format_emulated(pdev, image->vk.format));
iview->plane_id = 1; iview->plane_id = 1;
iview->vk.view_format = image->planes[iview->plane_id].format; iview->vk.view_format = image->planes[iview->plane_id].format;
iview->vk.format = image->planes[iview->plane_id].format; iview->vk.format = image->planes[iview->plane_id].format;
plane_count = 1; plane_count = 1;
} }
if (!force_zero_base_mip || device->physical_device->info.gfx_level >= GFX9) { if (!force_zero_base_mip || pdev->info.gfx_level >= GFX9) {
iview->extent = (VkExtent3D){ iview->extent = (VkExtent3D){
.width = image->vk.extent.width, .width = image->vk.extent.width,
.height = image->vk.extent.height, .height = image->vk.extent.height,
@ -854,7 +859,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
* block compatible format and the compressed format, so even if we take * block compatible format and the compressed format, so even if we take
* the plain converted dimensions the physical layout is correct. * the plain converted dimensions the physical layout is correct.
*/ */
if (device->physical_device->info.gfx_level >= GFX9 && vk_format_is_block_compressed(plane->format) && if (pdev->info.gfx_level >= GFX9 && vk_format_is_block_compressed(plane->format) &&
!vk_format_is_block_compressed(iview->vk.format)) { !vk_format_is_block_compressed(iview->vk.format)) {
/* If we have multiple levels in the view we should ideally take the last level, /* If we have multiple levels in the view we should ideally take the last level,
* but the mip calculation has a max(..., 1) so walking back to the base mip in an * but the mip calculation has a max(..., 1) so walking back to the base mip in an
@ -879,7 +884,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
* changes the descriptor's base level, and adjusts the address and * changes the descriptor's base level, and adjusts the address and
* extents accordingly. * extents accordingly.
*/ */
if (device->physical_device->info.gfx_level >= GFX10 && if (pdev->info.gfx_level >= GFX10 &&
(radv_minify(iview->extent.width, range->baseMipLevel) < lvl_width || (radv_minify(iview->extent.width, range->baseMipLevel) < lvl_width ||
radv_minify(iview->extent.height, range->baseMipLevel) < lvl_height) && radv_minify(iview->extent.height, range->baseMipLevel) < lvl_height) &&
iview->vk.layer_count == 1) { iview->vk.layer_count == 1) {

View file

@ -32,7 +32,9 @@
void void
radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders) radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders)
{ {
if (device->physical_device->info.gfx_level >= GFX11) { const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level >= GFX11) {
radeon_set_uconfig_reg(cs, R_036760_SQG_PERFCOUNTER_CTRL, shaders & 0x7f); radeon_set_uconfig_reg(cs, R_036760_SQG_PERFCOUNTER_CTRL, shaders & 0x7f);
} else { } else {
radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2); radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2);
@ -74,12 +76,14 @@ radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf
void void
radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family) radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
radv_emit_windowed_counters(device, cs, family, false); radv_emit_windowed_counters(device, cs, family, false);
/* Stop SPM counters. */ /* Stop SPM counters. */
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL, radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) | S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
S_036020_SPM_PERFMON_STATE(device->physical_device->info.never_stop_sq_perf_counters S_036020_SPM_PERFMON_STATE(pdev->info.never_stop_sq_perf_counters
? V_036020_STRM_PERFMON_STATE_START_COUNTING ? V_036020_STRM_PERFMON_STATE_START_COUNTING
: V_036020_STRM_PERFMON_STATE_STOP_COUNTING)); : V_036020_STRM_PERFMON_STATE_STOP_COUNTING));
} }
@ -466,7 +470,8 @@ radv_emit_instance(struct radv_cmd_buffer *cmd_buffer, int se, int instance)
static void static void
radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, unsigned *selectors) radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, unsigned *selectors)
{ {
const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
const enum radv_queue_family qf = cmd_buffer->qf; const enum radv_queue_family qf = cmd_buffer->qf;
struct ac_pc_block_base *regs = block->b->b; struct ac_pc_block_base *regs = block->b->b;
struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radeon_cmdbuf *cs = cmd_buffer->cs;
@ -492,6 +497,7 @@ static void
radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count,
uint64_t va) uint64_t va)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct ac_pc_block_base *regs = block->b->b; struct ac_pc_block_base *regs = block->b->b;
struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radeon_cmdbuf *cs = cmd_buffer->cs;
unsigned reg = regs->counter0_lo; unsigned reg = regs->counter0_lo;
@ -510,7 +516,7 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p
radeon_emit(cs, va); radeon_emit(cs, va);
radeon_emit(cs, va >> 32); radeon_emit(cs, va >> 32);
va += sizeof(uint64_t) * 2 * radv_pc_get_num_instances(cmd_buffer->device->physical_device, block); va += sizeof(uint64_t) * 2 * radv_pc_get_num_instances(pdev, block);
reg += reg_delta; reg += reg_delta;
} }
} }
@ -518,9 +524,10 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p
static void static void
radv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, uint64_t va) radv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, uint64_t va)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
unsigned se_end = 1; unsigned se_end = 1;
if (block->b->b->flags & AC_PC_BLOCK_SE) if (block->b->b->flags & AC_PC_BLOCK_SE)
se_end = cmd_buffer->device->physical_device->info.max_se; se_end = pdev->info.max_se;
for (unsigned se = 0; se < se_end; ++se) { for (unsigned se = 0; se < se_end; ++se) {
for (unsigned instance = 0; instance < block->num_instances; ++instance) { for (unsigned instance = 0; instance < block->num_instances; ++instance) {
@ -555,8 +562,8 @@ radv_pc_wait_idle(struct radv_cmd_buffer *cmd_buffer)
static void static void
radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va, bool end) radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va, bool end)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0)); radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
@ -621,7 +628,7 @@ void
radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va) radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va)
{ {
struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_physical_device *pdev = cmd_buffer->device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
ASSERTED unsigned cdw_max; ASSERTED unsigned cdw_max;
cmd_buffer->state.uses_perf_counters = true; cmd_buffer->state.uses_perf_counters = true;
@ -698,6 +705,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
void void
radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va) radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radeon_cmdbuf *cs = cmd_buffer->cs;
ASSERTED unsigned cdw_max; ASSERTED unsigned cdw_max;
@ -710,9 +718,8 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->perf_counter_bo); radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->perf_counter_bo);
uint64_t perf_ctr_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET; uint64_t perf_ctr_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf, radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0,
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, 1, cmd_buffer->gfx9_fence_va);
1, cmd_buffer->gfx9_fence_va);
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, perf_ctr_va, 1, 0xffffffff); radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, perf_ctr_va, 1, 0xffffffff);
radv_pc_wait_idle(cmd_buffer); radv_pc_wait_idle(cmd_buffer);

View file

@ -365,7 +365,8 @@ static unsigned
lower_bit_size_callback(const nir_instr *instr, void *_) lower_bit_size_callback(const nir_instr *instr, void *_)
{ {
struct radv_device *device = _; struct radv_device *device = _;
enum amd_gfx_level chip = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
enum amd_gfx_level chip = pdev->info.gfx_level;
if (instr->type != nir_instr_type_alu) if (instr->type != nir_instr_type_alu)
return 0; return 0;
@ -437,7 +438,8 @@ opt_vectorize_callback(const nir_instr *instr, const void *_)
return 0; return 0;
const struct radv_device *device = _; const struct radv_device *device = _;
enum amd_gfx_level chip = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
enum amd_gfx_level chip = pdev->info.gfx_level;
if (chip < GFX9) if (chip < GFX9)
return 1; return 1;
@ -461,7 +463,8 @@ void
radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_state_key *gfx_state, radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
struct radv_shader_stage *stage) struct radv_shader_stage *stage)
{ {
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
bool progress; bool progress;
/* Wave and workgroup size should already be filled. */ /* Wave and workgroup size should already be filled. */
@ -548,8 +551,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
NIR_PASS(_, stage->nir, ac_nir_lower_tex, NIR_PASS(_, stage->nir, ac_nir_lower_tex,
&(ac_nir_lower_tex_options){ &(ac_nir_lower_tex_options){
.gfx_level = gfx_level, .gfx_level = gfx_level,
.lower_array_layer_round_even = .lower_array_layer_round_even = !pdev->info.conformant_trunc_coord || device->disable_trunc_coord,
!device->physical_device->info.conformant_trunc_coord || device->disable_trunc_coord,
.fix_derivs_in_divergent_cf = fix_derivs_in_divergent_cf, .fix_derivs_in_divergent_cf = fix_derivs_in_divergent_cf,
.max_wqm_vgprs = 64, // TODO: improve spiller and RA support for linear VGPRs .max_wqm_vgprs = 64, // TODO: improve spiller and RA support for linear VGPRs
}); });
@ -570,7 +572,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
nir_move_options sink_opts = nir_move_const_undef | nir_move_copies; nir_move_options sink_opts = nir_move_const_undef | nir_move_copies;
if (!stage->key.optimisations_disabled) { if (!stage->key.optimisations_disabled) {
if (stage->stage != MESA_SHADER_FRAGMENT || !device->physical_device->cache_key.disable_sinking_load_input_fs) if (stage->stage != MESA_SHADER_FRAGMENT || !pdev->cache_key.disable_sinking_load_input_fs)
sink_opts |= nir_move_load_input; sink_opts |= nir_move_load_input;
NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts); NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
@ -581,7 +583,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
* load_input can be reordered, but buffer loads can't. * load_input can be reordered, but buffer loads can't.
*/ */
if (stage->stage == MESA_SHADER_VERTEX) { if (stage->stage == MESA_SHADER_VERTEX) {
NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, gfx_state, &device->physical_device->info); NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, gfx_state, &pdev->info);
} }
/* Lower I/O intrinsics to memory instructions. */ /* Lower I/O intrinsics to memory instructions. */
@ -598,7 +600,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
stage->info.outinfo.export_prim_id, false, false, false, stage->info.force_vrs_per_vertex); stage->info.outinfo.export_prim_id, false, false, false, stage->info.force_vrs_per_vertex);
} else { } else {
bool emulate_ngg_gs_query_pipeline_stat = device->physical_device->emulate_ngg_gs_query_pipeline_stat; bool emulate_ngg_gs_query_pipeline_stat = pdev->emulate_ngg_gs_query_pipeline_stat;
ac_nir_gs_output_info gs_out_info = { ac_nir_gs_output_info gs_out_info = {
.streams = stage->info.gs.output_streams, .streams = stage->info.gs.output_streams,
@ -609,7 +611,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
} else if (stage->stage == MESA_SHADER_FRAGMENT) { } else if (stage->stage == MESA_SHADER_FRAGMENT) {
ac_nir_lower_ps_options options = { ac_nir_lower_ps_options options = {
.gfx_level = gfx_level, .gfx_level = gfx_level,
.family = device->physical_device->info.family, .family = pdev->info.family,
.use_aco = !radv_use_llvm_for_stage(device, stage->stage), .use_aco = !radv_use_llvm_for_stage(device, stage->stage),
.uses_discard = true, .uses_discard = true,
.alpha_func = COMPARE_FUNC_ALWAYS, .alpha_func = COMPARE_FUNC_ALWAYS,
@ -666,7 +668,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
NIR_PASS(_, stage->nir, ac_nir_lower_global_access); NIR_PASS(_, stage->nir, ac_nir_lower_global_access);
NIR_PASS_V(stage->nir, ac_nir_lower_intrinsics_to_args, gfx_level, radv_select_hw_stage(&stage->info, gfx_level), NIR_PASS_V(stage->nir, ac_nir_lower_intrinsics_to_args, gfx_level, radv_select_hw_stage(&stage->info, gfx_level),
&stage->args.ac); &stage->args.ac);
NIR_PASS_V(stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, device->physical_device->info.address32_hi); NIR_PASS_V(stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, pdev->info.address32_hi);
radv_optimize_nir_algebraic( radv_optimize_nir_algebraic(
stage->nir, io_to_mem || lowered_ngg || stage->stage == MESA_SHADER_COMPUTE || stage->stage == MESA_SHADER_TASK); stage->nir, io_to_mem || lowered_ngg || stage->stage == MESA_SHADER_COMPUTE || stage->stage == MESA_SHADER_TASK);
@ -926,7 +928,7 @@ radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecut
struct radv_shader *shader = struct radv_shader *shader =
radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage); radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned lds_increment = unsigned lds_increment =
pdev->info.gfx_level >= GFX11 && stage == MESA_SHADER_FRAGMENT ? 1024 : pdev->info.lds_encode_granularity; pdev->info.gfx_level >= GFX11 && stage == MESA_SHADER_FRAGMENT ? 1024 : pdev->info.lds_encode_granularity;

View file

@ -38,6 +38,8 @@
static bool static bool
radv_is_cache_disabled(struct radv_device *device) radv_is_cache_disabled(struct radv_device *device)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
/* The buffer address used for debug printf is hardcoded. */ /* The buffer address used for debug printf is hardcoded. */
if (device->printf.buffer_addr) if (device->printf.buffer_addr)
return true; return true;
@ -45,8 +47,7 @@ radv_is_cache_disabled(struct radv_device *device)
/* Pipeline caches can be disabled with RADV_DEBUG=nocache, with MESA_GLSL_CACHE_DISABLE=1 and /* Pipeline caches can be disabled with RADV_DEBUG=nocache, with MESA_GLSL_CACHE_DISABLE=1 and
* when ACO_DEBUG is used. MESA_GLSL_CACHE_DISABLE is done elsewhere. * when ACO_DEBUG is used. MESA_GLSL_CACHE_DISABLE is done elsewhere.
*/ */
return (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) || return (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) || (pdev->use_llvm ? 0 : aco_get_codegen_flags());
(device->physical_device->use_llvm ? 0 : aco_get_codegen_flags());
} }
void void
@ -532,14 +533,15 @@ nir_shader *
radv_pipeline_cache_lookup_nir(struct radv_device *device, struct vk_pipeline_cache *cache, gl_shader_stage stage, radv_pipeline_cache_lookup_nir(struct radv_device *device, struct vk_pipeline_cache *cache, gl_shader_stage stage,
const blake3_hash key) const blake3_hash key)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (radv_is_cache_disabled(device)) if (radv_is_cache_disabled(device))
return NULL; return NULL;
if (!cache) if (!cache)
cache = device->mem_cache; cache = device->mem_cache;
return vk_pipeline_cache_lookup_nir(cache, key, sizeof(blake3_hash), &device->physical_device->nir_options[stage], return vk_pipeline_cache_lookup_nir(cache, key, sizeof(blake3_hash), &pdev->nir_options[stage], NULL, NULL);
NULL, NULL);
} }
void void
@ -570,6 +572,7 @@ radv_pipeline_cache_lookup_nir_handle(struct radv_device *device, struct vk_pipe
struct nir_shader * struct nir_shader *
radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline_cache_object *object) radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline_cache_object *object)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct blob_reader blob; struct blob_reader blob;
struct vk_raw_data_cache_object *nir_object = container_of(object, struct vk_raw_data_cache_object, base); struct vk_raw_data_cache_object *nir_object = container_of(object, struct vk_raw_data_cache_object, base);
blob_reader_init(&blob, nir_object->data, nir_object->data_size); blob_reader_init(&blob, nir_object->data, nir_object->data_size);
@ -579,7 +582,7 @@ radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline
ralloc_free(nir); ralloc_free(nir);
return NULL; return NULL;
} }
nir->options = &device->physical_device->nir_options[nir->info.stage]; nir->options = &pdev->nir_options[nir->info.stage];
return nir; return nir;
} }

View file

@ -75,6 +75,7 @@ void
radv_get_compute_pipeline_metadata(const struct radv_device *device, const struct radv_compute_pipeline *pipeline, radv_get_compute_pipeline_metadata(const struct radv_device *device, const struct radv_compute_pipeline *pipeline,
struct radv_compute_pipeline_metadata *metadata) struct radv_compute_pipeline_metadata *metadata)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader *cs = pipeline->base.shaders[MESA_SHADER_COMPUTE]; const struct radv_shader *cs = pipeline->base.shaders[MESA_SHADER_COMPUTE];
uint32_t upload_sgpr = 0, inline_sgpr = 0; uint32_t upload_sgpr = 0, inline_sgpr = 0;
@ -84,7 +85,7 @@ radv_get_compute_pipeline_metadata(const struct radv_device *device, const struc
metadata->rsrc1 = cs->config.rsrc1; metadata->rsrc1 = cs->config.rsrc1;
metadata->rsrc2 = cs->config.rsrc2; metadata->rsrc2 = cs->config.rsrc2;
metadata->rsrc3 = cs->config.rsrc3; metadata->rsrc3 = cs->config.rsrc3;
metadata->compute_resource_limits = radv_get_compute_resource_limits(device->physical_device, cs); metadata->compute_resource_limits = radv_get_compute_resource_limits(pdev, cs);
metadata->block_size_x = cs->info.cs.block_size[0]; metadata->block_size_x = cs->info.cs.block_size[0];
metadata->block_size_y = cs->info.cs.block_size[1]; metadata->block_size_y = cs->info.cs.block_size[1];
metadata->block_size_z = cs->info.cs.block_size[2]; metadata->block_size_z = cs->info.cs.block_size[2];
@ -136,7 +137,7 @@ static void
radv_compute_generate_pm4(const struct radv_device *device, struct radv_compute_pipeline *pipeline, radv_compute_generate_pm4(const struct radv_device *device, struct radv_compute_pipeline *pipeline,
struct radv_shader *shader) struct radv_shader *shader)
{ {
struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = &pipeline->base.cs; struct radeon_cmdbuf *cs = &pipeline->base.cs;
cs->reserved_dw = cs->max_dw = pdev->info.gfx_level >= GFX10 ? 19 : 16; cs->reserved_dw = cs->max_dw = pdev->info.gfx_level >= GFX10 ? 19 : 16;

View file

@ -132,12 +132,13 @@ static unsigned
radv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format, bool blend_enable, radv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format, bool blend_enable,
bool blend_need_alpha) bool blend_need_alpha)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct util_format_description *desc = vk_format_description(vk_format); const struct util_format_description *desc = vk_format_description(vk_format);
bool use_rbplus = device->physical_device->info.rbplus_allowed; bool use_rbplus = pdev->info.rbplus_allowed;
struct ac_spi_color_formats formats = {0}; struct ac_spi_color_formats formats = {0};
unsigned format, ntype, swap; unsigned format, ntype, swap;
format = ac_get_cb_format(device->physical_device->info.gfx_level, desc->format); format = ac_get_cb_format(pdev->info.gfx_level, desc->format);
ntype = ac_get_cb_number_type(desc->format); ntype = ac_get_cb_number_type(desc->format);
swap = radv_translate_colorswap(vk_format, false); swap = radv_translate_colorswap(vk_format, false);
@ -508,12 +509,13 @@ static uint64_t
radv_pipeline_needed_dynamic_state(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, radv_pipeline_needed_dynamic_state(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state) const struct vk_graphics_pipeline_state *state)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
bool has_color_att = radv_pipeline_has_color_attachments(state->rp); bool has_color_att = radv_pipeline_has_color_attachments(state->rp);
bool raster_enabled = bool raster_enabled =
!state->rs->rasterizer_discard_enable || (pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE); !state->rs->rasterizer_discard_enable || (pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
uint64_t states = RADV_DYNAMIC_ALL; uint64_t states = RADV_DYNAMIC_ALL;
if (device->physical_device->info.gfx_level < GFX10_3) if (pdev->info.gfx_level < GFX10_3)
states &= ~RADV_DYNAMIC_FRAGMENT_SHADING_RATE; states &= ~RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
/* Disable dynamic states that are useless to mesh shading. */ /* Disable dynamic states that are useless to mesh shading. */
@ -568,7 +570,7 @@ radv_pipeline_needed_dynamic_state(const struct radv_device *device, const struc
struct radv_ia_multi_vgt_param_helpers struct radv_ia_multi_vgt_param_helpers
radv_compute_ia_multi_vgt_param(const struct radv_device *device, struct radv_shader *const *shaders) radv_compute_ia_multi_vgt_param(const struct radv_device *device, struct radv_shader *const *shaders)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0}; struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0};
ia_multi_vgt_param.ia_switch_on_eoi = false; ia_multi_vgt_param.ia_switch_on_eoi = false;
@ -1295,7 +1297,8 @@ static void
radv_link_shaders(const struct radv_device *device, struct radv_shader_stage *producer_stage, radv_link_shaders(const struct radv_device *device, struct radv_shader_stage *producer_stage,
struct radv_shader_stage *consumer_stage, const struct radv_graphics_state_key *gfx_state) struct radv_shader_stage *consumer_stage, const struct radv_graphics_state_key *gfx_state)
{ {
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
nir_shader *producer = producer_stage->nir; nir_shader *producer = producer_stage->nir;
nir_shader *consumer = consumer_stage->nir; nir_shader *consumer = consumer_stage->nir;
bool progress; bool progress;
@ -1686,6 +1689,7 @@ radv_graphics_shaders_link(const struct radv_device *device, const struct radv_g
struct radv_ps_epilog_key struct radv_ps_epilog_key
radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ps_epilog_state *state) radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ps_epilog_state *state)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0, z_format = 0; unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0, z_format = 0;
struct radv_ps_epilog_key key; struct radv_ps_epilog_key key;
@ -1731,8 +1735,8 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
state->alpha_to_coverage_via_mrtz); state->alpha_to_coverage_via_mrtz);
key.spi_shader_col_format = col_format; key.spi_shader_col_format = col_format;
key.color_is_int8 = device->physical_device->info.gfx_level < GFX8 ? is_int8 : 0; key.color_is_int8 = pdev->info.gfx_level < GFX8 ? is_int8 : 0;
key.color_is_int10 = device->physical_device->info.gfx_level < GFX8 ? is_int10 : 0; key.color_is_int10 = pdev->info.gfx_level < GFX8 ? is_int10 : 0;
key.enable_mrt_output_nan_fixup = device->instance->drirc.enable_mrt_output_nan_fixup ? is_float32 : 0; key.enable_mrt_output_nan_fixup = device->instance->drirc.enable_mrt_output_nan_fixup ? is_float32 : 0;
key.colors_written = state->colors_written; key.colors_written = state->colors_written;
key.mrt0_is_dual_src = state->mrt0_is_dual_src; key.mrt0_is_dual_src = state->mrt0_is_dual_src;
@ -1811,7 +1815,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
const struct vk_graphics_pipeline_state *state, const struct vk_graphics_pipeline_state *state,
VkGraphicsPipelineLibraryFlagBitsEXT lib_flags) VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_graphics_state_key key; struct radv_graphics_state_key key;
memset(&key, 0, sizeof(key)); memset(&key, 0, sizeof(key));
@ -1884,7 +1888,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
} }
} }
if (device->physical_device->info.gfx_level >= GFX11 && state->ms) { if (pdev->info.gfx_level >= GFX11 && state->ms) {
key.ms.alpha_to_coverage_via_mrtz = state->ms->alpha_to_coverage_enable; key.ms.alpha_to_coverage_via_mrtz = state->ms->alpha_to_coverage_enable;
} }
@ -1898,15 +1902,14 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
key.unknown_rast_prim = true; key.unknown_rast_prim = true;
} }
if (device->physical_device->info.gfx_level >= GFX10 && state->rs) { if (pdev->info.gfx_level >= GFX10 && state->rs) {
key.rs.provoking_vtx_last = state->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; key.rs.provoking_vtx_last = state->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
} }
key.ps.force_vrs_enabled = device->force_vrs_enabled && !radv_is_static_vrs_enabled(pipeline, state); key.ps.force_vrs_enabled = device->force_vrs_enabled && !radv_is_static_vrs_enabled(pipeline, state);
if ((radv_is_vrs_enabled(pipeline, state) || key.ps.force_vrs_enabled) && if ((radv_is_vrs_enabled(pipeline, state) || key.ps.force_vrs_enabled) &&
(device->physical_device->info.family == CHIP_NAVI21 || device->physical_device->info.family == CHIP_NAVI22 || (pdev->info.family == CHIP_NAVI21 || pdev->info.family == CHIP_NAVI22 || pdev->info.family == CHIP_VANGOGH))
device->physical_device->info.family == CHIP_VANGOGH))
key.adjust_frag_coord_z = true; key.adjust_frag_coord_z = true;
if (radv_pipeline_needs_ps_epilog(pipeline, lib_flags)) if (radv_pipeline_needs_ps_epilog(pipeline, lib_flags))
@ -1914,7 +1917,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
key.ps.epilog = radv_pipeline_generate_ps_epilog_key(device, state); key.ps.epilog = radv_pipeline_generate_ps_epilog_key(device, state);
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
/* On GFX11, alpha to coverage is exported via MRTZ when depth/stencil/samplemask are also /* On GFX11, alpha to coverage is exported via MRTZ when depth/stencil/samplemask are also
* exported. Though, when a PS epilog is needed and the MS state is NULL (with dynamic * exported. Though, when a PS epilog is needed and the MS state is NULL (with dynamic
* rendering), it's not possible to know the info at compile time and MRTZ needs to be * rendering), it's not possible to know the info at compile time and MRTZ needs to be
@ -1927,7 +1930,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
key.dynamic_rasterization_samples = !!(pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZATION_SAMPLES) || key.dynamic_rasterization_samples = !!(pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZATION_SAMPLES) ||
(!!(pipeline->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && !state->ms); (!!(pipeline->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && !state->ms);
if (device->physical_device->use_ngg) { if (pdev->use_ngg) {
VkShaderStageFlags ngg_stage; VkShaderStageFlags ngg_stage;
if (pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT) { if (pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
@ -1995,7 +1998,9 @@ static void
radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *stages, radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *stages,
VkShaderStageFlagBits active_nir_stages) VkShaderStageFlagBits active_nir_stages)
{ {
if (!device->physical_device->cache_key.use_ngg) const struct radv_physical_device *pdev = radv_device_physical(device);
if (!pdev->cache_key.use_ngg)
return; return;
if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.next_stage != MESA_SHADER_TESS_CTRL) { if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.next_stage != MESA_SHADER_TESS_CTRL) {
@ -2006,7 +2011,7 @@ radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *
stages[MESA_SHADER_MESH].info.is_ngg = true; stages[MESA_SHADER_MESH].info.is_ngg = true;
} }
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
if (stages[MESA_SHADER_GEOMETRY].nir) if (stages[MESA_SHADER_GEOMETRY].nir)
stages[MESA_SHADER_GEOMETRY].info.is_ngg = true; stages[MESA_SHADER_GEOMETRY].info.is_ngg = true;
} else { } else {
@ -2164,7 +2169,8 @@ static void
radv_declare_pipeline_args(struct radv_device *device, struct radv_shader_stage *stages, radv_declare_pipeline_args(struct radv_device *device, struct radv_shader_stage *stages,
const struct radv_graphics_state_key *gfx_state, VkShaderStageFlagBits active_nir_stages) const struct radv_graphics_state_key *gfx_state, VkShaderStageFlagBits active_nir_stages)
{ {
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
if (gfx_level >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) { if (gfx_level >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) {
radv_declare_shader_args(device, gfx_state, &stages[MESA_SHADER_TESS_CTRL].info, MESA_SHADER_TESS_CTRL, radv_declare_shader_args(device, gfx_state, &stages[MESA_SHADER_TESS_CTRL].info, MESA_SHADER_TESS_CTRL,
@ -2210,15 +2216,16 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache
bool keep_executable_info, bool keep_statistic_info, bool keep_executable_info, bool keep_statistic_info,
struct radv_shader_binary **gs_copy_binary) struct radv_shader_binary **gs_copy_binary)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader_info *gs_info = &gs_stage->info; const struct radv_shader_info *gs_info = &gs_stage->info;
ac_nir_gs_output_info output_info = { ac_nir_gs_output_info output_info = {
.streams = gs_info->gs.output_streams, .streams = gs_info->gs.output_streams,
.usage_mask = gs_info->gs.output_usage_mask, .usage_mask = gs_info->gs.output_usage_mask,
}; };
nir_shader *nir = ac_nir_create_gs_copy_shader( nir_shader *nir = ac_nir_create_gs_copy_shader(
gs_stage->nir, device->physical_device->info.gfx_level, gs_stage->nir, pdev->info.gfx_level, gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask,
gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask, gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.param_exports, false, false, false,
gs_info->outinfo.param_exports, false, false, false, gs_info->force_vrs_per_vertex, &output_info); gs_info->force_vrs_per_vertex, &output_info);
nir_validate_shader(nir, "after ac_nir_create_gs_copy_shader"); nir_validate_shader(nir, "after ac_nir_create_gs_copy_shader");
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
@ -2246,10 +2253,8 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache
gs_copy_stage.info.user_sgprs_locs = gs_copy_stage.args.user_sgprs_locs; gs_copy_stage.info.user_sgprs_locs = gs_copy_stage.args.user_sgprs_locs;
gs_copy_stage.info.inline_push_constant_mask = gs_copy_stage.args.ac.inline_push_const_mask; gs_copy_stage.info.inline_push_constant_mask = gs_copy_stage.args.ac.inline_push_const_mask;
NIR_PASS_V(nir, ac_nir_lower_intrinsics_to_args, device->physical_device->info.gfx_level, AC_HW_VERTEX_SHADER, NIR_PASS_V(nir, ac_nir_lower_intrinsics_to_args, pdev->info.gfx_level, AC_HW_VERTEX_SHADER, &gs_copy_stage.args.ac);
&gs_copy_stage.args.ac); NIR_PASS_V(nir, radv_nir_lower_abi, pdev->info.gfx_level, &gs_copy_stage, gfx_state, pdev->info.address32_hi);
NIR_PASS_V(nir, radv_nir_lower_abi, device->physical_device->info.gfx_level, &gs_copy_stage, gfx_state,
device->physical_device->info.address32_hi);
struct radv_graphics_pipeline_key key = {0}; struct radv_graphics_pipeline_key key = {0};
bool dump_shader = radv_can_dump_shader(device, nir, true); bool dump_shader = radv_can_dump_shader(device, nir, true);
@ -2272,6 +2277,8 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_
struct radv_shader_binary **binaries, struct radv_shader **gs_copy_shader, struct radv_shader_binary **binaries, struct radv_shader **gs_copy_shader,
struct radv_shader_binary **gs_copy_binary) struct radv_shader_binary **gs_copy_binary)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
for (int s = MESA_VULKAN_SHADER_STAGES - 1; s >= 0; s--) { for (int s = MESA_VULKAN_SHADER_STAGES - 1; s >= 0; s--) {
if (!(active_nir_stages & (1 << s))) if (!(active_nir_stages & (1 << s)))
continue; continue;
@ -2280,7 +2287,7 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_
unsigned shader_count = 1; unsigned shader_count = 1;
/* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */ /* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */
if (device->physical_device->info.gfx_level >= GFX9 && if (pdev->info.gfx_level >= GFX9 &&
((s == MESA_SHADER_GEOMETRY && ((s == MESA_SHADER_GEOMETRY &&
(active_nir_stages & (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))) || (active_nir_stages & (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))) ||
(s == MESA_SHADER_TESS_CTRL && (active_nir_stages & VK_SHADER_STAGE_VERTEX_BIT)))) { (s == MESA_SHADER_TESS_CTRL && (active_nir_stages & VK_SHADER_STAGE_VERTEX_BIT)))) {
@ -2348,6 +2355,7 @@ static void
radv_pipeline_import_retained_shaders(const struct radv_device *device, struct radv_graphics_pipeline *pipeline, radv_pipeline_import_retained_shaders(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
struct radv_graphics_lib_pipeline *lib, struct radv_shader_stage *stages) struct radv_graphics_lib_pipeline *lib, struct radv_shader_stage *stages)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_retained_shaders *retained_shaders = &lib->retained_shaders; struct radv_retained_shaders *retained_shaders = &lib->retained_shaders;
/* Import the stages (SPIR-V only in case of cache hits). */ /* Import the stages (SPIR-V only in case of cache hits). */
@ -2370,7 +2378,7 @@ radv_pipeline_import_retained_shaders(const struct radv_device *device, struct r
int64_t stage_start = os_time_get_nano(); int64_t stage_start = os_time_get_nano();
/* Deserialize the NIR shader. */ /* Deserialize the NIR shader. */
const struct nir_shader_compiler_options *options = &device->physical_device->nir_options[s]; const struct nir_shader_compiler_options *options = &pdev->nir_options[s];
struct blob_reader blob_reader; struct blob_reader blob_reader;
blob_reader_init(&blob_reader, retained_shaders->stages[s].serialized_nir, blob_reader_init(&blob_reader, retained_shaders->stages[s].serialized_nir,
retained_shaders->stages[s].serialized_nir_size); retained_shaders->stages[s].serialized_nir_size);
@ -2442,6 +2450,7 @@ static bool
radv_skip_graphics_pipeline_compile(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline, radv_skip_graphics_pipeline_compile(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline,
VkGraphicsPipelineLibraryFlagBitsEXT lib_flags, bool fast_linking_enabled) VkGraphicsPipelineLibraryFlagBitsEXT lib_flags, bool fast_linking_enabled)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
VkShaderStageFlagBits binary_stages = 0; VkShaderStageFlagBits binary_stages = 0;
/* Do not skip when fast-linking isn't enabled. */ /* Do not skip when fast-linking isn't enabled. */
@ -2462,7 +2471,7 @@ radv_skip_graphics_pipeline_compile(const struct radv_device *device, const stru
binary_stages |= mesa_to_vk_shader_stage(i); binary_stages |= mesa_to_vk_shader_stage(i);
} }
if (device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
/* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */ /* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */
if (binary_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) { if (binary_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) {
binary_stages |= VK_SHADER_STAGE_VERTEX_BIT; binary_stages |= VK_SHADER_STAGE_VERTEX_BIT;
@ -2490,6 +2499,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
struct radv_shader **shaders, struct radv_shader_binary **binaries, struct radv_shader **shaders, struct radv_shader_binary **binaries,
struct radv_shader **gs_copy_shader, struct radv_shader_binary **gs_copy_binary) struct radv_shader **gs_copy_shader, struct radv_shader_binary **gs_copy_binary)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
const bool nir_cache = device->instance->perftest_flags & RADV_PERFTEST_NIR_CACHE; const bool nir_cache = device->instance->perftest_flags & RADV_PERFTEST_NIR_CACHE;
for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) { for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
if (!stages[s].entrypoint) if (!stages[s].entrypoint)
@ -2530,7 +2540,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
active_nir_stages |= mesa_to_vk_shader_stage(i); active_nir_stages |= mesa_to_vk_shader_stage(i);
} }
if (!device->physical_device->mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir && if (!pdev->mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir &&
BITSET_TEST(stages[MESA_SHADER_MESH].nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID)) { BITSET_TEST(stages[MESA_SHADER_MESH].nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID)) {
nir_shader *mesh = stages[MESA_SHADER_MESH].nir; nir_shader *mesh = stages[MESA_SHADER_MESH].nir;
nir_shader *task = stages[MESA_SHADER_TASK].nir; nir_shader *task = stages[MESA_SHADER_TASK].nir;
@ -2848,7 +2858,7 @@ void
radv_emit_vgt_gs_mode(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, radv_emit_vgt_gs_mode(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
const struct radv_shader *last_vgt_api_shader) const struct radv_shader *last_vgt_api_shader)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader_info *info = &last_vgt_api_shader->info; const struct radv_shader_info *info = &last_vgt_api_shader->info;
unsigned vgt_primitiveid_en = 0; unsigned vgt_primitiveid_en = 0;
uint32_t vgt_gs_mode = 0; uint32_t vgt_gs_mode = 0;
@ -2871,7 +2881,7 @@ static void
radv_emit_hw_vs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, radv_emit_hw_vs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
const struct radv_shader *shader) const struct radv_shader *shader)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t va = radv_shader_get_va(shader); uint64_t va = radv_shader_get_va(shader);
radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4); radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4);
@ -2971,7 +2981,7 @@ static void
radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
const struct radv_shader *es, const struct radv_shader *shader) const struct radv_shader *es, const struct radv_shader *shader)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t va = radv_shader_get_va(shader); uint64_t va = radv_shader_get_va(shader);
gl_shader_stage es_type; gl_shader_stage es_type;
const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info; const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info;
@ -3128,7 +3138,7 @@ radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
static void static void
radv_emit_hw_hs(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_shader *shader) radv_emit_hw_hs(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_shader *shader)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t va = radv_shader_get_va(shader); uint64_t va = radv_shader_get_va(shader);
if (pdev->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
@ -3152,6 +3162,8 @@ void
radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
const struct radv_shader *vs, const struct radv_shader *next_stage) const struct radv_shader *vs, const struct radv_shader *next_stage)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (vs->info.merged_shader_compiled_separately) { if (vs->info.merged_shader_compiled_separately) {
const struct radv_userdata_info *loc = &vs->info.user_sgprs_locs.shader_data[AC_UD_NEXT_STAGE_PC]; const struct radv_userdata_info *loc = &vs->info.user_sgprs_locs.shader_data[AC_UD_NEXT_STAGE_PC];
const uint32_t base_reg = vs->info.user_data_0; const uint32_t base_reg = vs->info.user_data_0;
@ -3164,7 +3176,7 @@ radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *
if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) { if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) {
radv_shader_combine_cfg_vs_tcs(vs, next_stage, &rsrc1, NULL); radv_shader_combine_cfg_vs_tcs(vs, next_stage, &rsrc1, NULL);
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, vs->va >> 8); radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, vs->va >> 8);
} else { } else {
radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, vs->va >> 8); radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, vs->va >> 8);
@ -3174,7 +3186,7 @@ radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *
} else { } else {
radv_shader_combine_cfg_vs_gs(vs, next_stage, &rsrc1, &rsrc2); radv_shader_combine_cfg_vs_gs(vs, next_stage, &rsrc1, &rsrc2);
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, vs->va >> 8); radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, vs->va >> 8);
} else { } else {
radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, vs->va >> 8); radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, vs->va >> 8);
@ -3182,8 +3194,7 @@ radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *
unsigned lds_size; unsigned lds_size;
if (next_stage->info.is_ngg) { if (next_stage->info.is_ngg) {
lds_size = DIV_ROUND_UP(next_stage->info.ngg_info.lds_size, lds_size = DIV_ROUND_UP(next_stage->info.ngg_info.lds_size, pdev->info.lds_encode_granularity);
device->physical_device->info.lds_encode_granularity);
} else { } else {
lds_size = next_stage->info.gs_ring_info.lds_size; lds_size = next_stage->info.gs_ring_info.lds_size;
} }
@ -3225,6 +3236,8 @@ void
radv_emit_tess_eval_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, radv_emit_tess_eval_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
const struct radv_shader *tes, const struct radv_shader *gs) const struct radv_shader *tes, const struct radv_shader *gs)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (tes->info.merged_shader_compiled_separately) { if (tes->info.merged_shader_compiled_separately) {
const struct radv_userdata_info *loc = &tes->info.user_sgprs_locs.shader_data[AC_UD_NEXT_STAGE_PC]; const struct radv_userdata_info *loc = &tes->info.user_sgprs_locs.shader_data[AC_UD_NEXT_STAGE_PC];
const uint32_t base_reg = tes->info.user_data_0; const uint32_t base_reg = tes->info.user_data_0;
@ -3238,7 +3251,7 @@ radv_emit_tess_eval_shader(const struct radv_device *device, struct radeon_cmdbu
unsigned lds_size; unsigned lds_size;
if (gs->info.is_ngg) { if (gs->info.is_ngg) {
lds_size = DIV_ROUND_UP(gs->info.ngg_info.lds_size, device->physical_device->info.lds_encode_granularity); lds_size = DIV_ROUND_UP(gs->info.ngg_info.lds_size, pdev->info.lds_encode_granularity);
} else { } else {
lds_size = gs->info.gs_ring_info.lds_size; lds_size = gs->info.gs_ring_info.lds_size;
} }
@ -3264,7 +3277,7 @@ static void
radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
const struct radv_shader *gs) const struct radv_shader *gs)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_legacy_gs_info *gs_state = &gs->info.gs_ring_info; const struct radv_legacy_gs_info *gs_state = &gs->info.gs_ring_info;
unsigned gs_max_out_vertices; unsigned gs_max_out_vertices;
const uint8_t *num_components; const uint8_t *num_components;
@ -3382,16 +3395,15 @@ void
radv_emit_mesh_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, radv_emit_mesh_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
const struct radv_shader *ms) const struct radv_shader *ms)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
const uint32_t gs_out = radv_conv_gl_prim_to_gs_out(ms->info.ms.output_prim); const uint32_t gs_out = radv_conv_gl_prim_to_gs_out(ms->info.ms.output_prim);
radv_emit_hw_ngg(device, ctx_cs, cs, NULL, ms); radv_emit_hw_ngg(device, ctx_cs, cs, NULL, ms);
radeon_set_context_reg( radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT,
ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, pdev->mesh_fast_launch_2 ? ms->info.ngg_info.max_out_verts : ms->info.workgroup_size);
device->physical_device->mesh_fast_launch_2 ? ms->info.ngg_info.max_out_verts : ms->info.workgroup_size);
radeon_set_uconfig_reg_idx(pdev, ctx_cs, R_030908_VGT_PRIMITIVE_TYPE, 1, V_008958_DI_PT_POINTLIST); radeon_set_uconfig_reg_idx(pdev, ctx_cs, R_030908_VGT_PRIMITIVE_TYPE, 1, V_008958_DI_PT_POINTLIST);
if (device->physical_device->mesh_fast_launch_2) { if (pdev->mesh_fast_launch_2) {
radeon_set_sh_reg_seq(cs, R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, 2); radeon_set_sh_reg_seq(cs, R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, 2);
radeon_emit(cs, S_00B2B0_MESHLET_NUM_THREAD_X(ms->info.cs.block_size[0] - 1) | radeon_emit(cs, S_00B2B0_MESHLET_NUM_THREAD_X(ms->info.cs.block_size[0] - 1) |
S_00B2B0_MESHLET_NUM_THREAD_Y(ms->info.cs.block_size[1] - 1) | S_00B2B0_MESHLET_NUM_THREAD_Y(ms->info.cs.block_size[1] - 1) |
@ -3476,9 +3488,10 @@ void
radv_emit_ps_inputs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, radv_emit_ps_inputs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
const struct radv_shader *last_vgt_shader, const struct radv_shader *ps) const struct radv_shader *last_vgt_shader, const struct radv_shader *ps)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_vs_output_info *outinfo = &last_vgt_shader->info.outinfo; const struct radv_vs_output_info *outinfo = &last_vgt_shader->info.outinfo;
bool mesh = last_vgt_shader->info.stage == MESA_SHADER_MESH; bool mesh = last_vgt_shader->info.stage == MESA_SHADER_MESH;
bool gfx11plus = device->physical_device->info.gfx_level >= GFX11; bool gfx11plus = pdev->info.gfx_level >= GFX11;
uint32_t ps_input_cntl[32]; uint32_t ps_input_cntl[32];
unsigned ps_offset = 0; unsigned ps_offset = 0;
@ -3530,7 +3543,7 @@ void
radv_emit_fragment_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, radv_emit_fragment_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
const struct radv_shader *ps) const struct radv_shader *ps)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
bool param_gen; bool param_gen;
uint64_t va; uint64_t va;
@ -3566,7 +3579,7 @@ void
radv_emit_vgt_reuse(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, const struct radv_shader *tes, radv_emit_vgt_reuse(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, const struct radv_shader *tes,
const struct radv_vgt_shader_key *key) const struct radv_vgt_shader_key *key)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level == GFX10_3) { if (pdev->info.gfx_level == GFX10_3) {
/* Legacy Tess+GS should disable reuse to prevent hangs on GFX10.3. */ /* Legacy Tess+GS should disable reuse to prevent hangs on GFX10.3. */
@ -3635,7 +3648,7 @@ void
radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
const struct radv_vgt_shader_key *key) const struct radv_vgt_shader_key *key)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t stages = 0; uint32_t stages = 0;
if (key->tess) { if (key->tess) {
@ -3651,7 +3664,7 @@ radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdb
stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1); stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1);
} else if (key->mesh) { } else if (key->mesh) {
assert(!key->ngg_passthrough); assert(!key->ngg_passthrough);
unsigned gs_fast_launch = device->physical_device->mesh_fast_launch_2 ? 2 : 1; unsigned gs_fast_launch = pdev->mesh_fast_launch_2 ? 2 : 1;
stages |= stages |=
S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(gs_fast_launch) | S_028B54_NGG_WAVE_ID_EN(key->mesh_scratch_ring); S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(gs_fast_launch) | S_028B54_NGG_WAVE_ID_EN(key->mesh_scratch_ring);
} else if (key->ngg) { } else if (key->ngg) {
@ -3682,7 +3695,7 @@ radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdb
void void
radv_emit_vgt_gs_out(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, uint32_t vgt_gs_out_prim_type) radv_emit_vgt_gs_out(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, uint32_t vgt_gs_out_prim_type)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
radeon_set_uconfig_reg(ctx_cs, R_030998_VGT_GS_OUT_PRIM_TYPE, vgt_gs_out_prim_type); radeon_set_uconfig_reg(ctx_cs, R_030998_VGT_GS_OUT_PRIM_TYPE, vgt_gs_out_prim_type);
@ -3713,9 +3726,10 @@ gfx103_emit_vgt_draw_payload_cntl(struct radeon_cmdbuf *ctx_cs, const struct rad
static bool static bool
gfx103_pipeline_vrs_coarse_shading(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline) gfx103_pipeline_vrs_coarse_shading(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
if (device->physical_device->info.gfx_level != GFX10_3) if (pdev->info.gfx_level != GFX10_3)
return false; return false;
if (device->instance->debug_flags & RADV_DEBUG_NO_VRS_FLAT_SHADING) if (device->instance->debug_flags & RADV_DEBUG_NO_VRS_FLAT_SHADING)
@ -3731,7 +3745,7 @@ void
gfx103_emit_vrs_state(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, const struct radv_shader *ps, gfx103_emit_vrs_state(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, const struct radv_shader *ps,
bool enable_vrs, bool enable_vrs_coarse_shading, bool force_vrs_per_vertex) bool enable_vrs, bool enable_vrs_coarse_shading, bool force_vrs_per_vertex)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t mode = V_028064_SC_VRS_COMB_MODE_PASSTHRU; uint32_t mode = V_028064_SC_VRS_COMB_MODE_PASSTHRU;
uint8_t rate_x = 0, rate_y = 0; uint8_t rate_x = 0, rate_y = 0;
@ -3769,7 +3783,7 @@ radv_pipeline_emit_pm4(const struct radv_device *device, struct radv_graphics_pi
const struct vk_graphics_pipeline_state *state) const struct vk_graphics_pipeline_state *state)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader *last_vgt_shader = radv_get_last_vgt_shader(pipeline); const struct radv_shader *last_vgt_shader = radv_get_last_vgt_shader(pipeline);
const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
struct radeon_cmdbuf *ctx_cs = &pipeline->base.ctx_cs; struct radeon_cmdbuf *ctx_cs = &pipeline->base.ctx_cs;
@ -3835,7 +3849,7 @@ static void
radv_pipeline_init_vertex_input_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline, radv_pipeline_init_vertex_input_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state) const struct vk_graphics_pipeline_state *state)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader_info *vs_info = &radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX)->info; const struct radv_shader_info *vs_info = &radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX)->info;
if (state->vi) { if (state->vi) {
@ -4022,7 +4036,8 @@ bool
radv_needs_null_export_workaround(const struct radv_device *device, const struct radv_shader *ps, radv_needs_null_export_workaround(const struct radv_device *device, const struct radv_shader *ps,
unsigned custom_blend_mode) unsigned custom_blend_mode)
{ {
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
if (!ps) if (!ps)
return false; return false;

View file

@ -370,6 +370,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
const struct radv_ray_tracing_stage_info *traversal_stage_info, const struct radv_ray_tracing_stage_info *traversal_stage_info,
struct radv_serialized_shader_arena_block *replay_block, struct radv_shader **out_shader) struct radv_serialized_shader_arena_block *replay_block, struct radv_shader **out_shader)
{ {
struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_shader_binary *binary; struct radv_shader_binary *binary;
bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags); bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags);
bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.base.create_flags); bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.base.create_flags);
@ -405,7 +406,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
.stack_alignment = 16, .stack_alignment = 16,
.localized_loads = true, .localized_loads = true,
.vectorizer_callback = radv_mem_vectorize_callback, .vectorizer_callback = radv_mem_vectorize_callback,
.vectorizer_data = &device->physical_device->info.gfx_level, .vectorizer_data = &pdev->info.gfx_level,
}; };
nir_lower_shader_calls(stage->nir, &opts, &resume_shaders, &num_resume_shaders, stage->nir); nir_lower_shader_calls(stage->nir, &opts, &resume_shaders, &num_resume_shaders, stage->nir);
} }
@ -828,6 +829,8 @@ postprocess_rt_config(struct ac_shader_config *config, enum amd_gfx_level gfx_le
static void static void
compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline) compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
pipeline->prolog = radv_create_rt_prolog(device); pipeline->prolog = radv_create_rt_prolog(device);
/* create combined config */ /* create combined config */
@ -839,7 +842,7 @@ compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline *
if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]) if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION])
combine_config(config, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]->config); combine_config(config, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]->config);
postprocess_rt_config(config, device->physical_device->info.gfx_level, device->physical_device->rt_wave_size); postprocess_rt_config(config, pdev->info.gfx_level, pdev->rt_wave_size);
pipeline->prolog->max_waves = radv_get_max_waves(device, config, &pipeline->prolog->info); pipeline->prolog->max_waves = radv_get_max_waves(device, config, &pipeline->prolog->info);
} }

View file

@ -18,6 +18,8 @@ static struct hash_table *device_ht = NULL;
VkResult VkResult
radv_printf_data_init(struct radv_device *device) radv_printf_data_init(struct radv_device *device)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
util_dynarray_init(&device->printf.formats, NULL); util_dynarray_init(&device->printf.formats, NULL);
device->printf.buffer_size = debug_get_num_option("RADV_PRINTF_BUFFER_SIZE", 0); device->printf.buffer_size = debug_get_num_option("RADV_PRINTF_BUFFER_SIZE", 0);
@ -45,9 +47,9 @@ radv_printf_data_init(struct radv_device *device)
VkMemoryAllocateInfo alloc_info = { VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = requirements.size, .allocationSize = requirements.size,
.memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | .memoryTypeIndex =
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
}; };
result = device->vk.dispatch_table.AllocateMemory(_device, &alloc_info, NULL, &device->printf.memory); result = device->vk.dispatch_table.AllocateMemory(_device, &alloc_info, NULL, &device->printf.memory);

View file

@ -1134,8 +1134,6 @@ struct radv_device {
/* Whether to keep shader debug info, for debugging. */ /* Whether to keep shader debug info, for debugging. */
bool keep_shader_info; bool keep_shader_info;
struct radv_physical_device *physical_device;
/* Backup in-memory cache to be used if the app doesn't provide one */ /* Backup in-memory cache to be used if the app doesn't provide one */
struct vk_pipeline_cache *mem_cache; struct vk_pipeline_cache *mem_cache;
@ -1275,6 +1273,12 @@ struct radv_device {
uint32_t compute_scratch_waves; uint32_t compute_scratch_waves;
}; };
static inline struct radv_physical_device *
radv_device_physical(const struct radv_device *dev)
{
return (struct radv_physical_device *)dev->vk.physical;
}
bool radv_device_set_pstate(struct radv_device *device, bool enable); bool radv_device_set_pstate(struct radv_device *device, bool enable);
bool radv_device_acquire_performance_counters(struct radv_device *device); bool radv_device_acquire_performance_counters(struct radv_device *device);
void radv_device_release_performance_counters(struct radv_device *device); void radv_device_release_performance_counters(struct radv_device *device);
@ -2155,10 +2159,12 @@ static inline void
radv_emit_shader_pointer_body(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, radv_emit_shader_pointer_body(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
bool use_32bit_pointers) bool use_32bit_pointers)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
radeon_emit(cs, va); radeon_emit(cs, va);
if (use_32bit_pointers) { if (use_32bit_pointers) {
assert(va == 0 || (va >> 32) == device->physical_device->info.address32_hi); assert(va == 0 || (va >> 32) == pdev->info.address32_hi);
} else { } else {
radeon_emit(cs, va >> 32); radeon_emit(cs, va >> 32);
} }
@ -2798,7 +2804,8 @@ radv_image_has_htile(const struct radv_image *image)
static inline bool static inline bool
radv_image_has_vrs_htile(const struct radv_device *device, const struct radv_image *image) radv_image_has_vrs_htile(const struct radv_device *device, const struct radv_image *image)
{ {
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
/* Any depth buffer can potentially use VRS on GFX10.3. */ /* Any depth buffer can potentially use VRS on GFX10.3. */
return gfx_level == GFX10_3 && device->vk.enabled_features.attachmentFragmentShadingRate && return gfx_level == GFX10_3 && device->vk.enabled_features.attachmentFragmentShadingRate &&
@ -2830,7 +2837,9 @@ radv_image_is_tc_compat_htile(const struct radv_image *image)
static inline bool static inline bool
radv_image_tile_stencil_disabled(const struct radv_device *device, const struct radv_image *image) radv_image_tile_stencil_disabled(const struct radv_device *device, const struct radv_image *image)
{ {
if (device->physical_device->info.gfx_level >= GFX9) { const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level >= GFX9) {
return !vk_format_has_stencil(image->vk.format) && !radv_image_has_vrs_htile(device, image); return !vk_format_has_stencil(image->vk.format) && !radv_image_has_vrs_htile(device, image);
} else { } else {
/* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for /* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for
@ -2938,8 +2947,10 @@ radv_get_htile_initial_value(const struct radv_device *device, const struct radv
static inline bool static inline bool
radv_image_get_iterate256(const struct radv_device *device, struct radv_image *image) radv_image_get_iterate256(const struct radv_device *device, struct radv_image *image)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
/* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */ /* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */
return device->physical_device->info.gfx_level >= GFX10 && return pdev->info.gfx_level >= GFX10 &&
(image->vk.usage & (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) && (image->vk.usage & (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
radv_image_is_tc_compat_htile(image) && image->vk.samples > 1; radv_image_is_tc_compat_htile(image) && image->vk.samples > 1;
} }
@ -3744,7 +3755,8 @@ radv_is_streamout_enabled(struct radv_cmd_buffer *cmd_buffer)
static inline enum amd_ip_type static inline enum amd_ip_type
radv_queue_ring(const struct radv_queue *queue) radv_queue_ring(const struct radv_queue *queue)
{ {
return radv_queue_family_to_ring(queue->device->physical_device, queue->state.qf); const struct radv_physical_device *pdev = radv_device_physical(queue->device);
return radv_queue_family_to_ring(pdev, queue->state.qf);
} }
/* radv_video */ /* radv_video */
@ -3758,7 +3770,8 @@ void radv_video_get_profile_alignments(struct radv_physical_device *pdev, const
static inline bool static inline bool
radv_use_llvm_for_stage(const struct radv_device *device, UNUSED gl_shader_stage stage) radv_use_llvm_for_stage(const struct radv_device *device, UNUSED gl_shader_stage stage)
{ {
return device->physical_device->use_llvm; const struct radv_physical_device *pdev = radv_device_physical(device);
return pdev->use_llvm;
} }
static inline bool static inline bool

View file

@ -56,7 +56,8 @@ radv_get_pipelinestat_query_size(struct radv_device *device)
/* GFX10_3 only has 11 valid pipeline statistics queries but in order to emulate mesh/task shader /* GFX10_3 only has 11 valid pipeline statistics queries but in order to emulate mesh/task shader
* invocations, it's easier to use the same size as GFX11. * invocations, it's easier to use the same size as GFX11.
*/ */
unsigned num_results = device->physical_device->info.gfx_level >= GFX10_3 ? 14 : 11; const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned num_results = pdev->info.gfx_level >= GFX10_3 ? 14 : 11;
return num_results * 8; return num_results * 8;
} }
@ -120,6 +121,7 @@ build_occlusion_query_shader(struct radv_device *device)
* } * }
* } * }
*/ */
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "occlusion_query"); nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "occlusion_query");
b.shader->info.workgroup_size[0] = 64; b.shader->info.workgroup_size[0] = 64;
@ -128,8 +130,8 @@ build_occlusion_query_shader(struct radv_device *device)
nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start"); nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start");
nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end"); nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end");
nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available"); nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available");
uint64_t enabled_rb_mask = device->physical_device->info.enabled_rb_mask; uint64_t enabled_rb_mask = pdev->info.enabled_rb_mask;
unsigned db_count = device->physical_device->info.max_render_backends; unsigned db_count = pdev->info.max_render_backends;
nir_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4); nir_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
@ -275,6 +277,7 @@ build_pipeline_statistics_query_shader(struct radv_device *device)
* } * }
* } * }
*/ */
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "pipeline_statistics_query"); nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "pipeline_statistics_query");
b.shader->info.workgroup_size[0] = 64; b.shader->info.workgroup_size[0] = 64;
@ -301,7 +304,7 @@ build_pipeline_statistics_query_shader(struct radv_device *device)
nir_def *available32 = nir_load_ssbo(&b, 1, 32, src_buf, avail_offset); nir_def *available32 = nir_load_ssbo(&b, 1, 32, src_buf, avail_offset);
nir_store_var(&b, available, nir_i2b(&b, available32), 0x1); nir_store_var(&b, available, nir_i2b(&b, available32), 0x1);
if (device->physical_device->emulate_mesh_shader_queries) { if (pdev->emulate_mesh_shader_queries) {
nir_push_if(&b, nir_test_mask(&b, stats_mask, VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT)); nir_push_if(&b, nir_test_mask(&b, stats_mask, VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT));
{ {
const uint32_t idx = ffs(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT) - 1; const uint32_t idx = ffs(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT) - 1;
@ -867,6 +870,7 @@ build_ms_prim_gen_query_shader(struct radv_device *device)
static VkResult static VkResult
radv_device_init_meta_query_state_internal(struct radv_device *device) radv_device_init_meta_query_state_internal(struct radv_device *device)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
VkResult result; VkResult result;
nir_shader *occlusion_cs = NULL; nir_shader *occlusion_cs = NULL;
nir_shader *pipeline_statistics_cs = NULL; nir_shader *pipeline_statistics_cs = NULL;
@ -886,7 +890,7 @@ radv_device_init_meta_query_state_internal(struct radv_device *device)
timestamp_cs = build_timestamp_query_shader(device); timestamp_cs = build_timestamp_query_shader(device);
pg_cs = build_pg_query_shader(device); pg_cs = build_pg_query_shader(device);
if (device->physical_device->emulate_mesh_shader_queries) if (pdev->emulate_mesh_shader_queries)
ms_prim_gen_cs = build_ms_prim_gen_query_shader(device); ms_prim_gen_cs = build_ms_prim_gen_query_shader(device);
VkDescriptorSetLayoutCreateInfo occlusion_ds_create_info = { VkDescriptorSetLayoutCreateInfo occlusion_ds_create_info = {
@ -1025,7 +1029,7 @@ radv_device_init_meta_query_state_internal(struct radv_device *device)
result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &pg_pipeline_info, result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &pg_pipeline_info,
NULL, &device->meta_state.query.pg_query_pipeline); NULL, &device->meta_state.query.pg_query_pipeline);
if (device->physical_device->emulate_mesh_shader_queries) { if (pdev->emulate_mesh_shader_queries) {
VkPipelineShaderStageCreateInfo ms_prim_gen_pipeline_shader_stage = { VkPipelineShaderStageCreateInfo ms_prim_gen_pipeline_shader_stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT, .stage = VK_SHADER_STAGE_COMPUTE_BIT,
@ -1211,6 +1215,7 @@ static VkResult
radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *pCreateInfo, radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool) const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool)
{ {
struct radv_physical_device *pdev = radv_device_physical(device);
VkResult result; VkResult result;
size_t pool_struct_size = pCreateInfo->queryType == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR size_t pool_struct_size = pCreateInfo->queryType == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR
? sizeof(struct radv_pc_query_pool) ? sizeof(struct radv_pc_query_pool)
@ -1232,21 +1237,20 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
* and the legacy GS path but it increments for NGG VS/TES because they are merged with GS. To * and the legacy GS path but it increments for NGG VS/TES because they are merged with GS. To
* avoid this counter to increment, it's also emulated. * avoid this counter to increment, it's also emulated.
*/ */
pool->uses_gds = pool->uses_gds = (pdev->emulate_ngg_gs_query_pipeline_stat &&
(device->physical_device->emulate_ngg_gs_query_pipeline_stat && (pool->vk.pipeline_statistics & (VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
(pool->vk.pipeline_statistics & (VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT))) ||
VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT))) || (pdev->use_ngg && pCreateInfo->queryType == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) ||
(device->physical_device->use_ngg && pCreateInfo->queryType == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) || (pdev->emulate_mesh_shader_queries &&
(device->physical_device->emulate_mesh_shader_queries && (pCreateInfo->queryType == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT ||
(pCreateInfo->queryType == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT || pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT));
pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT));
/* The number of task shader invocations needs to be queried on ACE. */ /* The number of task shader invocations needs to be queried on ACE. */
pool->uses_ace = (pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT); pool->uses_ace = (pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
switch (pCreateInfo->queryType) { switch (pCreateInfo->queryType) {
case VK_QUERY_TYPE_OCCLUSION: case VK_QUERY_TYPE_OCCLUSION:
pool->stride = 16 * device->physical_device->info.max_render_backends; pool->stride = 16 * pdev->info.max_render_backends;
break; break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS: case VK_QUERY_TYPE_PIPELINE_STATISTICS:
pool->stride = radv_get_pipelinestat_query_size(device) * 2; pool->stride = radv_get_pipelinestat_query_size(device) * 2;
@ -1262,7 +1266,7 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
pool->stride = 32; pool->stride = 32;
break; break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
if (pool->uses_gds && device->physical_device->info.gfx_level < GFX11) { if (pool->uses_gds && pdev->info.gfx_level < GFX11) {
/* When the hardware can use both the legacy and the NGG paths in the same begin/end pair, /* When the hardware can use both the legacy and the NGG paths in the same begin/end pair,
* allocate 2x64-bit values for the GDS counters. * allocate 2x64-bit values for the GDS counters.
*/ */
@ -1272,7 +1276,7 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
} }
break; break;
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: { case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
result = radv_pc_init_query_pool(device->physical_device, pCreateInfo, (struct radv_pc_query_pool *)pool); result = radv_pc_init_query_pool(pdev, pCreateInfo, (struct radv_pc_query_pool *)pool);
if (result != VK_SUCCESS) { if (result != VK_SUCCESS) {
radv_destroy_query_pool(device, pAllocator, pool); radv_destroy_query_pool(device, pAllocator, pool);
@ -1281,11 +1285,11 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
break; break;
} }
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
/* GFX11 natively supports mesh generated primitives with pipeline statistics. */ /* GFX11 natively supports mesh generated primitives with pipeline statistics. */
pool->stride = radv_get_pipelinestat_query_size(device) * 2; pool->stride = radv_get_pipelinestat_query_size(device) * 2;
} else { } else {
assert(device->physical_device->emulate_mesh_shader_queries); assert(pdev->emulate_mesh_shader_queries);
pool->stride = 16; pool->stride = 16;
} }
break; break;
@ -1296,8 +1300,7 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
pool->availability_offset = pool->stride * pCreateInfo->queryCount; pool->availability_offset = pool->stride * pCreateInfo->queryCount;
pool->size = pool->availability_offset; pool->size = pool->availability_offset;
if (pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS || if (pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
(pCreateInfo->queryType == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && (pCreateInfo->queryType == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && pdev->info.gfx_level >= GFX11))
device->physical_device->info.gfx_level >= GFX11))
pool->size += 4 * pCreateInfo->queryCount; pool->size += 4 * pCreateInfo->queryCount;
result = radv_bo_create(device, pool->size, 64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING, result = radv_bo_create(device, pool->size, 64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING,
@ -1365,6 +1368,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
{ {
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
const struct radv_physical_device *pdev = radv_device_physical(device);
char *data = pData; char *data = pData;
VkResult result = VK_SUCCESS; VkResult result = VK_SUCCESS;
@ -1413,8 +1417,8 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
} }
case VK_QUERY_TYPE_OCCLUSION: { case VK_QUERY_TYPE_OCCLUSION: {
p_atomic_uint64_t const *src64 = (p_atomic_uint64_t const *)src; p_atomic_uint64_t const *src64 = (p_atomic_uint64_t const *)src;
uint32_t db_count = device->physical_device->info.max_render_backends; uint32_t db_count = pdev->info.max_render_backends;
uint64_t enabled_rb_mask = device->physical_device->info.enabled_rb_mask; uint64_t enabled_rb_mask = pdev->info.enabled_rb_mask;
uint64_t sample_count = 0; uint64_t sample_count = 0;
available = 1; available = 1;
@ -1460,7 +1464,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
do { do {
available = p_atomic_read(avail_ptr); available = p_atomic_read(avail_ptr);
if (pool->uses_ace && device->physical_device->emulate_mesh_shader_queries) { if (pool->uses_ace && pdev->emulate_mesh_shader_queries) {
const uint32_t task_invoc_offset = const uint32_t task_invoc_offset =
radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT); radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
const uint32_t *avail_ptr_start = (const uint32_t *)(src + task_invoc_offset + 4); const uint32_t *avail_ptr_start = (const uint32_t *)(src + task_invoc_offset + 4);
@ -1550,7 +1554,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
break; break;
} }
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: { case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
const bool uses_gds_query = pool->uses_gds && device->physical_device->info.gfx_level < GFX11; const bool uses_gds_query = pool->uses_gds && pdev->info.gfx_level < GFX11;
p_atomic_uint64_t const *src64 = (p_atomic_uint64_t const *)src; p_atomic_uint64_t const *src64 = (p_atomic_uint64_t const *)src;
uint64_t primitive_storage_needed; uint64_t primitive_storage_needed;
@ -1615,7 +1619,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: { case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: {
uint64_t ms_prim_gen; uint64_t ms_prim_gen;
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(device); unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(device);
const uint32_t *avail_ptr = (const uint32_t *)(pool->ptr + pool->availability_offset + 4 * query); const uint32_t *avail_ptr = (const uint32_t *)(pool->ptr + pool->availability_offset + 4 * query);
@ -1730,6 +1734,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer); RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radeon_cmdbuf *cs = cmd_buffer->cs;
uint64_t va = radv_buffer_get_va(pool->bo); uint64_t va = radv_buffer_get_va(pool->bo);
uint64_t dest_va = radv_buffer_get_va(dst_buffer->bo); uint64_t dest_va = radv_buffer_get_va(dst_buffer->bo);
@ -1780,7 +1785,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
/* This waits on the ME. All copies below are done on the ME */ /* This waits on the ME. All copies below are done on the ME */
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff); radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff);
if (pool->uses_ace && cmd_buffer->device->physical_device->emulate_mesh_shader_queries) { if (pool->uses_ace && pdev->emulate_mesh_shader_queries) {
const uint64_t src_va = va + query * pool->stride; const uint64_t src_va = va + query * pool->stride;
const uint64_t start_va = src_va + task_invoc_offset + 4; const uint64_t start_va = src_va + task_invoc_offset + 4;
const uint64_t stop_va = start_va + pipelinestat_block_size; const uint64_t stop_va = start_va + pipelinestat_block_size;
@ -1842,7 +1847,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
break; break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
if (flags & VK_QUERY_RESULT_WAIT_BIT) { if (flags & VK_QUERY_RESULT_WAIT_BIT) {
const bool uses_gds_query = pool->uses_gds && cmd_buffer->device->physical_device->info.gfx_level < GFX11; const bool uses_gds_query = pool->uses_gds && pdev->info.gfx_level < GFX11;
for (unsigned i = 0; i < queryCount; i++) { for (unsigned i = 0; i < queryCount; i++) {
unsigned query = firstQuery + i; unsigned query = firstQuery + i;
@ -1863,11 +1868,10 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pg_query_pipeline, pool->bo, dst_buffer->bo, radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pg_query_pipeline, pool->bo, dst_buffer->bo,
firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride, dst_size, firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride, dst_size,
queryCount, flags, 0, 0, queryCount, flags, 0, 0, pool->uses_gds && pdev->info.gfx_level < GFX11);
pool->uses_gds && cmd_buffer->device->physical_device->info.gfx_level < GFX11);
break; break;
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
if (flags & VK_QUERY_RESULT_WAIT_BIT) { if (flags & VK_QUERY_RESULT_WAIT_BIT) {
for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) { for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
unsigned query = firstQuery + i; unsigned query = firstQuery + i;
@ -1928,6 +1932,7 @@ radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uin
{ {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
uint32_t value = query_clear_value(pool->vk.query_type); uint32_t value = query_clear_value(pool->vk.query_type);
uint32_t flush_bits = 0; uint32_t flush_bits = 0;
@ -1941,8 +1946,7 @@ radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uin
queryCount * pool->stride, value); queryCount * pool->stride, value);
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS || if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
(pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && (pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && pdev->info.gfx_level >= GFX11)) {
cmd_buffer->device->physical_device->info.gfx_level >= GFX11)) {
flush_bits |= flush_bits |=
radv_fill_buffer(cmd_buffer, NULL, pool->bo, radv_fill_buffer(cmd_buffer, NULL, pool->bo,
radv_buffer_get_va(pool->bo) + pool->availability_offset + firstQuery * 4, queryCount * 4, 0); radv_buffer_get_va(pool->bo) + pool->availability_offset + firstQuery * 4, queryCount * 4, 0);
@ -1960,6 +1964,7 @@ radv_ResetQueryPool(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery
{ {
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t value = query_clear_value(pool->vk.query_type); uint32_t value = query_clear_value(pool->vk.query_type);
uint32_t *data = (uint32_t *)(pool->ptr + firstQuery * pool->stride); uint32_t *data = (uint32_t *)(pool->ptr + firstQuery * pool->stride);
@ -1969,8 +1974,7 @@ radv_ResetQueryPool(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery
*p = value; *p = value;
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS || if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
(pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && (pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && pdev->info.gfx_level >= GFX11)) {
device->physical_device->info.gfx_level >= GFX11)) {
memset(pool->ptr + pool->availability_offset + firstQuery * 4, 0, queryCount * 4); memset(pool->ptr + pool->availability_offset + firstQuery * 4, 0, queryCount * 4);
} }
} }
@ -2055,6 +2059,7 @@ static void
emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va, VkQueryType query_type, emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va, VkQueryType query_type,
VkQueryControlFlags flags, uint32_t index) VkQueryControlFlags flags, uint32_t index)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radeon_cmdbuf *cs = cmd_buffer->cs;
switch (query_type) { switch (query_type) {
case VK_QUERY_TYPE_OCCLUSION: case VK_QUERY_TYPE_OCCLUSION:
@ -2082,12 +2087,11 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
} }
} }
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11 && if (pdev->info.gfx_level >= GFX11 && pdev->info.pfp_fw_version >= EVENT_WRITE_ZPASS_PFP_VERSION) {
cmd_buffer->device->physical_device->info.pfp_fw_version >= EVENT_WRITE_ZPASS_PFP_VERSION) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_ZPASS, 1, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_ZPASS, 1, 0));
} else { } else {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
radeon_emit(cs, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_DUMP) | EVENT_INDEX(1)); radeon_emit(cs, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_DUMP) | EVENT_INDEX(1));
} else { } else {
radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1)); radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
@ -2149,7 +2153,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
uint32_t task_invoc_offset = uint32_t task_invoc_offset =
radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT); radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
va += task_invoc_offset; va += task_invoc_offset;
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->gang.cs, 4); radeon_check_space(cmd_buffer->device->ws, cmd_buffer->gang.cs, 4);
@ -2176,7 +2180,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
break; break;
} }
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
if (cmd_buffer->device->physical_device->use_ngg_streamout) { if (pdev->use_ngg_streamout) {
/* generated prim counter */ /* generated prim counter */
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va); gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000); radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
@ -2201,7 +2205,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
} }
break; break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: { case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
/* On GFX11+, primitives generated query always use GDS. */ /* On GFX11+, primitives generated query always use GDS. */
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va); gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000); radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
@ -2251,7 +2255,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
break; break;
} }
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: { case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: {
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
radeon_check_space(cmd_buffer->device->ws, cs, 4); radeon_check_space(cmd_buffer->device->ws, cs, 4);
++cmd_buffer->state.active_pipeline_queries; ++cmd_buffer->state.active_pipeline_queries;
@ -2285,6 +2289,7 @@ static void
emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va, uint64_t avail_va, emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va, uint64_t avail_va,
VkQueryType query_type, uint32_t index) VkQueryType query_type, uint32_t index)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radeon_cmdbuf *cs = cmd_buffer->cs;
switch (query_type) { switch (query_type) {
case VK_QUERY_TYPE_OCCLUSION: case VK_QUERY_TYPE_OCCLUSION:
@ -2300,12 +2305,11 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_OCCLUSION_QUERY; cmd_buffer->state.dirty |= RADV_CMD_DIRTY_OCCLUSION_QUERY;
} }
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11 && if (pdev->info.gfx_level >= GFX11 && pdev->info.pfp_fw_version >= EVENT_WRITE_ZPASS_PFP_VERSION) {
cmd_buffer->device->physical_device->info.pfp_fw_version >= EVENT_WRITE_ZPASS_PFP_VERSION) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_ZPASS, 1, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_ZPASS, 1, 0));
} else { } else {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
radeon_emit(cs, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_DUMP) | EVENT_INDEX(1)); radeon_emit(cs, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_DUMP) | EVENT_INDEX(1));
} else { } else {
radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1)); radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
@ -2369,7 +2373,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
uint32_t task_invoc_offset = uint32_t task_invoc_offset =
radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT); radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
va += task_invoc_offset; va += task_invoc_offset;
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->gang.cs, 4); radeon_check_space(cmd_buffer->device->ws, cmd_buffer->gang.cs, 4);
@ -2391,13 +2395,12 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
} }
} }
radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf, radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0,
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va, 1, cmd_buffer->gfx9_eop_bug_va);
1, cmd_buffer->gfx9_eop_bug_va);
break; break;
} }
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
if (cmd_buffer->device->physical_device->use_ngg_streamout) { if (pdev->use_ngg_streamout) {
/* generated prim counter */ /* generated prim counter */
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16); gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
radv_cs_write_data_imm(cs, V_370_ME, va + 20, 0x80000000); radv_cs_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
@ -2419,7 +2422,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
} }
break; break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: { case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
/* On GFX11+, primitives generated query always use GDS. */ /* On GFX11+, primitives generated query always use GDS. */
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16); gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
radv_cs_write_data_imm(cs, V_370_ME, va + 20, 0x80000000); radv_cs_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
@ -2463,7 +2466,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
break; break;
} }
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: { case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: {
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(cmd_buffer->device); unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(cmd_buffer->device);
radeon_check_space(cmd_buffer->device->ws, cs, 16); radeon_check_space(cmd_buffer->device->ws, cs, 16);
@ -2479,9 +2482,9 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
radeon_emit(cs, va); radeon_emit(cs, va);
radeon_emit(cs, va >> 32); radeon_emit(cs, va >> 32);
radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf, radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0,
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va, 1,
avail_va, 1, cmd_buffer->gfx9_eop_bug_va); cmd_buffer->gfx9_eop_bug_va);
} else { } else {
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET, va + 8); gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET, va + 8);
radv_cs_write_data_imm(cs, V_370_ME, va + 12, 0x80000000); radv_cs_write_data_imm(cs, V_370_ME, va + 12, 0x80000000);
@ -2499,7 +2502,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
cmd_buffer->active_query_flush_bits |= cmd_buffer->active_query_flush_bits |=
RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE; RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE;
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB; cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
} }
} }
@ -2564,6 +2567,7 @@ radv_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
void void
radv_write_timestamp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkPipelineStageFlags2 stage) radv_write_timestamp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkPipelineStageFlags2 stage)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radeon_cmdbuf *cs = cmd_buffer->cs;
if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT) { if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT) {
@ -2575,9 +2579,8 @@ radv_write_timestamp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkPipeline
radeon_emit(cs, va); radeon_emit(cs, va);
radeon_emit(cs, va >> 32); radeon_emit(cs, va >> 32);
} else { } else {
radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf, radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0,
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_TIMESTAMP, va, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_TIMESTAMP, va, 0, cmd_buffer->gfx9_eop_bug_va);
cmd_buffer->gfx9_eop_bug_va);
} }
} }
@ -2587,6 +2590,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta
{ {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool); RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
const unsigned num_queries = MAX2(util_bitcount(cmd_buffer->state.render.view_mask), 1); const unsigned num_queries = MAX2(util_bitcount(cmd_buffer->state.render.view_mask), 1);
struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radeon_cmdbuf *cs = cmd_buffer->cs;
const uint64_t va = radv_buffer_get_va(pool->bo); const uint64_t va = radv_buffer_get_va(pool->bo);
@ -2625,7 +2629,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta
cmd_buffer->active_query_flush_bits |= cmd_buffer->active_query_flush_bits |=
RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE; RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE;
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB; cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
} }

View file

@ -126,6 +126,7 @@ static VkResult
radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMemoryBindInfo *bind) radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMemoryBindInfo *bind)
{ {
RADV_FROM_HANDLE(radv_image, image, bind->image); RADV_FROM_HANDLE(radv_image, image, bind->image);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_surf *surface = &image->planes[0].surface; struct radeon_surf *surface = &image->planes[0].surface;
uint32_t bs = vk_format_get_blocksize(image->vk.format); uint32_t bs = vk_format_get_blocksize(image->vk.format);
VkResult result; VkResult result;
@ -149,7 +150,7 @@ radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMem
if (bind->pBinds[i].memory != VK_NULL_HANDLE) if (bind->pBinds[i].memory != VK_NULL_HANDLE)
mem = radv_device_memory_from_handle(bind->pBinds[i].memory); mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
if (device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
offset = surface->u.gfx9.surf_slice_size * layer + surface->u.gfx9.prt_level_offset[level]; offset = surface->u.gfx9.surf_slice_size * layer + surface->u.gfx9.prt_level_offset[level];
pitch = surface->u.gfx9.prt_level_pitch[level]; pitch = surface->u.gfx9.prt_level_pitch[level];
depth_pitch = surface->u.gfx9.surf_slice_size; depth_pitch = surface->u.gfx9.surf_slice_size;
@ -243,11 +244,13 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
struct radeon_winsys_bo *task_rings_bo, struct radeon_winsys_bo *mesh_scratch_ring_bo, struct radeon_winsys_bo *task_rings_bo, struct radeon_winsys_bo *mesh_scratch_ring_bo,
uint32_t attr_ring_size, struct radeon_winsys_bo *attr_ring_bo) uint32_t attr_ring_size, struct radeon_winsys_bo *attr_ring_bo)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (scratch_bo) { if (scratch_bo) {
uint64_t scratch_va = radv_buffer_get_va(scratch_bo); uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32); uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
if (device->physical_device->info.gfx_level >= GFX11) if (pdev->info.gfx_level >= GFX11)
rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX11(1); rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
else else
rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX6(1); rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
@ -270,17 +273,17 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
S_008F0C_INDEX_STRIDE(3) | S_008F0C_ADD_TID_ENABLE(1); S_008F0C_INDEX_STRIDE(3) | S_008F0C_ADD_TID_ENABLE(1);
if (device->physical_device->info.gfx_level >= GFX11) if (pdev->info.gfx_level >= GFX11)
desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX11(1); desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
else else
desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(1); desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else if (device->physical_device->info.gfx_level >= GFX10) { } else if (pdev->info.gfx_level >= GFX10) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
} else if (device->physical_device->info.gfx_level >= GFX8) { } else if (pdev->info.gfx_level >= GFX8) {
/* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */ /* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */
desc[3] |= desc[3] |=
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1); S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1);
@ -298,9 +301,9 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else if (device->physical_device->info.gfx_level >= GFX10) { } else if (pdev->info.gfx_level >= GFX10) {
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
} else { } else {
@ -323,9 +326,9 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else if (device->physical_device->info.gfx_level >= GFX10) { } else if (pdev->info.gfx_level >= GFX10) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
} else { } else {
@ -343,17 +346,17 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
S_008F0C_INDEX_STRIDE(1) | S_008F0C_ADD_TID_ENABLE(true); S_008F0C_INDEX_STRIDE(1) | S_008F0C_ADD_TID_ENABLE(true);
if (device->physical_device->info.gfx_level >= GFX11) if (pdev->info.gfx_level >= GFX11)
desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX11(1); desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
else else
desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX6(1); desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else if (device->physical_device->info.gfx_level >= GFX10) { } else if (pdev->info.gfx_level >= GFX10) {
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1); S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
} else if (device->physical_device->info.gfx_level >= GFX8) { } else if (pdev->info.gfx_level >= GFX8) {
/* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */ /* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */
desc[7] |= desc[7] |=
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1); S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1);
@ -367,17 +370,17 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
if (tess_rings_bo) { if (tess_rings_bo) {
uint64_t tess_va = radv_buffer_get_va(tess_rings_bo); uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
uint64_t tess_offchip_va = tess_va + device->physical_device->hs.tess_offchip_ring_offset; uint64_t tess_offchip_va = tess_va + pdev->hs.tess_offchip_ring_offset;
desc[0] = tess_va; desc[0] = tess_va;
desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32); desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
desc[2] = device->physical_device->hs.tess_factor_ring_size; desc[2] = pdev->hs.tess_factor_ring_size;
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (device->physical_device->info.gfx_level >= GFX10) { } else if (pdev->info.gfx_level >= GFX10) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
S_008F0C_RESOURCE_LEVEL(1); S_008F0C_RESOURCE_LEVEL(1);
} else { } else {
@ -387,13 +390,13 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
desc[4] = tess_offchip_va; desc[4] = tess_offchip_va;
desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32); desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
desc[6] = device->physical_device->hs.tess_offchip_ring_size; desc[6] = pdev->hs.tess_offchip_ring_size;
desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (device->physical_device->info.gfx_level >= GFX10) { } else if (pdev->info.gfx_level >= GFX10) {
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
S_008F0C_RESOURCE_LEVEL(1); S_008F0C_RESOURCE_LEVEL(1);
} else { } else {
@ -406,33 +409,33 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
if (task_rings_bo) { if (task_rings_bo) {
uint64_t task_va = radv_buffer_get_va(task_rings_bo); uint64_t task_va = radv_buffer_get_va(task_rings_bo);
uint64_t task_draw_ring_va = task_va + device->physical_device->task_info.draw_ring_offset; uint64_t task_draw_ring_va = task_va + pdev->task_info.draw_ring_offset;
uint64_t task_payload_ring_va = task_va + device->physical_device->task_info.payload_ring_offset; uint64_t task_payload_ring_va = task_va + pdev->task_info.payload_ring_offset;
desc[0] = task_draw_ring_va; desc[0] = task_draw_ring_va;
desc[1] = S_008F04_BASE_ADDRESS_HI(task_draw_ring_va >> 32); desc[1] = S_008F04_BASE_ADDRESS_HI(task_draw_ring_va >> 32);
desc[2] = device->physical_device->task_info.num_entries * AC_TASK_DRAW_ENTRY_BYTES; desc[2] = pdev->task_info.num_entries * AC_TASK_DRAW_ENTRY_BYTES;
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else { } else {
assert(device->physical_device->info.gfx_level >= GFX10_3); assert(pdev->info.gfx_level >= GFX10_3);
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
S_008F0C_RESOURCE_LEVEL(1); S_008F0C_RESOURCE_LEVEL(1);
} }
desc[4] = task_payload_ring_va; desc[4] = task_payload_ring_va;
desc[5] = S_008F04_BASE_ADDRESS_HI(task_payload_ring_va >> 32); desc[5] = S_008F04_BASE_ADDRESS_HI(task_payload_ring_va >> 32);
desc[6] = device->physical_device->task_info.num_entries * AC_TASK_PAYLOAD_ENTRY_BYTES; desc[6] = pdev->task_info.num_entries * AC_TASK_PAYLOAD_ENTRY_BYTES;
desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else { } else {
assert(device->physical_device->info.gfx_level >= GFX10_3); assert(pdev->info.gfx_level >= GFX10_3);
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
S_008F0C_RESOURCE_LEVEL(1); S_008F0C_RESOURCE_LEVEL(1);
} }
@ -449,10 +452,10 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED); desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else { } else {
assert(device->physical_device->info.gfx_level >= GFX10_3); assert(pdev->info.gfx_level >= GFX10_3);
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
S_008F0C_RESOURCE_LEVEL(1); S_008F0C_RESOURCE_LEVEL(1);
} }
@ -461,7 +464,7 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
desc += 4; desc += 4;
if (attr_ring_bo) { if (attr_ring_bo) {
assert(device->physical_device->info.gfx_level >= GFX11); assert(pdev->info.gfx_level >= GFX11);
uint64_t va = radv_buffer_get_va(attr_ring_bo); uint64_t va = radv_buffer_get_va(attr_ring_bo);
@ -489,6 +492,8 @@ static void
radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *esgs_ring_bo, radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *esgs_ring_bo,
uint32_t esgs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size) uint32_t esgs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!esgs_ring_bo && !gsvs_ring_bo) if (!esgs_ring_bo && !gsvs_ring_bo)
return; return;
@ -498,7 +503,7 @@ radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, st
if (gsvs_ring_bo) if (gsvs_ring_bo)
radv_cs_add_buffer(device->ws, cs, gsvs_ring_bo); radv_cs_add_buffer(device->ws, cs, gsvs_ring_bo);
if (device->physical_device->info.gfx_level >= GFX7) { if (pdev->info.gfx_level >= GFX7) {
radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2); radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
radeon_emit(cs, esgs_ring_size >> 8); radeon_emit(cs, esgs_ring_size >> 8);
radeon_emit(cs, gsvs_ring_size >> 8); radeon_emit(cs, gsvs_ring_size >> 8);
@ -512,49 +517,51 @@ radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, st
static void static void
radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *tess_rings_bo) radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *tess_rings_bo)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t tf_va; uint64_t tf_va;
uint32_t tf_ring_size; uint32_t tf_ring_size;
if (!tess_rings_bo) if (!tess_rings_bo)
return; return;
tf_ring_size = device->physical_device->hs.tess_factor_ring_size / 4; tf_ring_size = pdev->hs.tess_factor_ring_size / 4;
tf_va = radv_buffer_get_va(tess_rings_bo); tf_va = radv_buffer_get_va(tess_rings_bo);
radv_cs_add_buffer(device->ws, cs, tess_rings_bo); radv_cs_add_buffer(device->ws, cs, tess_rings_bo);
if (device->physical_device->info.gfx_level >= GFX7) { if (pdev->info.gfx_level >= GFX7) {
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
/* TF_RING_SIZE is per SE on GFX11. */ /* TF_RING_SIZE is per SE on GFX11. */
tf_ring_size /= device->physical_device->info.max_se; tf_ring_size /= pdev->info.max_se;
} }
radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size)); radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size));
radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, tf_va >> 8); radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, tf_va >> 8);
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI, S_030984_BASE_HI(tf_va >> 40)); radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI, S_030984_BASE_HI(tf_va >> 40));
} else if (device->physical_device->info.gfx_level == GFX9) { } else if (pdev->info.gfx_level == GFX9) {
radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40)); radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40));
} }
radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param); radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, pdev->hs.hs_offchip_param);
} else { } else {
radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size)); radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size));
radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8); radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8);
radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param); radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, pdev->hs.hs_offchip_param);
} }
} }
static VkResult static VkResult
radv_initialise_task_control_buffer(struct radv_device *device, struct radeon_winsys_bo *task_rings_bo) radv_initialise_task_control_buffer(struct radv_device *device, struct radeon_winsys_bo *task_rings_bo)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t *ptr = (uint32_t *)radv_buffer_map(device->ws, task_rings_bo); uint32_t *ptr = (uint32_t *)radv_buffer_map(device->ws, task_rings_bo);
if (!ptr) if (!ptr)
return VK_ERROR_OUT_OF_DEVICE_MEMORY; return VK_ERROR_OUT_OF_DEVICE_MEMORY;
const uint32_t num_entries = device->physical_device->task_info.num_entries; const uint32_t num_entries = pdev->task_info.num_entries;
const uint64_t task_va = radv_buffer_get_va(task_rings_bo); const uint64_t task_va = radv_buffer_get_va(task_rings_bo);
const uint64_t task_draw_ring_va = task_va + device->physical_device->task_info.draw_ring_offset; const uint64_t task_draw_ring_va = task_va + pdev->task_info.draw_ring_offset;
assert((task_draw_ring_va & 0xFFFFFF00) == (task_draw_ring_va & 0xFFFFFFFF)); assert((task_draw_ring_va & 0xFFFFFF00) == (task_draw_ring_va & 0xFFFFFFFF));
/* 64-bit write_ptr */ /* 64-bit write_ptr */
@ -599,7 +606,8 @@ static void
radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves, radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves,
struct radeon_winsys_bo *scratch_bo) struct radeon_winsys_bo *scratch_bo)
{ {
const struct radeon_info *gpu_info = &device->physical_device->info; const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
if (!scratch_bo) if (!scratch_bo)
return; return;
@ -626,7 +634,8 @@ static void
radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves, radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves,
struct radeon_winsys_bo *compute_scratch_bo) struct radeon_winsys_bo *compute_scratch_bo)
{ {
const struct radeon_info *gpu_info = &device->physical_device->info; const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
uint64_t scratch_va; uint64_t scratch_va;
uint32_t rsrc1; uint32_t rsrc1;
@ -680,6 +689,7 @@ static void
radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmdbuf *cs, radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmdbuf *cs,
struct radeon_winsys_bo *descriptor_bo) struct radeon_winsys_bo *descriptor_bo)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t va; uint64_t va;
if (!descriptor_bo) if (!descriptor_bo)
@ -689,21 +699,21 @@ radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmd
radv_cs_add_buffer(device->ws, cs, descriptor_bo); radv_cs_add_buffer(device->ws, cs, descriptor_bo);
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B420_SPI_SHADER_PGM_LO_HS, uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B420_SPI_SHADER_PGM_LO_HS,
R_00B220_SPI_SHADER_PGM_LO_GS}; R_00B220_SPI_SHADER_PGM_LO_GS};
for (int i = 0; i < ARRAY_SIZE(regs); ++i) { for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
radv_emit_shader_pointer(device, cs, regs[i], va, true); radv_emit_shader_pointer(device, cs, regs[i], va, true);
} }
} else if (device->physical_device->info.gfx_level >= GFX10) { } else if (pdev->info.gfx_level >= GFX10) {
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0, uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS}; R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
for (int i = 0; i < ARRAY_SIZE(regs); ++i) { for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
radv_emit_shader_pointer(device, cs, regs[i], va, true); radv_emit_shader_pointer(device, cs, regs[i], va, true);
} }
} else if (device->physical_device->info.gfx_level == GFX9) { } else if (pdev->info.gfx_level == GFX9) {
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0, uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS}; R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
@ -725,7 +735,7 @@ static void
radv_emit_attribute_ring(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *attr_ring_bo, radv_emit_attribute_ring(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *attr_ring_bo,
uint32_t attr_ring_size) uint32_t attr_ring_size)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t va; uint64_t va;
if (!attr_ring_bo) if (!attr_ring_bo)
@ -792,6 +802,7 @@ static VkResult
radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *device, radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *device,
const struct radv_queue_ring_info *needs) const struct radv_queue_ring_info *needs)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_winsys *ws = device->ws; struct radeon_winsys *ws = device->ws;
struct radeon_winsys_bo *scratch_bo = queue->scratch_bo; struct radeon_winsys_bo *scratch_bo = queue->scratch_bo;
struct radeon_winsys_bo *descriptor_bo = queue->descriptor_bo; struct radeon_winsys_bo *descriptor_bo = queue->descriptor_bo;
@ -848,8 +859,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
} }
if (!queue->ring_info.tess_rings && needs->tess_rings) { if (!queue->ring_info.tess_rings && needs->tess_rings) {
uint64_t tess_rings_size = uint64_t tess_rings_size = pdev->hs.tess_offchip_ring_offset + pdev->hs.tess_offchip_ring_size;
device->physical_device->hs.tess_offchip_ring_offset + device->physical_device->hs.tess_offchip_ring_size;
result = radv_bo_create(device, tess_rings_size, 256, RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, result = radv_bo_create(device, tess_rings_size, 256, RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH,
0, true, &tess_rings_bo); 0, true, &tess_rings_bo);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
@ -858,7 +868,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
} }
if (!queue->ring_info.task_rings && needs->task_rings) { if (!queue->ring_info.task_rings && needs->task_rings) {
assert(device->physical_device->info.gfx_level >= GFX10_3); assert(pdev->info.gfx_level >= GFX10_3);
/* We write the control buffer from the CPU, so need to grant CPU access to the BO. /* We write the control buffer from the CPU, so need to grant CPU access to the BO.
* The draw ring needs to be zero-initialized otherwise the ready bits will be incorrect. * The draw ring needs to be zero-initialized otherwise the ready bits will be incorrect.
@ -866,12 +876,11 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
uint32_t task_rings_bo_flags = uint32_t task_rings_bo_flags =
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM; RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM;
result = radv_bo_create(device, device->physical_device->task_info.bo_size_bytes, 256, RADEON_DOMAIN_VRAM, result = radv_bo_create(device, pdev->task_info.bo_size_bytes, 256, RADEON_DOMAIN_VRAM, task_rings_bo_flags,
task_rings_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, true, &task_rings_bo); RADV_BO_PRIORITY_SCRATCH, 0, true, &task_rings_bo);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail; goto fail;
radv_rmv_log_command_buffer_bo_create(device, task_rings_bo, 0, 0, radv_rmv_log_command_buffer_bo_create(device, task_rings_bo, 0, 0, pdev->task_info.bo_size_bytes);
device->physical_device->task_info.bo_size_bytes);
result = radv_initialise_task_control_buffer(device, task_rings_bo); result = radv_initialise_task_control_buffer(device, task_rings_bo);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
@ -879,7 +888,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
} }
if (!queue->ring_info.mesh_scratch_ring && needs->mesh_scratch_ring) { if (!queue->ring_info.mesh_scratch_ring && needs->mesh_scratch_ring) {
assert(device->physical_device->info.gfx_level >= GFX10_3); assert(pdev->info.gfx_level >= GFX10_3);
result = radv_bo_create(device, RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES, 256, result = radv_bo_create(device, RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES, 256,
RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, true, RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, true,
&mesh_scratch_ring_bo); &mesh_scratch_ring_bo);
@ -891,7 +900,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
} }
if (needs->attr_ring_size > queue->ring_info.attr_ring_size) { if (needs->attr_ring_size > queue->ring_info.attr_ring_size) {
assert(device->physical_device->info.gfx_level >= GFX11); assert(pdev->info.gfx_level >= GFX11);
result = radv_bo_create(device, needs->attr_ring_size, 2 * 1024 * 1024 /* 2MiB */, RADEON_DOMAIN_VRAM, result = radv_bo_create(device, needs->attr_ring_size, 2 * 1024 * 1024 /* 2MiB */, RADEON_DOMAIN_VRAM,
RADEON_FLAG_32BIT | RADEON_FLAG_DISCARDABLE | ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, RADEON_FLAG_32BIT | RADEON_FLAG_DISCARDABLE | ring_bo_flags, RADV_BO_PRIORITY_SCRATCH,
0, true, &attr_ring_bo); 0, true, &attr_ring_bo);
@ -901,7 +910,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
} }
if (!queue->ring_info.gds && needs->gds) { if (!queue->ring_info.gds && needs->gds) {
assert(device->physical_device->info.gfx_level >= GFX10); assert(pdev->info.gfx_level >= GFX10);
/* 4 streamout GDS counters. /* 4 streamout GDS counters.
* We need 256B (64 dw) of GDS, otherwise streamout hangs. * We need 256B (64 dw) of GDS, otherwise streamout hangs.
@ -920,7 +929,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
} }
if (!queue->ring_info.gds_oa && needs->gds_oa) { if (!queue->ring_info.gds_oa && needs->gds_oa) {
assert(device->physical_device->info.gfx_level >= GFX10); assert(pdev->info.gfx_level >= GFX10);
result = result =
radv_bo_create(device, 1, 1, RADEON_DOMAIN_OA, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, true, &gds_oa_bo); radv_bo_create(device, 1, 1, RADEON_DOMAIN_OA, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, true, &gds_oa_bo);
@ -972,7 +981,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
enum rgp_flush_bits sqtt_flush_bits = 0; enum rgp_flush_bits sqtt_flush_bits = 0;
struct radeon_cmdbuf *cs = NULL; struct radeon_cmdbuf *cs = NULL;
cs = ws->cs_create(ws, radv_queue_family_to_ring(device->physical_device, queue->qf), false); cs = ws->cs_create(ws, radv_queue_family_to_ring(pdev, queue->qf), false);
if (!cs) { if (!cs) {
result = VK_ERROR_OUT_OF_DEVICE_MEMORY; result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
goto fail; goto fail;
@ -1027,7 +1036,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
if (i < 2) { if (i < 2) {
/* The two initial preambles have a cache flush at the beginning. */ /* The two initial preambles have a cache flush at the beginning. */
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
enum radv_cmd_flush_bits flush_bits = RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | enum radv_cmd_flush_bits flush_bits = RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE |
RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2 |
RADV_CMD_FLAG_START_PIPELINE_STATS; RADV_CMD_FLAG_START_PIPELINE_STATS;
@ -1143,6 +1152,7 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device
struct vk_command_buffer *const *cmd_buffers, uint32_t cmd_buffer_count, bool *use_perf_counters, struct vk_command_buffer *const *cmd_buffers, uint32_t cmd_buffer_count, bool *use_perf_counters,
bool *has_follower) bool *has_follower)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
bool has_indirect_pipeline_binds = false; bool has_indirect_pipeline_binds = false;
if (queue->qf != RADV_QUEUE_GENERAL && queue->qf != RADV_QUEUE_COMPUTE) { if (queue->qf != RADV_QUEUE_GENERAL && queue->qf != RADV_QUEUE_COMPUTE) {
@ -1203,9 +1213,8 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device
? MIN2(needs.compute_scratch_waves, UINT32_MAX / needs.compute_scratch_size_per_wave) ? MIN2(needs.compute_scratch_waves, UINT32_MAX / needs.compute_scratch_size_per_wave)
: 0; : 0;
if (device->physical_device->info.gfx_level >= GFX11 && queue->qf == RADV_QUEUE_GENERAL) { if (pdev->info.gfx_level >= GFX11 && queue->qf == RADV_QUEUE_GENERAL) {
needs.attr_ring_size = needs.attr_ring_size = pdev->info.attribute_ring_size_per_se * pdev->info.max_se;
device->physical_device->info.attribute_ring_size_per_se * device->physical_device->info.max_se;
} }
/* Return early if we already match these needs. /* Return early if we already match these needs.
@ -1230,13 +1239,15 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device
static VkResult static VkResult
radv_create_gang_wait_preambles_postambles(struct radv_queue *queue) radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
{ {
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
if (queue->gang_sem_bo) if (queue->gang_sem_bo)
return VK_SUCCESS; return VK_SUCCESS;
VkResult r = VK_SUCCESS; VkResult r = VK_SUCCESS;
struct radv_device *device = queue->device; struct radv_device *device = queue->device;
struct radeon_winsys *ws = device->ws; struct radeon_winsys *ws = device->ws;
const enum amd_ip_type leader_ip = radv_queue_family_to_ring(device->physical_device, queue->state.qf); const enum amd_ip_type leader_ip = radv_queue_family_to_ring(pdev, queue->state.qf);
struct radeon_winsys_bo *gang_sem_bo = NULL; struct radeon_winsys_bo *gang_sem_bo = NULL;
/* Gang semaphores BO. /* Gang semaphores BO.
@ -1291,9 +1302,8 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
*/ */
radv_cp_wait_mem(leader_post_cs, queue->state.qf, WAIT_REG_MEM_GREATER_OR_EQUAL, leader_wait_va, 1, 0xffffffff); radv_cp_wait_mem(leader_post_cs, queue->state.qf, WAIT_REG_MEM_GREATER_OR_EQUAL, leader_wait_va, 1, 0xffffffff);
radv_cs_write_data(device, leader_post_cs, queue->state.qf, V_370_ME, leader_wait_va, 1, &zero, false); radv_cs_write_data(device, leader_post_cs, queue->state.qf, V_370_ME, leader_wait_va, 1, &zero, false);
radv_cs_emit_write_event_eop(ace_post_cs, device->physical_device->info.gfx_level, RADV_QUEUE_COMPUTE, radv_cs_emit_write_event_eop(ace_post_cs, pdev->info.gfx_level, RADV_QUEUE_COMPUTE, V_028A90_BOTTOM_OF_PIPE_TS, 0,
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, leader_wait_va, 1, 0);
leader_wait_va, 1, 0);
r = ws->cs_finalize(leader_pre_cs); r = ws->cs_finalize(leader_pre_cs);
if (r != VK_SUCCESS) if (r != VK_SUCCESS)
@ -1681,13 +1691,14 @@ fail:
static void static void
radv_report_gpuvm_fault(struct radv_device *device) radv_report_gpuvm_fault(struct radv_device *device)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_winsys_gpuvm_fault_info fault_info = {0}; struct radv_winsys_gpuvm_fault_info fault_info = {0};
if (!radv_vm_fault_occurred(device, &fault_info)) if (!radv_vm_fault_occurred(device, &fault_info))
return; return;
fprintf(stderr, "radv: GPUVM fault detected at address 0x%08" PRIx64 ".\n", fault_info.addr); fprintf(stderr, "radv: GPUVM fault detected at address 0x%08" PRIx64 ".\n", fault_info.addr);
ac_print_gpuvm_fault_status(stderr, device->physical_device->info.gfx_level, fault_info.status); ac_print_gpuvm_fault_status(stderr, pdev->info.gfx_level, fault_info.status);
} }
static VkResult static VkResult
@ -1735,9 +1746,10 @@ static VkResult
radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission) radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission)
{ {
struct radv_queue *queue = (struct radv_queue *)vqueue; struct radv_queue *queue = (struct radv_queue *)vqueue;
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
VkResult result; VkResult result;
if (!radv_sparse_queue_enabled(queue->device->physical_device)) { if (!radv_sparse_queue_enabled(pdev)) {
result = radv_queue_submit_bind_sparse_memory(queue->device, submission); result = radv_queue_submit_bind_sparse_memory(queue->device, submission);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
goto fail; goto fail;
@ -1792,10 +1804,12 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
const VkDeviceQueueCreateInfo *create_info, const VkDeviceQueueCreateInfo *create_info,
const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority) const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
queue->device = device; queue->device = device;
queue->priority = radv_get_queue_global_priority(global_priority); queue->priority = radv_get_queue_global_priority(global_priority);
queue->hw_ctx = device->hw_ctx[queue->priority]; queue->hw_ctx = device->hw_ctx[queue->priority];
queue->state.qf = vk_queue_to_radv(device->physical_device, create_info->queueFamilyIndex); queue->state.qf = vk_queue_to_radv(pdev, create_info->queueFamilyIndex);
queue->gang_sem_bo = NULL; queue->gang_sem_bo = NULL;
VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx); VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);

View file

@ -173,6 +173,8 @@ static void
evaluate_trace_event(struct radv_device *device, uint64_t timestamp, struct util_dynarray *tokens, evaluate_trace_event(struct radv_device *device, uint64_t timestamp, struct util_dynarray *tokens,
struct trace_event_amdgpu_vm_update_ptes *event) struct trace_event_amdgpu_vm_update_ptes *event)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (event->common.pid != getpid() && event->pid != getpid()) { if (event->common.pid != getpid() && event->pid != getpid()) {
return; return;
} }
@ -180,8 +182,8 @@ evaluate_trace_event(struct radv_device *device, uint64_t timestamp, struct util
struct trace_event_address_array *array = (struct trace_event_address_array *)(event + 1); struct trace_event_address_array *array = (struct trace_event_address_array *)(event + 1);
for (uint32_t i = 0; i < event->num_ptes; ++i) for (uint32_t i = 0; i < event->num_ptes; ++i)
emit_page_table_update_event(&device->vk.memory_trace_data, !device->physical_device->info.has_dedicated_vram, emit_page_table_update_event(&device->vk.memory_trace_data, !pdev->info.has_dedicated_vram, timestamp, event,
timestamp, event, (uint64_t *)array->data, i); (uint64_t *)array->data, i);
} }
static void static void
@ -480,6 +482,8 @@ void
radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool is_internal, radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool is_internal,
VkMemoryAllocateFlags alloc_flags) VkMemoryAllocateFlags alloc_flags)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!device->vk.memory_trace_data.is_enabled) if (!device->vk.memory_trace_data.is_enabled)
return; return;
@ -495,7 +499,7 @@ radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool i
token.is_driver_internal = is_internal; token.is_driver_internal = is_internal;
token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)heap); token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)heap);
token.type = VK_RMV_RESOURCE_TYPE_HEAP; token.type = VK_RMV_RESOURCE_TYPE_HEAP;
token.heap.alignment = device->physical_device->info.max_alignment; token.heap.alignment = pdev->info.max_alignment;
token.heap.size = memory->alloc_size; token.heap.size = memory->alloc_size;
token.heap.heap_index = memory->heap_index; token.heap.heap_index = memory->heap_index;
token.heap.alloc_flags = alloc_flags; token.heap.alloc_flags = alloc_flags;
@ -508,6 +512,8 @@ radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool i
void void
radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, bool is_internal) radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, bool is_internal)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!device->vk.memory_trace_data.is_enabled) if (!device->vk.memory_trace_data.is_enabled)
return; return;
@ -518,7 +524,7 @@ radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo
struct vk_rmv_virtual_allocate_token token = {0}; struct vk_rmv_virtual_allocate_token token = {0};
token.address = bo->va; token.address = bo->va;
/* If all VRAM is visible, no bo will be in invisible memory. */ /* If all VRAM is visible, no bo will be in invisible memory. */
token.is_in_invisible_vram = bo->vram_no_cpu_access && !device->physical_device->info.all_vram_visible; token.is_in_invisible_vram = bo->vram_no_cpu_access && !pdev->info.all_vram_visible;
token.preferred_domains = (enum vk_rmv_kernel_memory_domain)bo->initial_domain; token.preferred_domains = (enum vk_rmv_kernel_memory_domain)bo->initial_domain;
token.is_driver_internal = is_internal; token.is_driver_internal = is_internal;
token.page_count = DIV_ROUND_UP(bo->size, 4096); token.page_count = DIV_ROUND_UP(bo->size, 4096);

View file

@ -898,15 +898,17 @@ exit:
VkResult VkResult
radv_rra_trace_init(struct radv_device *device) radv_rra_trace_init(struct radv_device *device)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
device->rra_trace.validate_as = debug_get_bool_option("RADV_RRA_TRACE_VALIDATE", false); device->rra_trace.validate_as = debug_get_bool_option("RADV_RRA_TRACE_VALIDATE", false);
device->rra_trace.copy_after_build = debug_get_bool_option("RADV_RRA_TRACE_COPY_AFTER_BUILD", false); device->rra_trace.copy_after_build = debug_get_bool_option("RADV_RRA_TRACE_COPY_AFTER_BUILD", false);
device->rra_trace.accel_structs = _mesa_pointer_hash_table_create(NULL); device->rra_trace.accel_structs = _mesa_pointer_hash_table_create(NULL);
device->rra_trace.accel_struct_vas = _mesa_hash_table_u64_create(NULL); device->rra_trace.accel_struct_vas = _mesa_hash_table_u64_create(NULL);
simple_mtx_init(&device->rra_trace.data_mtx, mtx_plain); simple_mtx_init(&device->rra_trace.data_mtx, mtx_plain);
device->rra_trace.copy_memory_index = radv_find_memory_index( device->rra_trace.copy_memory_index =
device->physical_device, radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT); VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
util_dynarray_init(&device->rra_trace.ray_history, NULL); util_dynarray_init(&device->rra_trace.ray_history, NULL);
@ -939,9 +941,9 @@ radv_rra_trace_init(struct radv_device *device)
VkMemoryAllocateInfo alloc_info = { VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = requirements.size, .allocationSize = requirements.size,
.memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | .memoryTypeIndex =
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
}; };
result = radv_AllocateMemory(_device, &alloc_info, NULL, &device->rra_trace.ray_history_memory); result = radv_AllocateMemory(_device, &alloc_info, NULL, &device->rra_trace.ray_history_memory);
@ -1316,6 +1318,7 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename)
{ {
RADV_FROM_HANDLE(radv_queue, queue, vk_queue); RADV_FROM_HANDLE(radv_queue, queue, vk_queue);
struct radv_device *device = queue->device; struct radv_device *device = queue->device;
const struct radv_physical_device *pdev = radv_device_physical(device);
VkDevice vk_device = radv_device_to_handle(device); VkDevice vk_device = radv_device_to_handle(device);
VkResult result = vk_common_DeviceWaitIdle(vk_device); VkResult result = vk_common_DeviceWaitIdle(vk_device);
@ -1365,7 +1368,7 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename)
fwrite(&api, sizeof(uint64_t), 1, file); fwrite(&api, sizeof(uint64_t), 1, file);
uint64_t asic_info_offset = (uint64_t)ftell(file); uint64_t asic_info_offset = (uint64_t)ftell(file);
rra_dump_asic_info(&device->physical_device->info, file); rra_dump_asic_info(&pdev->info, file);
uint64_t written_accel_struct_count = 0; uint64_t written_accel_struct_count = 0;

View file

@ -196,14 +196,14 @@ radv_unregister_border_color(struct radv_device *device, uint32_t slot)
static void static void
radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, const VkSamplerCreateInfo *pCreateInfo) radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, const VkSamplerCreateInfo *pCreateInfo)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo); uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso); uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
bool compat_mode = bool compat_mode = pdev->info.gfx_level == GFX8 || pdev->info.gfx_level == GFX9;
device->physical_device->info.gfx_level == GFX8 || device->physical_device->info.gfx_level == GFX9;
unsigned filter_mode = radv_tex_filter_mode(sampler->vk.reduction_mode); unsigned filter_mode = radv_tex_filter_mode(sampler->vk.reduction_mode);
unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
bool trunc_coord = ((pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST) || bool trunc_coord = ((pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST) ||
device->physical_device->info.conformant_trunc_coord) && pdev->info.conformant_trunc_coord) &&
!device->disable_trunc_coord; !device->disable_trunc_coord;
bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER || pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
@ -246,18 +246,17 @@ radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, cons
S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode))); S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)));
sampler->state[3] = S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color)); sampler->state[3] = S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color));
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
sampler->state[2] |= S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -32, 31), 8)) | sampler->state[2] |= S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -32, 31), 8)) |
S_008F38_ANISO_OVERRIDE_GFX10(device->instance->drirc.disable_aniso_single_level); S_008F38_ANISO_OVERRIDE_GFX10(device->instance->drirc.disable_aniso_single_level);
} else { } else {
sampler->state[2] |= S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) | sampler->state[2] |= S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
S_008F38_DISABLE_LSB_CEIL(device->physical_device->info.gfx_level <= GFX8) | S_008F38_DISABLE_LSB_CEIL(pdev->info.gfx_level <= GFX8) | S_008F38_FILTER_PREC_FIX(1) |
S_008F38_FILTER_PREC_FIX(1) |
S_008F38_ANISO_OVERRIDE_GFX8(device->instance->drirc.disable_aniso_single_level && S_008F38_ANISO_OVERRIDE_GFX8(device->instance->drirc.disable_aniso_single_level &&
device->physical_device->info.gfx_level >= GFX8); pdev->info.gfx_level >= GFX8);
} }
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
sampler->state[3] |= S_008F3C_BORDER_COLOR_PTR_GFX11(border_color_ptr); sampler->state[3] |= S_008F3C_BORDER_COLOR_PTR_GFX11(border_color_ptr);
} else { } else {
sampler->state[3] |= S_008F3C_BORDER_COLOR_PTR_GFX6(border_color_ptr); sampler->state[3] |= S_008F3C_BORDER_COLOR_PTR_GFX6(border_color_ptr);

View file

@ -57,7 +57,9 @@ static const VkExtent3D radv_sdma_t2t_alignment_3d[] = {
ALWAYS_INLINE static unsigned ALWAYS_INLINE static unsigned
radv_sdma_pitch_alignment(const struct radv_device *device, const unsigned bpp) radv_sdma_pitch_alignment(const struct radv_device *device, const unsigned bpp)
{ {
if (device->physical_device->info.sdma_ip_version >= SDMA_5_0) const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.sdma_ip_version >= SDMA_5_0)
return MAX2(1, 4 / bpp); return MAX2(1, 4 / bpp);
return 4; return 4;
@ -82,7 +84,9 @@ radv_sdma_check_pitches(const unsigned pitch, const unsigned slice_pitch, const
ALWAYS_INLINE static enum gfx9_resource_type ALWAYS_INLINE static enum gfx9_resource_type
radv_sdma_surface_resource_type(const struct radv_device *const device, const struct radeon_surf *const surf) radv_sdma_surface_resource_type(const struct radv_device *const device, const struct radeon_surf *const surf)
{ {
if (device->physical_device->info.sdma_ip_version >= SDMA_5_0) { const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.sdma_ip_version >= SDMA_5_0) {
/* Use the 2D resource type for rotated or Z swizzles. */ /* Use the 2D resource type for rotated or Z swizzles. */
if ((surf->u.gfx9.resource_type == RADEON_RESOURCE_1D || surf->u.gfx9.resource_type == RADEON_RESOURCE_3D) && if ((surf->u.gfx9.resource_type == RADEON_RESOURCE_1D || surf->u.gfx9.resource_type == RADEON_RESOURCE_3D) &&
(surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER || surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH)) (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER || surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH))
@ -195,7 +199,9 @@ radv_sdma_get_metadata_config(const struct radv_device *const device, const stru
const struct radeon_surf *const surf, const VkImageSubresourceLayers subresource, const struct radeon_surf *const surf, const VkImageSubresourceLayers subresource,
const VkImageAspectFlags aspect_mask) const VkImageAspectFlags aspect_mask)
{ {
if (!device->physical_device->info.sdma_supports_compression || const struct radv_physical_device *pdev = radv_device_physical(device);
if (!pdev->info.sdma_supports_compression ||
!(radv_dcc_enabled(image, subresource.mipLevel) || radv_image_has_htile(image))) { !(radv_dcc_enabled(image, subresource.mipLevel) || radv_image_has_htile(image))) {
return 0; return 0;
} }
@ -203,8 +209,7 @@ radv_sdma_get_metadata_config(const struct radv_device *const device, const stru
const VkFormat format = vk_format_get_aspect_format(image->vk.format, aspect_mask); const VkFormat format = vk_format_get_aspect_format(image->vk.format, aspect_mask);
const struct util_format_description *desc = vk_format_description(format); const struct util_format_description *desc = vk_format_description(format);
const uint32_t data_format = const uint32_t data_format = ac_get_cb_format(pdev->info.gfx_level, vk_format_to_pipe_format(format));
ac_get_cb_format(device->physical_device->info.gfx_level, vk_format_to_pipe_format(format));
const uint32_t alpha_is_on_msb = vi_alpha_is_on_msb(device, format); const uint32_t alpha_is_on_msb = vi_alpha_is_on_msb(device, format);
const uint32_t number_type = radv_translate_buffer_numformat(desc, vk_format_get_first_non_void_channel(format)); const uint32_t number_type = radv_translate_buffer_numformat(desc, vk_format_get_first_non_void_channel(format));
const uint32_t surface_type = radv_sdma_surface_type_from_aspect_mask(aspect_mask); const uint32_t surface_type = radv_sdma_surface_type_from_aspect_mask(aspect_mask);
@ -220,11 +225,12 @@ static uint32_t
radv_sdma_get_tiled_info_dword(const struct radv_device *const device, const struct radv_image *const image, radv_sdma_get_tiled_info_dword(const struct radv_device *const device, const struct radv_image *const image,
const struct radeon_surf *const surf, const VkImageSubresourceLayers subresource) const struct radeon_surf *const surf, const VkImageSubresourceLayers subresource)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
const uint32_t element_size = util_logbase2(surf->bpe); const uint32_t element_size = util_logbase2(surf->bpe);
const uint32_t swizzle_mode = surf->has_stencil ? surf->u.gfx9.zs.stencil_swizzle_mode : surf->u.gfx9.swizzle_mode; const uint32_t swizzle_mode = surf->has_stencil ? surf->u.gfx9.zs.stencil_swizzle_mode : surf->u.gfx9.swizzle_mode;
const enum gfx9_resource_type dimension = radv_sdma_surface_resource_type(device, surf); const enum gfx9_resource_type dimension = radv_sdma_surface_resource_type(device, surf);
const uint32_t info = element_size | swizzle_mode << 3 | dimension << 9; const uint32_t info = element_size | swizzle_mode << 3 | dimension << 9;
const enum sdma_version ver = device->physical_device->info.sdma_ip_version; const enum sdma_version ver = pdev->info.sdma_ip_version;
if (ver >= SDMA_5_0) { if (ver >= SDMA_5_0) {
const uint32_t mip_max = MAX2(image->vk.mip_levels, 1); const uint32_t mip_max = MAX2(image->vk.mip_levels, 1);
@ -242,7 +248,8 @@ static uint32_t
radv_sdma_get_tiled_header_dword(const struct radv_device *const device, const struct radv_image *const image, radv_sdma_get_tiled_header_dword(const struct radv_device *const device, const struct radv_image *const image,
const VkImageSubresourceLayers subresource) const VkImageSubresourceLayers subresource)
{ {
const enum sdma_version ver = device->physical_device->info.sdma_ip_version; const struct radv_physical_device *pdev = radv_device_physical(device);
const enum sdma_version ver = pdev->info.sdma_ip_version;
if (ver >= SDMA_5_0) { if (ver >= SDMA_5_0) {
return 0; return 0;
@ -262,6 +269,7 @@ radv_sdma_get_surf(const struct radv_device *const device, const struct radv_ima
{ {
assert(util_bitcount(aspect_mask) == 1); assert(util_bitcount(aspect_mask) == 1);
const struct radv_physical_device *pdev = radv_device_physical(device);
const unsigned plane_idx = radv_plane_from_aspect(aspect_mask); const unsigned plane_idx = radv_plane_from_aspect(aspect_mask);
const unsigned binding_idx = image->disjoint ? plane_idx : 0; const unsigned binding_idx = image->disjoint ? plane_idx : 0;
const struct radv_image_binding *binding = &image->bindings[binding_idx]; const struct radv_image_binding *binding = &image->bindings[binding_idx];
@ -301,7 +309,7 @@ radv_sdma_get_surf(const struct radv_device *const device, const struct radv_ima
info.info_dword = radv_sdma_get_tiled_info_dword(device, image, surf, subresource); info.info_dword = radv_sdma_get_tiled_info_dword(device, image, surf, subresource);
info.header_dword = radv_sdma_get_tiled_header_dword(device, image, subresource); info.header_dword = radv_sdma_get_tiled_header_dword(device, image, subresource);
if (device->physical_device->info.sdma_supports_compression && if (pdev->info.sdma_supports_compression &&
(radv_dcc_enabled(image, subresource.mipLevel) || radv_image_has_htile(image))) { (radv_dcc_enabled(image, subresource.mipLevel) || radv_image_has_htile(image))) {
info.meta_va = binding->bo->va + binding->offset + surf->meta_offset; info.meta_va = binding->bo->va + binding->offset + surf->meta_offset;
info.meta_config = radv_sdma_get_metadata_config(device, image, surf, subresource, aspect_mask); info.meta_config = radv_sdma_get_metadata_config(device, image, surf, subresource, aspect_mask);
@ -326,7 +334,8 @@ radv_sdma_copy_buffer(const struct radv_device *device, struct radeon_cmdbuf *cs
if (size == 0) if (size == 0)
return; return;
const enum sdma_version ver = device->physical_device->info.sdma_ip_version; const struct radv_physical_device *pdev = radv_device_physical(device);
const enum sdma_version ver = pdev->info.sdma_ip_version;
const unsigned max_size_per_packet = ver >= SDMA_5_2 ? SDMA_V5_2_COPY_MAX_BYTES : SDMA_V2_0_COPY_MAX_BYTES; const unsigned max_size_per_packet = ver >= SDMA_5_2 ? SDMA_V5_2_COPY_MAX_BYTES : SDMA_V2_0_COPY_MAX_BYTES;
unsigned align = ~0u; unsigned align = ~0u;
@ -367,11 +376,13 @@ void
radv_sdma_fill_buffer(const struct radv_device *device, struct radeon_cmdbuf *cs, const uint64_t va, radv_sdma_fill_buffer(const struct radv_device *device, struct radeon_cmdbuf *cs, const uint64_t va,
const uint64_t size, const uint32_t value) const uint64_t size, const uint32_t value)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
const uint32_t fill_size = 2; /* This means that the count is in dwords. */ const uint32_t fill_size = 2; /* This means that the count is in dwords. */
const uint32_t constant_fill_header = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0) | (fill_size & 0x3) << 30; const uint32_t constant_fill_header = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0) | (fill_size & 0x3) << 30;
/* This packet is the same since SDMA v2.4, haven't bothered to check older versions. */ /* This packet is the same since SDMA v2.4, haven't bothered to check older versions. */
const enum sdma_version ver = device->physical_device->info.sdma_ip_version; const enum sdma_version ver = pdev->info.sdma_ip_version;
assert(ver >= SDMA_2_4); assert(ver >= SDMA_2_4);
/* Maximum allowed fill size depends on the GPU. /* Maximum allowed fill size depends on the GPU.
@ -450,7 +461,9 @@ radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct ra
const struct radv_sdma_surf *const linear, const VkExtent3D pix_extent, const struct radv_sdma_surf *const linear, const VkExtent3D pix_extent,
const bool detile) const bool detile)
{ {
if (!device->physical_device->info.sdma_supports_compression) { const struct radv_physical_device *pdev = radv_device_physical(device);
if (!pdev->info.sdma_supports_compression) {
assert(!tiled->meta_va); assert(!tiled->meta_va);
} }
@ -499,14 +512,15 @@ radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct rade
const struct radv_sdma_surf *const src, const struct radv_sdma_surf *const dst, const struct radv_sdma_surf *const src, const struct radv_sdma_surf *const dst,
const VkExtent3D px_extent) const VkExtent3D px_extent)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
/* We currently only support the SDMA v4+ versions of this packet. */ /* We currently only support the SDMA v4+ versions of this packet. */
assert(device->physical_device->info.sdma_ip_version >= SDMA_4_0); assert(pdev->info.sdma_ip_version >= SDMA_4_0);
/* On GFX10+ this supports DCC, but cannot copy a compressed surface to another compressed surface. */ /* On GFX10+ this supports DCC, but cannot copy a compressed surface to another compressed surface. */
assert(!src->meta_va || !dst->meta_va); assert(!src->meta_va || !dst->meta_va);
if (device->physical_device->info.sdma_ip_version >= SDMA_4_0 && if (pdev->info.sdma_ip_version >= SDMA_4_0 && pdev->info.sdma_ip_version < SDMA_5_0) {
device->physical_device->info.sdma_ip_version < SDMA_5_0) {
/* SDMA v4 doesn't support mip_id selection in the T2T copy packet. */ /* SDMA v4 doesn't support mip_id selection in the T2T copy packet. */
assert(src->header_dword >> 24 == 0); assert(src->header_dword >> 24 == 0);
assert(dst->header_dword >> 24 == 0); assert(dst->header_dword >> 24 == 0);
@ -696,7 +710,8 @@ radv_sdma_use_t2t_scanline_copy(const struct radv_device *device, const struct r
/* SDMA can't do format conversion. */ /* SDMA can't do format conversion. */
assert(src->bpp == dst->bpp); assert(src->bpp == dst->bpp);
const enum sdma_version ver = device->physical_device->info.sdma_ip_version; const struct radv_physical_device *pdev = radv_device_physical(device);
const enum sdma_version ver = pdev->info.sdma_ip_version;
if (ver < SDMA_5_0) { if (ver < SDMA_5_0) {
/* SDMA v4.x and older doesn't support proper mip level selection. */ /* SDMA v4.x and older doesn't support proper mip level selection. */
if (src->mip_levels > 1 || dst->mip_levels > 1) if (src->mip_levels > 1 || dst->mip_levels > 1)

View file

@ -304,6 +304,7 @@ nir_shader *
radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_stage *stage, radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_stage *stage,
const struct radv_spirv_to_nir_options *options, bool is_internal) const struct radv_spirv_to_nir_options *options, bool is_internal)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned subgroup_size = 64, ballot_bit_size = 64; unsigned subgroup_size = 64, ballot_bit_size = 64;
const unsigned required_subgroup_size = stage->key.subgroup_required_size * 32; const unsigned required_subgroup_size = stage->key.subgroup_required_size * 32;
if (required_subgroup_size) { if (required_subgroup_size) {
@ -340,7 +341,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
.device = device, .device = device,
.object = stage->spirv.object, .object = stage->spirv.object,
}; };
const bool has_fragment_shader_interlock = radv_has_pops(device->physical_device); const bool has_fragment_shader_interlock = radv_has_pops(pdev);
const struct spirv_to_nir_options spirv_options = { const struct spirv_to_nir_options spirv_options = {
.caps = .caps =
{ {
@ -359,7 +360,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
.device_group = true, .device_group = true,
.draw_parameters = true, .draw_parameters = true,
.float_controls = true, .float_controls = true,
.float16 = device->physical_device->info.has_packed_math_16bit, .float16 = pdev->info.has_packed_math_16bit,
.float32_atomic_add = true, .float32_atomic_add = true,
.float32_atomic_min_max = true, .float32_atomic_min_max = true,
.float64 = true, .float64 = true,
@ -411,7 +412,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
.variable_pointers = true, .variable_pointers = true,
.vk_memory_model = true, .vk_memory_model = true,
.vk_memory_model_device_scope = true, .vk_memory_model_device_scope = true,
.fragment_shading_rate = device->physical_device->info.gfx_level >= GFX10_3, .fragment_shading_rate = pdev->info.gfx_level >= GFX10_3,
.workgroup_memory_explicit_layout = true, .workgroup_memory_explicit_layout = true,
.cooperative_matrix = true, .cooperative_matrix = true,
}, },
@ -426,11 +427,11 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
.func = radv_spirv_nir_debug, .func = radv_spirv_nir_debug,
.private_data = &spirv_debug_data, .private_data = &spirv_debug_data,
}, },
.force_tex_non_uniform = device->physical_device->cache_key.tex_non_uniform, .force_tex_non_uniform = pdev->cache_key.tex_non_uniform,
.force_ssbo_non_uniform = device->physical_device->cache_key.ssbo_non_uniform, .force_ssbo_non_uniform = pdev->cache_key.ssbo_non_uniform,
}; };
nir = spirv_to_nir(spirv, stage->spirv.size / 4, spec_entries, num_spec_entries, stage->stage, stage->entrypoint, nir = spirv_to_nir(spirv, stage->spirv.size / 4, spec_entries, num_spec_entries, stage->stage, stage->entrypoint,
&spirv_options, &device->physical_device->nir_options[stage->stage]); &spirv_options, &pdev->nir_options[stage->stage]);
nir->info.internal |= is_internal; nir->info.internal |= is_internal;
assert(nir->info.stage == stage->stage); assert(nir->info.stage == stage->stage);
nir_validate_shader(nir, "after spirv_to_nir"); nir_validate_shader(nir, "after spirv_to_nir");
@ -507,7 +508,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
NIR_PASS(_, nir, nir_lower_vars_to_ssa); NIR_PASS(_, nir, nir_lower_vars_to_ssa);
NIR_PASS(_, nir, nir_propagate_invariant, device->physical_device->cache_key.invariant_geom); NIR_PASS(_, nir, nir_propagate_invariant, pdev->cache_key.invariant_geom);
NIR_PASS(_, nir, nir_lower_clip_cull_distance_arrays); NIR_PASS(_, nir, nir_lower_clip_cull_distance_arrays);
@ -515,11 +516,11 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
nir->info.stage == MESA_SHADER_GEOMETRY) nir->info.stage == MESA_SHADER_GEOMETRY)
NIR_PASS_V(nir, nir_shader_gather_xfb_info); NIR_PASS_V(nir, nir_shader_gather_xfb_info);
NIR_PASS(_, nir, nir_lower_discard_or_demote, device->physical_device->cache_key.lower_discard_to_demote); NIR_PASS(_, nir, nir_lower_discard_or_demote, pdev->cache_key.lower_discard_to_demote);
nir_lower_doubles_options lower_doubles = nir->options->lower_doubles_options; nir_lower_doubles_options lower_doubles = nir->options->lower_doubles_options;
if (device->physical_device->info.gfx_level == GFX6) { if (pdev->info.gfx_level == GFX6) {
/* GFX6 doesn't support v_floor_f64 and the precision /* GFX6 doesn't support v_floor_f64 and the precision
* of v_fract_f64 which is used to implement 64-bit * of v_fract_f64 which is used to implement 64-bit
* floor is less than what Vulkan requires. * floor is less than what Vulkan requires.
@ -537,7 +538,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
/* Mesh shaders run as NGG which can implement local_invocation_index from /* Mesh shaders run as NGG which can implement local_invocation_index from
* the wave ID in merged_wave_info, but they don't have local_invocation_ids on GFX10.3. * the wave ID in merged_wave_info, but they don't have local_invocation_ids on GFX10.3.
*/ */
.lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !device->physical_device->mesh_fast_launch_2, .lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !pdev->mesh_fast_launch_2,
.lower_local_invocation_index = nir->info.stage == MESA_SHADER_COMPUTE && .lower_local_invocation_index = nir->info.stage == MESA_SHADER_COMPUTE &&
((nir->info.workgroup_size[0] == 1) + (nir->info.workgroup_size[1] == 1) + ((nir->info.workgroup_size[0] == 1) + (nir->info.workgroup_size[1] == 1) +
(nir->info.workgroup_size[2] == 1)) == 2, (nir->info.workgroup_size[2] == 1)) == 2,
@ -569,10 +570,10 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
.lower_txf_offset = true, .lower_txf_offset = true,
.lower_tg4_offsets = true, .lower_tg4_offsets = true,
.lower_txs_cube_array = true, .lower_txs_cube_array = true,
.lower_to_fragment_fetch_amd = device->physical_device->use_fmask, .lower_to_fragment_fetch_amd = pdev->use_fmask,
.lower_lod_zero_width = true, .lower_lod_zero_width = true,
.lower_invalid_implicit_lod = true, .lower_invalid_implicit_lod = true,
.lower_1d = device->physical_device->info.gfx_level == GFX9, .lower_1d = pdev->info.gfx_level == GFX9,
}; };
NIR_PASS(_, nir, nir_lower_tex, &tex_options); NIR_PASS(_, nir, nir_lower_tex, &tex_options);
@ -597,7 +598,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
NIR_PASS(_, nir, nir_lower_global_vars_to_local); NIR_PASS(_, nir, nir_lower_global_vars_to_local);
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL); NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
bool gfx7minus = device->physical_device->info.gfx_level <= GFX7; bool gfx7minus = pdev->info.gfx_level <= GFX7;
bool has_inverse_ballot = true; bool has_inverse_ballot = true;
#if LLVM_AVAILABLE #if LLVM_AVAILABLE
has_inverse_ballot = !radv_use_llvm_for_stage(device, nir->info.stage) || LLVM_VERSION_MAJOR >= 17; has_inverse_ballot = !radv_use_llvm_for_stage(device, nir->info.stage) || LLVM_VERSION_MAJOR >= 17;
@ -690,7 +691,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
nir->info.stage == MESA_SHADER_MESH) && nir->info.stage == MESA_SHADER_MESH) &&
nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE)) { nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE)) {
/* Lower primitive shading rate to match HW requirements. */ /* Lower primitive shading rate to match HW requirements. */
NIR_PASS(_, nir, radv_nir_lower_primitive_shading_rate, device->physical_device->info.gfx_level); NIR_PASS(_, nir, radv_nir_lower_primitive_shading_rate, pdev->info.gfx_level);
} }
/* Indirect lowering must be called after the radv_optimize_nir() loop /* Indirect lowering must be called after the radv_optimize_nir() loop
@ -698,8 +699,8 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
* bloat the instruction count of the loop and cause it to be * bloat the instruction count of the loop and cause it to be
* considered too large for unrolling. * considered too large for unrolling.
*/ */
if (ac_nir_lower_indirect_derefs(nir, device->physical_device->info.gfx_level) && if (ac_nir_lower_indirect_derefs(nir, pdev->info.gfx_level) && !stage->key.optimisations_disabled &&
!stage->key.optimisations_disabled && nir->info.stage != MESA_SHADER_COMPUTE) { nir->info.stage != MESA_SHADER_COMPUTE) {
/* Optimize the lowered code before the linking optimizations. */ /* Optimize the lowered code before the linking optimizations. */
radv_optimize_nir(nir, false); radv_optimize_nir(nir, false);
} }
@ -775,6 +776,7 @@ void
radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage, radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
const struct radv_graphics_state_key *gfx_state) const struct radv_graphics_state_key *gfx_state)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader_info *info = &ngg_stage->info; const struct radv_shader_info *info = &ngg_stage->info;
nir_shader *nir = ngg_stage->nir; nir_shader *nir = ngg_stage->nir;
@ -818,19 +820,19 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
nir->info.shared_size = info->ngg_info.lds_size; nir->info.shared_size = info->ngg_info.lds_size;
ac_nir_lower_ngg_options options = {0}; ac_nir_lower_ngg_options options = {0};
options.family = device->physical_device->info.family; options.family = pdev->info.family;
options.gfx_level = device->physical_device->info.gfx_level; options.gfx_level = pdev->info.gfx_level;
options.max_workgroup_size = info->workgroup_size; options.max_workgroup_size = info->workgroup_size;
options.wave_size = info->wave_size; options.wave_size = info->wave_size;
options.clip_cull_dist_mask = info->outinfo.clip_dist_mask | info->outinfo.cull_dist_mask; options.clip_cull_dist_mask = info->outinfo.clip_dist_mask | info->outinfo.cull_dist_mask;
options.vs_output_param_offset = info->outinfo.vs_output_param_offset; options.vs_output_param_offset = info->outinfo.vs_output_param_offset;
options.has_param_exports = info->outinfo.param_exports || info->outinfo.prim_param_exports; options.has_param_exports = info->outinfo.param_exports || info->outinfo.prim_param_exports;
options.can_cull = nir->info.stage != MESA_SHADER_GEOMETRY && info->has_ngg_culling; options.can_cull = nir->info.stage != MESA_SHADER_GEOMETRY && info->has_ngg_culling;
options.disable_streamout = !device->physical_device->use_ngg_streamout; options.disable_streamout = !pdev->use_ngg_streamout;
options.has_gen_prim_query = info->has_prim_query; options.has_gen_prim_query = info->has_prim_query;
options.has_xfb_prim_query = info->has_xfb_query; options.has_xfb_prim_query = info->has_xfb_query;
options.has_gs_invocations_query = device->physical_device->info.gfx_level < GFX11; options.has_gs_invocations_query = pdev->info.gfx_level < GFX11;
options.has_gs_primitives_query = device->physical_device->info.gfx_level < GFX11; options.has_gs_primitives_query = pdev->info.gfx_level < GFX11;
options.force_vrs = info->force_vrs_per_vertex; options.force_vrs = info->force_vrs_per_vertex;
if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) { if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) {
@ -862,8 +864,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
bool scratch_ring = false; bool scratch_ring = false;
NIR_PASS_V(nir, ac_nir_lower_ngg_ms, options.gfx_level, options.clip_cull_dist_mask, NIR_PASS_V(nir, ac_nir_lower_ngg_ms, options.gfx_level, options.clip_cull_dist_mask,
options.vs_output_param_offset, options.has_param_exports, &scratch_ring, info->wave_size, options.vs_output_param_offset, options.has_param_exports, &scratch_ring, info->wave_size,
hw_workgroup_size, gfx_state->has_multiview_view_index, info->ms.has_query, hw_workgroup_size, gfx_state->has_multiview_view_index, info->ms.has_query, pdev->mesh_fast_launch_2);
device->physical_device->mesh_fast_launch_2);
ngg_stage->info.ms.needs_ms_scratch_ring = scratch_ring; ngg_stage->info.ms.needs_ms_scratch_ring = scratch_ring;
} else { } else {
unreachable("invalid SW stage passed to radv_lower_ngg"); unreachable("invalid SW stage passed to radv_lower_ngg");
@ -933,6 +934,7 @@ static struct radv_shader_arena *
radv_create_shader_arena(struct radv_device *device, struct radv_shader_free_list *free_list, unsigned min_size, radv_create_shader_arena(struct radv_device *device, struct radv_shader_free_list *free_list, unsigned min_size,
unsigned arena_size, bool replayable, uint64_t replay_va) unsigned arena_size, bool replayable, uint64_t replay_va)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
union radv_shader_arena_block *alloc = NULL; union radv_shader_arena_block *alloc = NULL;
struct radv_shader_arena *arena = calloc(1, sizeof(struct radv_shader_arena)); struct radv_shader_arena *arena = calloc(1, sizeof(struct radv_shader_arena));
if (!arena) if (!arena)
@ -948,7 +950,7 @@ radv_create_shader_arena(struct radv_device *device, struct radv_shader_free_lis
if (device->shader_use_invisible_vram) if (device->shader_use_invisible_vram)
flags |= RADEON_FLAG_NO_CPU_ACCESS; flags |= RADEON_FLAG_NO_CPU_ACCESS;
else else
flags |= (device->physical_device->info.cpdma_prefetch_writes_memory ? 0 : RADEON_FLAG_READ_ONLY); flags |= (pdev->info.cpdma_prefetch_writes_memory ? 0 : RADEON_FLAG_READ_ONLY);
if (replayable) if (replayable)
flags |= RADEON_FLAG_REPLAYABLE; flags |= RADEON_FLAG_REPLAYABLE;
@ -1079,7 +1081,9 @@ insert_block(struct radv_device *device, union radv_shader_arena_block *hole, ui
union radv_shader_arena_block * union radv_shader_arena_block *
radv_alloc_shader_memory(struct radv_device *device, uint32_t size, bool replayable, void *ptr) radv_alloc_shader_memory(struct radv_device *device, uint32_t size, bool replayable, void *ptr)
{ {
size = ac_align_shader_binary_for_prefetch(&device->physical_device->info, size); const struct radv_physical_device *pdev = radv_device_physical(device);
size = ac_align_shader_binary_for_prefetch(&pdev->info, size);
size = align(size, RADV_SHADER_ALLOC_ALIGNMENT); size = align(size, RADV_SHADER_ALLOC_ALIGNMENT);
mtx_lock(&device->shader_arena_mutex); mtx_lock(&device->shader_arena_mutex);
@ -1402,7 +1406,8 @@ radv_destroy_shader_upload_queue(struct radv_device *device)
static bool static bool
radv_should_use_wgp_mode(const struct radv_device *device, gl_shader_stage stage, const struct radv_shader_info *info) radv_should_use_wgp_mode(const struct radv_device *device, gl_shader_stage stage, const struct radv_shader_info *info)
{ {
enum amd_gfx_level chip = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
enum amd_gfx_level chip = pdev->info.gfx_level;
switch (stage) { switch (stage) {
case MESA_SHADER_COMPUTE: case MESA_SHADER_COMPUTE:
case MESA_SHADER_TESS_CTRL: case MESA_SHADER_TESS_CTRL:
@ -1422,13 +1427,13 @@ static bool
radv_open_rtld_binary(struct radv_device *device, const struct radv_shader_binary *binary, radv_open_rtld_binary(struct radv_device *device, const struct radv_shader_binary *binary,
struct ac_rtld_binary *rtld_binary) struct ac_rtld_binary *rtld_binary)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
const char *elf_data = (const char *)((struct radv_shader_binary_rtld *)binary)->data; const char *elf_data = (const char *)((struct radv_shader_binary_rtld *)binary)->data;
size_t elf_size = ((struct radv_shader_binary_rtld *)binary)->elf_size; size_t elf_size = ((struct radv_shader_binary_rtld *)binary)->elf_size;
struct ac_rtld_symbol lds_symbols[3]; struct ac_rtld_symbol lds_symbols[3];
unsigned num_lds_symbols = 0; unsigned num_lds_symbols = 0;
if (device->physical_device->info.gfx_level >= GFX9 && if (pdev->info.gfx_level >= GFX9 && (binary->info.stage == MESA_SHADER_GEOMETRY || binary->info.is_ngg)) {
(binary->info.stage == MESA_SHADER_GEOMETRY || binary->info.is_ngg)) {
struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++]; struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
sym->name = "esgs_ring"; sym->name = "esgs_ring";
sym->size = binary->info.ngg_info.esgs_ring_size; sym->size = binary->info.ngg_info.esgs_ring_size;
@ -1448,7 +1453,7 @@ radv_open_rtld_binary(struct radv_device *device, const struct radv_shader_binar
} }
struct ac_rtld_open_info open_info = { struct ac_rtld_open_info open_info = {
.info = &device->physical_device->info, .info = &pdev->info,
.shader_type = binary->info.stage, .shader_type = binary->info.stage,
.wave_size = binary->info.wave_size, .wave_size = binary->info.wave_size,
.num_parts = 1, .num_parts = 1,
@ -1466,6 +1471,7 @@ static bool
radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_binary *binary, radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_binary *binary,
const struct radv_shader_args *args) const struct radv_shader_args *args)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct ac_shader_config *config = &binary->config; struct ac_shader_config *config = &binary->config;
if (binary->type == RADV_BINARY_TYPE_RTLD) { if (binary->type == RADV_BINARY_TYPE_RTLD) {
@ -1478,13 +1484,13 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi
return false; return false;
} }
if (!ac_rtld_read_config(&device->physical_device->info, &rtld_binary, config)) { if (!ac_rtld_read_config(&pdev->info, &rtld_binary, config)) {
ac_rtld_close(&rtld_binary); ac_rtld_close(&rtld_binary);
return false; return false;
} }
if (rtld_binary.lds_size > 0) { if (rtld_binary.lds_size > 0) {
unsigned encode_granularity = device->physical_device->info.lds_encode_granularity; unsigned encode_granularity = pdev->info.lds_encode_granularity;
config->lds_size = DIV_ROUND_UP(rtld_binary.lds_size, encode_granularity); config->lds_size = DIV_ROUND_UP(rtld_binary.lds_size, encode_granularity);
} }
if (!config->lds_size && binary->info.stage == MESA_SHADER_TESS_CTRL) { if (!config->lds_size && binary->info.stage == MESA_SHADER_TESS_CTRL) {
@ -1499,7 +1505,6 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi
const struct radv_shader_info *info = &binary->info; const struct radv_shader_info *info = &binary->info;
gl_shader_stage stage = binary->info.stage; gl_shader_stage stage = binary->info.stage;
const struct radv_physical_device *pdev = device->physical_device;
bool scratch_enabled = config->scratch_bytes_per_wave > 0; bool scratch_enabled = config->scratch_bytes_per_wave > 0;
bool trap_enabled = !!device->trap_handler_shader; bool trap_enabled = !!device->trap_handler_shader;
unsigned vgpr_comp_cnt = 0; unsigned vgpr_comp_cnt = 0;
@ -2064,7 +2069,8 @@ unsigned
radv_get_max_waves(const struct radv_device *device, const struct ac_shader_config *conf, radv_get_max_waves(const struct radv_device *device, const struct ac_shader_config *conf,
const struct radv_shader_info *info) const struct radv_shader_info *info)
{ {
const struct radeon_info *gpu_info = &device->physical_device->info; const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
const enum amd_gfx_level gfx_level = gpu_info->gfx_level; const enum amd_gfx_level gfx_level = gpu_info->gfx_level;
const uint8_t wave_size = info->wave_size; const uint8_t wave_size = info->wave_size;
gl_shader_stage stage = info->stage; gl_shader_stage stage = info->stage;
@ -2109,7 +2115,8 @@ radv_get_max_waves(const struct radv_device *device, const struct ac_shader_conf
unsigned unsigned
radv_get_max_scratch_waves(const struct radv_device *device, struct radv_shader *shader) radv_get_max_scratch_waves(const struct radv_device *device, struct radv_shader *shader)
{ {
const unsigned num_cu = device->physical_device->info.num_cu; const struct radv_physical_device *pdev = radv_device_physical(device);
const unsigned num_cu = pdev->info.num_cu;
return MIN2(device->scratch_waves, 4 * num_cu * shader->max_waves); return MIN2(device->scratch_waves, 4 * num_cu * shader->max_waves);
} }
@ -2423,10 +2430,12 @@ radv_fill_nir_compiler_options(struct radv_nir_compiler_options *options, struct
bool can_dump_shader, bool is_meta_shader, bool keep_shader_info, bool can_dump_shader, bool is_meta_shader, bool keep_shader_info,
bool keep_statistic_info) bool keep_statistic_info)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
/* robust_buffer_access_llvm here used by LLVM only, pipeline robustness is not exposed there. */ /* robust_buffer_access_llvm here used by LLVM only, pipeline robustness is not exposed there. */
options->robust_buffer_access_llvm = device->buffer_robustness >= RADV_BUFFER_ROBUSTNESS_1; options->robust_buffer_access_llvm = device->buffer_robustness >= RADV_BUFFER_ROBUSTNESS_1;
options->wgp_mode = should_use_wgp; options->wgp_mode = should_use_wgp;
options->info = &device->physical_device->info; options->info = &pdev->info;
options->dump_shader = can_dump_shader; options->dump_shader = can_dump_shader;
options->dump_preoptir = options->dump_shader && device->instance->debug_flags & RADV_DEBUG_PREOPTIR; options->dump_preoptir = options->dump_shader && device->instance->debug_flags & RADV_DEBUG_PREOPTIR;
options->record_ir = keep_shader_info; options->record_ir = keep_shader_info;
@ -2607,6 +2616,7 @@ radv_aco_build_shader_part(void **bin, uint32_t num_sgprs, uint32_t num_vgprs, c
struct radv_shader * struct radv_shader *
radv_create_rt_prolog(struct radv_device *device) radv_create_rt_prolog(struct radv_device *device)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_shader *prolog; struct radv_shader *prolog;
struct radv_shader_args in_args = {0}; struct radv_shader_args in_args = {0};
struct radv_shader_args out_args = {0}; struct radv_shader_args out_args = {0};
@ -2618,13 +2628,13 @@ radv_create_rt_prolog(struct radv_device *device)
info.stage = MESA_SHADER_COMPUTE; info.stage = MESA_SHADER_COMPUTE;
info.loads_push_constants = true; info.loads_push_constants = true;
info.desc_set_used_mask = -1; /* just to force indirection */ info.desc_set_used_mask = -1; /* just to force indirection */
info.wave_size = device->physical_device->rt_wave_size; info.wave_size = pdev->rt_wave_size;
info.workgroup_size = info.wave_size; info.workgroup_size = info.wave_size;
info.user_data_0 = R_00B900_COMPUTE_USER_DATA_0; info.user_data_0 = R_00B900_COMPUTE_USER_DATA_0;
info.cs.is_rt_shader = true; info.cs.is_rt_shader = true;
info.cs.uses_dynamic_rt_callable_stack = true; info.cs.uses_dynamic_rt_callable_stack = true;
info.cs.block_size[0] = 8; info.cs.block_size[0] = 8;
info.cs.block_size[1] = device->physical_device->rt_wave_size == 64 ? 8 : 4; info.cs.block_size[1] = pdev->rt_wave_size == 64 ? 8 : 4;
info.cs.block_size[2] = 1; info.cs.block_size[2] = 1;
info.cs.uses_thread_id[0] = true; info.cs.uses_thread_id[0] = true;
info.cs.uses_thread_id[1] = true; info.cs.uses_thread_id[1] = true;
@ -2739,6 +2749,7 @@ struct radv_shader_part *
radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_key *key, radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_key *key,
struct radv_shader_part_binary **binary_out) struct radv_shader_part_binary **binary_out)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_shader_part *epilog; struct radv_shader_part *epilog;
struct radv_shader_args args = {0}; struct radv_shader_args args = {0};
struct radv_nir_compiler_options options = {0}; struct radv_nir_compiler_options options = {0};
@ -2748,7 +2759,7 @@ radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_ke
struct radv_shader_info info = {0}; struct radv_shader_info info = {0};
info.stage = MESA_SHADER_FRAGMENT; info.stage = MESA_SHADER_FRAGMENT;
info.wave_size = device->physical_device->ps_wave_size; info.wave_size = pdev->ps_wave_size;
info.workgroup_size = 64; info.workgroup_size = 64;
radv_declare_ps_epilog_args(device, key, &args); radv_declare_ps_epilog_args(device, key, &args);

View file

@ -258,7 +258,9 @@ declare_ms_input_sgprs(const struct radv_shader_info *info, struct radv_shader_a
static void static void
declare_ms_input_vgprs(const struct radv_device *device, struct radv_shader_args *args) declare_ms_input_vgprs(const struct radv_device *device, struct radv_shader_args *args)
{ {
if (device->physical_device->mesh_fast_launch_2) { const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->mesh_fast_launch_2) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids);
} else { } else {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
@ -510,7 +512,8 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics
const struct radv_shader_info *info, gl_shader_stage stage, gl_shader_stage previous_stage, const struct radv_shader_info *info, gl_shader_stage stage, gl_shader_stage previous_stage,
struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info) struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info)
{ {
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
bool has_shader_query = info->has_prim_query || info->has_xfb_query || bool has_shader_query = info->has_prim_query || info->has_xfb_query ||
(stage == MESA_SHADER_GEOMETRY && info->gs.has_pipeline_stat_query) || (stage == MESA_SHADER_GEOMETRY && info->gs.has_pipeline_stat_query) ||
(stage == MESA_SHADER_MESH && info->ms.has_query) || (stage == MESA_SHADER_MESH && info->ms.has_query) ||
@ -784,7 +787,7 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics
declare_ngg_sgprs(info, args, has_ngg_provoking_vtx); declare_ngg_sgprs(info, args, has_ngg_provoking_vtx);
} }
if (previous_stage != MESA_SHADER_MESH || !device->physical_device->mesh_fast_launch_2) { if (previous_stage != MESA_SHADER_MESH || !pdev->mesh_fast_launch_2) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
@ -871,7 +874,8 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra
if (info->loads_push_constants) if (info->loads_push_constants)
num_user_sgprs++; num_user_sgprs++;
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
uint32_t available_sgprs = gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16; uint32_t available_sgprs = gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16;
uint32_t remaining_sgprs = available_sgprs - num_user_sgprs; uint32_t remaining_sgprs = available_sgprs - num_user_sgprs;

View file

@ -342,6 +342,8 @@ static uint8_t
radv_get_wave_size(struct radv_device *device, gl_shader_stage stage, const struct radv_shader_info *info, radv_get_wave_size(struct radv_device *device, gl_shader_stage stage, const struct radv_shader_info *info,
const struct radv_shader_stage_key *stage_key) const struct radv_shader_stage_key *stage_key)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (stage_key->subgroup_required_size) if (stage_key->subgroup_required_size)
return stage_key->subgroup_required_size * 32; return stage_key->subgroup_required_size * 32;
@ -350,11 +352,11 @@ radv_get_wave_size(struct radv_device *device, gl_shader_stage stage, const stru
else if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_TASK) else if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_TASK)
return info->wave_size; return info->wave_size;
else if (stage == MESA_SHADER_FRAGMENT) else if (stage == MESA_SHADER_FRAGMENT)
return device->physical_device->ps_wave_size; return pdev->ps_wave_size;
else if (gl_shader_stage_is_rt(stage)) else if (gl_shader_stage_is_rt(stage))
return device->physical_device->rt_wave_size; return pdev->rt_wave_size;
else else
return device->physical_device->ge_wave_size; return pdev->ge_wave_size;
} }
static uint8_t static uint8_t
@ -370,6 +372,7 @@ radv_get_ballot_bit_size(struct radv_device *device, gl_shader_stage stage, cons
static uint32_t static uint32_t
radv_compute_esgs_itemsize(const struct radv_device *device, uint32_t num_varyings) radv_compute_esgs_itemsize(const struct radv_device *device, uint32_t num_varyings)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t esgs_itemsize; uint32_t esgs_itemsize;
esgs_itemsize = num_varyings * 16; esgs_itemsize = num_varyings * 16;
@ -377,7 +380,7 @@ radv_compute_esgs_itemsize(const struct radv_device *device, uint32_t num_varyin
/* For the ESGS ring in LDS, add 1 dword to reduce LDS bank /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
* conflicts, i.e. each vertex will start on a different bank. * conflicts, i.e. each vertex will start on a different bank.
*/ */
if (device->physical_device->info.gfx_level >= GFX9 && esgs_itemsize) if (pdev->info.gfx_level >= GFX9 && esgs_itemsize)
esgs_itemsize += 4; esgs_itemsize += 4;
return esgs_itemsize; return esgs_itemsize;
@ -562,6 +565,8 @@ static void
gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir, gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir,
const struct radv_graphics_state_key *gfx_state, struct radv_shader_info *info) const struct radv_graphics_state_key *gfx_state, struct radv_shader_info *info)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out; info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
info->tcs.tes_inputs_read = ~0ULL; info->tcs.tes_inputs_read = ~0ULL;
info->tcs.tes_patch_inputs_read = ~0ULL; info->tcs.tes_patch_inputs_read = ~0ULL;
@ -571,15 +576,14 @@ gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir,
if (gfx_state->ts.patch_control_points) { if (gfx_state->ts.patch_control_points) {
/* Number of tessellation patches per workgroup processed by the current pipeline. */ /* Number of tessellation patches per workgroup processed by the current pipeline. */
info->num_tess_patches = info->num_tess_patches = get_tcs_num_patches(
get_tcs_num_patches(gfx_state->ts.patch_control_points, nir->info.tess.tcs_vertices_out, gfx_state->ts.patch_control_points, nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs,
info->tcs.num_linked_inputs, info->tcs.num_linked_outputs, info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs, pdev->hs.tess_offchip_block_dw_size,
info->tcs.num_linked_patch_outputs, device->physical_device->hs.tess_offchip_block_dw_size, pdev->info.gfx_level, pdev->info.family);
device->physical_device->info.gfx_level, device->physical_device->info.family);
/* LDS size used by VS+TCS for storing TCS inputs and outputs. */ /* LDS size used by VS+TCS for storing TCS inputs and outputs. */
info->tcs.num_lds_blocks = info->tcs.num_lds_blocks =
calculate_tess_lds_size(device->physical_device->info.gfx_level, gfx_state->ts.patch_control_points, calculate_tess_lds_size(pdev->info.gfx_level, gfx_state->ts.patch_control_points,
nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs, info->num_tess_patches, nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs, info->num_tess_patches,
info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs); info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs);
} }
@ -616,7 +620,7 @@ gather_shader_info_tes(struct radv_device *device, const nir_shader *nir, struct
static void static void
radv_init_legacy_gs_ring_info(const struct radv_device *device, struct radv_shader_info *gs_info) radv_init_legacy_gs_ring_info(const struct radv_device *device, struct radv_shader_info *gs_info)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_legacy_gs_info *gs_ring_info = &gs_info->gs_ring_info; struct radv_legacy_gs_info *gs_ring_info = &gs_info->gs_ring_info;
unsigned num_se = pdev->info.max_se; unsigned num_se = pdev->info.max_se;
unsigned wave_size = 64; unsigned wave_size = 64;
@ -650,6 +654,7 @@ radv_init_legacy_gs_ring_info(const struct radv_device *device, struct radv_shad
static void static void
radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *gs_info) radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *gs_info)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_legacy_gs_info *out = &gs_info->gs_ring_info; struct radv_legacy_gs_info *out = &gs_info->gs_ring_info;
const unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1); const unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1);
const bool uses_adjacency = const bool uses_adjacency =
@ -734,7 +739,7 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_inf
const uint32_t gs_prims_per_subgroup = gs_prims; const uint32_t gs_prims_per_subgroup = gs_prims;
const uint32_t gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations; const uint32_t gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations;
const uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out; const uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out;
const uint32_t lds_granularity = device->physical_device->info.lds_encode_granularity; const uint32_t lds_granularity = pdev->info.lds_encode_granularity;
const uint32_t total_lds_bytes = align(esgs_lds_size * 4, lds_granularity); const uint32_t total_lds_bytes = align(esgs_lds_size * 4, lds_granularity);
out->lds_size = total_lds_bytes / lds_granularity; out->lds_size = total_lds_bytes / lds_granularity;
out->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) | out->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) |
@ -750,6 +755,7 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_inf
static void static void
gather_shader_info_gs(struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info) gather_shader_info_gs(struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned add_clip = nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4; unsigned add_clip = nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4;
info->gs.gsvs_vertex_size = (util_bitcount64(nir->info.outputs_written) + add_clip) * 16; info->gs.gsvs_vertex_size = (util_bitcount64(nir->info.outputs_written) + add_clip) * 16;
info->gs.max_gsvs_emit_size = info->gs.gsvs_vertex_size * nir->info.gs.vertices_out; info->gs.max_gsvs_emit_size = info->gs.gsvs_vertex_size * nir->info.gs.vertices_out;
@ -770,7 +776,7 @@ gather_shader_info_gs(struct radv_device *device, const nir_shader *nir, struct
info->gs.num_stream_output_components[stream] += num_components; info->gs.num_stream_output_components[stream] += num_components;
} }
info->gs.has_pipeline_stat_query = device->physical_device->emulate_ngg_gs_query_pipeline_stat; info->gs.has_pipeline_stat_query = pdev->emulate_ngg_gs_query_pipeline_stat;
gather_info_unlinked_input(info, nir); gather_info_unlinked_input(info, nir);
@ -830,9 +836,10 @@ gather_shader_info_mesh(struct radv_device *device, const nir_shader *nir,
static void static void
calc_mesh_workgroup_size(const struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info) calc_mesh_workgroup_size(const struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned api_workgroup_size = ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX); unsigned api_workgroup_size = ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
if (device->physical_device->mesh_fast_launch_2) { if (pdev->mesh_fast_launch_2) {
/* Use multi-row export. It is also necessary to use the API workgroup size for non-emulated queries. */ /* Use multi-row export. It is also necessary to use the API workgroup size for non-emulated queries. */
info->workgroup_size = api_workgroup_size; info->workgroup_size = api_workgroup_size;
} else { } else {
@ -848,6 +855,7 @@ static void
gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir, gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir,
const struct radv_graphics_state_key *gfx_state, struct radv_shader_info *info) const struct radv_graphics_state_key *gfx_state, struct radv_shader_info *info)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t per_primitive_input_mask = nir->info.inputs_read & nir->info.per_primitive_inputs; uint64_t per_primitive_input_mask = nir->info.inputs_read & nir->info.per_primitive_inputs;
unsigned num_per_primitive_inputs = util_bitcount64(per_primitive_input_mask); unsigned num_per_primitive_inputs = util_bitcount64(per_primitive_input_mask);
assert(num_per_primitive_inputs <= nir->num_inputs); assert(num_per_primitive_inputs <= nir->num_inputs);
@ -855,7 +863,7 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir,
info->ps.num_interp = nir->num_inputs; info->ps.num_interp = nir->num_inputs;
info->ps.num_prim_interp = 0; info->ps.num_prim_interp = 0;
if (device->physical_device->info.gfx_level == GFX10_3) { if (pdev->info.gfx_level == GFX10_3) {
/* GFX10.3 distinguishes NUM_INTERP and NUM_PRIM_INTERP, but /* GFX10.3 distinguishes NUM_INTERP and NUM_PRIM_INTERP, but
* these are counted together in NUM_INTERP on GFX11. * these are counted together in NUM_INTERP on GFX11.
*/ */
@ -972,7 +980,7 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir,
*/ */
info->ps.force_sample_iter_shading_rate = info->ps.force_sample_iter_shading_rate =
(info->ps.reads_sample_mask_in && !info->ps.needs_poly_line_smooth) || (info->ps.reads_sample_mask_in && !info->ps.needs_poly_line_smooth) ||
(device->physical_device->info.gfx_level == GFX10_3 && (pdev->info.gfx_level == GFX10_3 &&
(nir->info.fs.sample_interlock_ordered || nir->info.fs.sample_interlock_unordered || (nir->info.fs.sample_interlock_ordered || nir->info.fs.sample_interlock_unordered ||
nir->info.fs.pixel_interlock_ordered || nir->info.fs.pixel_interlock_unordered)); nir->info.fs.pixel_interlock_ordered || nir->info.fs.pixel_interlock_unordered));
@ -992,8 +1000,7 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir,
*/ */
const bool mask_export_enable = info->ps.writes_sample_mask; const bool mask_export_enable = info->ps.writes_sample_mask;
const bool disable_rbplus = const bool disable_rbplus = pdev->info.has_rbplus && !pdev->info.rbplus_allowed;
device->physical_device->info.has_rbplus && !device->physical_device->info.rbplus_allowed;
info->ps.db_shader_control = info->ps.db_shader_control =
S_02880C_Z_EXPORT_ENABLE(info->ps.writes_z) | S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(info->ps.writes_stencil) | S_02880C_Z_EXPORT_ENABLE(info->ps.writes_z) | S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(info->ps.writes_stencil) |
@ -1020,9 +1027,10 @@ static void
gather_shader_info_cs(struct radv_device *device, const nir_shader *nir, const struct radv_shader_stage_key *stage_key, gather_shader_info_cs(struct radv_device *device, const nir_shader *nir, const struct radv_shader_stage_key *stage_key,
struct radv_shader_info *info) struct radv_shader_info *info)
{ {
unsigned default_wave_size = device->physical_device->cs_wave_size; const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned default_wave_size = pdev->cs_wave_size;
if (info->cs.uses_rt) if (info->cs.uses_rt)
default_wave_size = device->physical_device->rt_wave_size; default_wave_size = pdev->rt_wave_size;
unsigned local_size = nir->info.workgroup_size[0] * nir->info.workgroup_size[1] * nir->info.workgroup_size[2]; unsigned local_size = nir->info.workgroup_size[0] * nir->info.workgroup_size[1] * nir->info.workgroup_size[2];
@ -1040,14 +1048,14 @@ gather_shader_info_cs(struct radv_device *device, const nir_shader *nir, const s
info->wave_size = required_subgroup_size; info->wave_size = required_subgroup_size;
} else if (require_full_subgroups) { } else if (require_full_subgroups) {
info->wave_size = RADV_SUBGROUP_SIZE; info->wave_size = RADV_SUBGROUP_SIZE;
} else if (device->physical_device->info.gfx_level >= GFX10 && local_size <= 32) { } else if (pdev->info.gfx_level >= GFX10 && local_size <= 32) {
/* Use wave32 for small workgroups. */ /* Use wave32 for small workgroups. */
info->wave_size = 32; info->wave_size = 32;
} else { } else {
info->wave_size = default_wave_size; info->wave_size = default_wave_size;
} }
if (device->physical_device->info.has_cs_regalloc_hang_bug) { if (pdev->info.has_cs_regalloc_hang_bug) {
info->cs.regalloc_hang_bug = info->cs.block_size[0] * info->cs.block_size[1] * info->cs.block_size[2] > 256; info->cs.regalloc_hang_bug = info->cs.block_size[0] * info->cs.block_size[1] * info->cs.block_size[2] > 256;
} }
} }
@ -1083,7 +1091,8 @@ gather_shader_info_task(struct radv_device *device, const nir_shader *nir,
static uint32_t static uint32_t
radv_get_user_data_0(const struct radv_device *device, struct radv_shader_info *info) radv_get_user_data_0(const struct radv_device *device, struct radv_shader_info *info)
{ {
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
switch (info->stage) { switch (info->stage) {
case MESA_SHADER_VERTEX: case MESA_SHADER_VERTEX:
@ -1139,7 +1148,8 @@ radv_get_user_data_0(const struct radv_device *device, struct radv_shader_info *
static bool static bool
radv_is_merged_shader_compiled_separately(const struct radv_device *device, const struct radv_shader_info *info) radv_is_merged_shader_compiled_separately(const struct radv_device *device, const struct radv_shader_info *info)
{ {
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
if (gfx_level >= GFX9) { if (gfx_level >= GFX9) {
switch (info->stage) { switch (info->stage) {
@ -1180,6 +1190,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
const struct radv_graphics_state_key *gfx_state, const enum radv_pipeline_type pipeline_type, const struct radv_graphics_state_key *gfx_state, const enum radv_pipeline_type pipeline_type,
bool consider_force_vrs, struct radv_shader_info *info) bool consider_force_vrs, struct radv_shader_info *info)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions); struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
if (layout->use_dynamic_descriptors) { if (layout->use_dynamic_descriptors) {
@ -1257,7 +1268,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
/* The HW always assumes that there is at least 1 per-vertex param. /* The HW always assumes that there is at least 1 per-vertex param.
* so if there aren't any, we have to offset per-primitive params by 1. * so if there aren't any, we have to offset per-primitive params by 1.
*/ */
const unsigned extra_offset = !!(total_param_exports == 0 && device->physical_device->info.gfx_level >= GFX11); const unsigned extra_offset = !!(total_param_exports == 0 && pdev->info.gfx_level >= GFX11);
/* Per-primitive outputs: the HW needs these to be last. */ /* Per-primitive outputs: the HW needs these to be last. */
assign_outinfo_params(outinfo, per_prim_mask, &total_param_exports, extra_offset); assign_outinfo_params(outinfo, per_prim_mask, &total_param_exports, extra_offset);
@ -1274,7 +1285,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
/* Used by compute and mesh shaders. Mesh shaders must always declare this before GFX11. */ /* Used by compute and mesh shaders. Mesh shaders must always declare this before GFX11. */
info->cs.uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS) || info->cs.uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS) ||
(nir->info.stage == MESA_SHADER_MESH && device->physical_device->info.gfx_level < GFX11); (nir->info.stage == MESA_SHADER_MESH && pdev->info.gfx_level < GFX11);
info->cs.uses_local_invocation_idx = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) | info->cs.uses_local_invocation_idx = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) |
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) | BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) |
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS) | BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS) |
@ -1348,9 +1359,9 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
break; break;
case MESA_SHADER_TESS_CTRL: case MESA_SHADER_TESS_CTRL:
if (gfx_state->ts.patch_control_points) { if (gfx_state->ts.patch_control_points) {
info->workgroup_size = ac_compute_lshs_workgroup_size( info->workgroup_size =
device->physical_device->info.gfx_level, MESA_SHADER_TESS_CTRL, info->num_tess_patches, ac_compute_lshs_workgroup_size(pdev->info.gfx_level, MESA_SHADER_TESS_CTRL, info->num_tess_patches,
gfx_state->ts.patch_control_points, info->tcs.tcs_vertices_out); gfx_state->ts.patch_control_points, info->tcs.tcs_vertices_out);
} else { } else {
/* Set the maximum possible value when the workgroup size can't be determined. */ /* Set the maximum possible value when the workgroup size can't be determined. */
info->workgroup_size = 256; info->workgroup_size = 256;
@ -1371,7 +1382,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
unsigned es_verts_per_subgroup = G_028A44_ES_VERTS_PER_SUBGRP(info->gs_ring_info.vgt_gs_onchip_cntl); unsigned es_verts_per_subgroup = G_028A44_ES_VERTS_PER_SUBGRP(info->gs_ring_info.vgt_gs_onchip_cntl);
unsigned gs_inst_prims_in_subgroup = G_028A44_GS_INST_PRIMS_IN_SUBGRP(info->gs_ring_info.vgt_gs_onchip_cntl); unsigned gs_inst_prims_in_subgroup = G_028A44_GS_INST_PRIMS_IN_SUBGRP(info->gs_ring_info.vgt_gs_onchip_cntl);
info->workgroup_size = ac_compute_esgs_workgroup_size(device->physical_device->info.gfx_level, info->wave_size, info->workgroup_size = ac_compute_esgs_workgroup_size(pdev->info.gfx_level, info->wave_size,
es_verts_per_subgroup, gs_inst_prims_in_subgroup); es_verts_per_subgroup, gs_inst_prims_in_subgroup);
} else { } else {
/* Set the maximum possible value by default, this will be optimized during linking if /* Set the maximum possible value by default, this will be optimized during linking if
@ -1441,6 +1452,7 @@ static unsigned
gfx10_get_ngg_scratch_lds_base(const struct radv_device *device, const struct radv_shader_info *es_info, gfx10_get_ngg_scratch_lds_base(const struct radv_device *device, const struct radv_shader_info *es_info,
const struct radv_shader_info *gs_info, const struct gfx10_ngg_info *ngg_info) const struct radv_shader_info *gs_info, const struct gfx10_ngg_info *ngg_info)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t scratch_lds_base; uint32_t scratch_lds_base;
if (gs_info) { if (gs_info) {
@ -1451,7 +1463,7 @@ gfx10_get_ngg_scratch_lds_base(const struct radv_device *device, const struct ra
} else { } else {
const bool uses_instanceid = es_info->vs.needs_instance_id; const bool uses_instanceid = es_info->vs.needs_instance_id;
const bool uses_primitive_id = es_info->uses_prim_id; const bool uses_primitive_id = es_info->uses_prim_id;
const bool streamout_enabled = es_info->so.num_outputs && device->physical_device->use_ngg_streamout; const bool streamout_enabled = es_info->so.num_outputs && pdev->use_ngg_streamout;
const uint32_t num_outputs = const uint32_t num_outputs =
es_info->stage == MESA_SHADER_VERTEX ? es_info->vs.num_outputs : es_info->tes.num_outputs; es_info->stage == MESA_SHADER_VERTEX ? es_info->vs.num_outputs : es_info->tes.num_outputs;
unsigned pervertex_lds_bytes = ac_ngg_nogs_get_pervertex_lds_size( unsigned pervertex_lds_bytes = ac_ngg_nogs_get_pervertex_lds_size(
@ -1471,7 +1483,8 @@ void
gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info, gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info,
struct gfx10_ngg_info *out) struct gfx10_ngg_info *out)
{ {
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
const unsigned max_verts_per_prim = radv_get_num_input_vertices(es_info, gs_info); const unsigned max_verts_per_prim = radv_get_num_input_vertices(es_info, gs_info);
const unsigned min_verts_per_prim = gs_info ? max_verts_per_prim : 1; const unsigned min_verts_per_prim = gs_info ? max_verts_per_prim : 1;
@ -1683,9 +1696,8 @@ gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es
/* Get scratch LDS usage. */ /* Get scratch LDS usage. */
const struct radv_shader_info *info = gs_info ? gs_info : es_info; const struct radv_shader_info *info = gs_info ? gs_info : es_info;
const unsigned scratch_lds_size = const unsigned scratch_lds_size = ac_ngg_get_scratch_lds_size(info->stage, info->workgroup_size, info->wave_size,
ac_ngg_get_scratch_lds_size(info->stage, info->workgroup_size, info->wave_size, pdev->use_ngg_streamout, info->has_ngg_culling);
device->physical_device->use_ngg_streamout, info->has_ngg_culling);
out->lds_size = out->scratch_lds_base + scratch_lds_size; out->lds_size = out->scratch_lds_base + scratch_lds_size;
unsigned workgroup_size = unsigned workgroup_size =
@ -1700,6 +1712,8 @@ static void
radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage *es_stage, radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage *es_stage,
struct radv_shader_stage *fs_stage, const struct radv_graphics_state_key *gfx_state) struct radv_shader_stage *fs_stage, const struct radv_graphics_state_key *gfx_state)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
assert(es_stage->stage == MESA_SHADER_VERTEX || es_stage->stage == MESA_SHADER_TESS_EVAL); assert(es_stage->stage == MESA_SHADER_VERTEX || es_stage->stage == MESA_SHADER_TESS_EVAL);
assert(!fs_stage || fs_stage->stage == MESA_SHADER_FRAGMENT); assert(!fs_stage || fs_stage->stage == MESA_SHADER_FRAGMENT);
@ -1715,8 +1729,8 @@ radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage
: 3; : 3;
} }
es_stage->info.has_ngg_culling = radv_consider_culling(device->physical_device, es_stage->nir, ps_inputs_read, es_stage->info.has_ngg_culling =
num_vertices_per_prim, &es_stage->info); radv_consider_culling(pdev, es_stage->nir, ps_inputs_read, num_vertices_per_prim, &es_stage->info);
nir_function_impl *impl = nir_shader_get_entrypoint(es_stage->nir); nir_function_impl *impl = nir_shader_get_entrypoint(es_stage->nir);
es_stage->info.has_ngg_early_prim_export = exec_list_is_singular(&impl->body); es_stage->info.has_ngg_early_prim_export = exec_list_is_singular(&impl->body);
@ -1732,6 +1746,8 @@ static void
radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *producer, radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *producer,
struct radv_shader_stage *consumer, const struct radv_graphics_state_key *gfx_state) struct radv_shader_stage *consumer, const struct radv_graphics_state_key *gfx_state)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
/* Export primitive ID and clip/cull distances if read by the FS, or export unconditionally when /* Export primitive ID and clip/cull distances if read by the FS, or export unconditionally when
* the next stage is unknown (with graphics pipeline library). * the next stage is unknown (with graphics pipeline library).
*/ */
@ -1782,9 +1798,9 @@ radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *pro
struct radv_shader_stage *tcs_stage = consumer; struct radv_shader_stage *tcs_stage = consumer;
if (gfx_state->ts.patch_control_points) { if (gfx_state->ts.patch_control_points) {
vs_stage->info.workgroup_size = ac_compute_lshs_workgroup_size( vs_stage->info.workgroup_size =
device->physical_device->info.gfx_level, MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches, ac_compute_lshs_workgroup_size(pdev->info.gfx_level, MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches,
gfx_state->ts.patch_control_points, tcs_stage->info.tcs.tcs_vertices_out); gfx_state->ts.patch_control_points, tcs_stage->info.tcs.tcs_vertices_out);
if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) { if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) {
/* When the number of TCS input and output vertices are the same (typically 3): /* When the number of TCS input and output vertices are the same (typically 3):
@ -1797,7 +1813,7 @@ radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *pro
* instruction dominating another with a different mode. * instruction dominating another with a different mode.
*/ */
vs_stage->info.vs.tcs_in_out_eq = vs_stage->info.vs.tcs_in_out_eq =
device->physical_device->info.gfx_level >= GFX9 && pdev->info.gfx_level >= GFX9 &&
gfx_state->ts.patch_control_points == tcs_stage->info.tcs.tcs_vertices_out && gfx_state->ts.patch_control_points == tcs_stage->info.tcs.tcs_vertices_out &&
vs_stage->nir->info.float_controls_execution_mode == tcs_stage->nir->info.float_controls_execution_mode; vs_stage->nir->info.float_controls_execution_mode == tcs_stage->nir->info.float_controls_execution_mode;
@ -1865,6 +1881,8 @@ void
radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics_state_key *gfx_state, radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
struct radv_shader_stage *stages) struct radv_shader_stage *stages)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
/* Walk backwards to link */ /* Walk backwards to link */
struct radv_shader_stage *next_stage = stages[MESA_SHADER_FRAGMENT].nir ? &stages[MESA_SHADER_FRAGMENT] : NULL; struct radv_shader_stage *next_stage = stages[MESA_SHADER_FRAGMENT].nir ? &stages[MESA_SHADER_FRAGMENT] : NULL;
@ -1877,7 +1895,7 @@ radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics
next_stage = &stages[s]; next_stage = &stages[s];
} }
if (device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
/* Merge shader info for VS+TCS. */ /* Merge shader info for VS+TCS. */
if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_TESS_CTRL].nir) { if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_TESS_CTRL].nir) {
radv_nir_shader_info_merge(&stages[MESA_SHADER_VERTEX], &stages[MESA_SHADER_TESS_CTRL]); radv_nir_shader_info_merge(&stages[MESA_SHADER_VERTEX], &stages[MESA_SHADER_TESS_CTRL]);

View file

@ -128,6 +128,7 @@ static VkResult
radv_shader_object_init_graphics(struct radv_shader_object *shader_obj, struct radv_device *device, radv_shader_object_init_graphics(struct radv_shader_object *shader_obj, struct radv_device *device,
const VkShaderCreateInfoEXT *pCreateInfo) const VkShaderCreateInfoEXT *pCreateInfo)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
gl_shader_stage stage = vk_to_mesa_shader_stage(pCreateInfo->stage); gl_shader_stage stage = vk_to_mesa_shader_stage(pCreateInfo->stage);
struct radv_shader_stage stages[MESA_VULKAN_SHADER_STAGES]; struct radv_shader_stage stages[MESA_VULKAN_SHADER_STAGES];
@ -149,7 +150,7 @@ radv_shader_object_init_graphics(struct radv_shader_object *shader_obj, struct r
gfx_state.dynamic_provoking_vtx_mode = true; gfx_state.dynamic_provoking_vtx_mode = true;
gfx_state.dynamic_line_rast_mode = true; gfx_state.dynamic_line_rast_mode = true;
if (device->physical_device->info.gfx_level >= GFX11) if (pdev->info.gfx_level >= GFX11)
gfx_state.ps.exports_mrtz_via_epilog = true; gfx_state.ps.exports_mrtz_via_epilog = true;
struct radv_shader *shader = NULL; struct radv_shader *shader = NULL;
@ -297,6 +298,7 @@ static VkResult
radv_shader_object_init(struct radv_shader_object *shader_obj, struct radv_device *device, radv_shader_object_init(struct radv_shader_object *shader_obj, struct radv_device *device,
const VkShaderCreateInfoEXT *pCreateInfo) const VkShaderCreateInfoEXT *pCreateInfo)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_shader_layout layout; struct radv_shader_layout layout;
VkResult result; VkResult result;
@ -317,7 +319,7 @@ radv_shader_object_init(struct radv_shader_object *shader_obj, struct radv_devic
const uint8_t *cache_uuid = blob_read_bytes(&blob, VK_UUID_SIZE); const uint8_t *cache_uuid = blob_read_bytes(&blob, VK_UUID_SIZE);
if (memcmp(cache_uuid, device->physical_device->cache_uuid, VK_UUID_SIZE)) if (memcmp(cache_uuid, pdev->cache_uuid, VK_UUID_SIZE))
return VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT; return VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT;
const bool has_main_binary = blob_read_uint32(&blob); const bool has_main_binary = blob_read_uint32(&blob);
@ -407,6 +409,7 @@ radv_shader_object_create_linked(VkDevice _device, uint32_t createInfoCount, con
const VkAllocationCallbacks *pAllocator, VkShaderEXT *pShaders) const VkAllocationCallbacks *pAllocator, VkShaderEXT *pShaders)
{ {
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_shader_stage stages[MESA_VULKAN_SHADER_STAGES]; struct radv_shader_stage stages[MESA_VULKAN_SHADER_STAGES];
for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) { for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
@ -425,7 +428,7 @@ radv_shader_object_create_linked(VkDevice _device, uint32_t createInfoCount, con
gfx_state.dynamic_provoking_vtx_mode = true; gfx_state.dynamic_provoking_vtx_mode = true;
gfx_state.dynamic_line_rast_mode = true; gfx_state.dynamic_line_rast_mode = true;
if (device->physical_device->info.gfx_level >= GFX11) if (pdev->info.gfx_level >= GFX11)
gfx_state.ps.exports_mrtz_via_epilog = true; gfx_state.ps.exports_mrtz_via_epilog = true;
for (unsigned i = 0; i < createInfoCount; i++) { for (unsigned i = 0; i < createInfoCount; i++) {
@ -621,6 +624,7 @@ radv_GetShaderBinaryDataEXT(VkDevice _device, VkShaderEXT shader, size_t *pDataS
{ {
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_shader_object, shader_obj, shader); RADV_FROM_HANDLE(radv_shader_object, shader_obj, shader);
const struct radv_physical_device *pdev = radv_device_physical(device);
const size_t size = radv_get_shader_object_size(shader_obj); const size_t size = radv_get_shader_object_size(shader_obj);
if (!pData) { if (!pData) {
@ -635,7 +639,7 @@ radv_GetShaderBinaryDataEXT(VkDevice _device, VkShaderEXT shader, size_t *pDataS
struct blob blob; struct blob blob;
blob_init_fixed(&blob, pData, *pDataSize); blob_init_fixed(&blob, pData, *pDataSize);
blob_write_bytes(&blob, device->physical_device->cache_uuid, VK_UUID_SIZE); blob_write_bytes(&blob, pdev->cache_uuid, VK_UUID_SIZE);
radv_write_shader_binary(&blob, shader_obj->binary); radv_write_shader_binary(&blob, shader_obj->binary);

View file

@ -62,7 +62,8 @@ radv_spm_init_bo(struct radv_device *device)
static void static void
radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf) radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
{ {
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
struct ac_spm *spm = &device->spm; struct ac_spm *spm = &device->spm;
if (gfx_level >= GFX11) { if (gfx_level >= GFX11) {
@ -142,7 +143,8 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu
void void
radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf) radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
{ {
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
struct ac_spm *spm = &device->spm; struct ac_spm *spm = &device->spm;
uint64_t va = radv_buffer_get_va(spm->bo); uint64_t va = radv_buffer_get_va(spm->bo);
uint64_t ring_size = spm->buffer_size; uint64_t ring_size = spm->buffer_size;
@ -170,7 +172,7 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum r
radeon_set_uconfig_reg(cs, R_03726C_RLC_SPM_ACCUM_MODE, 0); radeon_set_uconfig_reg(cs, R_03726C_RLC_SPM_ACCUM_MODE, 0);
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
radeon_set_uconfig_reg(cs, R_03721C_RLC_SPM_PERFMON_SEGMENT_SIZE, radeon_set_uconfig_reg(cs, R_03721C_RLC_SPM_PERFMON_SEGMENT_SIZE,
S_03721C_TOTAL_NUM_SEGMENT(total_muxsel_lines) | S_03721C_TOTAL_NUM_SEGMENT(total_muxsel_lines) |
S_03721C_GLOBAL_NUM_SEGMENT(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]) | S_03721C_GLOBAL_NUM_SEGMENT(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]) |
@ -238,8 +240,9 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum r
bool bool
radv_spm_init(struct radv_device *device) radv_spm_init(struct radv_device *device)
{ {
const struct radeon_info *gpu_info = &device->physical_device->info; struct radv_physical_device *pdev = radv_device_physical(device);
struct ac_perfcounters *pc = &device->physical_device->ac_perfcounters; const struct radeon_info *gpu_info = &pdev->info;
struct ac_perfcounters *pc = &pdev->ac_perfcounters;
/* We failed to initialize the performance counters. */ /* We failed to initialize the performance counters. */
if (!pc->blocks) if (!pc->blocks)

View file

@ -54,15 +54,16 @@ gfx11_get_sqtt_ctrl(const struct radv_device *device, bool enable)
static uint32_t static uint32_t
gfx10_get_sqtt_ctrl(const struct radv_device *device, bool enable) gfx10_get_sqtt_ctrl(const struct radv_device *device, bool enable)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t sqtt_ctrl = S_008D1C_MODE(enable) | S_008D1C_HIWATER(5) | S_008D1C_UTIL_TIMER(1) | uint32_t sqtt_ctrl = S_008D1C_MODE(enable) | S_008D1C_HIWATER(5) | S_008D1C_UTIL_TIMER(1) |
S_008D1C_RT_FREQ(2) | /* 4096 clk */ S_008D1C_RT_FREQ(2) | /* 4096 clk */
S_008D1C_DRAW_EVENT_EN(1) | S_008D1C_REG_STALL_EN(1) | S_008D1C_SPI_STALL_EN(1) | S_008D1C_DRAW_EVENT_EN(1) | S_008D1C_REG_STALL_EN(1) | S_008D1C_SPI_STALL_EN(1) |
S_008D1C_SQ_STALL_EN(1) | S_008D1C_REG_DROP_ON_STALL(0); S_008D1C_SQ_STALL_EN(1) | S_008D1C_REG_DROP_ON_STALL(0);
if (device->physical_device->info.gfx_level == GFX10_3) if (pdev->info.gfx_level == GFX10_3)
sqtt_ctrl |= S_008D1C_LOWATER_OFFSET(4); sqtt_ctrl |= S_008D1C_LOWATER_OFFSET(4);
if (device->physical_device->info.has_sqtt_auto_flush_mode_bug) if (pdev->info.has_sqtt_auto_flush_mode_bug)
sqtt_ctrl |= S_008D1C_AUTO_FLUSH_MODE(1); sqtt_ctrl |= S_008D1C_AUTO_FLUSH_MODE(1);
return sqtt_ctrl; return sqtt_ctrl;
@ -86,10 +87,11 @@ radv_ip_to_queue_family(enum amd_ip_type t)
static void static void
radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf *cs, int family) radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf *cs, int family)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum radv_queue_family qf = radv_ip_to_queue_family(family); const enum radv_queue_family qf = radv_ip_to_queue_family(family);
enum rgp_flush_bits sqtt_flush_bits = 0; enum rgp_flush_bits sqtt_flush_bits = 0;
radv_cs_emit_cache_flush( radv_cs_emit_cache_flush(
device->ws, cs, device->physical_device->info.gfx_level, NULL, 0, qf, device->ws, cs, pdev->info.gfx_level, NULL, 0, qf,
(family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH (family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH
: (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) | : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2, RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2,
@ -99,9 +101,10 @@ radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf *
static void static void
radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf) radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
{ {
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
uint32_t shifted_size = device->sqtt.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT; uint32_t shifted_size = device->sqtt.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
const struct radeon_info *gpu_info = &device->physical_device->info; const struct radeon_info *gpu_info = &pdev->info;
const unsigned shader_mask = ac_sqtt_get_shader_mask(gpu_info); const unsigned shader_mask = ac_sqtt_get_shader_mask(gpu_info);
unsigned max_se = gpu_info->max_se; unsigned max_se = gpu_info->max_se;
@ -111,7 +114,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
uint64_t va = radv_buffer_get_va(device->sqtt.bo); uint64_t va = radv_buffer_get_va(device->sqtt.bo);
uint64_t data_va = ac_sqtt_get_data_va(gpu_info, &device->sqtt, va, se); uint64_t data_va = ac_sqtt_get_data_va(gpu_info, &device->sqtt, va, se);
uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT; uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
int active_cu = ac_sqtt_get_active_cu(&device->physical_device->info, se); int active_cu = ac_sqtt_get_active_cu(&pdev->info, se);
if (ac_sqtt_se_is_disabled(gpu_info, se)) if (ac_sqtt_se_is_disabled(gpu_info, se))
continue; continue;
@ -120,7 +123,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1)); S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
/* Order seems important for the following 2 registers. */ /* Order seems important for the following 2 registers. */
radeon_set_perfctr_reg(gfx_level, qf, cs, R_0367A4_SQ_THREAD_TRACE_BUF0_SIZE, radeon_set_perfctr_reg(gfx_level, qf, cs, R_0367A4_SQ_THREAD_TRACE_BUF0_SIZE,
S_0367A4_SIZE(shifted_size) | S_0367A4_BASE_HI(shifted_va >> 32)); S_0367A4_SIZE(shifted_size) | S_0367A4_BASE_HI(shifted_va >> 32));
@ -151,7 +154,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
/* Should be emitted last (it enables thread traces). */ /* Should be emitted last (it enables thread traces). */
radeon_set_perfctr_reg(gfx_level, qf, cs, R_0367B0_SQ_THREAD_TRACE_CTRL, gfx11_get_sqtt_ctrl(device, true)); radeon_set_perfctr_reg(gfx_level, qf, cs, R_0367B0_SQ_THREAD_TRACE_CTRL, gfx11_get_sqtt_ctrl(device, true));
} else if (device->physical_device->info.gfx_level >= GFX10) { } else if (pdev->info.gfx_level >= GFX10) {
/* Order seems important for the following 2 registers. */ /* Order seems important for the following 2 registers. */
radeon_set_privileged_config_reg(cs, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE, radeon_set_privileged_config_reg(cs, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
S_008D04_SIZE(shifted_size) | S_008D04_BASE_HI(shifted_va >> 32)); S_008D04_SIZE(shifted_size) | S_008D04_BASE_HI(shifted_va >> 32));
@ -196,7 +199,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
S_030CC8_VM_ID_MASK(0) | S_030CC8_REG_STALL_EN(1) | S_030CC8_SPI_STALL_EN(1) | S_030CC8_VM_ID_MASK(0) | S_030CC8_REG_STALL_EN(1) | S_030CC8_SPI_STALL_EN(1) |
S_030CC8_SQ_STALL_EN(1); S_030CC8_SQ_STALL_EN(1);
if (device->physical_device->info.gfx_level < GFX9) { if (pdev->info.gfx_level < GFX9) {
sqtt_mask |= S_030CC8_RANDOM_SEED(0xffff); sqtt_mask |= S_030CC8_RANDOM_SEED(0xffff);
} }
@ -214,7 +217,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
radeon_set_uconfig_reg(cs, R_030CEC_SQ_THREAD_TRACE_HIWATER, S_030CEC_HIWATER(4)); radeon_set_uconfig_reg(cs, R_030CEC_SQ_THREAD_TRACE_HIWATER, S_030CEC_HIWATER(4));
if (device->physical_device->info.gfx_level == GFX9) { if (pdev->info.gfx_level == GFX9) {
/* Reset thread trace status errors. */ /* Reset thread trace status errors. */
radeon_set_uconfig_reg(cs, R_030CE8_SQ_THREAD_TRACE_STATUS, S_030CE8_UTC_ERROR(0)); radeon_set_uconfig_reg(cs, R_030CE8_SQ_THREAD_TRACE_STATUS, S_030CE8_UTC_ERROR(0));
} }
@ -225,7 +228,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */ S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */
S_030CD8_MODE(1); S_030CD8_MODE(1);
if (device->physical_device->info.gfx_level == GFX9) { if (pdev->info.gfx_level == GFX9) {
/* Count SQTT traffic in TCC perf counters. */ /* Count SQTT traffic in TCC perf counters. */
sqtt_mode |= S_030CD8_TC_PERF_EN(1); sqtt_mode |= S_030CD8_TC_PERF_EN(1);
} }
@ -274,17 +277,17 @@ static const uint32_t gfx11_sqtt_info_regs[] = {
static void static void
radv_copy_sqtt_info_regs(const struct radv_device *device, struct radeon_cmdbuf *cs, unsigned se_index) radv_copy_sqtt_info_regs(const struct radv_device *device, struct radeon_cmdbuf *cs, unsigned se_index)
{ {
const struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
const uint32_t *sqtt_info_regs = NULL; const uint32_t *sqtt_info_regs = NULL;
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
sqtt_info_regs = gfx11_sqtt_info_regs; sqtt_info_regs = gfx11_sqtt_info_regs;
} else if (device->physical_device->info.gfx_level >= GFX10) { } else if (pdev->info.gfx_level >= GFX10) {
sqtt_info_regs = gfx10_sqtt_info_regs; sqtt_info_regs = gfx10_sqtt_info_regs;
} else if (device->physical_device->info.gfx_level == GFX9) { } else if (pdev->info.gfx_level == GFX9) {
sqtt_info_regs = gfx9_sqtt_info_regs; sqtt_info_regs = gfx9_sqtt_info_regs;
} else { } else {
assert(device->physical_device->info.gfx_level == GFX8); assert(pdev->info.gfx_level == GFX8);
sqtt_info_regs = gfx8_sqtt_info_regs; sqtt_info_regs = gfx8_sqtt_info_regs;
} }
@ -330,8 +333,9 @@ radv_copy_sqtt_info_regs(const struct radv_device *device, struct radeon_cmdbuf
static void static void
radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf) radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
{ {
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned max_se = device->physical_device->info.max_se; const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
unsigned max_se = pdev->info.max_se;
radeon_check_space(device->ws, cs, 8 + max_se * 64); radeon_check_space(device->ws, cs, 8 + max_se * 64);
@ -346,20 +350,20 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs,
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0)); radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0));
if (device->physical_device->info.has_sqtt_rb_harvest_bug) { if (pdev->info.has_sqtt_rb_harvest_bug) {
/* Some chips with disabled RBs should wait for idle because FINISH_DONE doesn't work. */ /* Some chips with disabled RBs should wait for idle because FINISH_DONE doesn't work. */
radv_emit_wait_for_idle(device, cs, qf); radv_emit_wait_for_idle(device, cs, qf);
} }
for (unsigned se = 0; se < max_se; se++) { for (unsigned se = 0; se < max_se; se++) {
if (ac_sqtt_se_is_disabled(&device->physical_device->info, se)) if (ac_sqtt_se_is_disabled(&pdev->info, se))
continue; continue;
/* Target SEi and SH0. */ /* Target SEi and SH0. */
radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1)); S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
/* Make sure to wait for the trace buffer. */ /* Make sure to wait for the trace buffer. */
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */ radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
@ -380,8 +384,8 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs,
radeon_emit(cs, 0); /* reference value */ radeon_emit(cs, 0); /* reference value */
radeon_emit(cs, ~C_0367D0_BUSY); /* mask */ radeon_emit(cs, ~C_0367D0_BUSY); /* mask */
radeon_emit(cs, 4); /* poll interval */ radeon_emit(cs, 4); /* poll interval */
} else if (device->physical_device->info.gfx_level >= GFX10) { } else if (pdev->info.gfx_level >= GFX10) {
if (!device->physical_device->info.has_sqtt_rb_harvest_bug) { if (!pdev->info.has_sqtt_rb_harvest_bug) {
/* Make sure to wait for the trace buffer. */ /* Make sure to wait for the trace buffer. */
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */ radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
@ -429,7 +433,8 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs,
void void
radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords) radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords)
{ {
const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
const enum radv_queue_family qf = cmd_buffer->qf; const enum radv_queue_family qf = cmd_buffer->qf;
struct radv_device *device = cmd_buffer->device; struct radv_device *device = cmd_buffer->device;
struct radeon_cmdbuf *cs = cmd_buffer->cs; struct radeon_cmdbuf *cs = cmd_buffer->cs;
@ -446,7 +451,7 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da
/* Without the perfctr bit the CP might not always pass the /* Without the perfctr bit the CP might not always pass the
* write on correctly. */ * write on correctly. */
if (device->physical_device->info.gfx_level >= GFX10) if (pdev->info.gfx_level >= GFX10)
radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count); radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
else else
radeon_set_uconfig_reg_seq(cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count); radeon_set_uconfig_reg_seq(cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
@ -460,11 +465,13 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da
void void
radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf *cs, bool enable) radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf *cs, bool enable)
{ {
if (device->physical_device->info.gfx_level >= GFX9) { const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level >= GFX9) {
uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) | S_031100_EXP_PRIORITY_ORDER(3) | uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) | S_031100_EXP_PRIORITY_ORDER(3) |
S_031100_ENABLE_SQG_TOP_EVENTS(enable) | S_031100_ENABLE_SQG_BOP_EVENTS(enable); S_031100_ENABLE_SQG_TOP_EVENTS(enable) | S_031100_ENABLE_SQG_BOP_EVENTS(enable);
if (device->physical_device->info.gfx_level >= GFX10) if (pdev->info.gfx_level >= GFX10)
spi_config_cntl |= S_031100_PS_PKR_PRIORITY_CNTL(3); spi_config_cntl |= S_031100_PS_PKR_PRIORITY_CNTL(3);
radeon_set_uconfig_reg(cs, R_031100_SPI_CONFIG_CNTL, spi_config_cntl); radeon_set_uconfig_reg(cs, R_031100_SPI_CONFIG_CNTL, spi_config_cntl);
@ -478,12 +485,14 @@ radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf
void void
radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit) radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit)
{ {
if (device->physical_device->info.gfx_level >= GFX11) const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level >= GFX11)
return; /* not needed */ return; /* not needed */
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL, S_037390_PERFMON_CLOCK_STATE(inhibit)); radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL, S_037390_PERFMON_CLOCK_STATE(inhibit));
} else if (device->physical_device->info.gfx_level >= GFX8) { } else if (pdev->info.gfx_level >= GFX8) {
radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL, S_0372FC_PERFMON_CLOCK_STATE(inhibit)); radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL, S_0372FC_PERFMON_CLOCK_STATE(inhibit));
} }
} }
@ -620,7 +629,8 @@ radv_sqtt_finish_queue_event(struct radv_device *device)
static bool static bool
radv_sqtt_init_bo(struct radv_device *device) radv_sqtt_init_bo(struct radv_device *device)
{ {
unsigned max_se = device->physical_device->info.max_se; const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned max_se = pdev->info.max_se;
struct radeon_winsys *ws = device->ws; struct radeon_winsys *ws = device->ws;
VkResult result; VkResult result;
uint64_t size; uint64_t size;
@ -801,6 +811,7 @@ bool
radv_begin_sqtt(struct radv_queue *queue) radv_begin_sqtt(struct radv_queue *queue)
{ {
struct radv_device *device = queue->device; struct radv_device *device = queue->device;
const struct radv_physical_device *pdev = radv_device_physical(device);
enum radv_queue_family family = queue->state.qf; enum radv_queue_family family = queue->state.qf;
struct radeon_winsys *ws = device->ws; struct radeon_winsys *ws = device->ws;
struct radeon_cmdbuf *cs; struct radeon_cmdbuf *cs;
@ -846,7 +857,7 @@ radv_begin_sqtt(struct radv_queue *queue)
if (device->spm.bo) { if (device->spm.bo) {
/* Enable all shader stages by default. */ /* Enable all shader stages by default. */
radv_perfcounter_emit_shaders(device, cs, ac_sqtt_get_shader_mask(&device->physical_device->info)); radv_perfcounter_emit_shaders(device, cs, ac_sqtt_get_shader_mask(&pdev->info));
radv_emit_spm_setup(device, cs, family); radv_emit_spm_setup(device, cs, family);
} }
@ -936,7 +947,8 @@ bool
radv_get_sqtt_trace(struct radv_queue *queue, struct ac_sqtt_trace *sqtt_trace) radv_get_sqtt_trace(struct radv_queue *queue, struct ac_sqtt_trace *sqtt_trace)
{ {
struct radv_device *device = queue->device; struct radv_device *device = queue->device;
const struct radeon_info *gpu_info = &device->physical_device->info; const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
if (!ac_sqtt_get_trace(&device->sqtt, gpu_info, sqtt_trace)) { if (!ac_sqtt_get_trace(&device->sqtt, gpu_info, sqtt_trace)) {
if (!radv_sqtt_resize_bo(device)) if (!radv_sqtt_resize_bo(device))

View file

@ -311,7 +311,7 @@ calc_ctx_size_h265_main10(struct radv_video_session *vid)
static unsigned static unsigned
calc_ctx_size_av1(struct radv_device *device, struct radv_video_session *vid) calc_ctx_size_av1(struct radv_device *device, struct radv_video_session *vid)
{ {
struct radv_physical_device *pdev = device->physical_device; const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned frame_ctxt_size = pdev->av1_version == RDECODE_AV1_VER_0 unsigned frame_ctxt_size = pdev->av1_version == RDECODE_AV1_VER_0
? align(sizeof(rvcn_av1_frame_context_t), 2048) ? align(sizeof(rvcn_av1_frame_context_t), 2048)
: align(sizeof(rvcn_av1_vcn4_frame_context_t), 2048); : align(sizeof(rvcn_av1_vcn4_frame_context_t), 2048);
@ -345,6 +345,7 @@ radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *
const VkAllocationCallbacks *pAllocator, VkVideoSessionKHR *pVideoSession) const VkAllocationCallbacks *pAllocator, VkVideoSessionKHR *pVideoSession)
{ {
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_video_session *vid = struct radv_video_session *vid =
vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*vid), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*vid), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@ -365,12 +366,12 @@ radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *
switch (vid->vk.op) { switch (vid->vk.op) {
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR: case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
vid->stream_type = RDECODE_CODEC_H264_PERF; vid->stream_type = RDECODE_CODEC_H264_PERF;
if (radv_enable_tier2(device->physical_device)) if (radv_enable_tier2(pdev))
vid->dpb_type = DPB_DYNAMIC_TIER_2; vid->dpb_type = DPB_DYNAMIC_TIER_2;
break; break;
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR: case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
vid->stream_type = RDECODE_CODEC_H265; vid->stream_type = RDECODE_CODEC_H265;
if (radv_enable_tier2(device->physical_device)) if (radv_enable_tier2(pdev))
vid->dpb_type = DPB_DYNAMIC_TIER_2; vid->dpb_type = DPB_DYNAMIC_TIER_2;
break; break;
case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR: case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
@ -381,10 +382,10 @@ radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *
return VK_ERROR_FEATURE_NOT_PRESENT; return VK_ERROR_FEATURE_NOT_PRESENT;
} }
vid->stream_handle = radv_vid_alloc_stream_handle(device->physical_device); vid->stream_handle = radv_vid_alloc_stream_handle(pdev);
vid->dbg_frame_cnt = 0; vid->dbg_frame_cnt = 0;
vid->db_alignment = radv_video_get_db_alignment( vid->db_alignment = radv_video_get_db_alignment(
device->physical_device, vid->vk.max_coded.width, pdev, vid->vk.max_coded.width,
(vid->stream_type == RDECODE_CODEC_AV1 || (vid->stream_type == RDECODE_CODEC_AV1 ||
(vid->stream_type == RDECODE_CODEC_H265 && vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10))); (vid->stream_type == RDECODE_CODEC_H265 && vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)));
@ -656,11 +657,13 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR vi
{ {
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_video_session, vid, videoSession); RADV_FROM_HANDLE(radv_video_session, vid, videoSession);
uint32_t memory_type_bits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1; const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t memory_type_bits = (1u << pdev->memory_properties.memoryTypeCount) - 1;
VK_OUTARRAY_MAKE_TYPED(VkVideoSessionMemoryRequirementsKHR, out, pMemoryRequirements, pMemoryRequirementsCount); VK_OUTARRAY_MAKE_TYPED(VkVideoSessionMemoryRequirementsKHR, out, pMemoryRequirements, pMemoryRequirementsCount);
/* 1 buffer for session context */ /* 1 buffer for session context */
if (device->physical_device->info.family >= CHIP_POLARIS10) { if (pdev->info.family >= CHIP_POLARIS10) {
vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
{ {
m->memoryBindIndex = RADV_BIND_SESSION_CTX; m->memoryBindIndex = RADV_BIND_SESSION_CTX;
@ -670,7 +673,7 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR vi
} }
} }
if (vid->stream_type == RDECODE_CODEC_H264_PERF && device->physical_device->info.family >= CHIP_POLARIS10) { if (vid->stream_type == RDECODE_CODEC_H264_PERF && pdev->info.family >= CHIP_POLARIS10) {
vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m) vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
{ {
m->memoryBindIndex = RADV_BIND_DECODER_CTX; m->memoryBindIndex = RADV_BIND_DECODER_CTX;
@ -701,9 +704,8 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR vi
m->memoryRequirements.size = align(calc_ctx_size_av1(device, vid), 4096); m->memoryRequirements.size = align(calc_ctx_size_av1(device, vid), 4096);
m->memoryRequirements.alignment = 0; m->memoryRequirements.alignment = 0;
m->memoryRequirements.memoryTypeBits = 0; m->memoryRequirements.memoryTypeBits = 0;
for (unsigned i = 0; i < device->physical_device->memory_properties.memoryTypeCount; i++) for (unsigned i = 0; i < pdev->memory_properties.memoryTypeCount; i++)
if (device->physical_device->memory_properties.memoryTypes[i].propertyFlags & if (pdev->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
m->memoryRequirements.memoryTypeBits |= (1 << i); m->memoryRequirements.memoryTypeBits |= (1 << i);
} }
} }
@ -761,14 +763,15 @@ set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val)
static void static void
send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, struct radeon_winsys_bo *bo, uint32_t offset) send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, struct radeon_winsys_bo *bo, uint32_t offset)
{ {
struct radv_physical_device *pdev = cmd_buffer->device->physical_device; struct radv_device *device = cmd_buffer->device;
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t addr; uint64_t addr;
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo); radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
addr = radv_buffer_get_va(bo); addr = radv_buffer_get_va(bo);
addr += offset; addr += offset;
if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) { if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 6); radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 6);
set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr); set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr);
set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32); set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32);
@ -1037,6 +1040,7 @@ get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct
uint32_t *height_in_samples, uint32_t *height_in_samples,
void *it_ptr) void *it_ptr)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
rvcn_dec_message_hevc_t result; rvcn_dec_message_hevc_t result;
int i, j; int i, j;
const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info = const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
@ -1059,7 +1063,7 @@ get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct
result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7; result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8; result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
if (device->physical_device->info.family == CHIP_CARRIZO) if (pdev->info.family == CHIP_CARRIZO)
result.sps_info_flags |= 1 << 9; result.sps_info_flags |= 1 << 9;
if (!h265_pic_info->pStdPictureInfo->flags.short_term_ref_pic_set_sps_flag) { if (!h265_pic_info->pStdPictureInfo->flags.short_term_ref_pic_set_sps_flag) {
@ -2097,6 +2101,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
const struct VkVideoDecodeInfoKHR *frame_info) const struct VkVideoDecodeInfoKHR *frame_info)
{ {
struct radv_device *device = cmd_buffer->device; struct radv_device *device = cmd_buffer->device;
const struct radv_physical_device *pdev = radv_device_physical(device);
rvcn_dec_message_header_t *header; rvcn_dec_message_header_t *header;
rvcn_dec_message_index_t *index_codec; rvcn_dec_message_index_t *index_codec;
rvcn_dec_message_decode_t *decode; rvcn_dec_message_decode_t *decode;
@ -2182,7 +2187,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
decode->dt_tiling_mode = 0; decode->dt_tiling_mode = 0;
decode->dt_swizzle_mode = luma->surface.u.gfx9.swizzle_mode; decode->dt_swizzle_mode = luma->surface.u.gfx9.swizzle_mode;
decode->dt_array_mode = device->physical_device->vid_addr_gfx_mode; decode->dt_array_mode = pdev->vid_addr_gfx_mode;
decode->dt_field_mode = vid->interlaced ? 1 : 0; decode->dt_field_mode = vid->interlaced ? 1 : 0;
decode->dt_surf_tile_config = 0; decode->dt_surf_tile_config = 0;
decode->dt_uv_surf_tile_config = 0; decode->dt_uv_surf_tile_config = 0;
@ -2254,7 +2259,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
decode->db_pitch = dpb->planes[0].surface.u.gfx9.surf_pitch; decode->db_pitch = dpb->planes[0].surface.u.gfx9.surf_pitch;
decode->db_aligned_height = dpb->planes[0].surface.u.gfx9.surf_height; decode->db_aligned_height = dpb->planes[0].surface.u.gfx9.surf_height;
decode->db_swizzle_mode = dpb->planes[0].surface.u.gfx9.swizzle_mode; decode->db_swizzle_mode = dpb->planes[0].surface.u.gfx9.swizzle_mode;
decode->db_array_mode = device->physical_device->vid_addr_gfx_mode; decode->db_array_mode = pdev->vid_addr_gfx_mode;
decode->hw_ctxt_size = vid->ctx.size; decode->hw_ctxt_size = vid->ctx.size;
@ -2427,6 +2432,7 @@ get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, str
const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *width_in_samples, const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *width_in_samples,
uint32_t *height_in_samples, void *it_ptr) uint32_t *height_in_samples, void *it_ptr)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct ruvd_h265 result; struct ruvd_h265 result;
int i, j; int i, j;
const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info = const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
@ -2450,7 +2456,7 @@ get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, str
result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7; result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8; result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
if (device->physical_device->info.family == CHIP_CARRIZO) if (pdev->info.family == CHIP_CARRIZO)
result.sps_info_flags |= 1 << 9; result.sps_info_flags |= 1 << 9;
*width_in_samples = sps->pic_width_in_luma_samples; *width_in_samples = sps->pic_width_in_luma_samples;
@ -2592,6 +2598,7 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v
struct radv_video_session_params *params, void *ptr, void *it_ptr, uint32_t *slice_offset, struct radv_video_session_params *params, void *ptr, void *it_ptr, uint32_t *slice_offset,
const struct VkVideoDecodeInfoKHR *frame_info) const struct VkVideoDecodeInfoKHR *frame_info)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct ruvd_msg *msg = ptr; struct ruvd_msg *msg = ptr;
struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding); struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
struct radv_image *img = dst_iv->image; struct radv_image *img = dst_iv->image;
@ -2616,7 +2623,7 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v
msg->body.decode.bsd_size = frame_info->srcBufferRange; msg->body.decode.bsd_size = frame_info->srcBufferRange;
msg->body.decode.db_pitch = align(frame_info->dstPictureResource.codedExtent.width, vid->db_alignment); msg->body.decode.db_pitch = align(frame_info->dstPictureResource.codedExtent.width, vid->db_alignment);
if (vid->stream_type == RUVD_CODEC_H264_PERF && device->physical_device->info.family >= CHIP_POLARIS10) if (vid->stream_type == RUVD_CODEC_H264_PERF && pdev->info.family >= CHIP_POLARIS10)
msg->body.decode.dpb_reserved = vid->ctx.size; msg->body.decode.dpb_reserved = vid->ctx.size;
*slice_offset = 0; *slice_offset = 0;
@ -2643,7 +2650,7 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v
msg->body.decode.dt_field_mode = false; msg->body.decode.dt_field_mode = false;
if (device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
msg->body.decode.dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w; msg->body.decode.dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR; msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR; msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
@ -2703,7 +2710,7 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v
RUVD_MACRO_TILE_ASPECT_RATIO(util_logbase2(luma->surface.u.legacy.mtilea)); RUVD_MACRO_TILE_ASPECT_RATIO(util_logbase2(luma->surface.u.legacy.mtilea));
} }
if (device->physical_device->info.family >= CHIP_STONEY) if (pdev->info.family >= CHIP_STONEY)
msg->body.decode.dt_wa_chroma_top_offset = msg->body.decode.dt_pitch / 2; msg->body.decode.dt_wa_chroma_top_offset = msg->body.decode.dt_pitch / 2;
msg->body.decode.db_surf_tile_config = msg->body.decode.dt_surf_tile_config; msg->body.decode.db_surf_tile_config = msg->body.decode.dt_surf_tile_config;
@ -2740,8 +2747,8 @@ radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoBeginCod
static void static void
radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer) radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radv_video_session *vid = cmd_buffer->video.vid; struct radv_video_session *vid = cmd_buffer->video.vid;
struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
uint32_t size = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t); uint32_t size = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t);
void *ptr; void *ptr;
@ -2771,7 +2778,7 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
} }
radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr); radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED) if (pdev->vid_decode_ip == AMD_IP_VCN_UNIFIED)
radv_vcn_sq_start(cmd_buffer); radv_vcn_sq_start(cmd_buffer);
rvcn_dec_message_create(vid, ptr, size); rvcn_dec_message_create(vid, ptr, size);
@ -2779,7 +2786,7 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset); send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset);
/* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */ /* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) { if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 8); radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 8);
for (unsigned i = 0; i < 8; i++) for (unsigned i = 0; i < 8; i++)
radeon_emit(cmd_buffer->cs, 0x81ff); radeon_emit(cmd_buffer->cs, 0x81ff);
@ -2812,8 +2819,10 @@ VKAPI_ATTR void VKAPI_CALL
radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoCodingControlInfoKHR *pCodingControlInfo) radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoCodingControlInfoKHR *pCodingControlInfo)
{ {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR) { if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR) {
if (radv_has_uvd(cmd_buffer->device->physical_device)) if (radv_has_uvd(pdev))
radv_uvd_cmd_reset(cmd_buffer); radv_uvd_cmd_reset(cmd_buffer);
else else
radv_vcn_cmd_reset(cmd_buffer); radv_vcn_cmd_reset(cmd_buffer);
@ -2829,14 +2838,14 @@ static void
radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info) radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
{ {
RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer); RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radv_video_session *vid = cmd_buffer->video.vid; struct radv_video_session *vid = cmd_buffer->video.vid;
struct radv_video_session_params *params = cmd_buffer->video.params; struct radv_video_session_params *params = cmd_buffer->video.params;
unsigned size = sizeof(struct ruvd_msg); unsigned size = sizeof(struct ruvd_msg);
void *ptr, *fb_ptr, *it_probs_ptr = NULL; void *ptr, *fb_ptr, *it_probs_ptr = NULL;
uint32_t out_offset, fb_offset, it_probs_offset = 0; uint32_t out_offset, fb_offset, it_probs_offset = 0;
struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL; struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL;
unsigned fb_size = unsigned fb_size = (pdev->info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;
(cmd_buffer->device->physical_device->info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;
radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset, &fb_ptr); radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset, &fb_ptr);
fb_bo = cmd_buffer->upload.upload_bo; fb_bo = cmd_buffer->upload.upload_bo;
@ -2876,13 +2885,14 @@ radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf
send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo, it_probs_offset); send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo, it_probs_offset);
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2); radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2);
set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1); set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1);
} }
static void static void
radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info) radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
{ {
RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer); RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radv_video_session *vid = cmd_buffer->video.vid; struct radv_video_session *vid = cmd_buffer->video.vid;
struct radv_video_session_params *params = cmd_buffer->video.params; struct radv_video_session_params *params = cmd_buffer->video.params;
unsigned size = 0; unsigned size = 0;
@ -2924,7 +2934,7 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf
radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr); radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
msg_bo = cmd_buffer->upload.upload_bo; msg_bo = cmd_buffer->upload.upload_bo;
if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED) if (pdev->vid_decode_ip == AMD_IP_VCN_UNIFIED)
radv_vcn_sq_start(cmd_buffer); radv_vcn_sq_start(cmd_buffer);
uint32_t slice_offset; uint32_t slice_offset;
@ -2955,9 +2965,9 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf
else if (have_probs(vid)) else if (have_probs(vid))
send_cmd(cmd_buffer, RDECODE_CMD_PROB_TBL_BUFFER, it_probs_bo, it_probs_offset); send_cmd(cmd_buffer, RDECODE_CMD_PROB_TBL_BUFFER, it_probs_bo, it_probs_offset);
if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) { if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2); radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2);
set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1); set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1);
} else } else
radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq); radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
} }
@ -2966,8 +2976,9 @@ VKAPI_ATTR void VKAPI_CALL
radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer, const VkVideoDecodeInfoKHR *frame_info) radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer, const VkVideoDecodeInfoKHR *frame_info)
{ {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
if (radv_has_uvd(cmd_buffer->device->physical_device)) if (radv_has_uvd(pdev))
radv_uvd_decode_video(cmd_buffer, frame_info); radv_uvd_decode_video(cmd_buffer, frame_info);
else else
radv_vcn_decode_video(cmd_buffer, frame_info); radv_vcn_decode_video(cmd_buffer, frame_info);

View file

@ -54,17 +54,17 @@ static VkQueue
radv_wsi_get_prime_blit_queue(VkDevice _device) radv_wsi_get_prime_blit_queue(VkDevice _device)
{ {
RADV_FROM_HANDLE(radv_device, device, _device); RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_physical_device *pdev = radv_device_physical(device);
if (device->private_sdma_queue != VK_NULL_HANDLE) if (device->private_sdma_queue != VK_NULL_HANDLE)
return vk_queue_to_handle(&device->private_sdma_queue->vk); return vk_queue_to_handle(&device->private_sdma_queue->vk);
if (device->physical_device->info.gfx_level >= GFX9 && if (pdev->info.gfx_level >= GFX9 && !(pdev->instance->debug_flags & RADV_DEBUG_NO_DMA_BLIT)) {
!(device->physical_device->instance->debug_flags & RADV_DEBUG_NO_DMA_BLIT)) {
device->physical_device->vk_queue_to_radv[device->physical_device->num_queues++] = RADV_QUEUE_TRANSFER; pdev->vk_queue_to_radv[pdev->num_queues++] = RADV_QUEUE_TRANSFER;
const VkDeviceQueueCreateInfo queue_create = { const VkDeviceQueueCreateInfo queue_create = {
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
.queueFamilyIndex = device->physical_device->num_queues - 1, .queueFamilyIndex = pdev->num_queues - 1,
.queueCount = 1, .queueCount = 1,
}; };

View file

@ -73,14 +73,15 @@ radv_write_harvested_raster_configs(struct radv_physical_device *pdev, struct ra
void void
radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs) radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
{ {
const struct radeon_info *gpu_info = &device->physical_device->info; const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3); radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
radeon_emit(cs, 0); radeon_emit(cs, 0);
radeon_emit(cs, 0); radeon_emit(cs, 0);
radeon_emit(cs, 0); radeon_emit(cs, 0);
radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(device->physical_device->info.address32_hi >> 8)); radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(pdev->info.address32_hi >> 8));
radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2); radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
/* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1, /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1,
@ -90,7 +91,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
radeon_emit(cs, S_00B8AC_SA0_CU_EN(cu_mask) | S_00B8AC_SA1_CU_EN(cu_mask)); radeon_emit(cs, S_00B8AC_SA0_CU_EN(cu_mask) | S_00B8AC_SA1_CU_EN(cu_mask));
} }
if (device->physical_device->info.gfx_level >= GFX7) { if (pdev->info.gfx_level >= GFX7) {
/* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */ /* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */
radeon_set_sh_reg_seq(cs, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2); radeon_set_sh_reg_seq(cs, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2);
for (unsigned i = 2; i < 4; ++i) { for (unsigned i = 2; i < 4; ++i) {
@ -107,12 +108,11 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
} }
} }
if (device->physical_device->info.gfx_level >= GFX9 && device->physical_device->info.gfx_level < GFX11) { if (pdev->info.gfx_level >= GFX9 && pdev->info.gfx_level < GFX11) {
radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY, radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY, pdev->info.gfx_level >= GFX10 ? 0x20 : 0);
device->physical_device->info.gfx_level >= GFX10 ? 0x20 : 0);
} }
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg_seq(cs, R_00B890_COMPUTE_USER_ACCUM_0, 4); radeon_set_sh_reg_seq(cs, R_00B890_COMPUTE_USER_ACCUM_0, 4);
radeon_emit(cs, 0); /* R_00B890_COMPUTE_USER_ACCUM_0 */ radeon_emit(cs, 0); /* R_00B890_COMPUTE_USER_ACCUM_0 */
radeon_emit(cs, 0); /* R_00B894_COMPUTE_USER_ACCUM_1 */ radeon_emit(cs, 0); /* R_00B894_COMPUTE_USER_ACCUM_1 */
@ -122,7 +122,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0); radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
} }
if (device->physical_device->info.gfx_level == GFX6) { if (pdev->info.gfx_level == GFX6) {
if (device->border_color_data.bo) { if (device->border_color_data.bo) {
uint64_t bc_va = radv_buffer_get_va(device->border_color_data.bo); uint64_t bc_va = radv_buffer_get_va(device->border_color_data.bo);
radeon_set_config_reg(cs, R_00950C_TA_CS_BC_BASE_ADDR, bc_va >> 8); radeon_set_config_reg(cs, R_00950C_TA_CS_BC_BASE_ADDR, bc_va >> 8);
@ -132,7 +132,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
if (device->tma_bo) { if (device->tma_bo) {
uint64_t tba_va, tma_va; uint64_t tba_va, tma_va;
assert(device->physical_device->info.gfx_level == GFX8); assert(pdev->info.gfx_level == GFX8);
tba_va = radv_shader_get_va(device->trap_handler_shader); tba_va = radv_shader_get_va(device->trap_handler_shader);
tma_va = radv_buffer_get_va(device->tma_bo); tma_va = radv_buffer_get_va(device->tma_bo);
@ -144,7 +144,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
radeon_emit(cs, tma_va >> 40); radeon_emit(cs, tma_va >> 40);
} }
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
radeon_set_sh_reg_seq(cs, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4, 4); radeon_set_sh_reg_seq(cs, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4, 4);
/* SE4-SE7 */ /* SE4-SE7 */
for (unsigned i = 4; i < 8; ++i) { for (unsigned i = 4; i < 8; ++i) {
@ -187,7 +187,7 @@ radv_set_raster_config(struct radv_physical_device *pdev, struct radeon_cmdbuf *
void void
radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs) radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
{ {
struct radv_physical_device *pdev = device->physical_device; struct radv_physical_device *pdev = radv_device_physical(device);
bool has_clear_state = pdev->info.has_clear_state; bool has_clear_state = pdev->info.has_clear_state;
int i; int i;
@ -300,26 +300,19 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0); radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
} }
if (device->physical_device->info.gfx_level >= GFX10) { if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS, radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS, S_00B524_MEM_BASE(pdev->info.address32_hi >> 8));
S_00B524_MEM_BASE(device->physical_device->info.address32_hi >> 8)); radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(pdev->info.address32_hi >> 8));
radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES, } else if (pdev->info.gfx_level == GFX9) {
S_00B324_MEM_BASE(device->physical_device->info.address32_hi >> 8)); radeon_set_sh_reg(cs, R_00B414_SPI_SHADER_PGM_HI_LS, S_00B414_MEM_BASE(pdev->info.address32_hi >> 8));
} else if (device->physical_device->info.gfx_level == GFX9) { radeon_set_sh_reg(cs, R_00B214_SPI_SHADER_PGM_HI_ES, S_00B214_MEM_BASE(pdev->info.address32_hi >> 8));
radeon_set_sh_reg(cs, R_00B414_SPI_SHADER_PGM_HI_LS,
S_00B414_MEM_BASE(device->physical_device->info.address32_hi >> 8));
radeon_set_sh_reg(cs, R_00B214_SPI_SHADER_PGM_HI_ES,
S_00B214_MEM_BASE(device->physical_device->info.address32_hi >> 8));
} else { } else {
radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS, radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS, S_00B524_MEM_BASE(pdev->info.address32_hi >> 8));
S_00B524_MEM_BASE(device->physical_device->info.address32_hi >> 8)); radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(pdev->info.address32_hi >> 8));
radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES,
S_00B324_MEM_BASE(device->physical_device->info.address32_hi >> 8));
} }
if (device->physical_device->info.gfx_level < GFX11) if (pdev->info.gfx_level < GFX11)
radeon_set_sh_reg(cs, R_00B124_SPI_SHADER_PGM_HI_VS, radeon_set_sh_reg(cs, R_00B124_SPI_SHADER_PGM_HI_VS, S_00B124_MEM_BASE(pdev->info.address32_hi >> 8));
S_00B124_MEM_BASE(device->physical_device->info.address32_hi >> 8));
unsigned cu_mask_ps = 0xffffffff; unsigned cu_mask_ps = 0xffffffff;
@ -400,8 +393,7 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
/* Enable CMASK/FMASK/HTILE/DCC caching in L2 for small chips. */ /* Enable CMASK/FMASK/HTILE/DCC caching in L2 for small chips. */
unsigned meta_write_policy, meta_read_policy; unsigned meta_write_policy, meta_read_policy;
unsigned no_alloc = unsigned no_alloc = pdev->info.gfx_level >= GFX11 ? V_02807C_CACHE_NOA_GFX11 : V_02807C_CACHE_NOA_GFX10;
device->physical_device->info.gfx_level >= GFX11 ? V_02807C_CACHE_NOA_GFX11 : V_02807C_CACHE_NOA_GFX10;
/* TODO: investigate whether LRU improves performance on other chips too */ /* TODO: investigate whether LRU improves performance on other chips too */
if (pdev->info.max_render_backends <= 4) { if (pdev->info.max_render_backends <= 4) {
@ -419,7 +411,7 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
S_02807C_S_RD_POLICY(no_alloc) | S_02807C_HTILE_RD_POLICY(meta_read_policy)); S_02807C_S_RD_POLICY(no_alloc) | S_02807C_HTILE_RD_POLICY(meta_read_policy));
uint32_t gl2_cc; uint32_t gl2_cc;
if (device->physical_device->info.gfx_level >= GFX11) { if (pdev->info.gfx_level >= GFX11) {
gl2_cc = S_028410_DCC_WR_POLICY_GFX11(meta_write_policy) | gl2_cc = S_028410_DCC_WR_POLICY_GFX11(meta_write_policy) |
S_028410_COLOR_WR_POLICY_GFX11(V_028410_CACHE_STREAM) | S_028410_COLOR_WR_POLICY_GFX11(V_028410_CACHE_STREAM) |
S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_GFX11); S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_GFX11);
@ -569,7 +561,7 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
if (device->tma_bo) { if (device->tma_bo) {
uint64_t tba_va, tma_va; uint64_t tba_va, tma_va;
assert(device->physical_device->info.gfx_level == GFX8); assert(pdev->info.gfx_level == GFX8);
tba_va = radv_shader_get_va(device->trap_handler_shader); tba_va = radv_shader_get_va(device->trap_handler_shader);
tma_va = radv_buffer_get_va(device->tma_bo); tma_va = radv_buffer_get_va(device->tma_bo);
@ -630,6 +622,7 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
void void
radv_create_gfx_config(struct radv_device *device) radv_create_gfx_config(struct radv_device *device)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = device->ws->cs_create(device->ws, AMD_IP_GFX, false); struct radeon_cmdbuf *cs = device->ws->cs_create(device->ws, AMD_IP_GFX, false);
if (!cs) if (!cs)
return; return;
@ -639,7 +632,7 @@ radv_create_gfx_config(struct radv_device *device)
radv_emit_graphics(device, cs); radv_emit_graphics(device, cs);
while (cs->cdw & 7) { while (cs->cdw & 7) {
if (device->physical_device->info.gfx_ib_pad_with_type2) if (pdev->info.gfx_ib_pad_with_type2)
radeon_emit(cs, PKT2_NOP_PAD); radeon_emit(cs, PKT2_NOP_PAD);
else else
radeon_emit(cs, PKT3_NOP_PAD); radeon_emit(cs, PKT3_NOP_PAD);
@ -817,7 +810,8 @@ radv_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_d
bool count_from_stream_output, uint32_t draw_vertex_count, unsigned topology, bool count_from_stream_output, uint32_t draw_vertex_count, unsigned topology,
bool prim_restart_enable, unsigned patch_control_points, unsigned num_tess_patches) bool prim_restart_enable, unsigned patch_control_points, unsigned num_tess_patches)
{ {
const struct radeon_info *gpu_info = &cmd_buffer->device->physical_device->info; const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
const struct radeon_info *gpu_info = &pdev->info;
const unsigned max_primgroup_in_wave = 2; const unsigned max_primgroup_in_wave = 2;
/* SWITCH_ON_EOP(0) is always preferable. */ /* SWITCH_ON_EOP(0) is always preferable. */
bool wd_switch_on_eop = false; bool wd_switch_on_eop = false;
@ -839,7 +833,7 @@ radv_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_d
/* GS requirement. */ /* GS requirement. */
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY) && gpu_info->gfx_level <= GFX8) { if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY) && gpu_info->gfx_level <= GFX8) {
unsigned gs_table_depth = cmd_buffer->device->physical_device->gs_table_depth; unsigned gs_table_depth = pdev->gs_table_depth;
if (SI_GS_PER_ES / primgroup_size >= gs_table_depth - 3) if (SI_GS_PER_ES / primgroup_size >= gs_table_depth - 3)
partial_es_wave = true; partial_es_wave = true;
} }
@ -1495,6 +1489,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enu
void void
radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer) radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
bool is_compute = cmd_buffer->qf == RADV_QUEUE_COMPUTE; bool is_compute = cmd_buffer->qf == RADV_QUEUE_COMPUTE;
if (is_compute) if (is_compute)
@ -1509,10 +1504,10 @@ radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
return; return;
} }
radv_cs_emit_cache_flush(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->physical_device->info.gfx_level, radv_cs_emit_cache_flush(cmd_buffer->device->ws, cmd_buffer->cs, pdev->info.gfx_level, &cmd_buffer->gfx9_fence_idx,
&cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va, cmd_buffer->gfx9_fence_va, radv_cmd_buffer_uses_mec(cmd_buffer),
radv_cmd_buffer_uses_mec(cmd_buffer), cmd_buffer->state.flush_bits, cmd_buffer->state.flush_bits, &cmd_buffer->state.sqtt_flush_bits,
&cmd_buffer->state.sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va); cmd_buffer->gfx9_eop_bug_va);
if (radv_device_fault_detection_enabled(cmd_buffer->device)) if (radv_device_fault_detection_enabled(cmd_buffer->device))
radv_cmd_buffer_trace_emit(cmd_buffer); radv_cmd_buffer_trace_emit(cmd_buffer);
@ -1539,6 +1534,7 @@ radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
void void
radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, unsigned pred_op, uint64_t va) radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, unsigned pred_op, uint64_t va)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
uint32_t op = 0; uint32_t op = 0;
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4); radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4);
@ -1554,7 +1550,7 @@ radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_vi
*/ */
op |= draw_visible ? PREDICATION_DRAW_VISIBLE : PREDICATION_DRAW_NOT_VISIBLE; op |= draw_visible ? PREDICATION_DRAW_VISIBLE : PREDICATION_DRAW_NOT_VISIBLE;
} }
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 2, 0)); radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
radeon_emit(cmd_buffer->cs, op); radeon_emit(cmd_buffer->cs, op);
radeon_emit(cmd_buffer->cs, va); radeon_emit(cmd_buffer->cs, va);
@ -1569,7 +1565,8 @@ radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_vi
void void
radv_emit_cond_exec(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, uint32_t count) radv_emit_cond_exec(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, uint32_t count)
{ {
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
if (gfx_level >= GFX7) { if (gfx_level >= GFX7) {
radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0)); radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
@ -1619,12 +1616,13 @@ static void
radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool predicating, uint64_t dst_va, radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool predicating, uint64_t dst_va,
uint64_t src_va, unsigned size, unsigned flags) uint64_t src_va, unsigned size, unsigned flags)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t header = 0, command = 0; uint32_t header = 0, command = 0;
assert(size <= cp_dma_max_byte_count(device->physical_device->info.gfx_level)); assert(size <= cp_dma_max_byte_count(pdev->info.gfx_level));
radeon_check_space(device->ws, cs, 9); radeon_check_space(device->ws, cs, 9);
if (device->physical_device->info.gfx_level >= GFX9) if (pdev->info.gfx_level >= GFX9)
command |= S_415_BYTE_COUNT_GFX9(size); command |= S_415_BYTE_COUNT_GFX9(size);
else else
command |= S_415_BYTE_COUNT_GFX6(size); command |= S_415_BYTE_COUNT_GFX6(size);
@ -1637,7 +1635,7 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool p
command |= S_415_RAW_WAIT(1); command |= S_415_RAW_WAIT(1);
/* Src and dst flags. */ /* Src and dst flags. */
if (device->physical_device->info.gfx_level >= GFX9 && !(flags & CP_DMA_CLEAR) && src_va == dst_va) if (pdev->info.gfx_level >= GFX9 && !(flags & CP_DMA_CLEAR) && src_va == dst_va)
header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */ header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */
else if (flags & CP_DMA_USE_L2) else if (flags & CP_DMA_USE_L2)
header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2); header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
@ -1647,7 +1645,7 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool p
else if (flags & CP_DMA_USE_L2) else if (flags & CP_DMA_USE_L2)
header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2); header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
if (device->physical_device->info.gfx_level >= GFX7) { if (pdev->info.gfx_level >= GFX7) {
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, predicating)); radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, predicating));
radeon_emit(cs, header); radeon_emit(cs, header);
radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */ radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
@ -1699,8 +1697,9 @@ void
radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, unsigned size, radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, unsigned size,
bool predicating) bool predicating)
{ {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_winsys *ws = device->ws; struct radeon_winsys *ws = device->ws;
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level; enum amd_gfx_level gfx_level = pdev->info.gfx_level;
uint32_t header = 0, command = 0; uint32_t header = 0, command = 0;
if (gfx_level >= GFX11) if (gfx_level >= GFX11)
@ -1784,15 +1783,15 @@ radv_cp_dma_realign_engine(struct radv_cmd_buffer *cmd_buffer, unsigned size)
void void
radv_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, uint64_t size) radv_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, uint64_t size)
{ {
enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
uint64_t main_src_va, main_dest_va; uint64_t main_src_va, main_dest_va;
uint64_t skipped_size = 0, realign_size = 0; uint64_t skipped_size = 0, realign_size = 0;
/* Assume that we are not going to sync after the last DMA operation. */ /* Assume that we are not going to sync after the last DMA operation. */
cmd_buffer->state.dma_is_busy = true; cmd_buffer->state.dma_is_busy = true;
if (cmd_buffer->device->physical_device->info.family <= CHIP_CARRIZO || if (pdev->info.family <= CHIP_CARRIZO || pdev->info.family == CHIP_STONEY) {
cmd_buffer->device->physical_device->info.family == CHIP_STONEY) {
/* If the size is not aligned, we must add a dummy copy at the end /* If the size is not aligned, we must add a dummy copy at the end
* just to align the internal counter. Otherwise, the DMA engine * just to align the internal counter. Otherwise, the DMA engine
* would slow down by an order of magnitude for following copies. * would slow down by an order of magnitude for following copies.
@ -1818,7 +1817,7 @@ radv_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uin
unsigned dma_flags = 0; unsigned dma_flags = 0;
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(gfx_level)); unsigned byte_count = MIN2(size, cp_dma_max_byte_count(gfx_level));
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
/* DMA operations via L2 are coherent and faster. /* DMA operations via L2 are coherent and faster.
* TODO: GFX7-GFX8 should also support this but it * TODO: GFX7-GFX8 should also support this but it
* requires tests/benchmarks. * requires tests/benchmarks.
@ -1858,12 +1857,14 @@ radv_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uin
void void
radv_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, unsigned value) radv_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, unsigned value)
{ {
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
if (!size) if (!size)
return; return;
assert(va % 4 == 0 && size % 4 == 0); assert(va % 4 == 0 && size % 4 == 0);
enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level; enum amd_gfx_level gfx_level = pdev->info.gfx_level;
/* Assume that we are not going to sync after the last DMA operation. */ /* Assume that we are not going to sync after the last DMA operation. */
cmd_buffer->state.dma_is_busy = true; cmd_buffer->state.dma_is_busy = true;
@ -1872,7 +1873,7 @@ radv_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(gfx_level)); unsigned byte_count = MIN2(size, cp_dma_max_byte_count(gfx_level));
unsigned dma_flags = CP_DMA_CLEAR; unsigned dma_flags = CP_DMA_CLEAR;
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) { if (pdev->info.gfx_level >= GFX9) {
/* DMA operations via L2 are coherent and faster. /* DMA operations via L2 are coherent and faster.
* TODO: GFX7-GFX8 should also support this but it * TODO: GFX7-GFX8 should also support this but it
* requires tests/benchmarks. * requires tests/benchmarks.
@ -1895,7 +1896,9 @@ radv_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64
void void
radv_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer) radv_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer)
{ {
if (cmd_buffer->device->physical_device->info.gfx_level < GFX7) const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
if (pdev->info.gfx_level < GFX7)
return; return;
if (!cmd_buffer->state.dma_is_busy) if (!cmd_buffer->state.dma_is_busy)