radv: remove radv_device::physical_device

Get the logical device object using the base object.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28500>
This commit is contained in:
Samuel Pitoiset 2024-03-28 14:42:10 +01:00 committed by Marge Bot
parent 310597cab6
commit 896c9cf486
55 changed files with 1390 additions and 1035 deletions

View file

@ -35,7 +35,8 @@
void
radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline)
{
const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
uint64_t va;
@ -340,7 +341,8 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
return;
/* Reserve a command buffer ID for SQTT. */
enum amd_ip_type ip_type = radv_queue_family_to_ring(cmd_buffer->device->physical_device, cmd_buffer->qf);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
enum amd_ip_type ip_type = radv_queue_family_to_ring(pdev, cmd_buffer->qf);
union rgp_sqtt_marker_cb_id cb_id = ac_sqtt_get_next_cmdbuf_id(&cmd_buffer->device->sqtt, ip_type);
cmd_buffer->sqtt_cb_id = cb_id.all;
@ -354,7 +356,7 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;
if (!radv_sparse_queue_enabled(cmd_buffer->device->physical_device))
if (!radv_sparse_queue_enabled(pdev))
marker.queue_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
@ -655,6 +657,7 @@ radv_handle_sqtt(VkQueue _queue)
{
RADV_FROM_HANDLE(radv_queue, queue, _queue);
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
bool trigger = queue->device->sqtt_triggered;
queue->device->sqtt_triggered = false;
@ -673,8 +676,7 @@ radv_handle_sqtt(VkQueue _queue)
if (queue->device->spm.bo)
ac_spm_get_trace(&queue->device->spm, &spm_trace);
ac_dump_rgp_capture(&queue->device->physical_device->info, &sqtt_trace,
queue->device->spm.bo ? &spm_trace : NULL);
ac_dump_rgp_capture(&pdev->info, &sqtt_trace, queue->device->spm.bo ? &spm_trace : NULL);
} else {
/* Trigger a new capture if the driver failed to get
* the trace because the buffer was too small.
@ -687,7 +689,7 @@ radv_handle_sqtt(VkQueue _queue)
}
if (trigger) {
if (ac_check_profile_state(&queue->device->physical_device->info)) {
if (ac_check_profile_state(&pdev->info)) {
fprintf(stderr, "radv: Canceling RGP trace request as a hang condition has been "
"detected. Force the GPU into a profiling mode with e.g. "
"\"echo profile_peak > "
@ -1415,7 +1417,7 @@ static void
radv_fill_code_object_record(struct radv_device *device, struct rgp_shader_data *shader_data,
struct radv_shader *shader, uint64_t va)
{
struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned lds_increment = pdev->info.gfx_level >= GFX11 && shader->info.stage == MESA_SHADER_FRAGMENT
? 1024
: pdev->info.lds_encode_granularity;

View file

@ -461,6 +461,7 @@ fail:
VkResult
radv_device_init_meta(struct radv_device *device)
{
struct radv_physical_device *pdev = radv_device_physical(device);
VkResult result;
memset(&device->meta_state, 0, sizeof(device->meta_state));
@ -521,7 +522,7 @@ radv_device_init_meta(struct radv_device *device)
if (result != VK_SUCCESS)
goto fail_resolve_fragment;
if (device->physical_device->use_fmask) {
if (pdev->use_fmask) {
result = radv_device_init_meta_fmask_expand_state(device, on_demand);
if (result != VK_SUCCESS)
goto fail_fmask_expand;
@ -555,11 +556,11 @@ radv_device_init_meta(struct radv_device *device)
/* FIXME: Acceleration structure builds hang when the build shaders are compiled with LLVM.
* Work around it by forcing ACO for now.
*/
bool use_llvm = device->physical_device->use_llvm;
bool use_llvm = pdev->use_llvm;
if (loaded_cache || use_llvm) {
device->physical_device->use_llvm = false;
pdev->use_llvm = false;
result = radv_device_init_accel_struct_build_state(device);
device->physical_device->use_llvm = use_llvm;
pdev->use_llvm = use_llvm;
if (result != VK_SUCCESS)
goto fail_accel_struct;
@ -639,6 +640,7 @@ radv_device_finish_meta(struct radv_device *device)
nir_builder PRINTFLIKE(3, 4)
radv_meta_init_shader(struct radv_device *dev, gl_shader_stage stage, const char *name, ...)
{
const struct radv_physical_device *pdev = radv_device_physical(dev);
nir_builder b = nir_builder_init_simple_shader(stage, NULL, NULL);
if (name) {
va_list args;
@ -647,7 +649,7 @@ nir_builder PRINTFLIKE(3, 4)
va_end(args);
}
b.shader->options = &dev->physical_device->nir_options[stage];
b.shader->options = &pdev->nir_options[stage];
radv_device_associate_nir(dev, b.shader);
@ -684,6 +686,7 @@ void
radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, int samples,
nir_variable *input_img, nir_variable *color, nir_def *img_coord)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_deref_instr *input_img_deref = nir_build_deref_var(b, input_img);
nir_def *sample0 = nir_txf_ms_deref(b, input_img_deref, img_coord, nir_imm_int(b, 0));
@ -692,7 +695,7 @@ radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b,
return;
}
if (device->physical_device->use_fmask) {
if (pdev->use_fmask) {
nir_def *all_same = nir_samples_identical_deref(b, input_img_deref, img_coord);
nir_push_if(b, nir_inot(b, all_same));
}
@ -706,7 +709,7 @@ radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b,
accum = nir_fdiv_imm(b, accum, samples);
nir_store_var(b, color, accum, 0xf);
if (device->physical_device->use_fmask) {
if (pdev->use_fmask) {
nir_push_else(b, NULL);
nir_store_var(b, color, sample0, 0xf);
nir_pop_if(b, NULL);

View file

@ -32,9 +32,10 @@
VkResult
radv_device_init_meta_astc_decode_state(struct radv_device *device, bool on_demand)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_meta_state *state = &device->meta_state;
if (!device->physical_device->emulate_astc)
if (!pdev->emulate_astc)
return VK_SUCCESS;
return vk_texcompress_astc_init(&device->vk, &state->alloc, state->cache, &state->astc_decode);
@ -43,10 +44,11 @@ radv_device_init_meta_astc_decode_state(struct radv_device *device, bool on_dema
void
radv_device_finish_meta_astc_decode_state(struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_meta_state *state = &device->meta_state;
struct vk_texcompress_astc_state *astc = state->astc_decode;
if (!device->physical_device->emulate_astc)
if (!pdev->emulate_astc)
return;
vk_texcompress_astc_finish(&device->vk, &state->alloc, astc);

View file

@ -213,9 +213,10 @@ static bool
radv_prefer_compute_dma(const struct radv_device *device, uint64_t size, struct radeon_winsys_bo *src_bo,
struct radeon_winsys_bo *dst_bo)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
bool use_compute = size >= RADV_BUFFER_OPS_CS_THRESHOLD;
if (device->physical_device->info.gfx_level >= GFX10 && device->physical_device->info.has_dedicated_vram) {
if (pdev->info.gfx_level >= GFX10 && pdev->info.has_dedicated_vram) {
if ((src_bo && !(src_bo->initial_domain & RADEON_DOMAIN_VRAM)) ||
(dst_bo && !(dst_bo->initial_domain & RADEON_DOMAIN_VRAM))) {
/* Prefer CP DMA for GTT on dGPUS due to slow PCIe. */

View file

@ -1174,11 +1174,12 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_bl
const struct radv_meta_blit2d_surf *img_bsurf, const struct radv_meta_blit2d_rect *rect,
bool to_image)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
const unsigned mip_level = img_bsurf->level;
const struct radv_image *image = img_bsurf->image;
const struct radeon_surf *surf = &image->planes[0].surface;
struct radv_device *device = cmd_buffer->device;
const struct radeon_info *gpu_info = &device->physical_device->info;
const struct radeon_info *gpu_info = &pdev->info;
struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws);
struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image);
@ -1243,9 +1244,10 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_bl
static unsigned
get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
unsigned stride;
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
} else {
stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;

View file

@ -1139,10 +1139,11 @@ uint32_t
radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
uint32_t value)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
uint64_t offset = image->bindings[0].offset + image->planes[0].surface.cmask_offset;
uint64_t size;
if (cmd_buffer->device->physical_device->info.gfx_level == GFX9) {
if (pdev->info.gfx_level == GFX9) {
/* TODO: clear layers. */
size = image->planes[0].surface.cmask_size;
} else {
@ -1178,6 +1179,7 @@ uint32_t
radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
uint32_t value)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
uint32_t layer_count = vk_image_subresource_layer_count(&image->vk, range);
uint32_t flush_bits = 0;
@ -1190,12 +1192,12 @@ radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, con
uint32_t level = range->baseMipLevel + l;
uint64_t size;
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
/* DCC for mipmaps+layers is currently disabled. */
offset += image->planes[0].surface.meta_slice_size * range->baseArrayLayer +
image->planes[0].surface.u.gfx9.meta_levels[level].offset;
size = image->planes[0].surface.u.gfx9.meta_levels[level].size * layer_count;
} else if (cmd_buffer->device->physical_device->info.gfx_level == GFX9) {
} else if (pdev->info.gfx_level == GFX9) {
/* Mipmap levels and layers aren't implemented. */
assert(level == 0);
size = image->planes[0].surface.meta_size;
@ -1331,6 +1333,7 @@ uint32_t
radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
const VkImageSubresourceRange *range, uint32_t value)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
uint32_t flush_bits = 0;
uint32_t htile_mask;
@ -1338,7 +1341,7 @@ radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im
htile_mask = radv_get_htile_mask(cmd_buffer->device, image, range->aspectMask);
if (level_count != image->vk.mip_levels) {
assert(cmd_buffer->device->physical_device->info.gfx_level >= GFX10);
assert(pdev->info.gfx_level >= GFX10);
/* Clear individuals levels separately. */
for (uint32_t l = 0; l < level_count; l++) {
@ -1398,7 +1401,8 @@ enum {
static uint32_t
radv_dcc_single_clear_value(const struct radv_device *device)
{
return device->physical_device->info.gfx_level >= GFX11 ? RADV_DCC_GFX11_CLEAR_SINGLE : RADV_DCC_GFX9_CLEAR_SINGLE;
const struct radv_physical_device *pdev = radv_device_physical(device);
return pdev->info.gfx_level >= GFX11 ? RADV_DCC_GFX11_CLEAR_SINGLE : RADV_DCC_GFX9_CLEAR_SINGLE;
}
static void
@ -1605,6 +1609,7 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_
VkImageLayout image_layout, const VkClearRect *clear_rect, VkClearColorValue clear_value,
uint32_t view_mask)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
uint32_t clear_color[2];
if (!iview || !iview->support_fast_clear)
@ -1641,7 +1646,7 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_
bool can_avoid_fast_clear_elim;
uint32_t reset_value;
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
if (!gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value))
return false;
} else {
@ -1650,7 +1655,7 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_
}
if (iview->image->vk.mip_levels > 1) {
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
uint32_t last_level = iview->vk.base_mip_level + iview->vk.level_count - 1;
if (last_level >= iview->image->planes[0].surface.num_meta_levels) {
/* Do not fast clears if one level can't be fast cleard. */
@ -1680,6 +1685,7 @@ radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_imag
const VkClearAttachment *clear_att, enum radv_cmd_flush_bits *pre_flush,
enum radv_cmd_flush_bits *post_flush)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
VkClearColorValue clear_value = clear_att->clearValue.color;
uint32_t clear_color[4], flush_bits = 0;
uint32_t cmask_clear_value;
@ -1710,7 +1716,7 @@ radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_imag
uint32_t reset_value;
bool can_avoid_fast_clear_elim = true;
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
ASSERTED bool result = gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value);
assert(result);
} else {
@ -2074,6 +2080,7 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *imag
const VkClearValue *clear_value, uint32_t range_count, const VkImageSubresourceRange *ranges,
bool cs)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
VkFormat format = image->vk.format;
VkClearValue internal_clear_value;
@ -2086,8 +2093,8 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *imag
if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
bool blendable;
if (cs ? !radv_is_storage_image_format_supported(cmd_buffer->device->physical_device, format)
: !radv_is_colorbuffer_format_supported(cmd_buffer->device->physical_device, format, &blendable)) {
if (cs ? !radv_is_storage_image_format_supported(pdev, format)
: !radv_is_colorbuffer_format_supported(pdev, format, &blendable)) {
format = VK_FORMAT_R32_UINT;
internal_clear_value.color.uint32[0] = float3_to_rgb9e5(clear_value->color.float32);

View file

@ -239,13 +239,14 @@ radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToIm
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
RADV_FROM_HANDLE(radv_image, dst_image, pCopyBufferToImageInfo->dstImage);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
copy_buffer_to_image(cmd_buffer, src_buffer, dst_image, pCopyBufferToImageInfo->dstImageLayout,
&pCopyBufferToImageInfo->pRegions[r]);
}
if (radv_is_format_emulated(cmd_buffer->device->physical_device, dst_image->vk.format)) {
if (radv_is_format_emulated(pdev, dst_image->vk.format)) {
cmd_buffer->state.flush_bits |=
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) |
@ -422,6 +423,8 @@ static void
copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkImageLayout src_image_layout,
struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageCopy2 *region)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
transfer_copy_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region);
return;
@ -499,9 +502,9 @@ copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkI
src_image_layout, src_queue_mask);
bool need_dcc_sign_reinterpret = false;
if (!src_compressed || (radv_dcc_formats_compatible(cmd_buffer->device->physical_device->info.gfx_level,
b_src.format, b_dst.format, &need_dcc_sign_reinterpret) &&
!need_dcc_sign_reinterpret)) {
if (!src_compressed ||
(radv_dcc_formats_compatible(pdev->info.gfx_level, b_src.format, b_dst.format, &need_dcc_sign_reinterpret) &&
!need_dcc_sign_reinterpret)) {
b_src.format = b_dst.format;
} else if (!dst_compressed) {
b_dst.format = b_src.format;
@ -613,13 +616,14 @@ radv_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyI
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_image, src_image, pCopyImageInfo->srcImage);
RADV_FROM_HANDLE(radv_image, dst_image, pCopyImageInfo->dstImage);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
copy_image(cmd_buffer, src_image, pCopyImageInfo->srcImageLayout, dst_image, pCopyImageInfo->dstImageLayout,
&pCopyImageInfo->pRegions[r]);
}
if (radv_is_format_emulated(cmd_buffer->device->physical_device, dst_image->vk.format)) {
if (radv_is_format_emulated(pdev, dst_image->vk.format)) {
cmd_buffer->state.flush_bits |=
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) |

View file

@ -43,6 +43,7 @@ radv_device_finish_meta_copy_vrs_htile_state(struct radv_device *device)
static nir_shader *
build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "meta_copy_vrs_htile");
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = 8;
@ -64,8 +65,8 @@ build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf
/* Get the HTILE addr from coordinates. */
nir_def *zero = nir_imm_int(&b, 0);
nir_def *htile_addr =
ac_nir_htile_addr_from_coord(&b, &device->physical_device->info, &surf->u.gfx9.zs.htile_equation, htile_pitch,
htile_slice_size, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero);
ac_nir_htile_addr_from_coord(&b, &pdev->info, &surf->u.gfx9.zs.htile_equation, htile_pitch, htile_slice_size,
nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero);
/* Set up the input VRS image descriptor. */
const struct glsl_type *vrs_sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT);

View file

@ -31,6 +31,7 @@
static nir_shader *
build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *surf)
{
const struct radv_physical_device *pdev = radv_device_physical(dev);
enum glsl_sampler_dim dim = GLSL_SAMPLER_DIM_BUF;
const struct glsl_type *buf_type = glsl_image_type(dim, false, GLSL_TYPE_UINT);
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "dcc_retile_compute");
@ -60,12 +61,12 @@ build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *sur
coord =
nir_imul(&b, coord, nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height));
nir_def *src = ac_nir_dcc_addr_from_coord(&b, &dev->physical_device->info, surf->bpe,
&surf->u.gfx9.color.dcc_equation, src_dcc_pitch, src_dcc_height, zero,
nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero);
nir_def *dst = ac_nir_dcc_addr_from_coord(
&b, &dev->physical_device->info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation, dst_dcc_pitch,
dst_dcc_height, zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero);
nir_def *src = ac_nir_dcc_addr_from_coord(&b, &pdev->info, surf->bpe, &surf->u.gfx9.color.dcc_equation,
src_dcc_pitch, src_dcc_height, zero, nir_channel(&b, coord, 0),
nir_channel(&b, coord, 1), zero, zero, zero);
nir_def *dst = ac_nir_dcc_addr_from_coord(&b, &pdev->info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation,
dst_dcc_pitch, dst_dcc_height, zero, nir_channel(&b, coord, 0),
nir_channel(&b, coord, 1), zero, zero, zero);
nir_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref, nir_vec4(&b, src, src, src, src),
nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = dim);

View file

@ -34,13 +34,14 @@
VkResult
radv_device_init_meta_etc_decode_state(struct radv_device *device, bool on_demand)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_meta_state *state = &device->meta_state;
if (!device->physical_device->emulate_etc2)
if (!pdev->emulate_etc2)
return VK_SUCCESS;
state->etc_decode.allocator = &state->alloc;
state->etc_decode.nir_options = &device->physical_device->nir_options[MESA_SHADER_COMPUTE];
state->etc_decode.nir_options = &pdev->nir_options[MESA_SHADER_COMPUTE];
state->etc_decode.pipeline_cache = state->cache;
vk_texcompress_etc2_init(&device->vk, &state->etc_decode);

View file

@ -155,6 +155,7 @@ create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
static VkResult
create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipelineLayout layout)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
VkResult result;
VkDevice device_h = radv_device_to_handle(device);
@ -363,8 +364,8 @@ create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipeli
},
&(struct radv_graphics_pipeline_create_info){
.use_rectlist = true,
.custom_blend_mode = device->physical_device->info.gfx_level >= GFX11 ? V_028808_CB_DCC_DECOMPRESS_GFX11
: V_028808_CB_DCC_DECOMPRESS_GFX8,
.custom_blend_mode =
pdev->info.gfx_level >= GFX11 ? V_028808_CB_DCC_DECOMPRESS_GFX11 : V_028808_CB_DCC_DECOMPRESS_GFX8,
},
&device->meta_state.alloc, &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
if (result != VK_SUCCESS)

View file

@ -232,8 +232,10 @@ radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_im
const struct radv_image *dst_image, unsigned num_rects,
const struct radv_meta_blit2d_rect *rects)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
/* TODO: Test on pre GFX10 chips. */
if (cmd_buffer->device->physical_device->info.gfx_level < GFX10)
if (pdev->info.gfx_level < GFX10)
return false;
/* TODO: Add support for layers. */

View file

@ -253,7 +253,8 @@ enum radv_resolve_method {
static bool
image_hw_resolve_compat(const struct radv_device *device, struct radv_image *src_image, struct radv_image *dst_image)
{
if (device->physical_device->info.gfx_level >= GFX9) {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level >= GFX9) {
return dst_image->planes[0].surface.u.gfx9.swizzle_mode == src_image->planes[0].surface.u.gfx9.swizzle_mode;
} else {
return dst_image->planes[0].surface.micro_tile_mode == src_image->planes[0].surface.micro_tile_mode;
@ -506,9 +507,9 @@ radv_CmdResolveImage2(VkCommandBuffer commandBuffer, const VkResolveImageInfo2 *
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_image, src_image, pResolveImageInfo->srcImage);
RADV_FROM_HANDLE(radv_image, dst_image, pResolveImageInfo->dstImage);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
VkImageLayout src_image_layout = pResolveImageInfo->srcImageLayout;
VkImageLayout dst_image_layout = pResolveImageInfo->dstImageLayout;
const struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
enum radv_resolve_method resolve_method = pdev->info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW;
/* we can use the hw resolve only for single full resolves */
@ -622,7 +623,7 @@ radv_cmd_buffer_resolve_rendering_hw(struct radv_cmd_buffer *cmd_buffer, struct
void
radv_cmd_buffer_resolve_rendering(struct radv_cmd_buffer *cmd_buffer)
{
const struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
const struct radv_rendering_state *render = &cmd_buffer->state.render;
enum radv_resolve_method resolve_method = pdev->info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW;

View file

@ -501,12 +501,13 @@ apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *te
void
radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device, const struct radv_shader_stage *stage)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
apply_layout_state state = {
.gfx_level = device->physical_device->info.gfx_level,
.address32_hi = device->physical_device->info.address32_hi,
.gfx_level = pdev->info.gfx_level,
.address32_hi = pdev->info.address32_hi,
.disable_aniso_single_level = device->instance->drirc.disable_aniso_single_level,
.has_image_load_dcc_bug = device->physical_device->info.has_image_load_dcc_bug,
.disable_tg4_trunc_coord = !device->physical_device->info.conformant_trunc_coord && !device->disable_trunc_coord,
.has_image_load_dcc_bug = pdev->info.has_image_load_dcc_bug,
.disable_tg4_trunc_coord = !pdev->info.conformant_trunc_coord && !device->disable_trunc_coord,
.args = &stage->args,
.info = &stage->info,
.layout = &stage->layout,

View file

@ -72,6 +72,8 @@ radv_nir_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask)
void
radv_nir_lower_io(struct radv_device *device, nir_shader *nir)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, MESA_SHADER_FRAGMENT);
}
@ -89,7 +91,7 @@ radv_nir_lower_io(struct radv_device *device, nir_shader *nir)
NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
if (device->physical_device->use_ngg_streamout && nir->xfb_info) {
if (pdev->use_ngg_streamout && nir->xfb_info) {
NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
/* The total number of shader outputs is required for computing the pervertex LDS size for
@ -133,6 +135,7 @@ radv_map_io_driver_location(unsigned semantic)
bool
radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *stage)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader_info *info = &stage->info;
ac_nir_map_io_driver_location map_input = info->inputs_linked ? NULL : radv_map_io_driver_location;
ac_nir_map_io_driver_location map_output = info->outputs_linked ? NULL : radv_map_io_driver_location;
@ -144,35 +147,33 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *s
info->vs.tcs_temp_only_input_mask);
return true;
} else if (info->vs.as_es) {
NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, device->physical_device->info.gfx_level,
info->esgs_itemsize);
NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize);
return true;
}
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, map_input, info->vs.tcs_in_out_eq);
NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, map_output, device->physical_device->info.gfx_level,
info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, info->tcs.num_linked_outputs,
info->tcs.num_linked_patch_outputs, info->wave_size, false, false);
NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, map_output, pdev->info.gfx_level, info->tcs.tes_inputs_read,
info->tcs.tes_patch_inputs_read, info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs,
info->wave_size, false, false);
return true;
} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
NIR_PASS_V(nir, ac_nir_lower_tes_inputs_to_mem, map_input);
if (info->tes.as_es) {
NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, device->physical_device->info.gfx_level,
info->esgs_itemsize);
NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize);
}
return true;
} else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
NIR_PASS_V(nir, ac_nir_lower_gs_inputs_to_mem, map_input, device->physical_device->info.gfx_level, false);
NIR_PASS_V(nir, ac_nir_lower_gs_inputs_to_mem, map_input, pdev->info.gfx_level, false);
return true;
} else if (nir->info.stage == MESA_SHADER_TASK) {
ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, device->physical_device->task_info.num_entries,
ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, pdev->task_info.num_entries,
info->cs.has_query);
return true;
} else if (nir->info.stage == MESA_SHADER_MESH) {
ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, device->physical_device->task_info.num_entries);
ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, pdev->task_info.num_entries);
return true;
}

View file

@ -648,6 +648,7 @@ lower_rq_terminate(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, s
bool
radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
bool progress = false;
struct hash_table *query_ht = _mesa_pointer_hash_table_create(NULL);
@ -655,7 +656,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
if (!var->data.ray_query)
continue;
lower_ray_query(shader, var, query_ht, device->physical_device->max_shared_size);
lower_ray_query(shader, var, query_ht, pdev->max_shared_size);
progress = true;
}
@ -670,7 +671,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
if (!var->data.ray_query)
continue;
lower_ray_query(shader, var, query_ht, device->physical_device->max_shared_size);
lower_ray_query(shader, var, query_ht, pdev->max_shared_size);
progress = true;
}

View file

@ -297,11 +297,12 @@ build_addr_to_node(nir_builder *b, nir_def *addr)
static nir_def *
build_node_to_addr(struct radv_device *device, nir_builder *b, nir_def *node, bool skip_type_and)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_def *addr = skip_type_and ? node : nir_iand_imm(b, node, ~7ull);
addr = nir_ishl_imm(b, addr, 3);
/* Assumes everything is in the top half of address space, which is true in
* GFX9+ for now. */
return device->physical_device->info.gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr;
return pdev->info.gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr;
}
nir_def *
@ -477,6 +478,7 @@ radv_test_flag(nir_builder *b, const struct radv_ray_traversal_args *args, uint3
nir_def *
radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
nir_store_var(b, incomplete, nir_imm_true(b), 0x1);
@ -568,7 +570,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
nir_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));
nir_def *intrinsic_result = NULL;
if (!radv_emulate_rt(device->physical_device)) {
if (!radv_emulate_rt(pdev)) {
intrinsic_result =
nir_bvh64_intersect_ray_amd(b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node),
nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),

View file

@ -1524,6 +1524,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, bool monolithic, nir_builder *b,
struct rt_variables *vars, bool ignore_cull_mask, struct radv_ray_tracing_stage_info *info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_variable *barycentrics =
nir_variable_create(b->shader, nir_var_ray_hit_attrib, glsl_vector_type(GLSL_TYPE_FLOAT, 2), "barycentrics");
barycentrics->data.driver_location = 0;
@ -1602,7 +1603,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
.tmin = nir_load_var(b, vars->tmin),
.dir = nir_load_var(b, vars->direction),
.vars = trav_vars_args,
.stack_stride = device->physical_device->rt_wave_size * sizeof(uint32_t),
.stack_stride = pdev->rt_wave_size * sizeof(uint32_t),
.stack_entries = MAX_STACK_ENTRY_COUNT,
.stack_base = 0,
.ignore_cull_mask = ignore_cull_mask,
@ -1638,7 +1639,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
hit_attribs[i] =
nir_local_variable_create(nir_shader_get_entrypoint(b->shader), glsl_uint_type(), "ahit_attrib");
lower_hit_attribs(b->shader, hit_attribs, device->physical_device->rt_wave_size);
lower_hit_attribs(b->shader, hit_attribs, pdev->rt_wave_size);
}
/* Initialize follow-up shader. */
@ -1702,6 +1703,7 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
struct radv_ray_tracing_stage_info *info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const VkPipelineCreateFlagBits2KHR create_flags = vk_rt_pipeline_create_flags(pCreateInfo);
/* Create the traversal shader as an intersection shader to prevent validation failures due to
@ -1709,8 +1711,8 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
nir_builder b = radv_meta_init_shader(device, MESA_SHADER_INTERSECTION, "rt_traversal");
b.shader->info.internal = false;
b.shader->info.workgroup_size[0] = 8;
b.shader->info.workgroup_size[1] = device->physical_device->rt_wave_size == 64 ? 8 : 4;
b.shader->info.shared_size = device->physical_device->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t);
b.shader->info.workgroup_size[1] = pdev->rt_wave_size == 64 ? 8 : 4;
b.shader->info.shared_size = pdev->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t);
struct rt_variables vars = create_rt_variables(b.shader, device, create_flags, false);
if (info->tmin.state == RADV_RT_CONST_ARG_STATE_VALID)
@ -1773,6 +1775,7 @@ lower_rt_instruction_monolithic(nir_builder *b, nir_instr *instr, void *data)
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
struct lower_rt_instruction_monolithic_state *state = data;
const struct radv_physical_device *pdev = radv_device_physical(state->device);
struct rt_variables *vars = state->vars;
switch (intr->intrinsic) {
@ -1800,8 +1803,8 @@ lower_rt_instruction_monolithic(nir_builder *b, nir_instr *instr, void *data)
nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, stack_ptr, b->shader->scratch_size), 0x1);
radv_build_traversal(state->device, state->pipeline, state->pCreateInfo, true, b, vars, ignore_cull_mask, NULL);
b->shader->info.shared_size = MAX2(b->shader->info.shared_size, state->device->physical_device->rt_wave_size *
MAX_STACK_ENTRY_COUNT * sizeof(uint32_t));
b->shader->info.shared_size =
MAX2(b->shader->info.shared_size, pdev->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t));
nir_store_var(b, vars->stack_ptr, stack_ptr, 0x1);

View file

@ -424,7 +424,9 @@ cleanup:
VkResult
radv_device_init_null_accel_struct(struct radv_device *device)
{
if (device->physical_device->memory_properties.memoryTypeCount == 0)
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->memory_properties.memoryTypeCount == 0)
return VK_SUCCESS; /* Exit in the case of null winsys. */
VkDevice _device = radv_device_to_handle(device);
@ -465,9 +467,9 @@ radv_device_init_null_accel_struct(struct radv_device *device)
VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = mem_req.memoryRequirements.size,
.memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
.memoryTypeIndex =
radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
};
result = radv_AllocateMemory(_device, &alloc_info, &device->meta_state.alloc, &memory);
@ -1537,9 +1539,9 @@ radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,
VkAccelerationStructureCompatibilityKHR *pCompatibility)
{
RADV_FROM_HANDLE(radv_device, device, _device);
bool compat =
memcmp(pVersionInfo->pVersionData, device->physical_device->driver_uuid, VK_UUID_SIZE) == 0 &&
memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, device->physical_device->cache_uuid, VK_UUID_SIZE) == 0;
const struct radv_physical_device *pdev = radv_device_physical(device);
bool compat = memcmp(pVersionInfo->pVersionData, pdev->driver_uuid, VK_UUID_SIZE) == 0 &&
memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE) == 0;
*pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR
: VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
}
@ -1601,6 +1603,7 @@ radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
RADV_FROM_HANDLE(radv_buffer, src_buffer, src->buffer);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radv_meta_saved_state saved_state;
VkResult result = radv_device_init_accel_struct_copy_state(cmd_buffer->device);
@ -1634,8 +1637,8 @@ radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
/* Set the header of the serialized data. */
uint8_t header_data[2 * VK_UUID_SIZE];
memcpy(header_data, cmd_buffer->device->physical_device->driver_uuid, VK_UUID_SIZE);
memcpy(header_data + VK_UUID_SIZE, cmd_buffer->device->physical_device->cache_uuid, VK_UUID_SIZE);
memcpy(header_data, pdev->driver_uuid, VK_UUID_SIZE);
memcpy(header_data + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE);
radv_update_buffer_cp(cmd_buffer, pInfo->dst.deviceAddress, header_data, sizeof(header_data));
}

View file

@ -114,6 +114,7 @@ radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
{
RADV_FROM_HANDLE(radv_device, device, device_h);
const struct radv_physical_device *pdev = radv_device_physical(device);
VkImage image_h = VK_NULL_HANDLE;
struct radv_image *image = NULL;
VkResult result;
@ -141,10 +142,9 @@ radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
/* Find the first VRAM memory type, or GART for PRIME images. */
int memory_type_index = -1;
for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) {
bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags &
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
bool is_32bit = !!(device->physical_device->memory_types_32bit & (1u << i));
for (int i = 0; i < pdev->memory_properties.memoryTypeCount; ++i) {
bool is_local = !!(pdev->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
bool is_32bit = !!(pdev->memory_types_32bit & (1u << i));
if (is_local && !is_32bit) {
memory_type_index = i;
break;
@ -217,7 +217,7 @@ radv_GetSwapchainGrallocUsageANDROID(VkDevice device_h, VkFormat format, VkImage
int *grallocUsage)
{
RADV_FROM_HANDLE(radv_device, device, device_h);
struct radv_physical_device *pdev = device->physical_device;
struct radv_physical_device *pdev = radv_device_physical(device);
VkPhysicalDevice pdev_h = radv_physical_device_to_handle(pdev);
VkResult result;
@ -298,7 +298,7 @@ radv_GetSwapchainGrallocUsage2ANDROID(VkDevice device_h, VkFormat format, VkImag
* vkGetSwapchainGrallocUsageANDROID. */
#if ANDROID_API_LEVEL >= 26
RADV_FROM_HANDLE(radv_device, device, device_h);
struct radv_physical_device *pdev = device->physical_device;
struct radv_physical_device *pdev = radv_device_physical(device);
VkPhysicalDevice pdev_h = radv_physical_device_to_handle(pdev);
VkResult result;
@ -408,6 +408,7 @@ get_ahb_buffer_format_properties(VkDevice device_h, const struct AHardwareBuffer
VkAndroidHardwareBufferFormatPropertiesANDROID *pProperties)
{
RADV_FROM_HANDLE(radv_device, device, device_h);
struct radv_physical_device *pdev = radv_device_physical(device);
/* Get a description of buffer contents . */
AHardwareBuffer_Desc desc;
@ -431,8 +432,7 @@ get_ahb_buffer_format_properties(VkDevice device_h, const struct AHardwareBuffer
VkFormatProperties2 format_properties = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2};
radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(device->physical_device), p->format,
&format_properties);
radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(pdev), p->format, &format_properties);
if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER)
p->formatFeatures = format_properties.formatProperties.linearTilingFeatures;
@ -481,6 +481,7 @@ get_ahb_buffer_format_properties2(VkDevice device_h, const struct AHardwareBuffe
VkAndroidHardwareBufferFormatProperties2ANDROID *pProperties)
{
RADV_FROM_HANDLE(radv_device, device, device_h);
struct radv_physical_device *pdev = radv_device_physical(device);
/* Get a description of buffer contents . */
AHardwareBuffer_Desc desc;
@ -504,8 +505,7 @@ get_ahb_buffer_format_properties2(VkDevice device_h, const struct AHardwareBuffe
VkFormatProperties2 format_properties = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2};
radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(device->physical_device), p->format,
&format_properties);
radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(pdev), p->format, &format_properties);
if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER)
p->formatFeatures = format_properties.formatProperties.linearTilingFeatures;
@ -554,7 +554,7 @@ radv_GetAndroidHardwareBufferPropertiesANDROID(VkDevice device_h, const struct A
VkAndroidHardwareBufferPropertiesANDROID *pProperties)
{
RADV_FROM_HANDLE(radv_device, dev, device_h);
struct radv_physical_device *pdev = dev->physical_device;
struct radv_physical_device *pdev = radv_device_physical(dev);
VkAndroidHardwareBufferFormatPropertiesANDROID *format_prop =
vk_find_struct(pProperties->pNext, ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_ANDROID);

View file

@ -176,9 +176,10 @@ static void
radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize size, VkBufferCreateFlags flags,
VkBufferUsageFlags2KHR usage, VkMemoryRequirements2 *pMemoryRequirements)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
pMemoryRequirements->memoryRequirements.memoryTypeBits =
((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) &
~device->physical_device->memory_types_32bit;
((1u << pdev->memory_properties.memoryTypeCount) - 1u) & ~pdev->memory_types_32bit;
/* Allow 32-bit address-space for DGC usage, as this buffer will contain
* cmd buffer upload buffers, and those get passed to shaders through 32-bit
@ -190,14 +191,14 @@ radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize siz
* intersection is non-zero at least)
*/
if ((usage & VK_BUFFER_USAGE_2_INDIRECT_BUFFER_BIT_KHR) && radv_uses_device_generated_commands(device))
pMemoryRequirements->memoryRequirements.memoryTypeBits |= device->physical_device->memory_types_32bit;
pMemoryRequirements->memoryRequirements.memoryTypeBits |= pdev->memory_types_32bit;
/* Force 32-bit address-space for descriptor buffers usage because they are passed to shaders
* through 32-bit pointers.
*/
if (usage &
(VK_BUFFER_USAGE_2_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | VK_BUFFER_USAGE_2_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT))
pMemoryRequirements->memoryRequirements.memoryTypeBits = device->physical_device->memory_types_32bit;
pMemoryRequirements->memoryRequirements.memoryTypeBits = pdev->memory_types_32bit;
if (flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
pMemoryRequirements->memoryRequirements.alignment = 4096;

View file

@ -34,6 +34,7 @@ void
radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFormat vk_format, unsigned offset,
unsigned range, uint32_t *state)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct util_format_description *desc;
unsigned stride;
unsigned num_format, data_format;
@ -49,16 +50,15 @@ radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFor
va += offset;
if (device->physical_device->info.gfx_level != GFX8 && stride) {
if (pdev->info.gfx_level != GFX8 && stride) {
range /= stride;
}
rsrc_word3 = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
if (device->physical_device->info.gfx_level >= GFX10) {
const struct gfx10_format *fmt =
&ac_get_gfx10_format_table(&device->physical_device->info)[vk_format_to_pipe_format(vk_format)];
if (pdev->info.gfx_level >= GFX10) {
const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&pdev->info)[vk_format_to_pipe_format(vk_format)];
/* OOB_SELECT chooses the out-of-bounds check.
*
@ -81,7 +81,7 @@ radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFor
* offset+payload > NUM_RECORDS
*/
rsrc_word3 |= S_008F0C_FORMAT(fmt->img_format) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
S_008F0C_RESOURCE_LEVEL(device->physical_device->info.gfx_level < GFX11);
S_008F0C_RESOURCE_LEVEL(pdev->info.gfx_level < GFX11);
} else {
num_format = radv_translate_buffer_numformat(desc, first_non_void);
data_format = radv_translate_buffer_dataformat(desc, first_non_void);

File diff suppressed because it is too large Load diff

View file

@ -38,8 +38,9 @@ radv_set_context_reg_array(struct radeon_cmdbuf *cs, unsigned reg, unsigned num,
VkResult
radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_state *queue_state)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_winsys *ws = device->ws;
const struct radeon_info *gpu_info = &device->physical_device->info;
const struct radeon_info *gpu_info = &pdev->info;
VkResult result;
struct radeon_cmdbuf *cs = ws->cs_create(ws, AMD_IP_GFX, false);
@ -125,7 +126,8 @@ radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_devic
VkResult
radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct radv_queue *queue)
{
const struct radeon_info *gpu_info = &device->physical_device->info;
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
struct radeon_winsys *ws = device->ws;
struct radeon_cmdbuf *cs;
VkResult result;

View file

@ -108,17 +108,19 @@ radv_dump_trace(const struct radv_device *device, struct radeon_cmdbuf *cs, FILE
static void
radv_dump_mmapped_reg(const struct radv_device *device, FILE *f, unsigned offset)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_winsys *ws = device->ws;
uint32_t value;
if (ws->read_registers(ws, offset, 1, &value))
ac_dump_reg(f, device->physical_device->info.gfx_level, device->physical_device->info.family, offset, value, ~0);
ac_dump_reg(f, pdev->info.gfx_level, pdev->info.family, offset, value, ~0);
}
static void
radv_dump_debug_registers(const struct radv_device *device, FILE *f)
{
const struct radeon_info *gpu_info = &device->physical_device->info;
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
fprintf(f, "Memory-mapped registers:\n");
radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
@ -190,8 +192,9 @@ radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level, enum r
static void
radv_dump_descriptor_set(const struct radv_device *device, const struct radv_descriptor_set *set, unsigned id, FILE *f)
{
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
enum radeon_family family = device->physical_device->info.family;
const struct radv_physical_device *pdev = radv_device_physical(device);
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
enum radeon_family family = pdev->info.family;
const struct radv_descriptor_set_layout *layout;
int i;
@ -376,6 +379,8 @@ static void
radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader,
gl_shader_stage stage, const char *dump_dir, FILE *f)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!shader)
return;
@ -400,7 +405,7 @@ radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline, str
fprintf(f, "NIR:\n%s\n", shader->nir_string);
}
fprintf(f, "%s IR:\n%s\n", device->physical_device->use_llvm ? "LLVM" : "ACO", shader->ir_string);
fprintf(f, "%s IR:\n%s\n", pdev->use_llvm ? "LLVM" : "ACO", shader->ir_string);
fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
radv_dump_shader_stats(device, pipeline, shader, stage, f);
@ -504,9 +509,10 @@ radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f)
}
if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {
const struct radv_physical_device *pdev = radv_device_physical(device);
struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
unsigned num_waves = ac_get_wave_info(gfx_level, &device->physical_device->info, waves);
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
unsigned num_waves = ac_get_wave_info(gfx_level, &pdev->info, waves);
fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
@ -633,21 +639,22 @@ radv_dump_app_info(const struct radv_device *device, FILE *f)
static void
radv_dump_device_name(const struct radv_device *device, FILE *f)
{
const struct radeon_info *gpu_info = &device->physical_device->info;
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
#ifndef _WIN32
char kernel_version[128] = {0};
struct utsname uname_data;
#endif
#ifdef _WIN32
fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", device->physical_device->marketing_name, gpu_info->drm_major,
gpu_info->drm_minor, gpu_info->drm_patchlevel);
fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", pdev->marketing_name, gpu_info->drm_major, gpu_info->drm_minor,
gpu_info->drm_patchlevel);
#else
if (uname(&uname_data) == 0)
snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", device->physical_device->marketing_name, gpu_info->drm_major,
gpu_info->drm_minor, gpu_info->drm_patchlevel, kernel_version);
fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", pdev->marketing_name, gpu_info->drm_major, gpu_info->drm_minor,
gpu_info->drm_patchlevel, kernel_version);
#endif
}
@ -655,18 +662,16 @@ static void
radv_dump_umr_ring(const struct radv_queue *queue, FILE *f)
{
#ifndef _WIN32
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
const enum amd_ip_type ring = radv_queue_ring(queue);
const struct radv_device *device = queue->device;
char cmd[256];
/* TODO: Dump compute ring. */
if (ring != AMD_IP_GFX)
return;
sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -RS %s 2>&1", device->physical_device->bus_info.domain,
device->physical_device->bus_info.bus, device->physical_device->bus_info.dev,
device->physical_device->bus_info.func,
device->physical_device->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -RS %s 2>&1", pdev->bus_info.domain, pdev->bus_info.bus,
pdev->bus_info.dev, pdev->bus_info.func, pdev->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
fprintf(f, "\nUMR GFX ring:\n\n");
radv_dump_cmd(cmd, f);
#endif
@ -676,18 +681,17 @@ static void
radv_dump_umr_waves(struct radv_queue *queue, FILE *f)
{
#ifndef _WIN32
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
enum amd_ip_type ring = radv_queue_ring(queue);
struct radv_device *device = queue->device;
char cmd[256];
/* TODO: Dump compute ring. */
if (ring != AMD_IP_GFX)
return;
sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -O bits,halt_waves -go 0 -wa %s -go 1 2>&1",
device->physical_device->bus_info.domain, device->physical_device->bus_info.bus,
device->physical_device->bus_info.dev, device->physical_device->bus_info.func,
device->physical_device->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -O bits,halt_waves -go 0 -wa %s -go 1 2>&1", pdev->bus_info.domain,
pdev->bus_info.bus, pdev->bus_info.dev, pdev->bus_info.func,
pdev->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
fprintf(f, "\nUMR GFX waves:\n\n");
radv_dump_cmd(cmd, f);
#endif
@ -707,7 +711,9 @@ radv_gpu_hang_occurred(struct radv_queue *queue, enum amd_ip_type ring)
bool
radv_vm_fault_occurred(struct radv_device *device, struct radv_winsys_gpuvm_fault_info *fault_info)
{
if (!device->physical_device->info.has_gpuvm_fault_query)
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!pdev->info.has_gpuvm_fault_query)
return false;
return device->ws->query_gpuvm_fault(device->ws, fault_info);
@ -742,6 +748,7 @@ radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_i
fprintf(stderr, "radv: GPU hang detected...\n");
#ifndef _WIN32
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
const bool save_hang_report = !queue->device->vk.enabled_features.deviceFaultVendorBinary;
struct radv_winsys_gpuvm_fault_info fault_info = {0};
struct radv_device *device = queue->device;
@ -822,7 +829,7 @@ radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_i
if (vm_fault_occurred) {
fprintf(f, "VM fault report.\n\n");
fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n", fault_info.addr);
ac_print_gpuvm_fault_status(f, device->physical_device->info.gfx_level, fault_info.status);
ac_print_gpuvm_fault_status(f, pdev->info.gfx_level, fault_info.status);
}
break;
case RADV_DEVICE_FAULT_CHUNK_APP_INFO:
@ -830,7 +837,7 @@ radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_i
break;
case RADV_DEVICE_FAULT_CHUNK_GPU_INFO:
radv_dump_device_name(device, f);
ac_print_gpu_info(&device->physical_device->info, f);
ac_print_gpu_info(&pdev->info, f);
break;
case RADV_DEVICE_FAULT_CHUNK_DMESG:
radv_dump_dmesg(f);
@ -1010,12 +1017,13 @@ struct radv_sq_hw_reg {
static void
radv_dump_sq_hw_regs(struct radv_device *device)
{
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
enum radeon_family family = device->physical_device->info.family;
const struct radv_physical_device *pdev = radv_device_physical(device);
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
enum radeon_family family = pdev->info.family;
struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
fprintf(stderr, "\nHardware registers:\n");
if (device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
ac_dump_reg(stderr, gfx_level, family, R_000408_SQ_WAVE_STATUS, regs->status, ~0);
ac_dump_reg(stderr, gfx_level, family, R_00040C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0);
ac_dump_reg(stderr, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, regs->hw_id, ~0);
@ -1084,6 +1092,7 @@ radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCount
&pFaultCounts->addressInfoCount);
struct radv_winsys_gpuvm_fault_info fault_info = {0};
RADV_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
bool vm_fault_occurred = false;
/* Query if a GPUVM fault happened. */
@ -1094,8 +1103,6 @@ radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCount
pFaultCounts->vendorBinarySize = 0;
if (device->gpu_hang_report) {
const struct radv_physical_device *pdev = device->physical_device;
VkDeviceFaultVendorBinaryHeaderVersionOneEXT hdr;
hdr.headerSize = sizeof(VkDeviceFaultVendorBinaryHeaderVersionOneEXT);
@ -1127,7 +1134,7 @@ radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCount
if (pFaultInfo)
strncpy(pFaultInfo->description, "A GPUVM fault has been detected", sizeof(pFaultInfo->description));
if (device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
addr_fault_info.addressType = G_00A130_RW(fault_info.status) ? VK_DEVICE_FAULT_ADDRESS_TYPE_WRITE_INVALID_EXT
: VK_DEVICE_FAULT_ADDRESS_TYPE_READ_INVALID_EXT;
} else {

View file

@ -1074,6 +1074,8 @@ write_texel_buffer_descriptor(struct radv_device *device, struct radv_cmd_buffer
static ALWAYS_INLINE void
write_buffer_descriptor(struct radv_device *device, unsigned *dst, uint64_t va, uint64_t range)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!va) {
memset(dst, 0, 4 * 4);
return;
@ -1082,9 +1084,9 @@ write_buffer_descriptor(struct radv_device *device, unsigned *dst, uint64_t va,
uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (device->physical_device->info.gfx_level >= GFX10) {
} else if (pdev->info.gfx_level >= GFX10) {
rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
S_008F0C_RESOURCE_LEVEL(1);
} else {

View file

@ -97,10 +97,10 @@ radv_GetMemoryHostPointerPropertiesEXT(VkDevice _device, VkExternalMemoryHandleT
VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
{
RADV_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
switch (handleType) {
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
const struct radv_physical_device *pdev = device->physical_device;
uint32_t memoryTypeBits = 0;
for (int i = 0; i < pdev->memory_properties.memoryTypeCount; i++) {
if (pdev->memory_domains[i] == RADEON_DOMAIN_GTT && !(pdev->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
@ -186,8 +186,10 @@ static struct radv_shader_part_cache_ops vs_prolog_ops = {
static VkResult
radv_device_init_vs_prologs(struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!radv_shader_part_cache_init(&device->vs_prologs, &vs_prolog_ops))
return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
return vk_error(pdev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
/* don't pre-compile prologs if we want to print them */
if (device->instance->debug_flags & RADV_DEBUG_DUMP_PROLOGS)
@ -196,9 +198,9 @@ radv_device_init_vs_prologs(struct radv_device *device)
struct radv_vs_prolog_key key;
memset(&key, 0, sizeof(key));
key.as_ls = false;
key.is_ngg = device->physical_device->use_ngg;
key.is_ngg = pdev->use_ngg;
key.next_stage = MESA_SHADER_VERTEX;
key.wave32 = device->physical_device->ge_wave_size == 32;
key.wave32 = pdev->ge_wave_size == 32;
for (unsigned i = 1; i <= MAX_VERTEX_ATTRIBS; i++) {
key.instance_rate_inputs = 0;
@ -206,7 +208,7 @@ radv_device_init_vs_prologs(struct radv_device *device)
device->simple_vs_prologs[i - 1] = radv_create_vs_prolog(device, &key);
if (!device->simple_vs_prologs[i - 1])
return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
return vk_error(pdev->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
unsigned idx = 0;
@ -218,7 +220,7 @@ radv_device_init_vs_prologs(struct radv_device *device)
struct radv_shader_part *prolog = radv_create_vs_prolog(device, &key);
if (!prolog)
return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
return vk_error(pdev->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
assert(idx == radv_instance_rate_prolog_index(num_attributes, key.instance_rate_inputs));
device->instance_rate_vs_prologs[idx++] = prolog;
@ -638,11 +640,11 @@ capture_trace(VkQueue _queue)
static void
radv_device_init_cache_key(struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_device_cache_key *key = &device->cache_key;
key->disable_trunc_coord = device->disable_trunc_coord;
key->image_2d_view_of_3d =
device->vk.enabled_features.image2DViewOf3D && device->physical_device->info.gfx_level == GFX9;
key->image_2d_view_of_3d = device->vk.enabled_features.image2DViewOf3D && pdev->info.gfx_level == GFX9;
key->mesh_shader_queries = device->vk.enabled_features.meshShaderQueries;
key->primitives_generated_query = radv_uses_primitives_generated_query(device);
@ -655,7 +657,7 @@ radv_device_init_cache_key(struct radv_device *device)
* enabled, regardless of what features are actually enabled on the logical device.
*/
if (device->vk.enabled_features.shaderObject) {
key->image_2d_view_of_3d = device->physical_device->info.gfx_level == GFX9;
key->image_2d_view_of_3d = pdev->info.gfx_level == GFX9;
key->primitives_generated_query = true;
}
@ -701,7 +703,6 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
device->vk.command_buffer_ops = &radv_cmd_buffer_ops;
device->instance = pdev->instance;
device->physical_device = pdev;
init_dispatch_tables(device, pdev);
@ -782,13 +783,12 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
device->shader_use_invisible_vram = (device->instance->perftest_flags & RADV_PERFTEST_DMA_SHADERS) &&
/* SDMA buffer copy is only implemented for GFX7+. */
device->physical_device->info.gfx_level >= GFX7;
pdev->info.gfx_level >= GFX7;
result = radv_init_shader_upload_queue(device);
if (result != VK_SUCCESS)
goto fail;
device->pbb_allowed =
device->physical_device->info.gfx_level >= GFX9 && !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
device->pbb_allowed = pdev->info.gfx_level >= GFX9 && !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
device->disable_trunc_coord = device->instance->drirc.disable_trunc_coord;
@ -818,13 +818,13 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
if (device->physical_device->info.gfx_level >= GFX7) {
if (pdev->info.gfx_level >= GFX7) {
/* If the KMD allows it (there is a KMD hw register for it),
* allow launching waves out-of-order.
*/
device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
}
if (device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
/* Enable asynchronous compute tunneling. The KMD restricts this feature
* to high-priority compute queues, so setting the bit on any other queue
* is a no-op. PAL always sets this bit as well.
@ -862,7 +862,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
}
if (device->instance->vk.trace_mode & RADV_TRACE_MODE_RGP) {
if (device->physical_device->info.gfx_level < GFX8 || device->physical_device->info.gfx_level > GFX11) {
if (pdev->info.gfx_level < GFX8 || pdev->info.gfx_level > GFX11) {
fprintf(stderr, "GPU hardware not supported: refer to "
"the RGP documentation for the list of "
"supported GPUs!\n");
@ -882,13 +882,13 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
radv_sqtt_queue_events_enabled() ? "enabled" : "disabled");
if (radv_spm_trace_enabled(device->instance)) {
if (device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
if (!radv_spm_init(device)) {
result = VK_ERROR_INITIALIZATION_FAILED;
goto fail;
}
} else {
fprintf(stderr, "radv: SPM isn't supported for this GPU (%s)!\n", device->physical_device->name);
fprintf(stderr, "radv: SPM isn't supported for this GPU (%s)!\n", pdev->name);
}
}
}
@ -905,7 +905,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
if (getenv("RADV_TRAP_HANDLER")) {
/* TODO: Add support for more hardware. */
assert(device->physical_device->info.gfx_level == GFX8);
assert(pdev->info.gfx_level == GFX8);
fprintf(stderr, "**********************************************************************\n");
fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
@ -922,7 +922,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
}
}
if (device->physical_device->info.gfx_level == GFX10_3) {
if (pdev->info.gfx_level == GFX10_3) {
if (getenv("RADV_FORCE_VRS_CONFIG_FILE")) {
const char *file = radv_get_force_vrs_config_file();
@ -942,7 +942,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
}
/* PKT3_LOAD_SH_REG_INDEX is supported on GFX8+, but it hangs with compute queues until GFX10.3. */
device->load_grid_size_from_user_sgpr = device->physical_device->info.gfx_level >= GFX10_3;
device->load_grid_size_from_user_sgpr = pdev->info.gfx_level >= GFX10_3;
device->keep_shader_info = keep_shader_info;
@ -1009,7 +1009,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
goto fail_cache;
}
if (!device->physical_device->ac_perfcounters.blocks) {
if (!pdev->ac_perfcounters.blocks) {
result = VK_ERROR_INITIALIZATION_FAILED;
goto fail_cache;
}
@ -1029,7 +1029,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
if (result != VK_SUCCESS)
goto fail_cache;
if (device->physical_device->info.gfx_level == GFX11 && device->physical_device->info.has_dedicated_vram &&
if (pdev->info.gfx_level == GFX11 && pdev->info.has_dedicated_vram &&
device->instance->drirc.force_pstate_peak_gfx11_dgpu) {
if (!radv_device_acquire_performance_counters(device))
fprintf(stderr, "radv: failed to set pstate to profile_peak.\n");
@ -1197,10 +1197,10 @@ radv_GetImageMemoryRequirements2(VkDevice _device, const VkImageMemoryRequiremen
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_image, image, pInfo->image);
const struct radv_physical_device *pdev = radv_device_physical(device);
pMemoryRequirements->memoryRequirements.memoryTypeBits =
((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) &
~device->physical_device->memory_types_32bit;
((1u << pdev->memory_properties.memoryTypeCount) - 1u) & ~pdev->memory_types_32bit;
pMemoryRequirements->memoryRequirements.size = image->size;
pMemoryRequirements->memoryRequirements.alignment = image->alignment;
@ -1254,7 +1254,9 @@ radv_surface_max_layer_count(struct radv_image_view *iview)
unsigned
radv_get_dcc_max_uncompressed_block_size(const struct radv_device *device, const struct radv_image *image)
{
if (device->physical_device->info.gfx_level < GFX10 && image->vk.samples > 1) {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level < GFX10 && image->vk.samples > 1) {
if (image->planes[0].surface.bpe == 1)
return V_028C78_MAX_BLOCK_SIZE_64B;
else if (image->planes[0].surface.bpe == 2)
@ -1267,7 +1269,9 @@ radv_get_dcc_max_uncompressed_block_size(const struct radv_device *device, const
static unsigned
get_dcc_min_compressed_block_size(const struct radv_device *device)
{
if (!device->physical_device->info.has_dedicated_vram) {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!pdev->info.has_dedicated_vram) {
/* amdvlk: [min-compressed-block-size] should be set to 32 for
* dGPU and 64 for APU because all of our APUs to date use
* DIMMs which have a request granularity size of 64B while all
@ -1282,6 +1286,7 @@ get_dcc_min_compressed_block_size(const struct radv_device *device)
static uint32_t
radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iview)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned max_uncompressed_block_size = radv_get_dcc_max_uncompressed_block_size(device, iview->image);
unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device);
unsigned max_compressed_block_size;
@ -1293,7 +1298,7 @@ radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iv
/* For GFX9+ ac_surface computes values for us (except min_compressed
* and max_uncompressed) */
if (device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
max_compressed_block_size = iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size;
independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_128B_blocks;
independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_64B_blocks;
@ -1322,12 +1327,12 @@ radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iv
S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
result |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX11(independent_128b_blocks) |
S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) |
S_028C78_FDCC_ENABLE(radv_dcc_enabled(iview->image, iview->vk.base_mip_level));
if (device->physical_device->info.family >= CHIP_GFX1103_R2) {
if (pdev->info.family >= CHIP_GFX1103_R2) {
result |= S_028C78_ENABLE_MAX_COMP_FRAG_OVERRIDE(1) | S_028C78_MAX_COMP_FRAGS(iview->image->vk.samples >= 4);
}
} else {
@ -1341,6 +1346,7 @@ void
radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
struct radv_image_view *iview)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct util_format_description *desc;
unsigned ntype, format, swap, endian;
unsigned blend_clamp = 0, blend_bypass = 0;
@ -1354,7 +1360,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
memset(cb, 0, sizeof(*cb));
/* Intensity is implemented as Red, so treat it that way. */
if (device->physical_device->info.gfx_level >= GFX11)
if (pdev->info.gfx_level >= GFX11)
cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX11(desc->swizzle[3] == PIPE_SWIZZLE_1);
else
cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX6(desc->swizzle[3] == PIPE_SWIZZLE_1);
@ -1369,11 +1375,11 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
cb->cb_color_base = va >> 8;
if (device->physical_device->info.gfx_level >= GFX9) {
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX11) {
cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
} else if (device->physical_device->info.gfx_level >= GFX10) {
} else if (pdev->info.gfx_level >= GFX10) {
cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
S_028EE0_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
S_028EE0_CMASK_PIPE_ALIGNED(1) |
@ -1414,13 +1420,13 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
if (radv_image_has_fmask(iview->image)) {
if (device->physical_device->info.gfx_level >= GFX7)
if (pdev->info.gfx_level >= GFX7)
cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1);
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index);
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max);
} else {
/* This must be set for fast clear to work without FMASK. */
if (device->physical_device->info.gfx_level >= GFX7)
if (pdev->info.gfx_level >= GFX7)
cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
@ -1435,7 +1441,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset;
va += surf->meta_offset;
if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && device->physical_device->info.gfx_level <= GFX8)
if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && pdev->info.gfx_level <= GFX8)
va += plane->surface.u.legacy.color.dcc_level[iview->vk.base_mip_level].dcc_offset;
unsigned dcc_tile_swizzle = tile_swizzle;
@ -1452,7 +1458,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
if (iview->image->vk.samples > 1) {
unsigned log_samples = util_logbase2(iview->image->vk.samples);
if (device->physical_device->info.gfx_level >= GFX11)
if (pdev->info.gfx_level >= GFX11)
cb->cb_color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(log_samples);
else
cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS_GFX6(log_samples);
@ -1467,7 +1473,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
}
ntype = ac_get_cb_number_type(desc->format);
format = ac_get_cb_format(device->physical_device->info.gfx_level, desc->format);
format = ac_get_cb_format(pdev->info.gfx_level, desc->format);
assert(format != V_028C70_COLOR_INVALID);
swap = radv_translate_colorswap(iview->vk.format, false);
@ -1498,14 +1504,14 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
format != V_028C70_COLOR_24_8) |
S_028C70_NUMBER_TYPE(ntype);
if (device->physical_device->info.gfx_level >= GFX11)
if (pdev->info.gfx_level >= GFX11)
cb->cb_color_info |= S_028C70_FORMAT_GFX11(format);
else
cb->cb_color_info |= S_028C70_FORMAT_GFX6(format) | S_028C70_ENDIAN(endian);
if (radv_image_has_fmask(iview->image)) {
cb->cb_color_info |= S_028C70_COMPRESSION(1);
if (device->physical_device->info.gfx_level == GFX6) {
if (pdev->info.gfx_level == GFX6) {
unsigned fmask_bankh = util_logbase2(surf->u.legacy.color.fmask.bankh);
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
}
@ -1514,7 +1520,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
/* Allow the texture block to read FMASK directly without decompressing it. */
cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
if (device->physical_device->info.gfx_level == GFX8) {
if (pdev->info.gfx_level == GFX8) {
/* Set CMASK into a tiling format that allows
* the texture block to read it.
*/
@ -1527,25 +1533,25 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && !iview->disable_dcc_mrt &&
device->physical_device->info.gfx_level < GFX11)
pdev->info.gfx_level < GFX11)
cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
/* This must be set for fast clear to work without FMASK. */
if (!radv_image_has_fmask(iview->image) && device->physical_device->info.gfx_level == GFX6) {
if (!radv_image_has_fmask(iview->image) && pdev->info.gfx_level == GFX6) {
unsigned bankh = util_logbase2(surf->u.legacy.bankh);
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
}
if (device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
unsigned mip0_depth = iview->image->vk.image_type == VK_IMAGE_TYPE_3D ? (iview->extent.depth - 1)
: (iview->image->vk.array_layers - 1);
unsigned width = vk_format_get_plane_width(iview->image->vk.format, iview->plane_id, iview->extent.width);
unsigned height = vk_format_get_plane_height(iview->image->vk.format, iview->plane_id, iview->extent.height);
unsigned max_mip = iview->image->vk.mip_levels - 1;
if (device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
unsigned base_level = iview->vk.base_mip_level;
if (iview->nbc_view.valid) {
@ -1556,7 +1562,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(base_level);
cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) | S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
S_028EE0_RESOURCE_LEVEL(device->physical_device->info.gfx_level >= GFX11 ? 0 : 1);
S_028EE0_RESOURCE_LEVEL(pdev->info.gfx_level >= GFX11 ? 0 : 1);
} else {
cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->vk.base_mip_level);
cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
@ -1567,7 +1573,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
*
* We set the pitch in MIP0_WIDTH.
*/
if (device->physical_device->info.gfx_level && iview->image->vk.image_type == VK_IMAGE_TYPE_2D &&
if (pdev->info.gfx_level && iview->image->vk.image_type == VK_IMAGE_TYPE_2D &&
iview->image->vk.array_layers == 1 && plane->surface.is_linear) {
assert((plane->surface.u.gfx9.surf_pitch * plane->surface.bpe) % 256 == 0);
@ -1586,11 +1592,12 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
static unsigned
radv_calc_decompress_on_z_planes(const struct radv_device *device, struct radv_image_view *iview)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned max_zplanes = 0;
assert(radv_image_is_tc_compat_htile(iview->image));
if (device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
/* Default value for 32-bit depth surfaces. */
max_zplanes = 4;
@ -1598,9 +1605,8 @@ radv_calc_decompress_on_z_planes(const struct radv_device *device, struct radv_i
max_zplanes = 2;
/* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
if (device->physical_device->info.has_two_planes_iterate256_bug &&
radv_image_get_iterate256(device, iview->image) && !radv_image_tile_stencil_disabled(device, iview->image) &&
iview->image->vk.samples == 4) {
if (pdev->info.has_two_planes_iterate256_bug && radv_image_get_iterate256(device, iview->image) &&
!radv_image_tile_stencil_disabled(device, iview->image) && iview->image->vk.samples == 4) {
max_zplanes = 1;
}
@ -1650,6 +1656,7 @@ void
radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buffer_info *ds,
struct radv_image_view *iview, VkImageAspectFlags ds_aspects)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned level = iview->vk.base_mip_level;
unsigned format, stencil_format;
uint64_t va, s_offs, z_offs;
@ -1668,7 +1675,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) | S_028008_SLICE_MAX(max_slice) |
S_028008_Z_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) |
S_028008_STENCIL_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT));
if (device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
ds->db_depth_view |=
S_028008_SLICE_START_HI(iview->vk.base_array_layer >> 11) | S_028008_SLICE_MAX_HI(max_slice >> 11);
}
@ -1681,20 +1688,19 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
/* Recommended value for better performance with 4x and 8x. */
ds->db_render_override2 = S_028010_DECOMPRESS_Z_ON_FLUSH(iview->image->vk.samples >= 4) |
S_028010_CENTROID_COMPUTATION_MODE(device->physical_device->info.gfx_level >= GFX10_3);
S_028010_CENTROID_COMPUTATION_MODE(pdev->info.gfx_level >= GFX10_3);
if (device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
assert(surf->u.gfx9.surf_offset == 0);
s_offs += surf->u.gfx9.zs.stencil_offset;
ds->db_z_info = S_028038_FORMAT(format) | S_028038_NUM_SAMPLES(util_logbase2(iview->image->vk.samples)) |
S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) | S_028038_MAXMIP(iview->image->vk.mip_levels - 1) |
S_028038_ZRANGE_PRECISION(1) |
S_028040_ITERATE_256(device->physical_device->info.gfx_level >= GFX11);
S_028038_ZRANGE_PRECISION(1) | S_028040_ITERATE_256(pdev->info.gfx_level >= GFX11);
ds->db_stencil_info = S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) |
S_028044_ITERATE_256(device->physical_device->info.gfx_level >= GFX11);
S_028044_ITERATE_256(pdev->info.gfx_level >= GFX11);
if (device->physical_device->info.gfx_level == GFX9) {
if (pdev->info.gfx_level == GFX9) {
ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch);
ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.zs.stencil_epitch);
}
@ -1711,7 +1717,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
if (device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
bool iterate256 = radv_image_get_iterate256(device, iview->image);
ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
@ -1732,7 +1738,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
ds->db_htile_data_base = va >> 8;
ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);
if (device->physical_device->info.gfx_level == GFX9) {
if (pdev->info.gfx_level == GFX9) {
ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
}
@ -1741,7 +1747,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
}
}
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
radv_gfx11_set_db_render_control(device, iview->image->vk.samples, &ds->db_render_control);
}
} else {
@ -1760,8 +1766,8 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
if (iview->image->vk.samples > 1)
ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->vk.samples));
if (device->physical_device->info.gfx_level >= GFX7) {
const struct radeon_info *gpu_info = &device->physical_device->info;
if (pdev->info.gfx_level >= GFX7) {
const struct radeon_info *gpu_info = &pdev->info;
unsigned tiling_index = surf->u.legacy.tiling_index[level];
unsigned stencil_index = surf->u.legacy.zs.stencil_tiling_index[level];
unsigned macro_index = surf->u.legacy.macro_tile_index;
@ -1820,7 +1826,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
void
radv_gfx11_set_db_render_control(const struct radv_device *device, unsigned num_samples, unsigned *db_render_control)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned max_allowed_tiles_in_wave = 0;
if (pdev->info.has_dedicated_vram) {
@ -1911,6 +1917,7 @@ radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBi
VkMemoryFdPropertiesKHR *pMemoryFdProperties)
{
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_physical_device *pdev = radv_device_physical(device);
switch (handleType) {
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
@ -1919,7 +1926,7 @@ radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBi
if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(device->physical_device, domains, flags);
pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(pdev, domains, flags);
return VK_SUCCESS;
}
default:
@ -1941,7 +1948,8 @@ radv_GetCalibratedTimestampsKHR(VkDevice _device, uint32_t timestampCount,
{
#ifndef _WIN32
RADV_FROM_HANDLE(radv_device, device, _device);
uint32_t clock_crystal_freq = device->physical_device->info.clock_crystal_freq;
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t clock_crystal_freq = pdev->info.clock_crystal_freq;
int d;
uint64_t begin, end;
uint64_t max_clock_period = 0;
@ -1992,10 +2000,11 @@ radv_GetCalibratedTimestampsKHR(VkDevice _device, uint32_t timestampCount,
bool
radv_device_set_pstate(struct radv_device *device, bool enable)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_winsys *ws = device->ws;
enum radeon_ctx_pstate pstate = enable ? RADEON_CTX_PSTATE_PEAK : RADEON_CTX_PSTATE_NONE;
if (device->physical_device->info.has_stable_pstate) {
if (pdev->info.has_stable_pstate) {
/* pstate is per-device; setting it for one ctx is sufficient.
* We pick the first initialized one below. */
for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++)

View file

@ -35,6 +35,7 @@ radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout
const struct radv_compute_pipeline *pipeline, uint32_t *cmd_size)
{
const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk);
const struct radv_physical_device *pdev = radv_device_physical(device);
/* dispatch */
*cmd_size += 5 * 4;
@ -55,7 +56,7 @@ radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout
/* COMPUTE_PGM_{LO,RSRC1,RSRC2} */
*cmd_size += 7 * 4;
if (device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
/* COMPUTE_PGM_RSRC3 */
*cmd_size += 3 * 4;
}
@ -87,6 +88,7 @@ radv_get_sequence_size_graphics(const struct radv_indirect_command_layout *layou
uint32_t *upload_size)
{
const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk);
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader *vs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX);
if (layout->bind_vbo_mask) {
@ -115,7 +117,7 @@ radv_get_sequence_size_graphics(const struct radv_indirect_command_layout *layou
} else {
if (layout->draw_mesh_tasks) {
/* userdata writes + instance count + non-indexed draw */
*cmd_size += (6 + 2 + (device->physical_device->mesh_fast_launch_2 ? 5 : 3)) * 4;
*cmd_size += (6 + 2 + (pdev->mesh_fast_launch_2 ? 5 : 3)) * 4;
} else {
/* userdata writes + instance count + non-indexed draw */
*cmd_size += (5 + 2 + 3) * 4;
@ -189,7 +191,8 @@ radv_get_sequence_size(const struct radv_indirect_command_layout *layout, struct
static uint32_t
radv_align_cmdbuf_size(const struct radv_device *device, uint32_t size, enum amd_ip_type ip_type)
{
const uint32_t ib_alignment = device->physical_device->info.ip[ip_type].ib_alignment;
const struct radv_physical_device *pdev = radv_device_physical(device);
const uint32_t ib_alignment = pdev->info.ip[ip_type].ib_alignment;
return align(size, ib_alignment);
}
@ -365,7 +368,9 @@ nir_pkt3(nir_builder *b, unsigned op, nir_def *len)
static nir_def *
dgc_get_nop_packet(nir_builder *b, const struct radv_device *device)
{
if (device->physical_device->info.gfx_ib_pad_with_type2) {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_ib_pad_with_type2) {
return nir_imm_int(b, PKT2_NOP_PAD);
} else {
return nir_imm_int(b, PKT3_NOP_PAD);
@ -691,6 +696,8 @@ dgc_main_cmd_buf_offset(nir_builder *b, const struct radv_device *device)
static void
build_dgc_buffer_tail(nir_builder *b, nir_def *sequence_count, const struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_def *global_id = get_global_ids(b, 1);
nir_def *cmd_buf_stride = load_param32(b, cmd_buf_stride);
@ -718,7 +725,7 @@ build_dgc_buffer_tail(nir_builder *b, nir_def *sequence_count, const struct radv
nir_def *packet, *packet_size;
if (device->physical_device->info.gfx_ib_pad_with_type2) {
if (pdev->info.gfx_ib_pad_with_type2) {
packet_size = nir_imm_int(b, 4);
packet = nir_imm_int(b, PKT2_NOP_PAD);
} else {
@ -741,6 +748,8 @@ build_dgc_buffer_tail(nir_builder *b, nir_def *sequence_count, const struct radv
static void
build_dgc_buffer_preamble(nir_builder *b, nir_def *sequence_count, const struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_def *global_id = get_global_ids(b, 1);
nir_def *use_preamble = nir_ine_imm(b, load_param8(b, use_preamble), 0);
@ -778,7 +787,7 @@ build_dgc_buffer_preamble(nir_builder *b, nir_def *sequence_count, const struct
nir_def *chain_packets[] = {
nir_imm_int(b, PKT3(PKT3_INDIRECT_BUFFER, 2, 0)),
addr,
nir_imm_int(b, device->physical_device->info.address32_hi),
nir_imm_int(b, pdev->info.address32_hi),
nir_ior_imm(b, words, S_3F2_CHAIN(1) | S_3F2_VALID(1) | S_3F2_PRE_ENA(false)),
};
@ -861,6 +870,8 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf
nir_def *index_buffer_offset, nir_def *ibo_type_32, nir_def *ibo_type_8,
nir_variable *max_index_count_var, const struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_def *index_stream_offset = nir_iadd(b, index_buffer_offset, stream_base);
nir_def *data = nir_load_ssbo(b, 4, 32, stream_buf, index_stream_offset);
@ -876,10 +887,9 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf
nir_def *cmd_values[3 + 2 + 3];
if (device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
if (device->physical_device->info.gfx_level < GFX9 ||
(device->physical_device->info.gfx_level == GFX9 && device->physical_device->info.me_fw_version < 26))
if (pdev->info.gfx_level < GFX9 || (pdev->info.gfx_level == GFX9 && pdev->info.me_fw_version < 26))
opcode = PKT3_SET_UCONFIG_REG;
cmd_values[0] = nir_imm_int(b, PKT3(opcode, 1, 0));
cmd_values[1] = nir_imm_int(b, (R_03090C_VGT_INDEX_TYPE - CIK_UCONFIG_REG_OFFSET) >> 2 | (2u << 28));
@ -1186,6 +1196,8 @@ static void
dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
nir_def *vbo_bind_mask, nir_variable *upload_offset, const struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_def *vbo_cnt = load_param8(b, vbo_cnt);
nir_variable *vbo_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx");
nir_store_var(b, vbo_idx, nir_imm_int(b, 0), 0x1);
@ -1252,9 +1264,9 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
nir_pop_if(b, NULL);
nir_def *convert_cond = nir_ine_imm(b, nir_load_var(b, num_records), 0);
if (device->physical_device->info.gfx_level == GFX9)
if (pdev->info.gfx_level == GFX9)
convert_cond = nir_imm_false(b);
else if (device->physical_device->info.gfx_level != GFX8)
else if (pdev->info.gfx_level != GFX8)
convert_cond = nir_iand(b, convert_cond, nir_ieq_imm(b, stride, 0));
nir_def *new_records =
@ -1264,7 +1276,7 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
}
nir_push_else(b, NULL);
{
if (device->physical_device->info.gfx_level != GFX8) {
if (pdev->info.gfx_level != GFX8) {
nir_push_if(b, nir_ine_imm(b, stride, 0));
{
nir_def *r = nir_iadd(b, nir_load_var(b, num_records), nir_iadd_imm(b, stride, -1));
@ -1276,7 +1288,7 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
nir_pop_if(b, NULL);
nir_def *rsrc_word3 = nir_channel(b, nir_load_var(b, vbo_data), 3);
if (device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
nir_def *oob_select = nir_bcsel(b, nir_ieq_imm(b, stride, 0), nir_imm_int(b, V_008F0C_OOB_SELECT_RAW),
nir_imm_int(b, V_008F0C_OOB_SELECT_STRUCTURED));
rsrc_word3 = nir_iand_imm(b, rsrc_word3, C_008F0C_OOB_SELECT);
@ -1408,6 +1420,8 @@ static void
dgc_emit_draw_mesh_tasks(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
nir_def *draw_params_offset, nir_def *sequence_id, const struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
nir_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base);
@ -1424,7 +1438,7 @@ dgc_emit_draw_mesh_tasks(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_
dgc_emit_userdata_mesh(b, cs, vtx_base_sgpr, x, y, z, sequence_id, device);
dgc_emit_instance_count(b, cs, nir_imm_int(b, 1));
if (device->physical_device->mesh_fast_launch_2) {
if (pdev->mesh_fast_launch_2) {
dgc_emit_dispatch_mesh_direct(b, cs, x, y, z);
} else {
nir_def *vertex_count = nir_imul(b, x, nir_imul(b, y, z));
@ -1454,6 +1468,8 @@ static void
dgc_emit_bind_pipeline(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
nir_def *pipeline_params_offset, const struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
@ -1465,7 +1481,7 @@ dgc_emit_bind_pipeline(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
dgc_emit1(b, cs, load_metadata32(b, rsrc1));
dgc_emit1(b, cs, load_metadata32(b, rsrc2));
if (device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
dgc_emit_set_sh_reg_seq(b, cs, R_00B8A0_COMPUTE_PGM_RSRC3, 1);
dgc_emit1(b, cs, load_metadata32(b, rsrc3));
}
@ -1504,6 +1520,7 @@ dgc_is_cond_render_enabled(nir_builder *b)
static nir_shader *
build_dgc_prepare_shader(struct radv_device *dev)
{
const struct radv_physical_device *pdev = radv_device_physical(dev);
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_dgc_prepare");
b.shader->info.workgroup_size[0] = 64;
@ -1554,7 +1571,7 @@ build_dgc_prepare_shader(struct radv_device *dev)
struct dgc_cmdbuf cmd_buf = {
.descriptor = radv_meta_load_descriptor(&b, 0, DGC_DESC_PREPARE),
.offset = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "cmd_buf_offset"),
.gfx_level = dev->physical_device->info.gfx_level,
.gfx_level = pdev->info.gfx_level,
.sqtt_enabled = !!dev->sqtt.bo,
};
nir_store_var(&b, cmd_buf.offset, nir_iadd(&b, nir_imul(&b, global_id, cmd_buf_stride), cmd_buf_base_offset), 1);
@ -1647,7 +1664,7 @@ build_dgc_prepare_shader(struct radv_device *dev)
/* Pad the cmdbuffer if we did not use the whole stride */
nir_push_if(&b, nir_ine(&b, nir_load_var(&b, cmd_buf.offset), cmd_buf_end));
{
if (dev->physical_device->info.gfx_ib_pad_with_type2) {
if (pdev->info.gfx_ib_pad_with_type2) {
nir_push_loop(&b);
{
nir_def *curr_offset = nir_load_var(&b, cmd_buf.offset);
@ -1872,6 +1889,7 @@ radv_GetGeneratedCommandsMemoryRequirementsNV(VkDevice _device,
VkMemoryRequirements2 *pMemoryRequirements)
{
RADV_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
VK_FROM_HANDLE(radv_indirect_command_layout, layout, pInfo->indirectCommandsLayout);
VK_FROM_HANDLE(radv_pipeline, pipeline, pInfo->pipeline);
@ -1882,10 +1900,9 @@ radv_GetGeneratedCommandsMemoryRequirementsNV(VkDevice _device,
radv_dgc_preamble_cmdbuf_size(device);
VkDeviceSize upload_buf_size = upload_stride * pInfo->maxSequencesCount;
pMemoryRequirements->memoryRequirements.memoryTypeBits = device->physical_device->memory_types_32bit;
pMemoryRequirements->memoryRequirements.memoryTypeBits = pdev->memory_types_32bit;
pMemoryRequirements->memoryRequirements.alignment =
MAX2(device->physical_device->info.ip[AMD_IP_GFX].ib_alignment,
device->physical_device->info.ip[AMD_IP_COMPUTE].ib_alignment);
MAX2(pdev->info.ip[AMD_IP_GFX].ib_alignment, pdev->info.ip[AMD_IP_COMPUTE].ib_alignment);
pMemoryRequirements->memoryRequirements.size =
align(cmd_buf_size + upload_buf_size, pMemoryRequirements->memoryRequirements.alignment);
}
@ -2051,6 +2068,7 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
{
VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout);
VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
*upload_size = MAX2(*upload_size, 16);
@ -2074,7 +2092,7 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
struct radv_shader *cs = radv_get_shader(compute_pipeline->base.shaders, MESA_SHADER_COMPUTE);
if (cs->info.wave_size == 32) {
assert(cmd_buffer->device->physical_device->info.gfx_level >= GFX10);
assert(pdev->info.gfx_level >= GFX10);
params->dispatch_initiator |= S_00B800_CS_W32_EN(1);
}
@ -2276,9 +2294,9 @@ radv_GetPipelineIndirectMemoryRequirementsNV(VkDevice _device, const VkComputePi
VkMemoryRequirements *reqs = &pMemoryRequirements->memoryRequirements;
const uint32_t size = sizeof(struct radv_compute_pipeline_metadata);
RADV_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
reqs->memoryTypeBits = ((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) &
~device->physical_device->memory_types_32bit;
reqs->memoryTypeBits = ((1u << pdev->memory_properties.memoryTypeCount) - 1u) & ~pdev->memory_types_32bit;
reqs->alignment = 4;
reqs->size = align(size, reqs->alignment);
}

View file

@ -195,12 +195,13 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc
mem->user_ptr = host_ptr_info->pHostPointer;
}
} else {
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
uint32_t heap_index;
heap_index = device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex].heapIndex;
domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex];
flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex];
heap_index = pdev->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex].heapIndex;
domain = pdev->memory_domains[pAllocateInfo->memoryTypeIndex];
flags |= pdev->memory_flags[pAllocateInfo->memoryTypeIndex];
if (export_info && export_info->handleTypes) {
/* Setting RADEON_FLAG_GTT_WC in case the bo is spilled to GTT. This is important when the
@ -226,7 +227,7 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc
flags |= RADEON_FLAG_ZERO_VRAM;
if (device->overallocation_disallowed) {
uint64_t total_size = device->physical_device->memory_properties.memoryHeaps[heap_index].size;
uint64_t total_size = pdev->memory_properties.memoryHeaps[heap_index].size;
mtx_lock(&device->overallocation_mutex);
if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {
@ -238,8 +239,8 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc
mtx_unlock(&device->overallocation_mutex);
}
result = radv_bo_create(device, alloc_size, device->physical_device->info.max_alignment, domain, flags, priority,
replay_address, is_internal, &mem->bo);
result = radv_bo_create(device, alloc_size, pdev->info.max_alignment, domain, flags, priority, replay_address,
is_internal, &mem->bo);
if (result != VK_SUCCESS) {
if (device->overallocation_disallowed) {

View file

@ -1881,6 +1881,7 @@ radv_GetImageSparseMemoryRequirements2(VkDevice _device, const VkImageSparseMemo
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_image, image, pInfo->image);
struct radv_physical_device *pdev = radv_device_physical(device);
if (!(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)) {
*pSparseMemoryRequirementCount = 0;
@ -1892,12 +1893,12 @@ radv_GetImageSparseMemoryRequirements2(VkDevice _device, const VkImageSparseMemo
vk_outarray_append_typed(VkSparseImageMemoryRequirements2, &out, req)
{
fill_sparse_image_format_properties(device->physical_device, image->vk.image_type, image->vk.format,
fill_sparse_image_format_properties(pdev, image->vk.image_type, image->vk.format,
&req->memoryRequirements.formatProperties);
req->memoryRequirements.imageMipTailFirstLod = image->planes[0].surface.first_mip_tail_level;
if (req->memoryRequirements.imageMipTailFirstLod < image->vk.mip_levels) {
if (device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
/* The tail is always a single tile per layer. */
req->memoryRequirements.imageMipTailSize = 65536;
req->memoryRequirements.imageMipTailOffset =

View file

@ -42,6 +42,8 @@
static unsigned
radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
assert(pCreateInfo->samples <= 1);
return RADEON_SURF_MODE_LINEAR_ALIGNED;
@ -54,8 +56,7 @@ radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateI
if (pCreateInfo->samples > 1)
return RADEON_SURF_MODE_2D;
if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) &&
device->physical_device->info.gfx_level <= GFX8) {
if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) && pdev->info.gfx_level <= GFX8) {
/* this causes hangs in some VK CTS tests on GFX9. */
/* Textures with a very small height are recommended to be linear. */
if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
@ -71,14 +72,16 @@ radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateI
static bool
radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
/* TC-compat HTILE is only available for GFX8+. */
if (device->physical_device->info.gfx_level < GFX8)
if (pdev->info.gfx_level < GFX8)
return false;
/* TC-compat HTILE looks broken on Tonga (and Iceland is the same design) and the documented bug
* workarounds don't help.
*/
if (device->physical_device->info.family == CHIP_TONGA || device->physical_device->info.family == CHIP_ICELAND)
if (pdev->info.family == CHIP_TONGA || pdev->info.family == CHIP_ICELAND)
return false;
if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
@ -91,7 +94,7 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCrea
(VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
return false;
if (device->physical_device->info.gfx_level < GFX9) {
if (pdev->info.gfx_level < GFX9) {
/* TC-compat HTILE for MSAA depth/stencil images is broken
* on GFX8 because the tiling doesn't match.
*/
@ -114,7 +117,7 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCrea
}
/* GFX9 has issues when the sample count is 4 and the format is D16 */
if (device->physical_device->info.gfx_level == GFX9 && pCreateInfo->samples == 4 && format == VK_FORMAT_D16_UNORM)
if (pdev->info.gfx_level == GFX9 && pCreateInfo->samples == 4 && format == VK_FORMAT_D16_UNORM)
return false;
return true;
@ -123,8 +126,10 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCrea
static bool
radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (info->bo_metadata) {
if (device->physical_device->info.gfx_level >= GFX9)
if (pdev->info.gfx_level >= GFX9)
return info->bo_metadata->u.gfx9.scanout;
else
return info->bo_metadata->u.legacy.scanout;
@ -237,8 +242,10 @@ static bool
radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image, const VkImageCreateInfo *pCreateInfo,
VkFormat format, bool *sign_reinterpret)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
/* DCC (Delta Color Compression) is only available for GFX8+. */
if (device->physical_device->info.gfx_level < GFX8)
if (pdev->info.gfx_level < GFX8)
return false;
const VkImageCompressionControlEXT *compression =
@ -260,7 +267,7 @@ radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *imag
* decompressing a lot anyway we might as well not have DCC.
*/
if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
(device->physical_device->info.gfx_level < GFX10 ||
(pdev->info.gfx_level < GFX10 ||
radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags)))
return false;
@ -278,24 +285,22 @@ radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *imag
if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
return false;
if (device->physical_device->info.gfx_level < GFX10) {
if (pdev->info.gfx_level < GFX10) {
/* TODO: Add support for DCC MSAA on GFX8-9. */
if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed)
if (pCreateInfo->samples > 1 && !pdev->dcc_msaa_allowed)
return false;
/* TODO: Add support for DCC layers/mipmaps on GFX9. */
if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
device->physical_device->info.gfx_level == GFX9)
if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) && pdev->info.gfx_level == GFX9)
return false;
}
/* DCC MSAA can't work on GFX10.3 and earlier without FMASK. */
if (pCreateInfo->samples > 1 && device->physical_device->info.gfx_level < GFX11 &&
if (pCreateInfo->samples > 1 && pdev->info.gfx_level < GFX11 &&
(device->instance->debug_flags & RADV_DEBUG_NO_FMASK))
return false;
return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format, pCreateInfo->flags,
sign_reinterpret);
return radv_are_formats_dcc_compatible(pdev, pCreateInfo->pNext, format, pCreateInfo->flags, sign_reinterpret);
}
static bool
@ -331,7 +336,9 @@ radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image
bool
radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
{
return ac_surface_supports_dcc_image_stores(device->physical_device->info.gfx_level, &image->planes[0].surface);
const struct radv_physical_device *pdev = radv_device_physical(device);
return ac_surface_supports_dcc_image_stores(pdev->info.gfx_level, &image->planes[0].surface);
}
/*
@ -347,12 +354,14 @@ radv_image_use_dcc_predication(const struct radv_device *device, const struct ra
static inline bool
radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image)
{
if (device->physical_device->info.gfx_level == GFX9 && image->vk.array_layers > 1) {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level == GFX9 && image->vk.array_layers > 1) {
/* On GFX9, FMASK can be interleaved with layers and this isn't properly supported. */
return false;
}
return device->physical_device->use_fmask && image->vk.samples > 1 &&
return pdev->use_fmask && image->vk.samples > 1 &&
((image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
(device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
}
@ -361,7 +370,8 @@ static inline bool
radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image,
const VkImageCreateInfo *pCreateInfo)
{
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
const VkImageCompressionControlEXT *compression =
vk_find_struct_const(pCreateInfo->pNext, IMAGE_COMPRESSION_CONTROL_EXT);
@ -374,11 +384,10 @@ radv_use_htile_for_image(const struct radv_device *device, const struct radv_ima
* - Investigate about mips+layers.
* - Enable on other gens.
*/
bool use_htile_for_mips = image->vk.array_layers == 1 && device->physical_device->info.gfx_level >= GFX10;
bool use_htile_for_mips = image->vk.array_layers == 1 && pdev->info.gfx_level >= GFX10;
/* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
if (device->physical_device->info.gfx_level == GFX10 && image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT &&
image->vk.mip_levels > 1)
if (pdev->info.gfx_level == GFX10 && image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->vk.mip_levels > 1)
return false;
/* Do not enable HTILE for very small images because it seems less performant but make sure it's
@ -395,19 +404,21 @@ radv_use_htile_for_image(const struct radv_device *device, const struct radv_ima
static bool
radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
/* TC-compat CMASK is only available for GFX8+. */
if (device->physical_device->info.gfx_level < GFX8)
if (pdev->info.gfx_level < GFX8)
return false;
/* GFX9 has issues when sample count is greater than 2 */
if (device->physical_device->info.gfx_level == GFX9 && image->vk.samples > 2)
if (pdev->info.gfx_level == GFX9 && image->vk.samples > 2)
return false;
if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
return false;
/* TC-compat CMASK with storage images is supported on GFX10+. */
if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && device->physical_device->info.gfx_level < GFX10)
if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && pdev->info.gfx_level < GFX10)
return false;
/* Do not enable TC-compatible if the image isn't readable by a shader
@ -427,7 +438,9 @@ radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image
static uint32_t
radv_get_bo_metadata_word1(const struct radv_device *device)
{
return (ATI_VENDOR_ID << 16) | device->physical_device->info.pci_id;
const struct radv_physical_device *pdev = radv_device_physical(device);
return (ATI_VENDOR_ID << 16) | pdev->info.pci_id;
}
static bool
@ -446,9 +459,11 @@ static void
radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface,
const struct radeon_bo_metadata *md)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
if (device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
if (md->u.gfx9.swizzle_mode > 0)
surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
else
@ -476,6 +491,7 @@ static VkResult
radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image,
const struct radv_image_create_info *create_info, struct ac_surf_info *image_info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned width = image->vk.extent.width;
unsigned height = image->vk.extent.height;
@ -489,7 +505,7 @@ radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image
if (create_info->bo_metadata && radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
const struct radeon_bo_metadata *md = create_info->bo_metadata;
if (device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
height = G_00A008_HEIGHT(md->metadata[4]) + 1;
} else {
@ -509,7 +525,7 @@ radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image
"(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
image->vk.extent.width, image->vk.extent.height, width, height);
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
} else if (device->physical_device->info.gfx_level >= GFX10) {
} else if (pdev->info.gfx_level >= GFX10) {
fprintf(stderr,
"Tried to import an image with inconsistent width on GFX10.\n"
"As GFX10 has no separate stride fields we cannot cope with\n"
@ -535,6 +551,8 @@ static VkResult
radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image,
const struct radv_image_create_info *create_info, struct ac_surf_info *image_info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
if (result != VK_SUCCESS)
return result;
@ -552,7 +570,7 @@ radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *
image_info->surf_index = NULL;
}
if (create_info->prime_blit_src && !device->physical_device->info.sdma_supports_compression) {
if (create_info->prime_blit_src && !pdev->info.sdma_supports_compression) {
/* Older SDMA hw can't handle DCC */
image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
}
@ -579,9 +597,10 @@ static uint64_t
radv_get_surface_flags(struct radv_device *device, struct radv_image *image, unsigned plane_id,
const VkImageCreateInfo *pCreateInfo, VkFormat image_format)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t flags;
unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
VkFormat format = radv_image_get_plane_format(device->physical_device, image, plane_id);
VkFormat format = radv_image_get_plane_format(pdev, image, plane_id);
const struct util_format_description *desc = vk_format_description(format);
bool is_depth, is_stencil;
@ -616,7 +635,7 @@ radv_get_surface_flags(struct radv_device *device, struct radv_image *image, uns
if (is_depth) {
flags |= RADEON_SURF_ZBUFFER;
if (is_depth && is_stencil && device->physical_device->info.gfx_level <= GFX8) {
if (is_depth && is_stencil && pdev->info.gfx_level <= GFX8) {
if (!(pCreateInfo->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT))
flags |= RADEON_SURF_NO_RENDER_TARGET;
@ -641,7 +660,7 @@ radv_get_surface_flags(struct radv_device *device, struct radv_image *image, uns
if (is_stencil)
flags |= RADEON_SURF_SBUFFER;
if (device->physical_device->info.gfx_level >= GFX9 && pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
if (pdev->info.gfx_level >= GFX9 && pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
flags |= RADEON_SURF_NO_RENDER_TARGET;
@ -656,7 +675,7 @@ radv_get_surface_flags(struct radv_device *device, struct radv_image *image, uns
}
if (image->queue_family_mask & BITFIELD_BIT(RADV_QUEUE_TRANSFER)) {
if (!device->physical_device->info.sdma_supports_compression)
if (!pdev->info.sdma_supports_compression)
flags |= RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_HTILE;
}
@ -720,12 +739,14 @@ radv_compose_swizzle(const struct util_format_description *desc, const VkCompone
bool
vi_alpha_is_on_msb(const struct radv_device *device, const VkFormat format)
{
if (device->physical_device->info.gfx_level >= GFX11)
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level >= GFX11)
return false;
const struct util_format_description *desc = vk_format_description(format);
if (device->physical_device->info.gfx_level >= GFX10 && desc->nr_channels == 1)
if (pdev->info.gfx_level >= GFX10 && desc->nr_channels == 1)
return desc->swizzle[3] == PIPE_SWIZZLE_X;
return radv_translate_colorswap(format, false) <= 1;
@ -735,13 +756,13 @@ static void
radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image, unsigned plane_id,
struct radeon_bo_metadata *md)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
static const VkComponentMapping fixedmapping;
const VkFormat plane_format = radv_image_get_plane_format(device->physical_device, image, plane_id);
const VkFormat plane_format = radv_image_get_plane_format(pdev, image, plane_id);
const unsigned plane_width = vk_format_get_plane_width(image->vk.format, plane_id, image->vk.extent.width);
const unsigned plane_height = vk_format_get_plane_height(image->vk.format, plane_id, image->vk.extent.height);
struct radeon_surf *surface = &image->planes[plane_id].surface;
const struct legacy_surf_level *base_level_info =
device->physical_device->info.gfx_level <= GFX8 ? &surface->u.legacy.level[0] : NULL;
const struct legacy_surf_level *base_level_info = pdev->info.gfx_level <= GFX8 ? &surface->u.legacy.level[0] : NULL;
uint32_t desc[8];
radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->vk.image_type, plane_format,
@ -751,21 +772,22 @@ radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image,
radv_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, 0, 0, surface->blk_w, false, false, false,
false, desc, NULL);
ac_surface_compute_umd_metadata(&device->physical_device->info, surface, image->vk.mip_levels, desc,
&md->size_metadata, md->metadata,
ac_surface_compute_umd_metadata(&pdev->info, surface, image->vk.mip_levels, desc, &md->size_metadata, md->metadata,
device->instance->debug_flags & RADV_DEBUG_EXTRA_MD);
}
void
radv_init_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *metadata)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
/* use plane 0, even when there are multiple planes, to follow radeonsi */
const unsigned plane_id = 0;
struct radeon_surf *surface = &image->planes[plane_id].surface;
memset(metadata, 0, sizeof(*metadata));
if (device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
uint64_t dcc_offset =
image->bindings[0].offset + (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset);
metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode;
@ -796,7 +818,8 @@ void
radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, uint64_t offset,
uint32_t stride)
{
ac_surface_override_offset_stride(&device->physical_device->info, &image->planes[0].surface, image->vk.array_layers,
const struct radv_physical_device *pdev = radv_device_physical(device);
ac_surface_override_offset_stride(&pdev->info, &image->planes[0].surface, image->vk.array_layers,
image->vk.mip_levels, offset, stride);
}
@ -819,6 +842,8 @@ radv_image_alloc_single_sample_cmask(const struct radv_device *device, const str
static void
radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
/* images with modifiers can be potentially imported */
if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
return;
@ -839,7 +864,7 @@ radv_image_alloc_values(const struct radv_device *device, struct radv_image *ima
image->size += 8 * image->vk.mip_levels;
}
if (radv_image_is_tc_compat_htile(image) && device->physical_device->info.has_tc_compat_zrange_bug) {
if (radv_image_is_tc_compat_htile(image) && pdev->info.has_tc_compat_zrange_bug) {
/* Metadata for the TC-compatible HTILE hardware bug which
* have to be fixed by updating ZRANGE_PRECISION when doing
* fast depth clears to 0.0f.
@ -855,13 +880,14 @@ radv_image_alloc_values(const struct radv_device *device, struct radv_image *ima
static bool
radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
{
const struct radeon_info *gpu_info = &device->physical_device->info;
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
int log2_samples = util_logbase2(image->vk.samples);
assert(gpu_info->gfx_level >= GFX10);
for (unsigned i = 0; i < image->plane_count; ++i) {
VkFormat fmt = radv_image_get_plane_format(device->physical_device, image, i);
VkFormat fmt = radv_image_get_plane_format(pdev, image, i);
int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
int log2_bpp_and_samples;
@ -903,9 +929,11 @@ radv_image_is_pipe_misaligned(const struct radv_device *device, const struct rad
static bool
radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
{
if (device->physical_device->info.gfx_level >= GFX10) {
return !device->physical_device->info.tcc_rb_non_coherent && !radv_image_is_pipe_misaligned(device, image);
} else if (device->physical_device->info.gfx_level == GFX9) {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level >= GFX10) {
return !pdev->info.tcc_rb_non_coherent && !radv_image_is_pipe_misaligned(device, image);
} else if (pdev->info.gfx_level == GFX9) {
if (image->vk.samples == 1 &&
(image->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
!vk_format_has_stencil(image->vk.format)) {
@ -926,6 +954,8 @@ radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_im
bool
radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
return false;
@ -934,7 +964,7 @@ radv_image_can_fast_clear(const struct radv_device *device, const struct radv_im
return false;
/* RB+ doesn't work with CMASK fast clear on Stoney. */
if (!radv_image_has_dcc(image) && device->physical_device->info.family == CHIP_STONEY)
if (!radv_image_has_dcc(image) && pdev->info.family == CHIP_STONEY)
return false;
/* Fast-clears with CMASK aren't supported for 128-bit formats. */
@ -958,8 +988,10 @@ radv_image_can_fast_clear(const struct radv_device *device, const struct radv_im
static bool
radv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
/* comp-to-single is only available for GFX10+. */
if (device->physical_device->info.gfx_level < GFX10)
if (pdev->info.gfx_level < GFX10)
return false;
/* If the image can't be fast cleared, comp-to-single can't be used. */
@ -972,7 +1004,7 @@ radv_image_use_comp_to_single(const struct radv_device *device, const struct rad
/* It seems 8bpp and 16bpp require RB+ to work. */
unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format);
if (bytes_per_pixel <= 2 && !device->physical_device->info.rbplus_allowed)
if (bytes_per_pixel <= 2 && !pdev->info.rbplus_allowed)
return false;
return true;
@ -1049,6 +1081,8 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
const struct VkVideoProfileListInfoKHR *profile_list, struct radv_image *image)
{
struct radv_physical_device *pdev = radv_device_physical(device);
/* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
* common internal case. */
create_info.vk_info = NULL;
@ -1060,7 +1094,7 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
radv_image_reset_layout(device->physical_device, image);
radv_image_reset_layout(pdev, image);
/*
* Due to how the decoder works, the user can't supply an oversized image, because if it attempts
@ -1070,17 +1104,17 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
if (image->vk.usage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR)) {
assert(profile_list);
uint32_t width_align, height_align;
radv_video_get_profile_alignments(device->physical_device, profile_list, &width_align, &height_align);
radv_video_get_profile_alignments(pdev, profile_list, &width_align, &height_align);
image_info.width = align(image_info.width, width_align);
image_info.height = align(image_info.height, height_align);
if (radv_has_uvd(device->physical_device) && image->vk.usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) {
if (radv_has_uvd(pdev) && image->vk.usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) {
/* UVD and kernel demand a full DPB allocation. */
image_info.array_size = MIN2(16, image_info.array_size);
}
}
unsigned plane_count = radv_get_internal_plane_count(device->physical_device, image->vk.format);
unsigned plane_count = radv_get_internal_plane_count(pdev, image->vk.format);
for (unsigned plane = 0; plane < plane_count; ++plane) {
struct ac_surf_info info = image_info;
uint64_t offset;
@ -1101,9 +1135,9 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
}
if (create_info.bo_metadata && !mod_info &&
!ac_surface_apply_umd_metadata(&device->physical_device->info, &image->planes[plane].surface,
image->vk.samples, image->vk.mip_levels,
create_info.bo_metadata->size_metadata, create_info.bo_metadata->metadata))
!ac_surface_apply_umd_metadata(&pdev->info, &image->planes[plane].surface, image->vk.samples,
image->vk.mip_levels, create_info.bo_metadata->size_metadata,
create_info.bo_metadata->metadata))
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
if (!create_info.no_metadata_planes && !create_info.bo_metadata && plane_count == 1 && !mod_info)
@ -1121,8 +1155,8 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
stride = 0; /* 0 means no override */
}
if (!ac_surface_override_offset_stride(&device->physical_device->info, &image->planes[plane].surface,
image->vk.array_layers, image->vk.mip_levels, offset, stride))
if (!ac_surface_override_offset_stride(&pdev->info, &image->planes[plane].surface, image->vk.array_layers,
image->vk.mip_levels, offset, stride))
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
/* Validate DCC offsets in modifier layout. */
@ -1132,8 +1166,8 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
for (unsigned i = 1; i < mem_planes; ++i) {
if (ac_surface_get_plane_offset(device->physical_device->info.gfx_level, &image->planes[plane].surface, i,
0) != mod_info->pPlaneLayouts[i].offset)
if (ac_surface_get_plane_offset(pdev->info.gfx_level, &image->planes[plane].surface, i, 0) !=
mod_info->pPlaneLayouts[i].offset)
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
}
}
@ -1141,7 +1175,7 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2);
image->planes[plane].format = radv_image_get_plane_format(device->physical_device, image, plane);
image->planes[plane].format = radv_image_get_plane_format(pdev, image, plane);
}
image->tc_compatible_cmask = radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
@ -1177,6 +1211,8 @@ radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAll
static void
radv_image_print_info(struct radv_device *device, struct radv_image *image)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
fprintf(stderr, "Image:\n");
fprintf(stderr,
" Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
@ -1188,11 +1224,11 @@ radv_image_print_info(struct radv_device *device, struct radv_image *image)
const struct radv_image_plane *plane = &image->planes[i];
const struct radeon_surf *surf = &plane->surface;
const struct util_format_description *desc = vk_format_description(plane->format);
uint64_t offset = ac_surface_get_plane_offset(device->physical_device->info.gfx_level, &plane->surface, 0, 0);
uint64_t offset = ac_surface_get_plane_offset(pdev->info.gfx_level, &plane->surface, 0, 0);
fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
ac_surface_print_info(stderr, &device->physical_device->info, surf);
ac_surface_print_info(stderr, &pdev->info, surf);
}
}
@ -1200,7 +1236,7 @@ static uint64_t
radv_select_modifier(const struct radv_device *dev, VkFormat format,
const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list)
{
const struct radv_physical_device *pdev = dev->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(dev);
unsigned mod_count;
assert(mod_list->drmFormatModifierCount);
@ -1238,6 +1274,7 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_
const VkAllocationCallbacks *alloc, VkImage *pImage, bool is_internal)
{
RADV_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
uint64_t modifier = DRM_FORMAT_MOD_INVALID;
struct radv_image *image = NULL;
@ -1250,7 +1287,7 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_
const struct VkVideoProfileListInfoKHR *profile_list =
vk_find_struct_const(pCreateInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR);
unsigned plane_count = radv_get_internal_plane_count(device->physical_device, format);
unsigned plane_count = radv_get_internal_plane_count(pdev, format);
const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
@ -1270,8 +1307,7 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_
pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
else
image->queue_family_mask |=
1u << vk_queue_to_radv(device->physical_device, pCreateInfo->pQueueFamilyIndices[i]);
image->queue_family_mask |= 1u << vk_queue_to_radv(pdev, pCreateInfo->pQueueFamilyIndices[i]);
/* This queue never really accesses the image. */
image->queue_family_mask &= ~(1u << RADV_QUEUE_SPARSE);
@ -1375,10 +1411,12 @@ bool
radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image, VkImageLayout layout,
unsigned queue_mask)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
/* Don't compress exclusive images used on transfer queues when SDMA doesn't support HTILE.
* Note that HTILE is already disabled on concurrent images when not supported.
*/
if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !device->physical_device->info.sdma_supports_compression)
if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !pdev->info.sdma_supports_compression)
return false;
switch (layout) {
@ -1452,6 +1490,8 @@ bool
radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, unsigned level,
VkImageLayout layout, unsigned queue_mask)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!radv_dcc_enabled(image, level))
return false;
@ -1470,7 +1510,7 @@ radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_i
/* Don't compress exclusive images used on transfer queues when SDMA doesn't support DCC.
* Note that DCC is already disabled on concurrent images when not supported.
*/
if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !device->physical_device->info.sdma_supports_compression)
if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !pdev->info.sdma_supports_compression)
return false;
if (layout == VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT) {
@ -1480,7 +1520,7 @@ radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_i
return false;
}
return device->physical_device->info.gfx_level >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
return pdev->info.gfx_level >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
}
enum radv_fmask_compression
@ -1533,11 +1573,13 @@ radv_image_queue_family_mask(const struct radv_image *image, enum radv_queue_fam
bool
radv_image_is_renderable(const struct radv_device *device, const struct radv_image *image)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT ||
image->vk.format == VK_FORMAT_R32G32B32_SFLOAT)
return false;
if (device->physical_device->info.gfx_level >= GFX9 && image->vk.image_type == VK_IMAGE_TYPE_3D &&
if (pdev->info.gfx_level >= GFX9 && image->vk.image_type == VK_IMAGE_TYPE_3D &&
vk_format_get_blocksizebits(image->vk.format) == 128 && vk_format_is_compressed(image->vk.format))
return false;
@ -1572,11 +1614,11 @@ radv_CreateImage(VkDevice _device, const VkImageCreateInfo *pCreateInfo, const V
* we're guaranteed to access an Android object incorrectly.
*/
RADV_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
const VkImageSwapchainCreateInfoKHR *swapchain_info =
vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR);
if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) {
return wsi_common_create_swapchain_image(device->physical_device->vk.wsi_device, pCreateInfo,
swapchain_info->swapchain, pImage);
return wsi_common_create_swapchain_image(pdev->vk.wsi_device, pCreateInfo, swapchain_info->swapchain, pImage);
}
#endif
@ -1686,6 +1728,7 @@ radv_GetImageSubresourceLayout2KHR(VkDevice _device, VkImage _image, const VkIma
{
RADV_FROM_HANDLE(radv_image, image, _image);
RADV_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
int level = pSubresource->imageSubresource.mipLevel;
int layer = pSubresource->imageSubresource.arrayLayer;
@ -1703,18 +1746,17 @@ radv_GetImageSubresourceLayout2KHR(VkDevice _device, VkImage _image, const VkIma
assert(level == 0);
assert(layer == 0);
pLayout->subresourceLayout.offset =
ac_surface_get_plane_offset(device->physical_device->info.gfx_level, surface, mem_plane_id, 0);
pLayout->subresourceLayout.offset = ac_surface_get_plane_offset(pdev->info.gfx_level, surface, mem_plane_id, 0);
pLayout->subresourceLayout.rowPitch =
ac_surface_get_plane_stride(device->physical_device->info.gfx_level, surface, mem_plane_id, level);
ac_surface_get_plane_stride(pdev->info.gfx_level, surface, mem_plane_id, level);
pLayout->subresourceLayout.arrayPitch = 0;
pLayout->subresourceLayout.depthPitch = 0;
pLayout->subresourceLayout.size = ac_surface_get_plane_size(surface, mem_plane_id);
} else if (device->physical_device->info.gfx_level >= GFX9) {
} else if (pdev->info.gfx_level >= GFX9) {
uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
pLayout->subresourceLayout.offset =
ac_surface_get_plane_offset(device->physical_device->info.gfx_level, &plane->surface, 0, layer) + level_offset;
ac_surface_get_plane_offset(pdev->info.gfx_level, &plane->surface, 0, layer) + level_offset;
if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT ||
image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) {
/* Adjust the number of bytes between each row because

View file

@ -106,7 +106,8 @@ radv_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *
uint64_t gpu_address = binding->bo ? radv_buffer_get_va(binding->bo) + binding->offset : 0;
uint64_t va = gpu_address;
uint8_t swizzle = plane->surface.tile_swizzle;
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
uint64_t meta_va = 0;
if (gfx_level >= GFX9) {
if (is_stencil)
@ -154,7 +155,7 @@ radv_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *
* If an imported image is used with VK_IMAGE_VIEW_TYPE_2D_ARRAY, it may hang due to VM faults
* because DEPTH means pitch with 2D, but it means depth with 2D array.
*/
if (device->physical_device->info.gfx_level >= GFX10_3 && plane->surface.u.gfx9.uses_custom_pitch) {
if (pdev->info.gfx_level >= GFX10_3 && plane->surface.u.gfx9.uses_custom_pitch) {
assert((plane->surface.u.gfx9.surf_pitch * plane->surface.bpe) % 256 == 0);
assert(image->vk.image_type == VK_IMAGE_TYPE_2D);
assert(plane->surface.is_linear);
@ -245,6 +246,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima
uint32_t *fmask_state, VkImageCreateFlags img_create_flags,
const struct ac_surf_nbc_view *nbc_view, const VkImageViewSlicedCreateInfoEXT *sliced_3d)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct util_format_description *desc;
enum pipe_swizzle swizzle[4];
unsigned img_format;
@ -261,8 +263,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima
desc = util_format_description(PIPE_FORMAT_R8G8B8X8_SRGB);
}
img_format =
ac_get_gfx10_format_table(&device->physical_device->info)[vk_format_to_pipe_format(vk_format)].img_format;
img_format = ac_get_gfx10_format_table(&pdev->info)[vk_format_to_pipe_format(vk_format)].img_format;
radv_compose_swizzle(desc, mapping, swizzle);
@ -271,7 +272,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima
type = V_008F1C_SQ_RSRC_IMG_3D;
} else {
type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples, is_storage_image,
device->physical_device->info.gfx_level == GFX9);
pdev->info.gfx_level == GFX9);
}
if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
@ -286,7 +287,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima
state[0] = 0;
state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1);
state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
S_00A008_RESOURCE_LEVEL(device->physical_device->info.gfx_level < GFX11);
S_00A008_RESOURCE_LEVEL(pdev->info.gfx_level < GFX11);
state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
S_00A00C_BASE_LEVEL(image->vk.samples > 1 ? 0 : first_level) |
@ -332,7 +333,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima
max_mip = nbc_view->num_levels - 1;
unsigned min_lod_clamped = radv_float_to_ufixed(CLAMP(min_lod, 0, 15), 8);
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
state[1] |= S_00A004_MAX_MIP(max_mip);
state[5] |= S_00A014_MIN_LOD_LO(min_lod_clamped);
state[6] |= S_00A018_MIN_LOD_HI(min_lod_clamped >> 5);
@ -413,6 +414,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state,
uint32_t *fmask_state, VkImageCreateFlags img_create_flags)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct util_format_description *desc;
enum pipe_swizzle swizzle[4];
int first_non_void;
@ -444,21 +446,19 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
}
/* S8 with either Z16 or Z32 HTILE need a special format. */
if (device->physical_device->info.gfx_level == GFX9 && vk_format == VK_FORMAT_S8_UINT &&
radv_image_is_tc_compat_htile(image)) {
if (pdev->info.gfx_level == GFX9 && vk_format == VK_FORMAT_S8_UINT && radv_image_is_tc_compat_htile(image)) {
if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
else if (image->vk.format == VK_FORMAT_D16_UNORM_S8_UINT)
data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
}
if (device->physical_device->info.gfx_level == GFX9 &&
img_create_flags & VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT) {
if (pdev->info.gfx_level == GFX9 && img_create_flags & VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT) {
assert(image->vk.image_type == VK_IMAGE_TYPE_3D);
type = V_008F1C_SQ_RSRC_IMG_3D;
} else {
type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples, is_storage_image,
device->physical_device->info.gfx_level == GFX9);
pdev->info.gfx_level == GFX9);
}
if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
@ -484,7 +484,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
state[6] = 0;
state[7] = 0;
if (device->physical_device->info.gfx_level == GFX9) {
if (pdev->info.gfx_level == GFX9) {
unsigned bc_swizzle = gfx9_border_color_swizzle(desc);
/* Depth is the last accessible layer on Gfx9.
@ -509,7 +509,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
/* The last dword is unused by hw. The shader uses it to clear
* bits in the first dword of sampler state.
*/
if (device->physical_device->info.gfx_level <= GFX7 && image->vk.samples <= 1) {
if (pdev->info.gfx_level <= GFX7 && image->vk.samples <= 1) {
if (first_level == last_level)
state[7] = C_008F30_MAX_ANISO_RATIO;
else
@ -529,7 +529,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
va = gpu_address + image->bindings[0].offset + image->planes[0].surface.fmask_offset;
if (device->physical_device->info.gfx_level == GFX9) {
if (pdev->info.gfx_level == GFX9) {
fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
switch (image->vk.samples) {
case 2:
@ -576,7 +576,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
fmask_state[6] = 0;
fmask_state[7] = 0;
if (device->physical_device->info.gfx_level == GFX9) {
if (pdev->info.gfx_level == GFX9) {
fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode);
fmask_state[4] |=
S_008F20_DEPTH(last_layer) | S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch);
@ -615,7 +615,9 @@ radv_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
uint32_t *fmask_state, VkImageCreateFlags img_create_flags,
const struct ac_surf_nbc_view *nbc_view, const VkImageViewSlicedCreateInfoEXT *sliced_3d)
{
if (device->physical_device->info.gfx_level >= GFX10) {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level >= GFX10) {
gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, first_level,
last_level, first_layer, last_layer, width, height, depth, min_lod, state,
fmask_state, img_create_flags, nbc_view, sliced_3d);
@ -630,12 +632,13 @@ static inline void
compute_non_block_compressed_view(struct radv_device *device, const struct radv_image_view *iview,
struct ac_surf_nbc_view *nbc_view)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_image *image = iview->image;
const struct radeon_surf *surf = &image->planes[0].surface;
struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws);
struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image);
ac_surface_compute_nbc_view(addrlib, &device->physical_device->info, surf, &surf_info, iview->vk.base_mip_level,
ac_surface_compute_nbc_view(addrlib, &pdev->info, surf, &surf_info, iview->vk.base_mip_level,
iview->vk.base_array_layer, nbc_view);
}
@ -647,6 +650,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic
const struct ac_surf_nbc_view *nbc_view,
const VkImageViewSlicedCreateInfoEXT *sliced_3d, bool force_zero_base_mip)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_image *image = iview->image;
struct radv_image_plane *plane = &image->planes[plane_id];
bool is_stencil = iview->vk.aspects == VK_IMAGE_ASPECT_STENCIL_BIT;
@ -665,7 +669,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic
assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
if (device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
if (nbc_view->valid) {
hw_level = nbc_view->level;
iview->extent.width = nbc_view->width;
@ -689,7 +693,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic
img_create_flags, nbc_view, sliced_3d);
const struct legacy_surf_level *base_level_info = NULL;
if (device->physical_device->info.gfx_level <= GFX8) {
if (pdev->info.gfx_level <= GFX8) {
if (is_stencil)
base_level_info = &plane->surface.u.legacy.zs.stencil_level[iview->vk.base_mip_level];
else
@ -738,6 +742,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
const struct radv_image_view_extra_create_info *extra_create_info)
{
RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
const struct radv_physical_device *pdev = radv_device_physical(device);
const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
uint32_t plane_count = 1;
float min_lod = 0.0f;
@ -755,7 +760,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
vk_image_view_init(&device->vk, &iview->vk, !from_client, pCreateInfo);
bool force_zero_base_mip = true;
if (device->physical_device->info.gfx_level <= GFX8 && min_lod) {
if (pdev->info.gfx_level <= GFX8 && min_lod) {
/* Do not force the base level to zero to workaround a spurious bug with mipmaps and min LOD. */
force_zero_base_mip = false;
}
@ -800,15 +805,15 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
}
/* when the view format is emulated, redirect the view to the hidden plane 1 */
if (radv_is_format_emulated(device->physical_device, iview->vk.format)) {
assert(radv_is_format_emulated(device->physical_device, image->vk.format));
if (radv_is_format_emulated(pdev, iview->vk.format)) {
assert(radv_is_format_emulated(pdev, image->vk.format));
iview->plane_id = 1;
iview->vk.view_format = image->planes[iview->plane_id].format;
iview->vk.format = image->planes[iview->plane_id].format;
plane_count = 1;
}
if (!force_zero_base_mip || device->physical_device->info.gfx_level >= GFX9) {
if (!force_zero_base_mip || pdev->info.gfx_level >= GFX9) {
iview->extent = (VkExtent3D){
.width = image->vk.extent.width,
.height = image->vk.extent.height,
@ -854,7 +859,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
* block compatible format and the compressed format, so even if we take
* the plain converted dimensions the physical layout is correct.
*/
if (device->physical_device->info.gfx_level >= GFX9 && vk_format_is_block_compressed(plane->format) &&
if (pdev->info.gfx_level >= GFX9 && vk_format_is_block_compressed(plane->format) &&
!vk_format_is_block_compressed(iview->vk.format)) {
/* If we have multiple levels in the view we should ideally take the last level,
* but the mip calculation has a max(..., 1) so walking back to the base mip in an
@ -879,7 +884,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
* changes the descriptor's base level, and adjusts the address and
* extents accordingly.
*/
if (device->physical_device->info.gfx_level >= GFX10 &&
if (pdev->info.gfx_level >= GFX10 &&
(radv_minify(iview->extent.width, range->baseMipLevel) < lvl_width ||
radv_minify(iview->extent.height, range->baseMipLevel) < lvl_height) &&
iview->vk.layer_count == 1) {

View file

@ -32,7 +32,9 @@
void
radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders)
{
if (device->physical_device->info.gfx_level >= GFX11) {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level >= GFX11) {
radeon_set_uconfig_reg(cs, R_036760_SQG_PERFCOUNTER_CTRL, shaders & 0x7f);
} else {
radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2);
@ -74,12 +76,14 @@ radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf
void
radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
radv_emit_windowed_counters(device, cs, family, false);
/* Stop SPM counters. */
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
S_036020_SPM_PERFMON_STATE(device->physical_device->info.never_stop_sq_perf_counters
S_036020_SPM_PERFMON_STATE(pdev->info.never_stop_sq_perf_counters
? V_036020_STRM_PERFMON_STATE_START_COUNTING
: V_036020_STRM_PERFMON_STATE_STOP_COUNTING));
}
@ -466,7 +470,8 @@ radv_emit_instance(struct radv_cmd_buffer *cmd_buffer, int se, int instance)
static void
radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, unsigned *selectors)
{
const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
const enum radv_queue_family qf = cmd_buffer->qf;
struct ac_pc_block_base *regs = block->b->b;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
@ -492,6 +497,7 @@ static void
radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count,
uint64_t va)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct ac_pc_block_base *regs = block->b->b;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
unsigned reg = regs->counter0_lo;
@ -510,7 +516,7 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
va += sizeof(uint64_t) * 2 * radv_pc_get_num_instances(cmd_buffer->device->physical_device, block);
va += sizeof(uint64_t) * 2 * radv_pc_get_num_instances(pdev, block);
reg += reg_delta;
}
}
@ -518,9 +524,10 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p
static void
radv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, uint64_t va)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
unsigned se_end = 1;
if (block->b->b->flags & AC_PC_BLOCK_SE)
se_end = cmd_buffer->device->physical_device->info.max_se;
se_end = pdev->info.max_se;
for (unsigned se = 0; se < se_end; ++se) {
for (unsigned instance = 0; instance < block->num_instances; ++instance) {
@ -555,8 +562,8 @@ radv_pc_wait_idle(struct radv_cmd_buffer *cmd_buffer)
static void
radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va, bool end)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
@ -621,7 +628,7 @@ void
radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va)
{
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
ASSERTED unsigned cdw_max;
cmd_buffer->state.uses_perf_counters = true;
@ -698,6 +705,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
void
radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
ASSERTED unsigned cdw_max;
@ -710,9 +718,8 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->perf_counter_bo);
uint64_t perf_ctr_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf,
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va,
1, cmd_buffer->gfx9_fence_va);
radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0,
EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, 1, cmd_buffer->gfx9_fence_va);
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, perf_ctr_va, 1, 0xffffffff);
radv_pc_wait_idle(cmd_buffer);

View file

@ -365,7 +365,8 @@ static unsigned
lower_bit_size_callback(const nir_instr *instr, void *_)
{
struct radv_device *device = _;
enum amd_gfx_level chip = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
enum amd_gfx_level chip = pdev->info.gfx_level;
if (instr->type != nir_instr_type_alu)
return 0;
@ -437,7 +438,8 @@ opt_vectorize_callback(const nir_instr *instr, const void *_)
return 0;
const struct radv_device *device = _;
enum amd_gfx_level chip = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
enum amd_gfx_level chip = pdev->info.gfx_level;
if (chip < GFX9)
return 1;
@ -461,7 +463,8 @@ void
radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
struct radv_shader_stage *stage)
{
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
bool progress;
/* Wave and workgroup size should already be filled. */
@ -548,8 +551,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
NIR_PASS(_, stage->nir, ac_nir_lower_tex,
&(ac_nir_lower_tex_options){
.gfx_level = gfx_level,
.lower_array_layer_round_even =
!device->physical_device->info.conformant_trunc_coord || device->disable_trunc_coord,
.lower_array_layer_round_even = !pdev->info.conformant_trunc_coord || device->disable_trunc_coord,
.fix_derivs_in_divergent_cf = fix_derivs_in_divergent_cf,
.max_wqm_vgprs = 64, // TODO: improve spiller and RA support for linear VGPRs
});
@ -570,7 +572,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
nir_move_options sink_opts = nir_move_const_undef | nir_move_copies;
if (!stage->key.optimisations_disabled) {
if (stage->stage != MESA_SHADER_FRAGMENT || !device->physical_device->cache_key.disable_sinking_load_input_fs)
if (stage->stage != MESA_SHADER_FRAGMENT || !pdev->cache_key.disable_sinking_load_input_fs)
sink_opts |= nir_move_load_input;
NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
@ -581,7 +583,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
* load_input can be reordered, but buffer loads can't.
*/
if (stage->stage == MESA_SHADER_VERTEX) {
NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, gfx_state, &device->physical_device->info);
NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, gfx_state, &pdev->info);
}
/* Lower I/O intrinsics to memory instructions. */
@ -598,7 +600,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
stage->info.outinfo.export_prim_id, false, false, false, stage->info.force_vrs_per_vertex);
} else {
bool emulate_ngg_gs_query_pipeline_stat = device->physical_device->emulate_ngg_gs_query_pipeline_stat;
bool emulate_ngg_gs_query_pipeline_stat = pdev->emulate_ngg_gs_query_pipeline_stat;
ac_nir_gs_output_info gs_out_info = {
.streams = stage->info.gs.output_streams,
@ -609,7 +611,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
} else if (stage->stage == MESA_SHADER_FRAGMENT) {
ac_nir_lower_ps_options options = {
.gfx_level = gfx_level,
.family = device->physical_device->info.family,
.family = pdev->info.family,
.use_aco = !radv_use_llvm_for_stage(device, stage->stage),
.uses_discard = true,
.alpha_func = COMPARE_FUNC_ALWAYS,
@ -666,7 +668,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
NIR_PASS(_, stage->nir, ac_nir_lower_global_access);
NIR_PASS_V(stage->nir, ac_nir_lower_intrinsics_to_args, gfx_level, radv_select_hw_stage(&stage->info, gfx_level),
&stage->args.ac);
NIR_PASS_V(stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, device->physical_device->info.address32_hi);
NIR_PASS_V(stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, pdev->info.address32_hi);
radv_optimize_nir_algebraic(
stage->nir, io_to_mem || lowered_ngg || stage->stage == MESA_SHADER_COMPUTE || stage->stage == MESA_SHADER_TASK);
@ -926,7 +928,7 @@ radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecut
struct radv_shader *shader =
radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned lds_increment =
pdev->info.gfx_level >= GFX11 && stage == MESA_SHADER_FRAGMENT ? 1024 : pdev->info.lds_encode_granularity;

View file

@ -38,6 +38,8 @@
static bool
radv_is_cache_disabled(struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
/* The buffer address used for debug printf is hardcoded. */
if (device->printf.buffer_addr)
return true;
@ -45,8 +47,7 @@ radv_is_cache_disabled(struct radv_device *device)
/* Pipeline caches can be disabled with RADV_DEBUG=nocache, with MESA_GLSL_CACHE_DISABLE=1 and
* when ACO_DEBUG is used. MESA_GLSL_CACHE_DISABLE is done elsewhere.
*/
return (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) ||
(device->physical_device->use_llvm ? 0 : aco_get_codegen_flags());
return (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) || (pdev->use_llvm ? 0 : aco_get_codegen_flags());
}
void
@ -532,14 +533,15 @@ nir_shader *
radv_pipeline_cache_lookup_nir(struct radv_device *device, struct vk_pipeline_cache *cache, gl_shader_stage stage,
const blake3_hash key)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (radv_is_cache_disabled(device))
return NULL;
if (!cache)
cache = device->mem_cache;
return vk_pipeline_cache_lookup_nir(cache, key, sizeof(blake3_hash), &device->physical_device->nir_options[stage],
NULL, NULL);
return vk_pipeline_cache_lookup_nir(cache, key, sizeof(blake3_hash), &pdev->nir_options[stage], NULL, NULL);
}
void
@ -570,6 +572,7 @@ radv_pipeline_cache_lookup_nir_handle(struct radv_device *device, struct vk_pipe
struct nir_shader *
radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline_cache_object *object)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct blob_reader blob;
struct vk_raw_data_cache_object *nir_object = container_of(object, struct vk_raw_data_cache_object, base);
blob_reader_init(&blob, nir_object->data, nir_object->data_size);
@ -579,7 +582,7 @@ radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline
ralloc_free(nir);
return NULL;
}
nir->options = &device->physical_device->nir_options[nir->info.stage];
nir->options = &pdev->nir_options[nir->info.stage];
return nir;
}

View file

@ -75,6 +75,7 @@ void
radv_get_compute_pipeline_metadata(const struct radv_device *device, const struct radv_compute_pipeline *pipeline,
struct radv_compute_pipeline_metadata *metadata)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader *cs = pipeline->base.shaders[MESA_SHADER_COMPUTE];
uint32_t upload_sgpr = 0, inline_sgpr = 0;
@ -84,7 +85,7 @@ radv_get_compute_pipeline_metadata(const struct radv_device *device, const struc
metadata->rsrc1 = cs->config.rsrc1;
metadata->rsrc2 = cs->config.rsrc2;
metadata->rsrc3 = cs->config.rsrc3;
metadata->compute_resource_limits = radv_get_compute_resource_limits(device->physical_device, cs);
metadata->compute_resource_limits = radv_get_compute_resource_limits(pdev, cs);
metadata->block_size_x = cs->info.cs.block_size[0];
metadata->block_size_y = cs->info.cs.block_size[1];
metadata->block_size_z = cs->info.cs.block_size[2];
@ -136,7 +137,7 @@ static void
radv_compute_generate_pm4(const struct radv_device *device, struct radv_compute_pipeline *pipeline,
struct radv_shader *shader)
{
struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = &pipeline->base.cs;
cs->reserved_dw = cs->max_dw = pdev->info.gfx_level >= GFX10 ? 19 : 16;

View file

@ -132,12 +132,13 @@ static unsigned
radv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format, bool blend_enable,
bool blend_need_alpha)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct util_format_description *desc = vk_format_description(vk_format);
bool use_rbplus = device->physical_device->info.rbplus_allowed;
bool use_rbplus = pdev->info.rbplus_allowed;
struct ac_spi_color_formats formats = {0};
unsigned format, ntype, swap;
format = ac_get_cb_format(device->physical_device->info.gfx_level, desc->format);
format = ac_get_cb_format(pdev->info.gfx_level, desc->format);
ntype = ac_get_cb_number_type(desc->format);
swap = radv_translate_colorswap(vk_format, false);
@ -508,12 +509,13 @@ static uint64_t
radv_pipeline_needed_dynamic_state(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
bool has_color_att = radv_pipeline_has_color_attachments(state->rp);
bool raster_enabled =
!state->rs->rasterizer_discard_enable || (pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
uint64_t states = RADV_DYNAMIC_ALL;
if (device->physical_device->info.gfx_level < GFX10_3)
if (pdev->info.gfx_level < GFX10_3)
states &= ~RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
/* Disable dynamic states that are useless to mesh shading. */
@ -568,7 +570,7 @@ radv_pipeline_needed_dynamic_state(const struct radv_device *device, const struc
struct radv_ia_multi_vgt_param_helpers
radv_compute_ia_multi_vgt_param(const struct radv_device *device, struct radv_shader *const *shaders)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0};
ia_multi_vgt_param.ia_switch_on_eoi = false;
@ -1295,7 +1297,8 @@ static void
radv_link_shaders(const struct radv_device *device, struct radv_shader_stage *producer_stage,
struct radv_shader_stage *consumer_stage, const struct radv_graphics_state_key *gfx_state)
{
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
nir_shader *producer = producer_stage->nir;
nir_shader *consumer = consumer_stage->nir;
bool progress;
@ -1686,6 +1689,7 @@ radv_graphics_shaders_link(const struct radv_device *device, const struct radv_g
struct radv_ps_epilog_key
radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ps_epilog_state *state)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0, z_format = 0;
struct radv_ps_epilog_key key;
@ -1731,8 +1735,8 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
state->alpha_to_coverage_via_mrtz);
key.spi_shader_col_format = col_format;
key.color_is_int8 = device->physical_device->info.gfx_level < GFX8 ? is_int8 : 0;
key.color_is_int10 = device->physical_device->info.gfx_level < GFX8 ? is_int10 : 0;
key.color_is_int8 = pdev->info.gfx_level < GFX8 ? is_int8 : 0;
key.color_is_int10 = pdev->info.gfx_level < GFX8 ? is_int10 : 0;
key.enable_mrt_output_nan_fixup = device->instance->drirc.enable_mrt_output_nan_fixup ? is_float32 : 0;
key.colors_written = state->colors_written;
key.mrt0_is_dual_src = state->mrt0_is_dual_src;
@ -1811,7 +1815,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
const struct vk_graphics_pipeline_state *state,
VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_graphics_state_key key;
memset(&key, 0, sizeof(key));
@ -1884,7 +1888,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
}
}
if (device->physical_device->info.gfx_level >= GFX11 && state->ms) {
if (pdev->info.gfx_level >= GFX11 && state->ms) {
key.ms.alpha_to_coverage_via_mrtz = state->ms->alpha_to_coverage_enable;
}
@ -1898,15 +1902,14 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
key.unknown_rast_prim = true;
}
if (device->physical_device->info.gfx_level >= GFX10 && state->rs) {
if (pdev->info.gfx_level >= GFX10 && state->rs) {
key.rs.provoking_vtx_last = state->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
}
key.ps.force_vrs_enabled = device->force_vrs_enabled && !radv_is_static_vrs_enabled(pipeline, state);
if ((radv_is_vrs_enabled(pipeline, state) || key.ps.force_vrs_enabled) &&
(device->physical_device->info.family == CHIP_NAVI21 || device->physical_device->info.family == CHIP_NAVI22 ||
device->physical_device->info.family == CHIP_VANGOGH))
(pdev->info.family == CHIP_NAVI21 || pdev->info.family == CHIP_NAVI22 || pdev->info.family == CHIP_VANGOGH))
key.adjust_frag_coord_z = true;
if (radv_pipeline_needs_ps_epilog(pipeline, lib_flags))
@ -1914,7 +1917,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
key.ps.epilog = radv_pipeline_generate_ps_epilog_key(device, state);
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
/* On GFX11, alpha to coverage is exported via MRTZ when depth/stencil/samplemask are also
* exported. Though, when a PS epilog is needed and the MS state is NULL (with dynamic
* rendering), it's not possible to know the info at compile time and MRTZ needs to be
@ -1927,7 +1930,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
key.dynamic_rasterization_samples = !!(pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZATION_SAMPLES) ||
(!!(pipeline->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && !state->ms);
if (device->physical_device->use_ngg) {
if (pdev->use_ngg) {
VkShaderStageFlags ngg_stage;
if (pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
@ -1995,7 +1998,9 @@ static void
radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *stages,
VkShaderStageFlagBits active_nir_stages)
{
if (!device->physical_device->cache_key.use_ngg)
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!pdev->cache_key.use_ngg)
return;
if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.next_stage != MESA_SHADER_TESS_CTRL) {
@ -2006,7 +2011,7 @@ radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *
stages[MESA_SHADER_MESH].info.is_ngg = true;
}
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
if (stages[MESA_SHADER_GEOMETRY].nir)
stages[MESA_SHADER_GEOMETRY].info.is_ngg = true;
} else {
@ -2164,7 +2169,8 @@ static void
radv_declare_pipeline_args(struct radv_device *device, struct radv_shader_stage *stages,
const struct radv_graphics_state_key *gfx_state, VkShaderStageFlagBits active_nir_stages)
{
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
if (gfx_level >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) {
radv_declare_shader_args(device, gfx_state, &stages[MESA_SHADER_TESS_CTRL].info, MESA_SHADER_TESS_CTRL,
@ -2210,15 +2216,16 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache
bool keep_executable_info, bool keep_statistic_info,
struct radv_shader_binary **gs_copy_binary)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader_info *gs_info = &gs_stage->info;
ac_nir_gs_output_info output_info = {
.streams = gs_info->gs.output_streams,
.usage_mask = gs_info->gs.output_usage_mask,
};
nir_shader *nir = ac_nir_create_gs_copy_shader(
gs_stage->nir, device->physical_device->info.gfx_level,
gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask, gs_info->outinfo.vs_output_param_offset,
gs_info->outinfo.param_exports, false, false, false, gs_info->force_vrs_per_vertex, &output_info);
gs_stage->nir, pdev->info.gfx_level, gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask,
gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.param_exports, false, false, false,
gs_info->force_vrs_per_vertex, &output_info);
nir_validate_shader(nir, "after ac_nir_create_gs_copy_shader");
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
@ -2246,10 +2253,8 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache
gs_copy_stage.info.user_sgprs_locs = gs_copy_stage.args.user_sgprs_locs;
gs_copy_stage.info.inline_push_constant_mask = gs_copy_stage.args.ac.inline_push_const_mask;
NIR_PASS_V(nir, ac_nir_lower_intrinsics_to_args, device->physical_device->info.gfx_level, AC_HW_VERTEX_SHADER,
&gs_copy_stage.args.ac);
NIR_PASS_V(nir, radv_nir_lower_abi, device->physical_device->info.gfx_level, &gs_copy_stage, gfx_state,
device->physical_device->info.address32_hi);
NIR_PASS_V(nir, ac_nir_lower_intrinsics_to_args, pdev->info.gfx_level, AC_HW_VERTEX_SHADER, &gs_copy_stage.args.ac);
NIR_PASS_V(nir, radv_nir_lower_abi, pdev->info.gfx_level, &gs_copy_stage, gfx_state, pdev->info.address32_hi);
struct radv_graphics_pipeline_key key = {0};
bool dump_shader = radv_can_dump_shader(device, nir, true);
@ -2272,6 +2277,8 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_
struct radv_shader_binary **binaries, struct radv_shader **gs_copy_shader,
struct radv_shader_binary **gs_copy_binary)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
for (int s = MESA_VULKAN_SHADER_STAGES - 1; s >= 0; s--) {
if (!(active_nir_stages & (1 << s)))
continue;
@ -2280,7 +2287,7 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_
unsigned shader_count = 1;
/* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */
if (device->physical_device->info.gfx_level >= GFX9 &&
if (pdev->info.gfx_level >= GFX9 &&
((s == MESA_SHADER_GEOMETRY &&
(active_nir_stages & (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))) ||
(s == MESA_SHADER_TESS_CTRL && (active_nir_stages & VK_SHADER_STAGE_VERTEX_BIT)))) {
@ -2348,6 +2355,7 @@ static void
radv_pipeline_import_retained_shaders(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
struct radv_graphics_lib_pipeline *lib, struct radv_shader_stage *stages)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_retained_shaders *retained_shaders = &lib->retained_shaders;
/* Import the stages (SPIR-V only in case of cache hits). */
@ -2370,7 +2378,7 @@ radv_pipeline_import_retained_shaders(const struct radv_device *device, struct r
int64_t stage_start = os_time_get_nano();
/* Deserialize the NIR shader. */
const struct nir_shader_compiler_options *options = &device->physical_device->nir_options[s];
const struct nir_shader_compiler_options *options = &pdev->nir_options[s];
struct blob_reader blob_reader;
blob_reader_init(&blob_reader, retained_shaders->stages[s].serialized_nir,
retained_shaders->stages[s].serialized_nir_size);
@ -2442,6 +2450,7 @@ static bool
radv_skip_graphics_pipeline_compile(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline,
VkGraphicsPipelineLibraryFlagBitsEXT lib_flags, bool fast_linking_enabled)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
VkShaderStageFlagBits binary_stages = 0;
/* Do not skip when fast-linking isn't enabled. */
@ -2462,7 +2471,7 @@ radv_skip_graphics_pipeline_compile(const struct radv_device *device, const stru
binary_stages |= mesa_to_vk_shader_stage(i);
}
if (device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
/* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */
if (binary_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) {
binary_stages |= VK_SHADER_STAGE_VERTEX_BIT;
@ -2490,6 +2499,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
struct radv_shader **shaders, struct radv_shader_binary **binaries,
struct radv_shader **gs_copy_shader, struct radv_shader_binary **gs_copy_binary)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const bool nir_cache = device->instance->perftest_flags & RADV_PERFTEST_NIR_CACHE;
for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
if (!stages[s].entrypoint)
@ -2530,7 +2540,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
active_nir_stages |= mesa_to_vk_shader_stage(i);
}
if (!device->physical_device->mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir &&
if (!pdev->mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir &&
BITSET_TEST(stages[MESA_SHADER_MESH].nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID)) {
nir_shader *mesh = stages[MESA_SHADER_MESH].nir;
nir_shader *task = stages[MESA_SHADER_TASK].nir;
@ -2848,7 +2858,7 @@ void
radv_emit_vgt_gs_mode(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
const struct radv_shader *last_vgt_api_shader)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader_info *info = &last_vgt_api_shader->info;
unsigned vgt_primitiveid_en = 0;
uint32_t vgt_gs_mode = 0;
@ -2871,7 +2881,7 @@ static void
radv_emit_hw_vs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
const struct radv_shader *shader)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t va = radv_shader_get_va(shader);
radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4);
@ -2971,7 +2981,7 @@ static void
radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
const struct radv_shader *es, const struct radv_shader *shader)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t va = radv_shader_get_va(shader);
gl_shader_stage es_type;
const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info;
@ -3128,7 +3138,7 @@ radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
static void
radv_emit_hw_hs(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_shader *shader)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t va = radv_shader_get_va(shader);
if (pdev->info.gfx_level >= GFX9) {
@ -3152,6 +3162,8 @@ void
radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
const struct radv_shader *vs, const struct radv_shader *next_stage)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (vs->info.merged_shader_compiled_separately) {
const struct radv_userdata_info *loc = &vs->info.user_sgprs_locs.shader_data[AC_UD_NEXT_STAGE_PC];
const uint32_t base_reg = vs->info.user_data_0;
@ -3164,7 +3176,7 @@ radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *
if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) {
radv_shader_combine_cfg_vs_tcs(vs, next_stage, &rsrc1, NULL);
if (device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, vs->va >> 8);
} else {
radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, vs->va >> 8);
@ -3174,7 +3186,7 @@ radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *
} else {
radv_shader_combine_cfg_vs_gs(vs, next_stage, &rsrc1, &rsrc2);
if (device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, vs->va >> 8);
} else {
radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, vs->va >> 8);
@ -3182,8 +3194,7 @@ radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *
unsigned lds_size;
if (next_stage->info.is_ngg) {
lds_size = DIV_ROUND_UP(next_stage->info.ngg_info.lds_size,
device->physical_device->info.lds_encode_granularity);
lds_size = DIV_ROUND_UP(next_stage->info.ngg_info.lds_size, pdev->info.lds_encode_granularity);
} else {
lds_size = next_stage->info.gs_ring_info.lds_size;
}
@ -3225,6 +3236,8 @@ void
radv_emit_tess_eval_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
const struct radv_shader *tes, const struct radv_shader *gs)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (tes->info.merged_shader_compiled_separately) {
const struct radv_userdata_info *loc = &tes->info.user_sgprs_locs.shader_data[AC_UD_NEXT_STAGE_PC];
const uint32_t base_reg = tes->info.user_data_0;
@ -3238,7 +3251,7 @@ radv_emit_tess_eval_shader(const struct radv_device *device, struct radeon_cmdbu
unsigned lds_size;
if (gs->info.is_ngg) {
lds_size = DIV_ROUND_UP(gs->info.ngg_info.lds_size, device->physical_device->info.lds_encode_granularity);
lds_size = DIV_ROUND_UP(gs->info.ngg_info.lds_size, pdev->info.lds_encode_granularity);
} else {
lds_size = gs->info.gs_ring_info.lds_size;
}
@ -3264,7 +3277,7 @@ static void
radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
const struct radv_shader *gs)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_legacy_gs_info *gs_state = &gs->info.gs_ring_info;
unsigned gs_max_out_vertices;
const uint8_t *num_components;
@ -3382,16 +3395,15 @@ void
radv_emit_mesh_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
const struct radv_shader *ms)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
const uint32_t gs_out = radv_conv_gl_prim_to_gs_out(ms->info.ms.output_prim);
radv_emit_hw_ngg(device, ctx_cs, cs, NULL, ms);
radeon_set_context_reg(
ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT,
device->physical_device->mesh_fast_launch_2 ? ms->info.ngg_info.max_out_verts : ms->info.workgroup_size);
radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT,
pdev->mesh_fast_launch_2 ? ms->info.ngg_info.max_out_verts : ms->info.workgroup_size);
radeon_set_uconfig_reg_idx(pdev, ctx_cs, R_030908_VGT_PRIMITIVE_TYPE, 1, V_008958_DI_PT_POINTLIST);
if (device->physical_device->mesh_fast_launch_2) {
if (pdev->mesh_fast_launch_2) {
radeon_set_sh_reg_seq(cs, R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, 2);
radeon_emit(cs, S_00B2B0_MESHLET_NUM_THREAD_X(ms->info.cs.block_size[0] - 1) |
S_00B2B0_MESHLET_NUM_THREAD_Y(ms->info.cs.block_size[1] - 1) |
@ -3476,9 +3488,10 @@ void
radv_emit_ps_inputs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
const struct radv_shader *last_vgt_shader, const struct radv_shader *ps)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_vs_output_info *outinfo = &last_vgt_shader->info.outinfo;
bool mesh = last_vgt_shader->info.stage == MESA_SHADER_MESH;
bool gfx11plus = device->physical_device->info.gfx_level >= GFX11;
bool gfx11plus = pdev->info.gfx_level >= GFX11;
uint32_t ps_input_cntl[32];
unsigned ps_offset = 0;
@ -3530,7 +3543,7 @@ void
radv_emit_fragment_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
const struct radv_shader *ps)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
bool param_gen;
uint64_t va;
@ -3566,7 +3579,7 @@ void
radv_emit_vgt_reuse(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, const struct radv_shader *tes,
const struct radv_vgt_shader_key *key)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level == GFX10_3) {
/* Legacy Tess+GS should disable reuse to prevent hangs on GFX10.3. */
@ -3635,7 +3648,7 @@ void
radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
const struct radv_vgt_shader_key *key)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t stages = 0;
if (key->tess) {
@ -3651,7 +3664,7 @@ radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdb
stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1);
} else if (key->mesh) {
assert(!key->ngg_passthrough);
unsigned gs_fast_launch = device->physical_device->mesh_fast_launch_2 ? 2 : 1;
unsigned gs_fast_launch = pdev->mesh_fast_launch_2 ? 2 : 1;
stages |=
S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(gs_fast_launch) | S_028B54_NGG_WAVE_ID_EN(key->mesh_scratch_ring);
} else if (key->ngg) {
@ -3682,7 +3695,7 @@ radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdb
void
radv_emit_vgt_gs_out(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, uint32_t vgt_gs_out_prim_type)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level >= GFX11) {
radeon_set_uconfig_reg(ctx_cs, R_030998_VGT_GS_OUT_PRIM_TYPE, vgt_gs_out_prim_type);
@ -3713,9 +3726,10 @@ gfx103_emit_vgt_draw_payload_cntl(struct radeon_cmdbuf *ctx_cs, const struct rad
static bool
gfx103_pipeline_vrs_coarse_shading(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
if (device->physical_device->info.gfx_level != GFX10_3)
if (pdev->info.gfx_level != GFX10_3)
return false;
if (device->instance->debug_flags & RADV_DEBUG_NO_VRS_FLAT_SHADING)
@ -3731,7 +3745,7 @@ void
gfx103_emit_vrs_state(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, const struct radv_shader *ps,
bool enable_vrs, bool enable_vrs_coarse_shading, bool force_vrs_per_vertex)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t mode = V_028064_SC_VRS_COMB_MODE_PASSTHRU;
uint8_t rate_x = 0, rate_y = 0;
@ -3769,7 +3783,7 @@ radv_pipeline_emit_pm4(const struct radv_device *device, struct radv_graphics_pi
const struct vk_graphics_pipeline_state *state)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader *last_vgt_shader = radv_get_last_vgt_shader(pipeline);
const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
struct radeon_cmdbuf *ctx_cs = &pipeline->base.ctx_cs;
@ -3835,7 +3849,7 @@ static void
radv_pipeline_init_vertex_input_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
const struct vk_graphics_pipeline_state *state)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader_info *vs_info = &radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX)->info;
if (state->vi) {
@ -4022,7 +4036,8 @@ bool
radv_needs_null_export_workaround(const struct radv_device *device, const struct radv_shader *ps,
unsigned custom_blend_mode)
{
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
if (!ps)
return false;

View file

@ -370,6 +370,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
const struct radv_ray_tracing_stage_info *traversal_stage_info,
struct radv_serialized_shader_arena_block *replay_block, struct radv_shader **out_shader)
{
struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_shader_binary *binary;
bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags);
bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.base.create_flags);
@ -405,7 +406,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
.stack_alignment = 16,
.localized_loads = true,
.vectorizer_callback = radv_mem_vectorize_callback,
.vectorizer_data = &device->physical_device->info.gfx_level,
.vectorizer_data = &pdev->info.gfx_level,
};
nir_lower_shader_calls(stage->nir, &opts, &resume_shaders, &num_resume_shaders, stage->nir);
}
@ -828,6 +829,8 @@ postprocess_rt_config(struct ac_shader_config *config, enum amd_gfx_level gfx_le
static void
compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
pipeline->prolog = radv_create_rt_prolog(device);
/* create combined config */
@ -839,7 +842,7 @@ compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline *
if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION])
combine_config(config, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]->config);
postprocess_rt_config(config, device->physical_device->info.gfx_level, device->physical_device->rt_wave_size);
postprocess_rt_config(config, pdev->info.gfx_level, pdev->rt_wave_size);
pipeline->prolog->max_waves = radv_get_max_waves(device, config, &pipeline->prolog->info);
}

View file

@ -18,6 +18,8 @@ static struct hash_table *device_ht = NULL;
VkResult
radv_printf_data_init(struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
util_dynarray_init(&device->printf.formats, NULL);
device->printf.buffer_size = debug_get_num_option("RADV_PRINTF_BUFFER_SIZE", 0);
@ -45,9 +47,9 @@ radv_printf_data_init(struct radv_device *device)
VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = requirements.size,
.memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
.memoryTypeIndex =
radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
};
result = device->vk.dispatch_table.AllocateMemory(_device, &alloc_info, NULL, &device->printf.memory);

View file

@ -1134,8 +1134,6 @@ struct radv_device {
/* Whether to keep shader debug info, for debugging. */
bool keep_shader_info;
struct radv_physical_device *physical_device;
/* Backup in-memory cache to be used if the app doesn't provide one */
struct vk_pipeline_cache *mem_cache;
@ -1275,6 +1273,12 @@ struct radv_device {
uint32_t compute_scratch_waves;
};
static inline struct radv_physical_device *
radv_device_physical(const struct radv_device *dev)
{
return (struct radv_physical_device *)dev->vk.physical;
}
bool radv_device_set_pstate(struct radv_device *device, bool enable);
bool radv_device_acquire_performance_counters(struct radv_device *device);
void radv_device_release_performance_counters(struct radv_device *device);
@ -2155,10 +2159,12 @@ static inline void
radv_emit_shader_pointer_body(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
bool use_32bit_pointers)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
radeon_emit(cs, va);
if (use_32bit_pointers) {
assert(va == 0 || (va >> 32) == device->physical_device->info.address32_hi);
assert(va == 0 || (va >> 32) == pdev->info.address32_hi);
} else {
radeon_emit(cs, va >> 32);
}
@ -2798,7 +2804,8 @@ radv_image_has_htile(const struct radv_image *image)
static inline bool
radv_image_has_vrs_htile(const struct radv_device *device, const struct radv_image *image)
{
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
/* Any depth buffer can potentially use VRS on GFX10.3. */
return gfx_level == GFX10_3 && device->vk.enabled_features.attachmentFragmentShadingRate &&
@ -2830,7 +2837,9 @@ radv_image_is_tc_compat_htile(const struct radv_image *image)
static inline bool
radv_image_tile_stencil_disabled(const struct radv_device *device, const struct radv_image *image)
{
if (device->physical_device->info.gfx_level >= GFX9) {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level >= GFX9) {
return !vk_format_has_stencil(image->vk.format) && !radv_image_has_vrs_htile(device, image);
} else {
/* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for
@ -2938,8 +2947,10 @@ radv_get_htile_initial_value(const struct radv_device *device, const struct radv
static inline bool
radv_image_get_iterate256(const struct radv_device *device, struct radv_image *image)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
/* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */
return device->physical_device->info.gfx_level >= GFX10 &&
return pdev->info.gfx_level >= GFX10 &&
(image->vk.usage & (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
radv_image_is_tc_compat_htile(image) && image->vk.samples > 1;
}
@ -3744,7 +3755,8 @@ radv_is_streamout_enabled(struct radv_cmd_buffer *cmd_buffer)
static inline enum amd_ip_type
radv_queue_ring(const struct radv_queue *queue)
{
return radv_queue_family_to_ring(queue->device->physical_device, queue->state.qf);
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
return radv_queue_family_to_ring(pdev, queue->state.qf);
}
/* radv_video */
@ -3758,7 +3770,8 @@ void radv_video_get_profile_alignments(struct radv_physical_device *pdev, const
static inline bool
radv_use_llvm_for_stage(const struct radv_device *device, UNUSED gl_shader_stage stage)
{
return device->physical_device->use_llvm;
const struct radv_physical_device *pdev = radv_device_physical(device);
return pdev->use_llvm;
}
static inline bool

View file

@ -56,7 +56,8 @@ radv_get_pipelinestat_query_size(struct radv_device *device)
/* GFX10_3 only has 11 valid pipeline statistics queries but in order to emulate mesh/task shader
* invocations, it's easier to use the same size as GFX11.
*/
unsigned num_results = device->physical_device->info.gfx_level >= GFX10_3 ? 14 : 11;
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned num_results = pdev->info.gfx_level >= GFX10_3 ? 14 : 11;
return num_results * 8;
}
@ -120,6 +121,7 @@ build_occlusion_query_shader(struct radv_device *device)
* }
* }
*/
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "occlusion_query");
b.shader->info.workgroup_size[0] = 64;
@ -128,8 +130,8 @@ build_occlusion_query_shader(struct radv_device *device)
nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start");
nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end");
nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available");
uint64_t enabled_rb_mask = device->physical_device->info.enabled_rb_mask;
unsigned db_count = device->physical_device->info.max_render_backends;
uint64_t enabled_rb_mask = pdev->info.enabled_rb_mask;
unsigned db_count = pdev->info.max_render_backends;
nir_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
@ -275,6 +277,7 @@ build_pipeline_statistics_query_shader(struct radv_device *device)
* }
* }
*/
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "pipeline_statistics_query");
b.shader->info.workgroup_size[0] = 64;
@ -301,7 +304,7 @@ build_pipeline_statistics_query_shader(struct radv_device *device)
nir_def *available32 = nir_load_ssbo(&b, 1, 32, src_buf, avail_offset);
nir_store_var(&b, available, nir_i2b(&b, available32), 0x1);
if (device->physical_device->emulate_mesh_shader_queries) {
if (pdev->emulate_mesh_shader_queries) {
nir_push_if(&b, nir_test_mask(&b, stats_mask, VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT));
{
const uint32_t idx = ffs(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT) - 1;
@ -867,6 +870,7 @@ build_ms_prim_gen_query_shader(struct radv_device *device)
static VkResult
radv_device_init_meta_query_state_internal(struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
VkResult result;
nir_shader *occlusion_cs = NULL;
nir_shader *pipeline_statistics_cs = NULL;
@ -886,7 +890,7 @@ radv_device_init_meta_query_state_internal(struct radv_device *device)
timestamp_cs = build_timestamp_query_shader(device);
pg_cs = build_pg_query_shader(device);
if (device->physical_device->emulate_mesh_shader_queries)
if (pdev->emulate_mesh_shader_queries)
ms_prim_gen_cs = build_ms_prim_gen_query_shader(device);
VkDescriptorSetLayoutCreateInfo occlusion_ds_create_info = {
@ -1025,7 +1029,7 @@ radv_device_init_meta_query_state_internal(struct radv_device *device)
result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &pg_pipeline_info,
NULL, &device->meta_state.query.pg_query_pipeline);
if (device->physical_device->emulate_mesh_shader_queries) {
if (pdev->emulate_mesh_shader_queries) {
VkPipelineShaderStageCreateInfo ms_prim_gen_pipeline_shader_stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
@ -1211,6 +1215,7 @@ static VkResult
radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool)
{
struct radv_physical_device *pdev = radv_device_physical(device);
VkResult result;
size_t pool_struct_size = pCreateInfo->queryType == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR
? sizeof(struct radv_pc_query_pool)
@ -1232,21 +1237,20 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
* and the legacy GS path but it increments for NGG VS/TES because they are merged with GS. To
* avoid this counter to increment, it's also emulated.
*/
pool->uses_gds =
(device->physical_device->emulate_ngg_gs_query_pipeline_stat &&
(pool->vk.pipeline_statistics & (VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT))) ||
(device->physical_device->use_ngg && pCreateInfo->queryType == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) ||
(device->physical_device->emulate_mesh_shader_queries &&
(pCreateInfo->queryType == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT ||
pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT));
pool->uses_gds = (pdev->emulate_ngg_gs_query_pipeline_stat &&
(pool->vk.pipeline_statistics & (VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT))) ||
(pdev->use_ngg && pCreateInfo->queryType == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) ||
(pdev->emulate_mesh_shader_queries &&
(pCreateInfo->queryType == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT ||
pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT));
/* The number of task shader invocations needs to be queried on ACE. */
pool->uses_ace = (pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
switch (pCreateInfo->queryType) {
case VK_QUERY_TYPE_OCCLUSION:
pool->stride = 16 * device->physical_device->info.max_render_backends;
pool->stride = 16 * pdev->info.max_render_backends;
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
pool->stride = radv_get_pipelinestat_query_size(device) * 2;
@ -1262,7 +1266,7 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
pool->stride = 32;
break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
if (pool->uses_gds && device->physical_device->info.gfx_level < GFX11) {
if (pool->uses_gds && pdev->info.gfx_level < GFX11) {
/* When the hardware can use both the legacy and the NGG paths in the same begin/end pair,
* allocate 2x64-bit values for the GDS counters.
*/
@ -1272,7 +1276,7 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
}
break;
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
result = radv_pc_init_query_pool(device->physical_device, pCreateInfo, (struct radv_pc_query_pool *)pool);
result = radv_pc_init_query_pool(pdev, pCreateInfo, (struct radv_pc_query_pool *)pool);
if (result != VK_SUCCESS) {
radv_destroy_query_pool(device, pAllocator, pool);
@ -1281,11 +1285,11 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
break;
}
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
/* GFX11 natively supports mesh generated primitives with pipeline statistics. */
pool->stride = radv_get_pipelinestat_query_size(device) * 2;
} else {
assert(device->physical_device->emulate_mesh_shader_queries);
assert(pdev->emulate_mesh_shader_queries);
pool->stride = 16;
}
break;
@ -1296,8 +1300,7 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
pool->availability_offset = pool->stride * pCreateInfo->queryCount;
pool->size = pool->availability_offset;
if (pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
(pCreateInfo->queryType == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT &&
device->physical_device->info.gfx_level >= GFX11))
(pCreateInfo->queryType == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && pdev->info.gfx_level >= GFX11))
pool->size += 4 * pCreateInfo->queryCount;
result = radv_bo_create(device, pool->size, 64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING,
@ -1365,6 +1368,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
const struct radv_physical_device *pdev = radv_device_physical(device);
char *data = pData;
VkResult result = VK_SUCCESS;
@ -1413,8 +1417,8 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
}
case VK_QUERY_TYPE_OCCLUSION: {
p_atomic_uint64_t const *src64 = (p_atomic_uint64_t const *)src;
uint32_t db_count = device->physical_device->info.max_render_backends;
uint64_t enabled_rb_mask = device->physical_device->info.enabled_rb_mask;
uint32_t db_count = pdev->info.max_render_backends;
uint64_t enabled_rb_mask = pdev->info.enabled_rb_mask;
uint64_t sample_count = 0;
available = 1;
@ -1460,7 +1464,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
do {
available = p_atomic_read(avail_ptr);
if (pool->uses_ace && device->physical_device->emulate_mesh_shader_queries) {
if (pool->uses_ace && pdev->emulate_mesh_shader_queries) {
const uint32_t task_invoc_offset =
radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
const uint32_t *avail_ptr_start = (const uint32_t *)(src + task_invoc_offset + 4);
@ -1550,7 +1554,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
break;
}
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
const bool uses_gds_query = pool->uses_gds && device->physical_device->info.gfx_level < GFX11;
const bool uses_gds_query = pool->uses_gds && pdev->info.gfx_level < GFX11;
p_atomic_uint64_t const *src64 = (p_atomic_uint64_t const *)src;
uint64_t primitive_storage_needed;
@ -1615,7 +1619,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: {
uint64_t ms_prim_gen;
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(device);
const uint32_t *avail_ptr = (const uint32_t *)(pool->ptr + pool->availability_offset + 4 * query);
@ -1730,6 +1734,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
uint64_t va = radv_buffer_get_va(pool->bo);
uint64_t dest_va = radv_buffer_get_va(dst_buffer->bo);
@ -1780,7 +1785,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
/* This waits on the ME. All copies below are done on the ME */
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff);
if (pool->uses_ace && cmd_buffer->device->physical_device->emulate_mesh_shader_queries) {
if (pool->uses_ace && pdev->emulate_mesh_shader_queries) {
const uint64_t src_va = va + query * pool->stride;
const uint64_t start_va = src_va + task_invoc_offset + 4;
const uint64_t stop_va = start_va + pipelinestat_block_size;
@ -1842,7 +1847,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
const bool uses_gds_query = pool->uses_gds && cmd_buffer->device->physical_device->info.gfx_level < GFX11;
const bool uses_gds_query = pool->uses_gds && pdev->info.gfx_level < GFX11;
for (unsigned i = 0; i < queryCount; i++) {
unsigned query = firstQuery + i;
@ -1863,11 +1868,10 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pg_query_pipeline, pool->bo, dst_buffer->bo,
firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride, dst_size,
queryCount, flags, 0, 0,
pool->uses_gds && cmd_buffer->device->physical_device->info.gfx_level < GFX11);
queryCount, flags, 0, 0, pool->uses_gds && pdev->info.gfx_level < GFX11);
break;
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
unsigned query = firstQuery + i;
@ -1928,6 +1932,7 @@ radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uin
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
uint32_t value = query_clear_value(pool->vk.query_type);
uint32_t flush_bits = 0;
@ -1941,8 +1946,7 @@ radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uin
queryCount * pool->stride, value);
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
(pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT &&
cmd_buffer->device->physical_device->info.gfx_level >= GFX11)) {
(pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && pdev->info.gfx_level >= GFX11)) {
flush_bits |=
radv_fill_buffer(cmd_buffer, NULL, pool->bo,
radv_buffer_get_va(pool->bo) + pool->availability_offset + firstQuery * 4, queryCount * 4, 0);
@ -1960,6 +1964,7 @@ radv_ResetQueryPool(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t value = query_clear_value(pool->vk.query_type);
uint32_t *data = (uint32_t *)(pool->ptr + firstQuery * pool->stride);
@ -1969,8 +1974,7 @@ radv_ResetQueryPool(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery
*p = value;
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
(pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT &&
device->physical_device->info.gfx_level >= GFX11)) {
(pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && pdev->info.gfx_level >= GFX11)) {
memset(pool->ptr + pool->availability_offset + firstQuery * 4, 0, queryCount * 4);
}
}
@ -2055,6 +2059,7 @@ static void
emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va, VkQueryType query_type,
VkQueryControlFlags flags, uint32_t index)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
switch (query_type) {
case VK_QUERY_TYPE_OCCLUSION:
@ -2082,12 +2087,11 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
}
}
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11 &&
cmd_buffer->device->physical_device->info.pfp_fw_version >= EVENT_WRITE_ZPASS_PFP_VERSION) {
if (pdev->info.gfx_level >= GFX11 && pdev->info.pfp_fw_version >= EVENT_WRITE_ZPASS_PFP_VERSION) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_ZPASS, 1, 0));
} else {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
radeon_emit(cs, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_DUMP) | EVENT_INDEX(1));
} else {
radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
@ -2149,7 +2153,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
uint32_t task_invoc_offset =
radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
va += task_invoc_offset;
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->gang.cs, 4);
@ -2176,7 +2180,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
break;
}
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
if (pdev->use_ngg_streamout) {
/* generated prim counter */
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
@ -2201,7 +2205,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
}
break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
/* On GFX11+, primitives generated query always use GDS. */
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
@ -2251,7 +2255,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
break;
}
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: {
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
radeon_check_space(cmd_buffer->device->ws, cs, 4);
++cmd_buffer->state.active_pipeline_queries;
@ -2285,6 +2289,7 @@ static void
emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va, uint64_t avail_va,
VkQueryType query_type, uint32_t index)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
switch (query_type) {
case VK_QUERY_TYPE_OCCLUSION:
@ -2300,12 +2305,11 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_OCCLUSION_QUERY;
}
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11 &&
cmd_buffer->device->physical_device->info.pfp_fw_version >= EVENT_WRITE_ZPASS_PFP_VERSION) {
if (pdev->info.gfx_level >= GFX11 && pdev->info.pfp_fw_version >= EVENT_WRITE_ZPASS_PFP_VERSION) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_ZPASS, 1, 0));
} else {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
radeon_emit(cs, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_DUMP) | EVENT_INDEX(1));
} else {
radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
@ -2369,7 +2373,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
uint32_t task_invoc_offset =
radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
va += task_invoc_offset;
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->gang.cs, 4);
@ -2391,13 +2395,12 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
}
}
radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf,
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va,
1, cmd_buffer->gfx9_eop_bug_va);
radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0,
EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va, 1, cmd_buffer->gfx9_eop_bug_va);
break;
}
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
if (pdev->use_ngg_streamout) {
/* generated prim counter */
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
radv_cs_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
@ -2419,7 +2422,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
}
break;
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
/* On GFX11+, primitives generated query always use GDS. */
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
radv_cs_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
@ -2463,7 +2466,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
break;
}
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: {
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(cmd_buffer->device);
radeon_check_space(cmd_buffer->device->ws, cs, 16);
@ -2479,9 +2482,9 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf,
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT,
avail_va, 1, cmd_buffer->gfx9_eop_bug_va);
radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0,
EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va, 1,
cmd_buffer->gfx9_eop_bug_va);
} else {
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET, va + 8);
radv_cs_write_data_imm(cs, V_370_ME, va + 12, 0x80000000);
@ -2499,7 +2502,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
cmd_buffer->active_query_flush_bits |=
RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE;
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
}
}
@ -2564,6 +2567,7 @@ radv_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
void
radv_write_timestamp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkPipelineStageFlags2 stage)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT) {
@ -2575,9 +2579,8 @@ radv_write_timestamp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkPipeline
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
} else {
radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf,
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_TIMESTAMP, va, 0,
cmd_buffer->gfx9_eop_bug_va);
radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0,
EOP_DST_SEL_MEM, EOP_DATA_SEL_TIMESTAMP, va, 0, cmd_buffer->gfx9_eop_bug_va);
}
}
@ -2587,6 +2590,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
const unsigned num_queries = MAX2(util_bitcount(cmd_buffer->state.render.view_mask), 1);
struct radeon_cmdbuf *cs = cmd_buffer->cs;
const uint64_t va = radv_buffer_get_va(pool->bo);
@ -2625,7 +2629,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta
cmd_buffer->active_query_flush_bits |=
RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE;
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
}

View file

@ -126,6 +126,7 @@ static VkResult
radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMemoryBindInfo *bind)
{
RADV_FROM_HANDLE(radv_image, image, bind->image);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_surf *surface = &image->planes[0].surface;
uint32_t bs = vk_format_get_blocksize(image->vk.format);
VkResult result;
@ -149,7 +150,7 @@ radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMem
if (bind->pBinds[i].memory != VK_NULL_HANDLE)
mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
if (device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
offset = surface->u.gfx9.surf_slice_size * layer + surface->u.gfx9.prt_level_offset[level];
pitch = surface->u.gfx9.prt_level_pitch[level];
depth_pitch = surface->u.gfx9.surf_slice_size;
@ -243,11 +244,13 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
struct radeon_winsys_bo *task_rings_bo, struct radeon_winsys_bo *mesh_scratch_ring_bo,
uint32_t attr_ring_size, struct radeon_winsys_bo *attr_ring_bo)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (scratch_bo) {
uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
if (device->physical_device->info.gfx_level >= GFX11)
if (pdev->info.gfx_level >= GFX11)
rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
else
rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
@ -270,17 +273,17 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
S_008F0C_INDEX_STRIDE(3) | S_008F0C_ADD_TID_ENABLE(1);
if (device->physical_device->info.gfx_level >= GFX11)
if (pdev->info.gfx_level >= GFX11)
desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
else
desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else if (device->physical_device->info.gfx_level >= GFX10) {
} else if (pdev->info.gfx_level >= GFX10) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
} else if (device->physical_device->info.gfx_level >= GFX8) {
} else if (pdev->info.gfx_level >= GFX8) {
/* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */
desc[3] |=
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1);
@ -298,9 +301,9 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else if (device->physical_device->info.gfx_level >= GFX10) {
} else if (pdev->info.gfx_level >= GFX10) {
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
} else {
@ -323,9 +326,9 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else if (device->physical_device->info.gfx_level >= GFX10) {
} else if (pdev->info.gfx_level >= GFX10) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
} else {
@ -343,17 +346,17 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
S_008F0C_INDEX_STRIDE(1) | S_008F0C_ADD_TID_ENABLE(true);
if (device->physical_device->info.gfx_level >= GFX11)
if (pdev->info.gfx_level >= GFX11)
desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
else
desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else if (device->physical_device->info.gfx_level >= GFX10) {
} else if (pdev->info.gfx_level >= GFX10) {
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
} else if (device->physical_device->info.gfx_level >= GFX8) {
} else if (pdev->info.gfx_level >= GFX8) {
/* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */
desc[7] |=
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1);
@ -367,17 +370,17 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
if (tess_rings_bo) {
uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
uint64_t tess_offchip_va = tess_va + device->physical_device->hs.tess_offchip_ring_offset;
uint64_t tess_offchip_va = tess_va + pdev->hs.tess_offchip_ring_offset;
desc[0] = tess_va;
desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
desc[2] = device->physical_device->hs.tess_factor_ring_size;
desc[2] = pdev->hs.tess_factor_ring_size;
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (device->physical_device->info.gfx_level >= GFX10) {
} else if (pdev->info.gfx_level >= GFX10) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
S_008F0C_RESOURCE_LEVEL(1);
} else {
@ -387,13 +390,13 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
desc[4] = tess_offchip_va;
desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
desc[6] = device->physical_device->hs.tess_offchip_ring_size;
desc[6] = pdev->hs.tess_offchip_ring_size;
desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
} else if (device->physical_device->info.gfx_level >= GFX10) {
} else if (pdev->info.gfx_level >= GFX10) {
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
S_008F0C_RESOURCE_LEVEL(1);
} else {
@ -406,33 +409,33 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
if (task_rings_bo) {
uint64_t task_va = radv_buffer_get_va(task_rings_bo);
uint64_t task_draw_ring_va = task_va + device->physical_device->task_info.draw_ring_offset;
uint64_t task_payload_ring_va = task_va + device->physical_device->task_info.payload_ring_offset;
uint64_t task_draw_ring_va = task_va + pdev->task_info.draw_ring_offset;
uint64_t task_payload_ring_va = task_va + pdev->task_info.payload_ring_offset;
desc[0] = task_draw_ring_va;
desc[1] = S_008F04_BASE_ADDRESS_HI(task_draw_ring_va >> 32);
desc[2] = device->physical_device->task_info.num_entries * AC_TASK_DRAW_ENTRY_BYTES;
desc[2] = pdev->task_info.num_entries * AC_TASK_DRAW_ENTRY_BYTES;
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else {
assert(device->physical_device->info.gfx_level >= GFX10_3);
assert(pdev->info.gfx_level >= GFX10_3);
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
S_008F0C_RESOURCE_LEVEL(1);
}
desc[4] = task_payload_ring_va;
desc[5] = S_008F04_BASE_ADDRESS_HI(task_payload_ring_va >> 32);
desc[6] = device->physical_device->task_info.num_entries * AC_TASK_PAYLOAD_ENTRY_BYTES;
desc[6] = pdev->task_info.num_entries * AC_TASK_PAYLOAD_ENTRY_BYTES;
desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else {
assert(device->physical_device->info.gfx_level >= GFX10_3);
assert(pdev->info.gfx_level >= GFX10_3);
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
S_008F0C_RESOURCE_LEVEL(1);
}
@ -449,10 +452,10 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
} else {
assert(device->physical_device->info.gfx_level >= GFX10_3);
assert(pdev->info.gfx_level >= GFX10_3);
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
S_008F0C_RESOURCE_LEVEL(1);
}
@ -461,7 +464,7 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
desc += 4;
if (attr_ring_bo) {
assert(device->physical_device->info.gfx_level >= GFX11);
assert(pdev->info.gfx_level >= GFX11);
uint64_t va = radv_buffer_get_va(attr_ring_bo);
@ -489,6 +492,8 @@ static void
radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *esgs_ring_bo,
uint32_t esgs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!esgs_ring_bo && !gsvs_ring_bo)
return;
@ -498,7 +503,7 @@ radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, st
if (gsvs_ring_bo)
radv_cs_add_buffer(device->ws, cs, gsvs_ring_bo);
if (device->physical_device->info.gfx_level >= GFX7) {
if (pdev->info.gfx_level >= GFX7) {
radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
radeon_emit(cs, esgs_ring_size >> 8);
radeon_emit(cs, gsvs_ring_size >> 8);
@ -512,49 +517,51 @@ radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, st
static void
radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *tess_rings_bo)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t tf_va;
uint32_t tf_ring_size;
if (!tess_rings_bo)
return;
tf_ring_size = device->physical_device->hs.tess_factor_ring_size / 4;
tf_ring_size = pdev->hs.tess_factor_ring_size / 4;
tf_va = radv_buffer_get_va(tess_rings_bo);
radv_cs_add_buffer(device->ws, cs, tess_rings_bo);
if (device->physical_device->info.gfx_level >= GFX7) {
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX7) {
if (pdev->info.gfx_level >= GFX11) {
/* TF_RING_SIZE is per SE on GFX11. */
tf_ring_size /= device->physical_device->info.max_se;
tf_ring_size /= pdev->info.max_se;
}
radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size));
radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, tf_va >> 8);
if (device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI, S_030984_BASE_HI(tf_va >> 40));
} else if (device->physical_device->info.gfx_level == GFX9) {
} else if (pdev->info.gfx_level == GFX9) {
radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40));
}
radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param);
radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, pdev->hs.hs_offchip_param);
} else {
radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size));
radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8);
radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param);
radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, pdev->hs.hs_offchip_param);
}
}
static VkResult
radv_initialise_task_control_buffer(struct radv_device *device, struct radeon_winsys_bo *task_rings_bo)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t *ptr = (uint32_t *)radv_buffer_map(device->ws, task_rings_bo);
if (!ptr)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
const uint32_t num_entries = device->physical_device->task_info.num_entries;
const uint32_t num_entries = pdev->task_info.num_entries;
const uint64_t task_va = radv_buffer_get_va(task_rings_bo);
const uint64_t task_draw_ring_va = task_va + device->physical_device->task_info.draw_ring_offset;
const uint64_t task_draw_ring_va = task_va + pdev->task_info.draw_ring_offset;
assert((task_draw_ring_va & 0xFFFFFF00) == (task_draw_ring_va & 0xFFFFFFFF));
/* 64-bit write_ptr */
@ -599,7 +606,8 @@ static void
radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves,
struct radeon_winsys_bo *scratch_bo)
{
const struct radeon_info *gpu_info = &device->physical_device->info;
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
if (!scratch_bo)
return;
@ -626,7 +634,8 @@ static void
radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves,
struct radeon_winsys_bo *compute_scratch_bo)
{
const struct radeon_info *gpu_info = &device->physical_device->info;
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
uint64_t scratch_va;
uint32_t rsrc1;
@ -680,6 +689,7 @@ static void
radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmdbuf *cs,
struct radeon_winsys_bo *descriptor_bo)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t va;
if (!descriptor_bo)
@ -689,21 +699,21 @@ radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmd
radv_cs_add_buffer(device->ws, cs, descriptor_bo);
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B420_SPI_SHADER_PGM_LO_HS,
R_00B220_SPI_SHADER_PGM_LO_GS};
for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
radv_emit_shader_pointer(device, cs, regs[i], va, true);
}
} else if (device->physical_device->info.gfx_level >= GFX10) {
} else if (pdev->info.gfx_level >= GFX10) {
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
radv_emit_shader_pointer(device, cs, regs[i], va, true);
}
} else if (device->physical_device->info.gfx_level == GFX9) {
} else if (pdev->info.gfx_level == GFX9) {
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
@ -725,7 +735,7 @@ static void
radv_emit_attribute_ring(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *attr_ring_bo,
uint32_t attr_ring_size)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t va;
if (!attr_ring_bo)
@ -792,6 +802,7 @@ static VkResult
radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *device,
const struct radv_queue_ring_info *needs)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_winsys *ws = device->ws;
struct radeon_winsys_bo *scratch_bo = queue->scratch_bo;
struct radeon_winsys_bo *descriptor_bo = queue->descriptor_bo;
@ -848,8 +859,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
}
if (!queue->ring_info.tess_rings && needs->tess_rings) {
uint64_t tess_rings_size =
device->physical_device->hs.tess_offchip_ring_offset + device->physical_device->hs.tess_offchip_ring_size;
uint64_t tess_rings_size = pdev->hs.tess_offchip_ring_offset + pdev->hs.tess_offchip_ring_size;
result = radv_bo_create(device, tess_rings_size, 256, RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH,
0, true, &tess_rings_bo);
if (result != VK_SUCCESS)
@ -858,7 +868,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
}
if (!queue->ring_info.task_rings && needs->task_rings) {
assert(device->physical_device->info.gfx_level >= GFX10_3);
assert(pdev->info.gfx_level >= GFX10_3);
/* We write the control buffer from the CPU, so need to grant CPU access to the BO.
* The draw ring needs to be zero-initialized otherwise the ready bits will be incorrect.
@ -866,12 +876,11 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
uint32_t task_rings_bo_flags =
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM;
result = radv_bo_create(device, device->physical_device->task_info.bo_size_bytes, 256, RADEON_DOMAIN_VRAM,
task_rings_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, true, &task_rings_bo);
result = radv_bo_create(device, pdev->task_info.bo_size_bytes, 256, RADEON_DOMAIN_VRAM, task_rings_bo_flags,
RADV_BO_PRIORITY_SCRATCH, 0, true, &task_rings_bo);
if (result != VK_SUCCESS)
goto fail;
radv_rmv_log_command_buffer_bo_create(device, task_rings_bo, 0, 0,
device->physical_device->task_info.bo_size_bytes);
radv_rmv_log_command_buffer_bo_create(device, task_rings_bo, 0, 0, pdev->task_info.bo_size_bytes);
result = radv_initialise_task_control_buffer(device, task_rings_bo);
if (result != VK_SUCCESS)
@ -879,7 +888,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
}
if (!queue->ring_info.mesh_scratch_ring && needs->mesh_scratch_ring) {
assert(device->physical_device->info.gfx_level >= GFX10_3);
assert(pdev->info.gfx_level >= GFX10_3);
result = radv_bo_create(device, RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES, 256,
RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, true,
&mesh_scratch_ring_bo);
@ -891,7 +900,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
}
if (needs->attr_ring_size > queue->ring_info.attr_ring_size) {
assert(device->physical_device->info.gfx_level >= GFX11);
assert(pdev->info.gfx_level >= GFX11);
result = radv_bo_create(device, needs->attr_ring_size, 2 * 1024 * 1024 /* 2MiB */, RADEON_DOMAIN_VRAM,
RADEON_FLAG_32BIT | RADEON_FLAG_DISCARDABLE | ring_bo_flags, RADV_BO_PRIORITY_SCRATCH,
0, true, &attr_ring_bo);
@ -901,7 +910,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
}
if (!queue->ring_info.gds && needs->gds) {
assert(device->physical_device->info.gfx_level >= GFX10);
assert(pdev->info.gfx_level >= GFX10);
/* 4 streamout GDS counters.
* We need 256B (64 dw) of GDS, otherwise streamout hangs.
@ -920,7 +929,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
}
if (!queue->ring_info.gds_oa && needs->gds_oa) {
assert(device->physical_device->info.gfx_level >= GFX10);
assert(pdev->info.gfx_level >= GFX10);
result =
radv_bo_create(device, 1, 1, RADEON_DOMAIN_OA, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, true, &gds_oa_bo);
@ -972,7 +981,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
for (int i = 0; i < 3; ++i) {
enum rgp_flush_bits sqtt_flush_bits = 0;
struct radeon_cmdbuf *cs = NULL;
cs = ws->cs_create(ws, radv_queue_family_to_ring(device->physical_device, queue->qf), false);
cs = ws->cs_create(ws, radv_queue_family_to_ring(pdev, queue->qf), false);
if (!cs) {
result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
goto fail;
@ -1027,7 +1036,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
if (i < 2) {
/* The two initial preambles have a cache flush at the beginning. */
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
enum radv_cmd_flush_bits flush_bits = RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE |
RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2 |
RADV_CMD_FLAG_START_PIPELINE_STATS;
@ -1143,6 +1152,7 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device
struct vk_command_buffer *const *cmd_buffers, uint32_t cmd_buffer_count, bool *use_perf_counters,
bool *has_follower)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
bool has_indirect_pipeline_binds = false;
if (queue->qf != RADV_QUEUE_GENERAL && queue->qf != RADV_QUEUE_COMPUTE) {
@ -1203,9 +1213,8 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device
? MIN2(needs.compute_scratch_waves, UINT32_MAX / needs.compute_scratch_size_per_wave)
: 0;
if (device->physical_device->info.gfx_level >= GFX11 && queue->qf == RADV_QUEUE_GENERAL) {
needs.attr_ring_size =
device->physical_device->info.attribute_ring_size_per_se * device->physical_device->info.max_se;
if (pdev->info.gfx_level >= GFX11 && queue->qf == RADV_QUEUE_GENERAL) {
needs.attr_ring_size = pdev->info.attribute_ring_size_per_se * pdev->info.max_se;
}
/* Return early if we already match these needs.
@ -1230,13 +1239,15 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device
static VkResult
radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
{
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
if (queue->gang_sem_bo)
return VK_SUCCESS;
VkResult r = VK_SUCCESS;
struct radv_device *device = queue->device;
struct radeon_winsys *ws = device->ws;
const enum amd_ip_type leader_ip = radv_queue_family_to_ring(device->physical_device, queue->state.qf);
const enum amd_ip_type leader_ip = radv_queue_family_to_ring(pdev, queue->state.qf);
struct radeon_winsys_bo *gang_sem_bo = NULL;
/* Gang semaphores BO.
@ -1291,9 +1302,8 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
*/
radv_cp_wait_mem(leader_post_cs, queue->state.qf, WAIT_REG_MEM_GREATER_OR_EQUAL, leader_wait_va, 1, 0xffffffff);
radv_cs_write_data(device, leader_post_cs, queue->state.qf, V_370_ME, leader_wait_va, 1, &zero, false);
radv_cs_emit_write_event_eop(ace_post_cs, device->physical_device->info.gfx_level, RADV_QUEUE_COMPUTE,
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT,
leader_wait_va, 1, 0);
radv_cs_emit_write_event_eop(ace_post_cs, pdev->info.gfx_level, RADV_QUEUE_COMPUTE, V_028A90_BOTTOM_OF_PIPE_TS, 0,
EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, leader_wait_va, 1, 0);
r = ws->cs_finalize(leader_pre_cs);
if (r != VK_SUCCESS)
@ -1681,13 +1691,14 @@ fail:
static void
radv_report_gpuvm_fault(struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_winsys_gpuvm_fault_info fault_info = {0};
if (!radv_vm_fault_occurred(device, &fault_info))
return;
fprintf(stderr, "radv: GPUVM fault detected at address 0x%08" PRIx64 ".\n", fault_info.addr);
ac_print_gpuvm_fault_status(stderr, device->physical_device->info.gfx_level, fault_info.status);
ac_print_gpuvm_fault_status(stderr, pdev->info.gfx_level, fault_info.status);
}
static VkResult
@ -1735,9 +1746,10 @@ static VkResult
radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission)
{
struct radv_queue *queue = (struct radv_queue *)vqueue;
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
VkResult result;
if (!radv_sparse_queue_enabled(queue->device->physical_device)) {
if (!radv_sparse_queue_enabled(pdev)) {
result = radv_queue_submit_bind_sparse_memory(queue->device, submission);
if (result != VK_SUCCESS)
goto fail;
@ -1792,10 +1804,12 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
const VkDeviceQueueCreateInfo *create_info,
const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
queue->device = device;
queue->priority = radv_get_queue_global_priority(global_priority);
queue->hw_ctx = device->hw_ctx[queue->priority];
queue->state.qf = vk_queue_to_radv(device->physical_device, create_info->queueFamilyIndex);
queue->state.qf = vk_queue_to_radv(pdev, create_info->queueFamilyIndex);
queue->gang_sem_bo = NULL;
VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);

View file

@ -173,6 +173,8 @@ static void
evaluate_trace_event(struct radv_device *device, uint64_t timestamp, struct util_dynarray *tokens,
struct trace_event_amdgpu_vm_update_ptes *event)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (event->common.pid != getpid() && event->pid != getpid()) {
return;
}
@ -180,8 +182,8 @@ evaluate_trace_event(struct radv_device *device, uint64_t timestamp, struct util
struct trace_event_address_array *array = (struct trace_event_address_array *)(event + 1);
for (uint32_t i = 0; i < event->num_ptes; ++i)
emit_page_table_update_event(&device->vk.memory_trace_data, !device->physical_device->info.has_dedicated_vram,
timestamp, event, (uint64_t *)array->data, i);
emit_page_table_update_event(&device->vk.memory_trace_data, !pdev->info.has_dedicated_vram, timestamp, event,
(uint64_t *)array->data, i);
}
static void
@ -480,6 +482,8 @@ void
radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool is_internal,
VkMemoryAllocateFlags alloc_flags)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!device->vk.memory_trace_data.is_enabled)
return;
@ -495,7 +499,7 @@ radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool i
token.is_driver_internal = is_internal;
token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)heap);
token.type = VK_RMV_RESOURCE_TYPE_HEAP;
token.heap.alignment = device->physical_device->info.max_alignment;
token.heap.alignment = pdev->info.max_alignment;
token.heap.size = memory->alloc_size;
token.heap.heap_index = memory->heap_index;
token.heap.alloc_flags = alloc_flags;
@ -508,6 +512,8 @@ radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool i
void
radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, bool is_internal)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!device->vk.memory_trace_data.is_enabled)
return;
@ -518,7 +524,7 @@ radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo
struct vk_rmv_virtual_allocate_token token = {0};
token.address = bo->va;
/* If all VRAM is visible, no bo will be in invisible memory. */
token.is_in_invisible_vram = bo->vram_no_cpu_access && !device->physical_device->info.all_vram_visible;
token.is_in_invisible_vram = bo->vram_no_cpu_access && !pdev->info.all_vram_visible;
token.preferred_domains = (enum vk_rmv_kernel_memory_domain)bo->initial_domain;
token.is_driver_internal = is_internal;
token.page_count = DIV_ROUND_UP(bo->size, 4096);

View file

@ -898,15 +898,17 @@ exit:
VkResult
radv_rra_trace_init(struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
device->rra_trace.validate_as = debug_get_bool_option("RADV_RRA_TRACE_VALIDATE", false);
device->rra_trace.copy_after_build = debug_get_bool_option("RADV_RRA_TRACE_COPY_AFTER_BUILD", false);
device->rra_trace.accel_structs = _mesa_pointer_hash_table_create(NULL);
device->rra_trace.accel_struct_vas = _mesa_hash_table_u64_create(NULL);
simple_mtx_init(&device->rra_trace.data_mtx, mtx_plain);
device->rra_trace.copy_memory_index = radv_find_memory_index(
device->physical_device,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
device->rra_trace.copy_memory_index =
radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
util_dynarray_init(&device->rra_trace.ray_history, NULL);
@ -939,9 +941,9 @@ radv_rra_trace_init(struct radv_device *device)
VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = requirements.size,
.memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
.memoryTypeIndex =
radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
};
result = radv_AllocateMemory(_device, &alloc_info, NULL, &device->rra_trace.ray_history_memory);
@ -1316,6 +1318,7 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename)
{
RADV_FROM_HANDLE(radv_queue, queue, vk_queue);
struct radv_device *device = queue->device;
const struct radv_physical_device *pdev = radv_device_physical(device);
VkDevice vk_device = radv_device_to_handle(device);
VkResult result = vk_common_DeviceWaitIdle(vk_device);
@ -1365,7 +1368,7 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename)
fwrite(&api, sizeof(uint64_t), 1, file);
uint64_t asic_info_offset = (uint64_t)ftell(file);
rra_dump_asic_info(&device->physical_device->info, file);
rra_dump_asic_info(&pdev->info, file);
uint64_t written_accel_struct_count = 0;

View file

@ -196,14 +196,14 @@ radv_unregister_border_color(struct radv_device *device, uint32_t slot)
static void
radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, const VkSamplerCreateInfo *pCreateInfo)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
bool compat_mode =
device->physical_device->info.gfx_level == GFX8 || device->physical_device->info.gfx_level == GFX9;
bool compat_mode = pdev->info.gfx_level == GFX8 || pdev->info.gfx_level == GFX9;
unsigned filter_mode = radv_tex_filter_mode(sampler->vk.reduction_mode);
unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
bool trunc_coord = ((pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST) ||
device->physical_device->info.conformant_trunc_coord) &&
pdev->info.conformant_trunc_coord) &&
!device->disable_trunc_coord;
bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
@ -246,18 +246,17 @@ radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, cons
S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)));
sampler->state[3] = S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color));
if (device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
sampler->state[2] |= S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -32, 31), 8)) |
S_008F38_ANISO_OVERRIDE_GFX10(device->instance->drirc.disable_aniso_single_level);
} else {
sampler->state[2] |= S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
S_008F38_DISABLE_LSB_CEIL(device->physical_device->info.gfx_level <= GFX8) |
S_008F38_FILTER_PREC_FIX(1) |
S_008F38_DISABLE_LSB_CEIL(pdev->info.gfx_level <= GFX8) | S_008F38_FILTER_PREC_FIX(1) |
S_008F38_ANISO_OVERRIDE_GFX8(device->instance->drirc.disable_aniso_single_level &&
device->physical_device->info.gfx_level >= GFX8);
pdev->info.gfx_level >= GFX8);
}
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
sampler->state[3] |= S_008F3C_BORDER_COLOR_PTR_GFX11(border_color_ptr);
} else {
sampler->state[3] |= S_008F3C_BORDER_COLOR_PTR_GFX6(border_color_ptr);

View file

@ -57,7 +57,9 @@ static const VkExtent3D radv_sdma_t2t_alignment_3d[] = {
ALWAYS_INLINE static unsigned
radv_sdma_pitch_alignment(const struct radv_device *device, const unsigned bpp)
{
if (device->physical_device->info.sdma_ip_version >= SDMA_5_0)
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.sdma_ip_version >= SDMA_5_0)
return MAX2(1, 4 / bpp);
return 4;
@ -82,7 +84,9 @@ radv_sdma_check_pitches(const unsigned pitch, const unsigned slice_pitch, const
ALWAYS_INLINE static enum gfx9_resource_type
radv_sdma_surface_resource_type(const struct radv_device *const device, const struct radeon_surf *const surf)
{
if (device->physical_device->info.sdma_ip_version >= SDMA_5_0) {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.sdma_ip_version >= SDMA_5_0) {
/* Use the 2D resource type for rotated or Z swizzles. */
if ((surf->u.gfx9.resource_type == RADEON_RESOURCE_1D || surf->u.gfx9.resource_type == RADEON_RESOURCE_3D) &&
(surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER || surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH))
@ -195,7 +199,9 @@ radv_sdma_get_metadata_config(const struct radv_device *const device, const stru
const struct radeon_surf *const surf, const VkImageSubresourceLayers subresource,
const VkImageAspectFlags aspect_mask)
{
if (!device->physical_device->info.sdma_supports_compression ||
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!pdev->info.sdma_supports_compression ||
!(radv_dcc_enabled(image, subresource.mipLevel) || radv_image_has_htile(image))) {
return 0;
}
@ -203,8 +209,7 @@ radv_sdma_get_metadata_config(const struct radv_device *const device, const stru
const VkFormat format = vk_format_get_aspect_format(image->vk.format, aspect_mask);
const struct util_format_description *desc = vk_format_description(format);
const uint32_t data_format =
ac_get_cb_format(device->physical_device->info.gfx_level, vk_format_to_pipe_format(format));
const uint32_t data_format = ac_get_cb_format(pdev->info.gfx_level, vk_format_to_pipe_format(format));
const uint32_t alpha_is_on_msb = vi_alpha_is_on_msb(device, format);
const uint32_t number_type = radv_translate_buffer_numformat(desc, vk_format_get_first_non_void_channel(format));
const uint32_t surface_type = radv_sdma_surface_type_from_aspect_mask(aspect_mask);
@ -220,11 +225,12 @@ static uint32_t
radv_sdma_get_tiled_info_dword(const struct radv_device *const device, const struct radv_image *const image,
const struct radeon_surf *const surf, const VkImageSubresourceLayers subresource)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const uint32_t element_size = util_logbase2(surf->bpe);
const uint32_t swizzle_mode = surf->has_stencil ? surf->u.gfx9.zs.stencil_swizzle_mode : surf->u.gfx9.swizzle_mode;
const enum gfx9_resource_type dimension = radv_sdma_surface_resource_type(device, surf);
const uint32_t info = element_size | swizzle_mode << 3 | dimension << 9;
const enum sdma_version ver = device->physical_device->info.sdma_ip_version;
const enum sdma_version ver = pdev->info.sdma_ip_version;
if (ver >= SDMA_5_0) {
const uint32_t mip_max = MAX2(image->vk.mip_levels, 1);
@ -242,7 +248,8 @@ static uint32_t
radv_sdma_get_tiled_header_dword(const struct radv_device *const device, const struct radv_image *const image,
const VkImageSubresourceLayers subresource)
{
const enum sdma_version ver = device->physical_device->info.sdma_ip_version;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum sdma_version ver = pdev->info.sdma_ip_version;
if (ver >= SDMA_5_0) {
return 0;
@ -262,6 +269,7 @@ radv_sdma_get_surf(const struct radv_device *const device, const struct radv_ima
{
assert(util_bitcount(aspect_mask) == 1);
const struct radv_physical_device *pdev = radv_device_physical(device);
const unsigned plane_idx = radv_plane_from_aspect(aspect_mask);
const unsigned binding_idx = image->disjoint ? plane_idx : 0;
const struct radv_image_binding *binding = &image->bindings[binding_idx];
@ -301,7 +309,7 @@ radv_sdma_get_surf(const struct radv_device *const device, const struct radv_ima
info.info_dword = radv_sdma_get_tiled_info_dword(device, image, surf, subresource);
info.header_dword = radv_sdma_get_tiled_header_dword(device, image, subresource);
if (device->physical_device->info.sdma_supports_compression &&
if (pdev->info.sdma_supports_compression &&
(radv_dcc_enabled(image, subresource.mipLevel) || radv_image_has_htile(image))) {
info.meta_va = binding->bo->va + binding->offset + surf->meta_offset;
info.meta_config = radv_sdma_get_metadata_config(device, image, surf, subresource, aspect_mask);
@ -326,7 +334,8 @@ radv_sdma_copy_buffer(const struct radv_device *device, struct radeon_cmdbuf *cs
if (size == 0)
return;
const enum sdma_version ver = device->physical_device->info.sdma_ip_version;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum sdma_version ver = pdev->info.sdma_ip_version;
const unsigned max_size_per_packet = ver >= SDMA_5_2 ? SDMA_V5_2_COPY_MAX_BYTES : SDMA_V2_0_COPY_MAX_BYTES;
unsigned align = ~0u;
@ -367,11 +376,13 @@ void
radv_sdma_fill_buffer(const struct radv_device *device, struct radeon_cmdbuf *cs, const uint64_t va,
const uint64_t size, const uint32_t value)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const uint32_t fill_size = 2; /* This means that the count is in dwords. */
const uint32_t constant_fill_header = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0) | (fill_size & 0x3) << 30;
/* This packet is the same since SDMA v2.4, haven't bothered to check older versions. */
const enum sdma_version ver = device->physical_device->info.sdma_ip_version;
const enum sdma_version ver = pdev->info.sdma_ip_version;
assert(ver >= SDMA_2_4);
/* Maximum allowed fill size depends on the GPU.
@ -450,7 +461,9 @@ radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct ra
const struct radv_sdma_surf *const linear, const VkExtent3D pix_extent,
const bool detile)
{
if (!device->physical_device->info.sdma_supports_compression) {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!pdev->info.sdma_supports_compression) {
assert(!tiled->meta_va);
}
@ -499,14 +512,15 @@ radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct rade
const struct radv_sdma_surf *const src, const struct radv_sdma_surf *const dst,
const VkExtent3D px_extent)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
/* We currently only support the SDMA v4+ versions of this packet. */
assert(device->physical_device->info.sdma_ip_version >= SDMA_4_0);
assert(pdev->info.sdma_ip_version >= SDMA_4_0);
/* On GFX10+ this supports DCC, but cannot copy a compressed surface to another compressed surface. */
assert(!src->meta_va || !dst->meta_va);
if (device->physical_device->info.sdma_ip_version >= SDMA_4_0 &&
device->physical_device->info.sdma_ip_version < SDMA_5_0) {
if (pdev->info.sdma_ip_version >= SDMA_4_0 && pdev->info.sdma_ip_version < SDMA_5_0) {
/* SDMA v4 doesn't support mip_id selection in the T2T copy packet. */
assert(src->header_dword >> 24 == 0);
assert(dst->header_dword >> 24 == 0);
@ -696,7 +710,8 @@ radv_sdma_use_t2t_scanline_copy(const struct radv_device *device, const struct r
/* SDMA can't do format conversion. */
assert(src->bpp == dst->bpp);
const enum sdma_version ver = device->physical_device->info.sdma_ip_version;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum sdma_version ver = pdev->info.sdma_ip_version;
if (ver < SDMA_5_0) {
/* SDMA v4.x and older doesn't support proper mip level selection. */
if (src->mip_levels > 1 || dst->mip_levels > 1)

View file

@ -304,6 +304,7 @@ nir_shader *
radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_stage *stage,
const struct radv_spirv_to_nir_options *options, bool is_internal)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned subgroup_size = 64, ballot_bit_size = 64;
const unsigned required_subgroup_size = stage->key.subgroup_required_size * 32;
if (required_subgroup_size) {
@ -340,7 +341,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
.device = device,
.object = stage->spirv.object,
};
const bool has_fragment_shader_interlock = radv_has_pops(device->physical_device);
const bool has_fragment_shader_interlock = radv_has_pops(pdev);
const struct spirv_to_nir_options spirv_options = {
.caps =
{
@ -359,7 +360,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
.device_group = true,
.draw_parameters = true,
.float_controls = true,
.float16 = device->physical_device->info.has_packed_math_16bit,
.float16 = pdev->info.has_packed_math_16bit,
.float32_atomic_add = true,
.float32_atomic_min_max = true,
.float64 = true,
@ -411,7 +412,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
.variable_pointers = true,
.vk_memory_model = true,
.vk_memory_model_device_scope = true,
.fragment_shading_rate = device->physical_device->info.gfx_level >= GFX10_3,
.fragment_shading_rate = pdev->info.gfx_level >= GFX10_3,
.workgroup_memory_explicit_layout = true,
.cooperative_matrix = true,
},
@ -426,11 +427,11 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
.func = radv_spirv_nir_debug,
.private_data = &spirv_debug_data,
},
.force_tex_non_uniform = device->physical_device->cache_key.tex_non_uniform,
.force_ssbo_non_uniform = device->physical_device->cache_key.ssbo_non_uniform,
.force_tex_non_uniform = pdev->cache_key.tex_non_uniform,
.force_ssbo_non_uniform = pdev->cache_key.ssbo_non_uniform,
};
nir = spirv_to_nir(spirv, stage->spirv.size / 4, spec_entries, num_spec_entries, stage->stage, stage->entrypoint,
&spirv_options, &device->physical_device->nir_options[stage->stage]);
&spirv_options, &pdev->nir_options[stage->stage]);
nir->info.internal |= is_internal;
assert(nir->info.stage == stage->stage);
nir_validate_shader(nir, "after spirv_to_nir");
@ -507,7 +508,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
NIR_PASS(_, nir, nir_propagate_invariant, device->physical_device->cache_key.invariant_geom);
NIR_PASS(_, nir, nir_propagate_invariant, pdev->cache_key.invariant_geom);
NIR_PASS(_, nir, nir_lower_clip_cull_distance_arrays);
@ -515,11 +516,11 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
nir->info.stage == MESA_SHADER_GEOMETRY)
NIR_PASS_V(nir, nir_shader_gather_xfb_info);
NIR_PASS(_, nir, nir_lower_discard_or_demote, device->physical_device->cache_key.lower_discard_to_demote);
NIR_PASS(_, nir, nir_lower_discard_or_demote, pdev->cache_key.lower_discard_to_demote);
nir_lower_doubles_options lower_doubles = nir->options->lower_doubles_options;
if (device->physical_device->info.gfx_level == GFX6) {
if (pdev->info.gfx_level == GFX6) {
/* GFX6 doesn't support v_floor_f64 and the precision
* of v_fract_f64 which is used to implement 64-bit
* floor is less than what Vulkan requires.
@ -537,7 +538,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
/* Mesh shaders run as NGG which can implement local_invocation_index from
* the wave ID in merged_wave_info, but they don't have local_invocation_ids on GFX10.3.
*/
.lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !device->physical_device->mesh_fast_launch_2,
.lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !pdev->mesh_fast_launch_2,
.lower_local_invocation_index = nir->info.stage == MESA_SHADER_COMPUTE &&
((nir->info.workgroup_size[0] == 1) + (nir->info.workgroup_size[1] == 1) +
(nir->info.workgroup_size[2] == 1)) == 2,
@ -569,10 +570,10 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
.lower_txf_offset = true,
.lower_tg4_offsets = true,
.lower_txs_cube_array = true,
.lower_to_fragment_fetch_amd = device->physical_device->use_fmask,
.lower_to_fragment_fetch_amd = pdev->use_fmask,
.lower_lod_zero_width = true,
.lower_invalid_implicit_lod = true,
.lower_1d = device->physical_device->info.gfx_level == GFX9,
.lower_1d = pdev->info.gfx_level == GFX9,
};
NIR_PASS(_, nir, nir_lower_tex, &tex_options);
@ -597,7 +598,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
NIR_PASS(_, nir, nir_lower_global_vars_to_local);
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
bool gfx7minus = device->physical_device->info.gfx_level <= GFX7;
bool gfx7minus = pdev->info.gfx_level <= GFX7;
bool has_inverse_ballot = true;
#if LLVM_AVAILABLE
has_inverse_ballot = !radv_use_llvm_for_stage(device, nir->info.stage) || LLVM_VERSION_MAJOR >= 17;
@ -690,7 +691,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
nir->info.stage == MESA_SHADER_MESH) &&
nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE)) {
/* Lower primitive shading rate to match HW requirements. */
NIR_PASS(_, nir, radv_nir_lower_primitive_shading_rate, device->physical_device->info.gfx_level);
NIR_PASS(_, nir, radv_nir_lower_primitive_shading_rate, pdev->info.gfx_level);
}
/* Indirect lowering must be called after the radv_optimize_nir() loop
@ -698,8 +699,8 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
* bloat the instruction count of the loop and cause it to be
* considered too large for unrolling.
*/
if (ac_nir_lower_indirect_derefs(nir, device->physical_device->info.gfx_level) &&
!stage->key.optimisations_disabled && nir->info.stage != MESA_SHADER_COMPUTE) {
if (ac_nir_lower_indirect_derefs(nir, pdev->info.gfx_level) && !stage->key.optimisations_disabled &&
nir->info.stage != MESA_SHADER_COMPUTE) {
/* Optimize the lowered code before the linking optimizations. */
radv_optimize_nir(nir, false);
}
@ -775,6 +776,7 @@ void
radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
const struct radv_graphics_state_key *gfx_state)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader_info *info = &ngg_stage->info;
nir_shader *nir = ngg_stage->nir;
@ -818,19 +820,19 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
nir->info.shared_size = info->ngg_info.lds_size;
ac_nir_lower_ngg_options options = {0};
options.family = device->physical_device->info.family;
options.gfx_level = device->physical_device->info.gfx_level;
options.family = pdev->info.family;
options.gfx_level = pdev->info.gfx_level;
options.max_workgroup_size = info->workgroup_size;
options.wave_size = info->wave_size;
options.clip_cull_dist_mask = info->outinfo.clip_dist_mask | info->outinfo.cull_dist_mask;
options.vs_output_param_offset = info->outinfo.vs_output_param_offset;
options.has_param_exports = info->outinfo.param_exports || info->outinfo.prim_param_exports;
options.can_cull = nir->info.stage != MESA_SHADER_GEOMETRY && info->has_ngg_culling;
options.disable_streamout = !device->physical_device->use_ngg_streamout;
options.disable_streamout = !pdev->use_ngg_streamout;
options.has_gen_prim_query = info->has_prim_query;
options.has_xfb_prim_query = info->has_xfb_query;
options.has_gs_invocations_query = device->physical_device->info.gfx_level < GFX11;
options.has_gs_primitives_query = device->physical_device->info.gfx_level < GFX11;
options.has_gs_invocations_query = pdev->info.gfx_level < GFX11;
options.has_gs_primitives_query = pdev->info.gfx_level < GFX11;
options.force_vrs = info->force_vrs_per_vertex;
if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) {
@ -862,8 +864,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
bool scratch_ring = false;
NIR_PASS_V(nir, ac_nir_lower_ngg_ms, options.gfx_level, options.clip_cull_dist_mask,
options.vs_output_param_offset, options.has_param_exports, &scratch_ring, info->wave_size,
hw_workgroup_size, gfx_state->has_multiview_view_index, info->ms.has_query,
device->physical_device->mesh_fast_launch_2);
hw_workgroup_size, gfx_state->has_multiview_view_index, info->ms.has_query, pdev->mesh_fast_launch_2);
ngg_stage->info.ms.needs_ms_scratch_ring = scratch_ring;
} else {
unreachable("invalid SW stage passed to radv_lower_ngg");
@ -933,6 +934,7 @@ static struct radv_shader_arena *
radv_create_shader_arena(struct radv_device *device, struct radv_shader_free_list *free_list, unsigned min_size,
unsigned arena_size, bool replayable, uint64_t replay_va)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
union radv_shader_arena_block *alloc = NULL;
struct radv_shader_arena *arena = calloc(1, sizeof(struct radv_shader_arena));
if (!arena)
@ -948,7 +950,7 @@ radv_create_shader_arena(struct radv_device *device, struct radv_shader_free_lis
if (device->shader_use_invisible_vram)
flags |= RADEON_FLAG_NO_CPU_ACCESS;
else
flags |= (device->physical_device->info.cpdma_prefetch_writes_memory ? 0 : RADEON_FLAG_READ_ONLY);
flags |= (pdev->info.cpdma_prefetch_writes_memory ? 0 : RADEON_FLAG_READ_ONLY);
if (replayable)
flags |= RADEON_FLAG_REPLAYABLE;
@ -1079,7 +1081,9 @@ insert_block(struct radv_device *device, union radv_shader_arena_block *hole, ui
union radv_shader_arena_block *
radv_alloc_shader_memory(struct radv_device *device, uint32_t size, bool replayable, void *ptr)
{
size = ac_align_shader_binary_for_prefetch(&device->physical_device->info, size);
const struct radv_physical_device *pdev = radv_device_physical(device);
size = ac_align_shader_binary_for_prefetch(&pdev->info, size);
size = align(size, RADV_SHADER_ALLOC_ALIGNMENT);
mtx_lock(&device->shader_arena_mutex);
@ -1402,7 +1406,8 @@ radv_destroy_shader_upload_queue(struct radv_device *device)
static bool
radv_should_use_wgp_mode(const struct radv_device *device, gl_shader_stage stage, const struct radv_shader_info *info)
{
enum amd_gfx_level chip = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
enum amd_gfx_level chip = pdev->info.gfx_level;
switch (stage) {
case MESA_SHADER_COMPUTE:
case MESA_SHADER_TESS_CTRL:
@ -1422,13 +1427,13 @@ static bool
radv_open_rtld_binary(struct radv_device *device, const struct radv_shader_binary *binary,
struct ac_rtld_binary *rtld_binary)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const char *elf_data = (const char *)((struct radv_shader_binary_rtld *)binary)->data;
size_t elf_size = ((struct radv_shader_binary_rtld *)binary)->elf_size;
struct ac_rtld_symbol lds_symbols[3];
unsigned num_lds_symbols = 0;
if (device->physical_device->info.gfx_level >= GFX9 &&
(binary->info.stage == MESA_SHADER_GEOMETRY || binary->info.is_ngg)) {
if (pdev->info.gfx_level >= GFX9 && (binary->info.stage == MESA_SHADER_GEOMETRY || binary->info.is_ngg)) {
struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
sym->name = "esgs_ring";
sym->size = binary->info.ngg_info.esgs_ring_size;
@ -1448,7 +1453,7 @@ radv_open_rtld_binary(struct radv_device *device, const struct radv_shader_binar
}
struct ac_rtld_open_info open_info = {
.info = &device->physical_device->info,
.info = &pdev->info,
.shader_type = binary->info.stage,
.wave_size = binary->info.wave_size,
.num_parts = 1,
@ -1466,6 +1471,7 @@ static bool
radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_binary *binary,
const struct radv_shader_args *args)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct ac_shader_config *config = &binary->config;
if (binary->type == RADV_BINARY_TYPE_RTLD) {
@ -1478,13 +1484,13 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi
return false;
}
if (!ac_rtld_read_config(&device->physical_device->info, &rtld_binary, config)) {
if (!ac_rtld_read_config(&pdev->info, &rtld_binary, config)) {
ac_rtld_close(&rtld_binary);
return false;
}
if (rtld_binary.lds_size > 0) {
unsigned encode_granularity = device->physical_device->info.lds_encode_granularity;
unsigned encode_granularity = pdev->info.lds_encode_granularity;
config->lds_size = DIV_ROUND_UP(rtld_binary.lds_size, encode_granularity);
}
if (!config->lds_size && binary->info.stage == MESA_SHADER_TESS_CTRL) {
@ -1499,7 +1505,6 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi
const struct radv_shader_info *info = &binary->info;
gl_shader_stage stage = binary->info.stage;
const struct radv_physical_device *pdev = device->physical_device;
bool scratch_enabled = config->scratch_bytes_per_wave > 0;
bool trap_enabled = !!device->trap_handler_shader;
unsigned vgpr_comp_cnt = 0;
@ -2064,7 +2069,8 @@ unsigned
radv_get_max_waves(const struct radv_device *device, const struct ac_shader_config *conf,
const struct radv_shader_info *info)
{
const struct radeon_info *gpu_info = &device->physical_device->info;
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
const enum amd_gfx_level gfx_level = gpu_info->gfx_level;
const uint8_t wave_size = info->wave_size;
gl_shader_stage stage = info->stage;
@ -2109,7 +2115,8 @@ radv_get_max_waves(const struct radv_device *device, const struct ac_shader_conf
unsigned
radv_get_max_scratch_waves(const struct radv_device *device, struct radv_shader *shader)
{
const unsigned num_cu = device->physical_device->info.num_cu;
const struct radv_physical_device *pdev = radv_device_physical(device);
const unsigned num_cu = pdev->info.num_cu;
return MIN2(device->scratch_waves, 4 * num_cu * shader->max_waves);
}
@ -2423,10 +2430,12 @@ radv_fill_nir_compiler_options(struct radv_nir_compiler_options *options, struct
bool can_dump_shader, bool is_meta_shader, bool keep_shader_info,
bool keep_statistic_info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
/* robust_buffer_access_llvm here used by LLVM only, pipeline robustness is not exposed there. */
options->robust_buffer_access_llvm = device->buffer_robustness >= RADV_BUFFER_ROBUSTNESS_1;
options->wgp_mode = should_use_wgp;
options->info = &device->physical_device->info;
options->info = &pdev->info;
options->dump_shader = can_dump_shader;
options->dump_preoptir = options->dump_shader && device->instance->debug_flags & RADV_DEBUG_PREOPTIR;
options->record_ir = keep_shader_info;
@ -2607,6 +2616,7 @@ radv_aco_build_shader_part(void **bin, uint32_t num_sgprs, uint32_t num_vgprs, c
struct radv_shader *
radv_create_rt_prolog(struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_shader *prolog;
struct radv_shader_args in_args = {0};
struct radv_shader_args out_args = {0};
@ -2618,13 +2628,13 @@ radv_create_rt_prolog(struct radv_device *device)
info.stage = MESA_SHADER_COMPUTE;
info.loads_push_constants = true;
info.desc_set_used_mask = -1; /* just to force indirection */
info.wave_size = device->physical_device->rt_wave_size;
info.wave_size = pdev->rt_wave_size;
info.workgroup_size = info.wave_size;
info.user_data_0 = R_00B900_COMPUTE_USER_DATA_0;
info.cs.is_rt_shader = true;
info.cs.uses_dynamic_rt_callable_stack = true;
info.cs.block_size[0] = 8;
info.cs.block_size[1] = device->physical_device->rt_wave_size == 64 ? 8 : 4;
info.cs.block_size[1] = pdev->rt_wave_size == 64 ? 8 : 4;
info.cs.block_size[2] = 1;
info.cs.uses_thread_id[0] = true;
info.cs.uses_thread_id[1] = true;
@ -2739,6 +2749,7 @@ struct radv_shader_part *
radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_key *key,
struct radv_shader_part_binary **binary_out)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_shader_part *epilog;
struct radv_shader_args args = {0};
struct radv_nir_compiler_options options = {0};
@ -2748,7 +2759,7 @@ radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_ke
struct radv_shader_info info = {0};
info.stage = MESA_SHADER_FRAGMENT;
info.wave_size = device->physical_device->ps_wave_size;
info.wave_size = pdev->ps_wave_size;
info.workgroup_size = 64;
radv_declare_ps_epilog_args(device, key, &args);

View file

@ -258,7 +258,9 @@ declare_ms_input_sgprs(const struct radv_shader_info *info, struct radv_shader_a
static void
declare_ms_input_vgprs(const struct radv_device *device, struct radv_shader_args *args)
{
if (device->physical_device->mesh_fast_launch_2) {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->mesh_fast_launch_2) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids);
} else {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
@ -510,7 +512,8 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics
const struct radv_shader_info *info, gl_shader_stage stage, gl_shader_stage previous_stage,
struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info)
{
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
bool has_shader_query = info->has_prim_query || info->has_xfb_query ||
(stage == MESA_SHADER_GEOMETRY && info->gs.has_pipeline_stat_query) ||
(stage == MESA_SHADER_MESH && info->ms.has_query) ||
@ -784,7 +787,7 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics
declare_ngg_sgprs(info, args, has_ngg_provoking_vtx);
}
if (previous_stage != MESA_SHADER_MESH || !device->physical_device->mesh_fast_launch_2) {
if (previous_stage != MESA_SHADER_MESH || !pdev->mesh_fast_launch_2) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
@ -871,7 +874,8 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra
if (info->loads_push_constants)
num_user_sgprs++;
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
uint32_t available_sgprs = gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16;
uint32_t remaining_sgprs = available_sgprs - num_user_sgprs;

View file

@ -342,6 +342,8 @@ static uint8_t
radv_get_wave_size(struct radv_device *device, gl_shader_stage stage, const struct radv_shader_info *info,
const struct radv_shader_stage_key *stage_key)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (stage_key->subgroup_required_size)
return stage_key->subgroup_required_size * 32;
@ -350,11 +352,11 @@ radv_get_wave_size(struct radv_device *device, gl_shader_stage stage, const stru
else if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_TASK)
return info->wave_size;
else if (stage == MESA_SHADER_FRAGMENT)
return device->physical_device->ps_wave_size;
return pdev->ps_wave_size;
else if (gl_shader_stage_is_rt(stage))
return device->physical_device->rt_wave_size;
return pdev->rt_wave_size;
else
return device->physical_device->ge_wave_size;
return pdev->ge_wave_size;
}
static uint8_t
@ -370,6 +372,7 @@ radv_get_ballot_bit_size(struct radv_device *device, gl_shader_stage stage, cons
static uint32_t
radv_compute_esgs_itemsize(const struct radv_device *device, uint32_t num_varyings)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t esgs_itemsize;
esgs_itemsize = num_varyings * 16;
@ -377,7 +380,7 @@ radv_compute_esgs_itemsize(const struct radv_device *device, uint32_t num_varyin
/* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
* conflicts, i.e. each vertex will start on a different bank.
*/
if (device->physical_device->info.gfx_level >= GFX9 && esgs_itemsize)
if (pdev->info.gfx_level >= GFX9 && esgs_itemsize)
esgs_itemsize += 4;
return esgs_itemsize;
@ -562,6 +565,8 @@ static void
gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir,
const struct radv_graphics_state_key *gfx_state, struct radv_shader_info *info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
info->tcs.tes_inputs_read = ~0ULL;
info->tcs.tes_patch_inputs_read = ~0ULL;
@ -571,15 +576,14 @@ gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir,
if (gfx_state->ts.patch_control_points) {
/* Number of tessellation patches per workgroup processed by the current pipeline. */
info->num_tess_patches =
get_tcs_num_patches(gfx_state->ts.patch_control_points, nir->info.tess.tcs_vertices_out,
info->tcs.num_linked_inputs, info->tcs.num_linked_outputs,
info->tcs.num_linked_patch_outputs, device->physical_device->hs.tess_offchip_block_dw_size,
device->physical_device->info.gfx_level, device->physical_device->info.family);
info->num_tess_patches = get_tcs_num_patches(
gfx_state->ts.patch_control_points, nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs,
info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs, pdev->hs.tess_offchip_block_dw_size,
pdev->info.gfx_level, pdev->info.family);
/* LDS size used by VS+TCS for storing TCS inputs and outputs. */
info->tcs.num_lds_blocks =
calculate_tess_lds_size(device->physical_device->info.gfx_level, gfx_state->ts.patch_control_points,
calculate_tess_lds_size(pdev->info.gfx_level, gfx_state->ts.patch_control_points,
nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs, info->num_tess_patches,
info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs);
}
@ -616,7 +620,7 @@ gather_shader_info_tes(struct radv_device *device, const nir_shader *nir, struct
static void
radv_init_legacy_gs_ring_info(const struct radv_device *device, struct radv_shader_info *gs_info)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_legacy_gs_info *gs_ring_info = &gs_info->gs_ring_info;
unsigned num_se = pdev->info.max_se;
unsigned wave_size = 64;
@ -650,6 +654,7 @@ radv_init_legacy_gs_ring_info(const struct radv_device *device, struct radv_shad
static void
radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *gs_info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_legacy_gs_info *out = &gs_info->gs_ring_info;
const unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1);
const bool uses_adjacency =
@ -734,7 +739,7 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_inf
const uint32_t gs_prims_per_subgroup = gs_prims;
const uint32_t gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations;
const uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out;
const uint32_t lds_granularity = device->physical_device->info.lds_encode_granularity;
const uint32_t lds_granularity = pdev->info.lds_encode_granularity;
const uint32_t total_lds_bytes = align(esgs_lds_size * 4, lds_granularity);
out->lds_size = total_lds_bytes / lds_granularity;
out->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) |
@ -750,6 +755,7 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_inf
static void
gather_shader_info_gs(struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned add_clip = nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4;
info->gs.gsvs_vertex_size = (util_bitcount64(nir->info.outputs_written) + add_clip) * 16;
info->gs.max_gsvs_emit_size = info->gs.gsvs_vertex_size * nir->info.gs.vertices_out;
@ -770,7 +776,7 @@ gather_shader_info_gs(struct radv_device *device, const nir_shader *nir, struct
info->gs.num_stream_output_components[stream] += num_components;
}
info->gs.has_pipeline_stat_query = device->physical_device->emulate_ngg_gs_query_pipeline_stat;
info->gs.has_pipeline_stat_query = pdev->emulate_ngg_gs_query_pipeline_stat;
gather_info_unlinked_input(info, nir);
@ -830,9 +836,10 @@ gather_shader_info_mesh(struct radv_device *device, const nir_shader *nir,
static void
calc_mesh_workgroup_size(const struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned api_workgroup_size = ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
if (device->physical_device->mesh_fast_launch_2) {
if (pdev->mesh_fast_launch_2) {
/* Use multi-row export. It is also necessary to use the API workgroup size for non-emulated queries. */
info->workgroup_size = api_workgroup_size;
} else {
@ -848,6 +855,7 @@ static void
gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir,
const struct radv_graphics_state_key *gfx_state, struct radv_shader_info *info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t per_primitive_input_mask = nir->info.inputs_read & nir->info.per_primitive_inputs;
unsigned num_per_primitive_inputs = util_bitcount64(per_primitive_input_mask);
assert(num_per_primitive_inputs <= nir->num_inputs);
@ -855,7 +863,7 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir,
info->ps.num_interp = nir->num_inputs;
info->ps.num_prim_interp = 0;
if (device->physical_device->info.gfx_level == GFX10_3) {
if (pdev->info.gfx_level == GFX10_3) {
/* GFX10.3 distinguishes NUM_INTERP and NUM_PRIM_INTERP, but
* these are counted together in NUM_INTERP on GFX11.
*/
@ -972,7 +980,7 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir,
*/
info->ps.force_sample_iter_shading_rate =
(info->ps.reads_sample_mask_in && !info->ps.needs_poly_line_smooth) ||
(device->physical_device->info.gfx_level == GFX10_3 &&
(pdev->info.gfx_level == GFX10_3 &&
(nir->info.fs.sample_interlock_ordered || nir->info.fs.sample_interlock_unordered ||
nir->info.fs.pixel_interlock_ordered || nir->info.fs.pixel_interlock_unordered));
@ -992,8 +1000,7 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir,
*/
const bool mask_export_enable = info->ps.writes_sample_mask;
const bool disable_rbplus =
device->physical_device->info.has_rbplus && !device->physical_device->info.rbplus_allowed;
const bool disable_rbplus = pdev->info.has_rbplus && !pdev->info.rbplus_allowed;
info->ps.db_shader_control =
S_02880C_Z_EXPORT_ENABLE(info->ps.writes_z) | S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(info->ps.writes_stencil) |
@ -1020,9 +1027,10 @@ static void
gather_shader_info_cs(struct radv_device *device, const nir_shader *nir, const struct radv_shader_stage_key *stage_key,
struct radv_shader_info *info)
{
unsigned default_wave_size = device->physical_device->cs_wave_size;
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned default_wave_size = pdev->cs_wave_size;
if (info->cs.uses_rt)
default_wave_size = device->physical_device->rt_wave_size;
default_wave_size = pdev->rt_wave_size;
unsigned local_size = nir->info.workgroup_size[0] * nir->info.workgroup_size[1] * nir->info.workgroup_size[2];
@ -1040,14 +1048,14 @@ gather_shader_info_cs(struct radv_device *device, const nir_shader *nir, const s
info->wave_size = required_subgroup_size;
} else if (require_full_subgroups) {
info->wave_size = RADV_SUBGROUP_SIZE;
} else if (device->physical_device->info.gfx_level >= GFX10 && local_size <= 32) {
} else if (pdev->info.gfx_level >= GFX10 && local_size <= 32) {
/* Use wave32 for small workgroups. */
info->wave_size = 32;
} else {
info->wave_size = default_wave_size;
}
if (device->physical_device->info.has_cs_regalloc_hang_bug) {
if (pdev->info.has_cs_regalloc_hang_bug) {
info->cs.regalloc_hang_bug = info->cs.block_size[0] * info->cs.block_size[1] * info->cs.block_size[2] > 256;
}
}
@ -1083,7 +1091,8 @@ gather_shader_info_task(struct radv_device *device, const nir_shader *nir,
static uint32_t
radv_get_user_data_0(const struct radv_device *device, struct radv_shader_info *info)
{
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
switch (info->stage) {
case MESA_SHADER_VERTEX:
@ -1139,7 +1148,8 @@ radv_get_user_data_0(const struct radv_device *device, struct radv_shader_info *
static bool
radv_is_merged_shader_compiled_separately(const struct radv_device *device, const struct radv_shader_info *info)
{
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
if (gfx_level >= GFX9) {
switch (info->stage) {
@ -1180,6 +1190,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
const struct radv_graphics_state_key *gfx_state, const enum radv_pipeline_type pipeline_type,
bool consider_force_vrs, struct radv_shader_info *info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
if (layout->use_dynamic_descriptors) {
@ -1257,7 +1268,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
/* The HW always assumes that there is at least 1 per-vertex param.
* so if there aren't any, we have to offset per-primitive params by 1.
*/
const unsigned extra_offset = !!(total_param_exports == 0 && device->physical_device->info.gfx_level >= GFX11);
const unsigned extra_offset = !!(total_param_exports == 0 && pdev->info.gfx_level >= GFX11);
/* Per-primitive outputs: the HW needs these to be last. */
assign_outinfo_params(outinfo, per_prim_mask, &total_param_exports, extra_offset);
@ -1274,7 +1285,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
/* Used by compute and mesh shaders. Mesh shaders must always declare this before GFX11. */
info->cs.uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS) ||
(nir->info.stage == MESA_SHADER_MESH && device->physical_device->info.gfx_level < GFX11);
(nir->info.stage == MESA_SHADER_MESH && pdev->info.gfx_level < GFX11);
info->cs.uses_local_invocation_idx = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) |
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) |
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS) |
@ -1348,9 +1359,9 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
break;
case MESA_SHADER_TESS_CTRL:
if (gfx_state->ts.patch_control_points) {
info->workgroup_size = ac_compute_lshs_workgroup_size(
device->physical_device->info.gfx_level, MESA_SHADER_TESS_CTRL, info->num_tess_patches,
gfx_state->ts.patch_control_points, info->tcs.tcs_vertices_out);
info->workgroup_size =
ac_compute_lshs_workgroup_size(pdev->info.gfx_level, MESA_SHADER_TESS_CTRL, info->num_tess_patches,
gfx_state->ts.patch_control_points, info->tcs.tcs_vertices_out);
} else {
/* Set the maximum possible value when the workgroup size can't be determined. */
info->workgroup_size = 256;
@ -1371,7 +1382,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
unsigned es_verts_per_subgroup = G_028A44_ES_VERTS_PER_SUBGRP(info->gs_ring_info.vgt_gs_onchip_cntl);
unsigned gs_inst_prims_in_subgroup = G_028A44_GS_INST_PRIMS_IN_SUBGRP(info->gs_ring_info.vgt_gs_onchip_cntl);
info->workgroup_size = ac_compute_esgs_workgroup_size(device->physical_device->info.gfx_level, info->wave_size,
info->workgroup_size = ac_compute_esgs_workgroup_size(pdev->info.gfx_level, info->wave_size,
es_verts_per_subgroup, gs_inst_prims_in_subgroup);
} else {
/* Set the maximum possible value by default, this will be optimized during linking if
@ -1441,6 +1452,7 @@ static unsigned
gfx10_get_ngg_scratch_lds_base(const struct radv_device *device, const struct radv_shader_info *es_info,
const struct radv_shader_info *gs_info, const struct gfx10_ngg_info *ngg_info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t scratch_lds_base;
if (gs_info) {
@ -1451,7 +1463,7 @@ gfx10_get_ngg_scratch_lds_base(const struct radv_device *device, const struct ra
} else {
const bool uses_instanceid = es_info->vs.needs_instance_id;
const bool uses_primitive_id = es_info->uses_prim_id;
const bool streamout_enabled = es_info->so.num_outputs && device->physical_device->use_ngg_streamout;
const bool streamout_enabled = es_info->so.num_outputs && pdev->use_ngg_streamout;
const uint32_t num_outputs =
es_info->stage == MESA_SHADER_VERTEX ? es_info->vs.num_outputs : es_info->tes.num_outputs;
unsigned pervertex_lds_bytes = ac_ngg_nogs_get_pervertex_lds_size(
@ -1471,7 +1483,8 @@ void
gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info,
struct gfx10_ngg_info *out)
{
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
const unsigned max_verts_per_prim = radv_get_num_input_vertices(es_info, gs_info);
const unsigned min_verts_per_prim = gs_info ? max_verts_per_prim : 1;
@ -1683,9 +1696,8 @@ gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es
/* Get scratch LDS usage. */
const struct radv_shader_info *info = gs_info ? gs_info : es_info;
const unsigned scratch_lds_size =
ac_ngg_get_scratch_lds_size(info->stage, info->workgroup_size, info->wave_size,
device->physical_device->use_ngg_streamout, info->has_ngg_culling);
const unsigned scratch_lds_size = ac_ngg_get_scratch_lds_size(info->stage, info->workgroup_size, info->wave_size,
pdev->use_ngg_streamout, info->has_ngg_culling);
out->lds_size = out->scratch_lds_base + scratch_lds_size;
unsigned workgroup_size =
@ -1700,6 +1712,8 @@ static void
radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage *es_stage,
struct radv_shader_stage *fs_stage, const struct radv_graphics_state_key *gfx_state)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
assert(es_stage->stage == MESA_SHADER_VERTEX || es_stage->stage == MESA_SHADER_TESS_EVAL);
assert(!fs_stage || fs_stage->stage == MESA_SHADER_FRAGMENT);
@ -1715,8 +1729,8 @@ radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage
: 3;
}
es_stage->info.has_ngg_culling = radv_consider_culling(device->physical_device, es_stage->nir, ps_inputs_read,
num_vertices_per_prim, &es_stage->info);
es_stage->info.has_ngg_culling =
radv_consider_culling(pdev, es_stage->nir, ps_inputs_read, num_vertices_per_prim, &es_stage->info);
nir_function_impl *impl = nir_shader_get_entrypoint(es_stage->nir);
es_stage->info.has_ngg_early_prim_export = exec_list_is_singular(&impl->body);
@ -1732,6 +1746,8 @@ static void
radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *producer,
struct radv_shader_stage *consumer, const struct radv_graphics_state_key *gfx_state)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
/* Export primitive ID and clip/cull distances if read by the FS, or export unconditionally when
* the next stage is unknown (with graphics pipeline library).
*/
@ -1782,9 +1798,9 @@ radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *pro
struct radv_shader_stage *tcs_stage = consumer;
if (gfx_state->ts.patch_control_points) {
vs_stage->info.workgroup_size = ac_compute_lshs_workgroup_size(
device->physical_device->info.gfx_level, MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches,
gfx_state->ts.patch_control_points, tcs_stage->info.tcs.tcs_vertices_out);
vs_stage->info.workgroup_size =
ac_compute_lshs_workgroup_size(pdev->info.gfx_level, MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches,
gfx_state->ts.patch_control_points, tcs_stage->info.tcs.tcs_vertices_out);
if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) {
/* When the number of TCS input and output vertices are the same (typically 3):
@ -1797,7 +1813,7 @@ radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *pro
* instruction dominating another with a different mode.
*/
vs_stage->info.vs.tcs_in_out_eq =
device->physical_device->info.gfx_level >= GFX9 &&
pdev->info.gfx_level >= GFX9 &&
gfx_state->ts.patch_control_points == tcs_stage->info.tcs.tcs_vertices_out &&
vs_stage->nir->info.float_controls_execution_mode == tcs_stage->nir->info.float_controls_execution_mode;
@ -1865,6 +1881,8 @@ void
radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
struct radv_shader_stage *stages)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
/* Walk backwards to link */
struct radv_shader_stage *next_stage = stages[MESA_SHADER_FRAGMENT].nir ? &stages[MESA_SHADER_FRAGMENT] : NULL;
@ -1877,7 +1895,7 @@ radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics
next_stage = &stages[s];
}
if (device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
/* Merge shader info for VS+TCS. */
if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_TESS_CTRL].nir) {
radv_nir_shader_info_merge(&stages[MESA_SHADER_VERTEX], &stages[MESA_SHADER_TESS_CTRL]);

View file

@ -128,6 +128,7 @@ static VkResult
radv_shader_object_init_graphics(struct radv_shader_object *shader_obj, struct radv_device *device,
const VkShaderCreateInfoEXT *pCreateInfo)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
gl_shader_stage stage = vk_to_mesa_shader_stage(pCreateInfo->stage);
struct radv_shader_stage stages[MESA_VULKAN_SHADER_STAGES];
@ -149,7 +150,7 @@ radv_shader_object_init_graphics(struct radv_shader_object *shader_obj, struct r
gfx_state.dynamic_provoking_vtx_mode = true;
gfx_state.dynamic_line_rast_mode = true;
if (device->physical_device->info.gfx_level >= GFX11)
if (pdev->info.gfx_level >= GFX11)
gfx_state.ps.exports_mrtz_via_epilog = true;
struct radv_shader *shader = NULL;
@ -297,6 +298,7 @@ static VkResult
radv_shader_object_init(struct radv_shader_object *shader_obj, struct radv_device *device,
const VkShaderCreateInfoEXT *pCreateInfo)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_shader_layout layout;
VkResult result;
@ -317,7 +319,7 @@ radv_shader_object_init(struct radv_shader_object *shader_obj, struct radv_devic
const uint8_t *cache_uuid = blob_read_bytes(&blob, VK_UUID_SIZE);
if (memcmp(cache_uuid, device->physical_device->cache_uuid, VK_UUID_SIZE))
if (memcmp(cache_uuid, pdev->cache_uuid, VK_UUID_SIZE))
return VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT;
const bool has_main_binary = blob_read_uint32(&blob);
@ -407,6 +409,7 @@ radv_shader_object_create_linked(VkDevice _device, uint32_t createInfoCount, con
const VkAllocationCallbacks *pAllocator, VkShaderEXT *pShaders)
{
RADV_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_shader_stage stages[MESA_VULKAN_SHADER_STAGES];
for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
@ -425,7 +428,7 @@ radv_shader_object_create_linked(VkDevice _device, uint32_t createInfoCount, con
gfx_state.dynamic_provoking_vtx_mode = true;
gfx_state.dynamic_line_rast_mode = true;
if (device->physical_device->info.gfx_level >= GFX11)
if (pdev->info.gfx_level >= GFX11)
gfx_state.ps.exports_mrtz_via_epilog = true;
for (unsigned i = 0; i < createInfoCount; i++) {
@ -621,6 +624,7 @@ radv_GetShaderBinaryDataEXT(VkDevice _device, VkShaderEXT shader, size_t *pDataS
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_shader_object, shader_obj, shader);
const struct radv_physical_device *pdev = radv_device_physical(device);
const size_t size = radv_get_shader_object_size(shader_obj);
if (!pData) {
@ -635,7 +639,7 @@ radv_GetShaderBinaryDataEXT(VkDevice _device, VkShaderEXT shader, size_t *pDataS
struct blob blob;
blob_init_fixed(&blob, pData, *pDataSize);
blob_write_bytes(&blob, device->physical_device->cache_uuid, VK_UUID_SIZE);
blob_write_bytes(&blob, pdev->cache_uuid, VK_UUID_SIZE);
radv_write_shader_binary(&blob, shader_obj->binary);

View file

@ -62,7 +62,8 @@ radv_spm_init_bo(struct radv_device *device)
static void
radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
{
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
struct ac_spm *spm = &device->spm;
if (gfx_level >= GFX11) {
@ -142,7 +143,8 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu
void
radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
{
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
struct ac_spm *spm = &device->spm;
uint64_t va = radv_buffer_get_va(spm->bo);
uint64_t ring_size = spm->buffer_size;
@ -170,7 +172,7 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum r
radeon_set_uconfig_reg(cs, R_03726C_RLC_SPM_ACCUM_MODE, 0);
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
radeon_set_uconfig_reg(cs, R_03721C_RLC_SPM_PERFMON_SEGMENT_SIZE,
S_03721C_TOTAL_NUM_SEGMENT(total_muxsel_lines) |
S_03721C_GLOBAL_NUM_SEGMENT(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]) |
@ -238,8 +240,9 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum r
bool
radv_spm_init(struct radv_device *device)
{
const struct radeon_info *gpu_info = &device->physical_device->info;
struct ac_perfcounters *pc = &device->physical_device->ac_perfcounters;
struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
struct ac_perfcounters *pc = &pdev->ac_perfcounters;
/* We failed to initialize the performance counters. */
if (!pc->blocks)

View file

@ -54,15 +54,16 @@ gfx11_get_sqtt_ctrl(const struct radv_device *device, bool enable)
static uint32_t
gfx10_get_sqtt_ctrl(const struct radv_device *device, bool enable)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t sqtt_ctrl = S_008D1C_MODE(enable) | S_008D1C_HIWATER(5) | S_008D1C_UTIL_TIMER(1) |
S_008D1C_RT_FREQ(2) | /* 4096 clk */
S_008D1C_DRAW_EVENT_EN(1) | S_008D1C_REG_STALL_EN(1) | S_008D1C_SPI_STALL_EN(1) |
S_008D1C_SQ_STALL_EN(1) | S_008D1C_REG_DROP_ON_STALL(0);
if (device->physical_device->info.gfx_level == GFX10_3)
if (pdev->info.gfx_level == GFX10_3)
sqtt_ctrl |= S_008D1C_LOWATER_OFFSET(4);
if (device->physical_device->info.has_sqtt_auto_flush_mode_bug)
if (pdev->info.has_sqtt_auto_flush_mode_bug)
sqtt_ctrl |= S_008D1C_AUTO_FLUSH_MODE(1);
return sqtt_ctrl;
@ -86,10 +87,11 @@ radv_ip_to_queue_family(enum amd_ip_type t)
static void
radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf *cs, int family)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum radv_queue_family qf = radv_ip_to_queue_family(family);
enum rgp_flush_bits sqtt_flush_bits = 0;
radv_cs_emit_cache_flush(
device->ws, cs, device->physical_device->info.gfx_level, NULL, 0, qf,
device->ws, cs, pdev->info.gfx_level, NULL, 0, qf,
(family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH
: (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2,
@ -99,9 +101,10 @@ radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf *
static void
radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
{
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
uint32_t shifted_size = device->sqtt.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
const struct radeon_info *gpu_info = &device->physical_device->info;
const struct radeon_info *gpu_info = &pdev->info;
const unsigned shader_mask = ac_sqtt_get_shader_mask(gpu_info);
unsigned max_se = gpu_info->max_se;
@ -111,7 +114,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
uint64_t va = radv_buffer_get_va(device->sqtt.bo);
uint64_t data_va = ac_sqtt_get_data_va(gpu_info, &device->sqtt, va, se);
uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
int active_cu = ac_sqtt_get_active_cu(&device->physical_device->info, se);
int active_cu = ac_sqtt_get_active_cu(&pdev->info, se);
if (ac_sqtt_se_is_disabled(gpu_info, se))
continue;
@ -120,7 +123,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
/* Order seems important for the following 2 registers. */
radeon_set_perfctr_reg(gfx_level, qf, cs, R_0367A4_SQ_THREAD_TRACE_BUF0_SIZE,
S_0367A4_SIZE(shifted_size) | S_0367A4_BASE_HI(shifted_va >> 32));
@ -151,7 +154,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
/* Should be emitted last (it enables thread traces). */
radeon_set_perfctr_reg(gfx_level, qf, cs, R_0367B0_SQ_THREAD_TRACE_CTRL, gfx11_get_sqtt_ctrl(device, true));
} else if (device->physical_device->info.gfx_level >= GFX10) {
} else if (pdev->info.gfx_level >= GFX10) {
/* Order seems important for the following 2 registers. */
radeon_set_privileged_config_reg(cs, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
S_008D04_SIZE(shifted_size) | S_008D04_BASE_HI(shifted_va >> 32));
@ -196,7 +199,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
S_030CC8_VM_ID_MASK(0) | S_030CC8_REG_STALL_EN(1) | S_030CC8_SPI_STALL_EN(1) |
S_030CC8_SQ_STALL_EN(1);
if (device->physical_device->info.gfx_level < GFX9) {
if (pdev->info.gfx_level < GFX9) {
sqtt_mask |= S_030CC8_RANDOM_SEED(0xffff);
}
@ -214,7 +217,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
radeon_set_uconfig_reg(cs, R_030CEC_SQ_THREAD_TRACE_HIWATER, S_030CEC_HIWATER(4));
if (device->physical_device->info.gfx_level == GFX9) {
if (pdev->info.gfx_level == GFX9) {
/* Reset thread trace status errors. */
radeon_set_uconfig_reg(cs, R_030CE8_SQ_THREAD_TRACE_STATUS, S_030CE8_UTC_ERROR(0));
}
@ -225,7 +228,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */
S_030CD8_MODE(1);
if (device->physical_device->info.gfx_level == GFX9) {
if (pdev->info.gfx_level == GFX9) {
/* Count SQTT traffic in TCC perf counters. */
sqtt_mode |= S_030CD8_TC_PERF_EN(1);
}
@ -274,17 +277,17 @@ static const uint32_t gfx11_sqtt_info_regs[] = {
static void
radv_copy_sqtt_info_regs(const struct radv_device *device, struct radeon_cmdbuf *cs, unsigned se_index)
{
const struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
const uint32_t *sqtt_info_regs = NULL;
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
sqtt_info_regs = gfx11_sqtt_info_regs;
} else if (device->physical_device->info.gfx_level >= GFX10) {
} else if (pdev->info.gfx_level >= GFX10) {
sqtt_info_regs = gfx10_sqtt_info_regs;
} else if (device->physical_device->info.gfx_level == GFX9) {
} else if (pdev->info.gfx_level == GFX9) {
sqtt_info_regs = gfx9_sqtt_info_regs;
} else {
assert(device->physical_device->info.gfx_level == GFX8);
assert(pdev->info.gfx_level == GFX8);
sqtt_info_regs = gfx8_sqtt_info_regs;
}
@ -330,8 +333,9 @@ radv_copy_sqtt_info_regs(const struct radv_device *device, struct radeon_cmdbuf
static void
radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
{
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
unsigned max_se = device->physical_device->info.max_se;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
unsigned max_se = pdev->info.max_se;
radeon_check_space(device->ws, cs, 8 + max_se * 64);
@ -346,20 +350,20 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs,
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0));
if (device->physical_device->info.has_sqtt_rb_harvest_bug) {
if (pdev->info.has_sqtt_rb_harvest_bug) {
/* Some chips with disabled RBs should wait for idle because FINISH_DONE doesn't work. */
radv_emit_wait_for_idle(device, cs, qf);
}
for (unsigned se = 0; se < max_se; se++) {
if (ac_sqtt_se_is_disabled(&device->physical_device->info, se))
if (ac_sqtt_se_is_disabled(&pdev->info, se))
continue;
/* Target SEi and SH0. */
radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
/* Make sure to wait for the trace buffer. */
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
@ -380,8 +384,8 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs,
radeon_emit(cs, 0); /* reference value */
radeon_emit(cs, ~C_0367D0_BUSY); /* mask */
radeon_emit(cs, 4); /* poll interval */
} else if (device->physical_device->info.gfx_level >= GFX10) {
if (!device->physical_device->info.has_sqtt_rb_harvest_bug) {
} else if (pdev->info.gfx_level >= GFX10) {
if (!pdev->info.has_sqtt_rb_harvest_bug) {
/* Make sure to wait for the trace buffer. */
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
@ -429,7 +433,8 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs,
void
radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords)
{
const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
const enum radv_queue_family qf = cmd_buffer->qf;
struct radv_device *device = cmd_buffer->device;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
@ -446,7 +451,7 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da
/* Without the perfctr bit the CP might not always pass the
* write on correctly. */
if (device->physical_device->info.gfx_level >= GFX10)
if (pdev->info.gfx_level >= GFX10)
radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
else
radeon_set_uconfig_reg_seq(cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
@ -460,11 +465,13 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da
void
radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf *cs, bool enable)
{
if (device->physical_device->info.gfx_level >= GFX9) {
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level >= GFX9) {
uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) | S_031100_EXP_PRIORITY_ORDER(3) |
S_031100_ENABLE_SQG_TOP_EVENTS(enable) | S_031100_ENABLE_SQG_BOP_EVENTS(enable);
if (device->physical_device->info.gfx_level >= GFX10)
if (pdev->info.gfx_level >= GFX10)
spi_config_cntl |= S_031100_PS_PKR_PRIORITY_CNTL(3);
radeon_set_uconfig_reg(cs, R_031100_SPI_CONFIG_CNTL, spi_config_cntl);
@ -478,12 +485,14 @@ radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf
void
radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit)
{
if (device->physical_device->info.gfx_level >= GFX11)
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.gfx_level >= GFX11)
return; /* not needed */
if (device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL, S_037390_PERFMON_CLOCK_STATE(inhibit));
} else if (device->physical_device->info.gfx_level >= GFX8) {
} else if (pdev->info.gfx_level >= GFX8) {
radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL, S_0372FC_PERFMON_CLOCK_STATE(inhibit));
}
}
@ -620,7 +629,8 @@ radv_sqtt_finish_queue_event(struct radv_device *device)
static bool
radv_sqtt_init_bo(struct radv_device *device)
{
unsigned max_se = device->physical_device->info.max_se;
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned max_se = pdev->info.max_se;
struct radeon_winsys *ws = device->ws;
VkResult result;
uint64_t size;
@ -801,6 +811,7 @@ bool
radv_begin_sqtt(struct radv_queue *queue)
{
struct radv_device *device = queue->device;
const struct radv_physical_device *pdev = radv_device_physical(device);
enum radv_queue_family family = queue->state.qf;
struct radeon_winsys *ws = device->ws;
struct radeon_cmdbuf *cs;
@ -846,7 +857,7 @@ radv_begin_sqtt(struct radv_queue *queue)
if (device->spm.bo) {
/* Enable all shader stages by default. */
radv_perfcounter_emit_shaders(device, cs, ac_sqtt_get_shader_mask(&device->physical_device->info));
radv_perfcounter_emit_shaders(device, cs, ac_sqtt_get_shader_mask(&pdev->info));
radv_emit_spm_setup(device, cs, family);
}
@ -936,7 +947,8 @@ bool
radv_get_sqtt_trace(struct radv_queue *queue, struct ac_sqtt_trace *sqtt_trace)
{
struct radv_device *device = queue->device;
const struct radeon_info *gpu_info = &device->physical_device->info;
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
if (!ac_sqtt_get_trace(&device->sqtt, gpu_info, sqtt_trace)) {
if (!radv_sqtt_resize_bo(device))

View file

@ -311,7 +311,7 @@ calc_ctx_size_h265_main10(struct radv_video_session *vid)
static unsigned
calc_ctx_size_av1(struct radv_device *device, struct radv_video_session *vid)
{
struct radv_physical_device *pdev = device->physical_device;
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned frame_ctxt_size = pdev->av1_version == RDECODE_AV1_VER_0
? align(sizeof(rvcn_av1_frame_context_t), 2048)
: align(sizeof(rvcn_av1_vcn4_frame_context_t), 2048);
@ -345,6 +345,7 @@ radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *
const VkAllocationCallbacks *pAllocator, VkVideoSessionKHR *pVideoSession)
{
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_video_session *vid =
vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*vid), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@ -365,12 +366,12 @@ radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *
switch (vid->vk.op) {
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
vid->stream_type = RDECODE_CODEC_H264_PERF;
if (radv_enable_tier2(device->physical_device))
if (radv_enable_tier2(pdev))
vid->dpb_type = DPB_DYNAMIC_TIER_2;
break;
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
vid->stream_type = RDECODE_CODEC_H265;
if (radv_enable_tier2(device->physical_device))
if (radv_enable_tier2(pdev))
vid->dpb_type = DPB_DYNAMIC_TIER_2;
break;
case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
@ -381,10 +382,10 @@ radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *
return VK_ERROR_FEATURE_NOT_PRESENT;
}
vid->stream_handle = radv_vid_alloc_stream_handle(device->physical_device);
vid->stream_handle = radv_vid_alloc_stream_handle(pdev);
vid->dbg_frame_cnt = 0;
vid->db_alignment = radv_video_get_db_alignment(
device->physical_device, vid->vk.max_coded.width,
pdev, vid->vk.max_coded.width,
(vid->stream_type == RDECODE_CODEC_AV1 ||
(vid->stream_type == RDECODE_CODEC_H265 && vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)));
@ -656,11 +657,13 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR vi
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_video_session, vid, videoSession);
uint32_t memory_type_bits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t memory_type_bits = (1u << pdev->memory_properties.memoryTypeCount) - 1;
VK_OUTARRAY_MAKE_TYPED(VkVideoSessionMemoryRequirementsKHR, out, pMemoryRequirements, pMemoryRequirementsCount);
/* 1 buffer for session context */
if (device->physical_device->info.family >= CHIP_POLARIS10) {
if (pdev->info.family >= CHIP_POLARIS10) {
vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
{
m->memoryBindIndex = RADV_BIND_SESSION_CTX;
@ -670,7 +673,7 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR vi
}
}
if (vid->stream_type == RDECODE_CODEC_H264_PERF && device->physical_device->info.family >= CHIP_POLARIS10) {
if (vid->stream_type == RDECODE_CODEC_H264_PERF && pdev->info.family >= CHIP_POLARIS10) {
vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
{
m->memoryBindIndex = RADV_BIND_DECODER_CTX;
@ -701,9 +704,8 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR vi
m->memoryRequirements.size = align(calc_ctx_size_av1(device, vid), 4096);
m->memoryRequirements.alignment = 0;
m->memoryRequirements.memoryTypeBits = 0;
for (unsigned i = 0; i < device->physical_device->memory_properties.memoryTypeCount; i++)
if (device->physical_device->memory_properties.memoryTypes[i].propertyFlags &
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
for (unsigned i = 0; i < pdev->memory_properties.memoryTypeCount; i++)
if (pdev->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
m->memoryRequirements.memoryTypeBits |= (1 << i);
}
}
@ -761,14 +763,15 @@ set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val)
static void
send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, struct radeon_winsys_bo *bo, uint32_t offset)
{
struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
struct radv_device *device = cmd_buffer->device;
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t addr;
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
addr = radv_buffer_get_va(bo);
addr += offset;
if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 6);
set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr);
set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32);
@ -1037,6 +1040,7 @@ get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct
uint32_t *height_in_samples,
void *it_ptr)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
rvcn_dec_message_hevc_t result;
int i, j;
const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
@ -1059,7 +1063,7 @@ get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct
result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
if (device->physical_device->info.family == CHIP_CARRIZO)
if (pdev->info.family == CHIP_CARRIZO)
result.sps_info_flags |= 1 << 9;
if (!h265_pic_info->pStdPictureInfo->flags.short_term_ref_pic_set_sps_flag) {
@ -2097,6 +2101,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
const struct VkVideoDecodeInfoKHR *frame_info)
{
struct radv_device *device = cmd_buffer->device;
const struct radv_physical_device *pdev = radv_device_physical(device);
rvcn_dec_message_header_t *header;
rvcn_dec_message_index_t *index_codec;
rvcn_dec_message_decode_t *decode;
@ -2182,7 +2187,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
decode->dt_tiling_mode = 0;
decode->dt_swizzle_mode = luma->surface.u.gfx9.swizzle_mode;
decode->dt_array_mode = device->physical_device->vid_addr_gfx_mode;
decode->dt_array_mode = pdev->vid_addr_gfx_mode;
decode->dt_field_mode = vid->interlaced ? 1 : 0;
decode->dt_surf_tile_config = 0;
decode->dt_uv_surf_tile_config = 0;
@ -2254,7 +2259,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
decode->db_pitch = dpb->planes[0].surface.u.gfx9.surf_pitch;
decode->db_aligned_height = dpb->planes[0].surface.u.gfx9.surf_height;
decode->db_swizzle_mode = dpb->planes[0].surface.u.gfx9.swizzle_mode;
decode->db_array_mode = device->physical_device->vid_addr_gfx_mode;
decode->db_array_mode = pdev->vid_addr_gfx_mode;
decode->hw_ctxt_size = vid->ctx.size;
@ -2427,6 +2432,7 @@ get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, str
const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *width_in_samples,
uint32_t *height_in_samples, void *it_ptr)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct ruvd_h265 result;
int i, j;
const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
@ -2450,7 +2456,7 @@ get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, str
result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
if (device->physical_device->info.family == CHIP_CARRIZO)
if (pdev->info.family == CHIP_CARRIZO)
result.sps_info_flags |= 1 << 9;
*width_in_samples = sps->pic_width_in_luma_samples;
@ -2592,6 +2598,7 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v
struct radv_video_session_params *params, void *ptr, void *it_ptr, uint32_t *slice_offset,
const struct VkVideoDecodeInfoKHR *frame_info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct ruvd_msg *msg = ptr;
struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
struct radv_image *img = dst_iv->image;
@ -2616,7 +2623,7 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v
msg->body.decode.bsd_size = frame_info->srcBufferRange;
msg->body.decode.db_pitch = align(frame_info->dstPictureResource.codedExtent.width, vid->db_alignment);
if (vid->stream_type == RUVD_CODEC_H264_PERF && device->physical_device->info.family >= CHIP_POLARIS10)
if (vid->stream_type == RUVD_CODEC_H264_PERF && pdev->info.family >= CHIP_POLARIS10)
msg->body.decode.dpb_reserved = vid->ctx.size;
*slice_offset = 0;
@ -2643,7 +2650,7 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v
msg->body.decode.dt_field_mode = false;
if (device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
msg->body.decode.dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
@ -2703,7 +2710,7 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v
RUVD_MACRO_TILE_ASPECT_RATIO(util_logbase2(luma->surface.u.legacy.mtilea));
}
if (device->physical_device->info.family >= CHIP_STONEY)
if (pdev->info.family >= CHIP_STONEY)
msg->body.decode.dt_wa_chroma_top_offset = msg->body.decode.dt_pitch / 2;
msg->body.decode.db_surf_tile_config = msg->body.decode.dt_surf_tile_config;
@ -2740,8 +2747,8 @@ radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoBeginCod
static void
radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radv_video_session *vid = cmd_buffer->video.vid;
struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
uint32_t size = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t);
void *ptr;
@ -2771,7 +2778,7 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
}
radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED)
if (pdev->vid_decode_ip == AMD_IP_VCN_UNIFIED)
radv_vcn_sq_start(cmd_buffer);
rvcn_dec_message_create(vid, ptr, size);
@ -2779,7 +2786,7 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset);
/* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 8);
for (unsigned i = 0; i < 8; i++)
radeon_emit(cmd_buffer->cs, 0x81ff);
@ -2812,8 +2819,10 @@ VKAPI_ATTR void VKAPI_CALL
radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoCodingControlInfoKHR *pCodingControlInfo)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR) {
if (radv_has_uvd(cmd_buffer->device->physical_device))
if (radv_has_uvd(pdev))
radv_uvd_cmd_reset(cmd_buffer);
else
radv_vcn_cmd_reset(cmd_buffer);
@ -2829,14 +2838,14 @@ static void
radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
{
RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radv_video_session *vid = cmd_buffer->video.vid;
struct radv_video_session_params *params = cmd_buffer->video.params;
unsigned size = sizeof(struct ruvd_msg);
void *ptr, *fb_ptr, *it_probs_ptr = NULL;
uint32_t out_offset, fb_offset, it_probs_offset = 0;
struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL;
unsigned fb_size =
(cmd_buffer->device->physical_device->info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;
unsigned fb_size = (pdev->info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;
radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset, &fb_ptr);
fb_bo = cmd_buffer->upload.upload_bo;
@ -2876,13 +2885,14 @@ radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf
send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo, it_probs_offset);
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2);
set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1);
set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1);
}
static void
radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
{
RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
struct radv_video_session *vid = cmd_buffer->video.vid;
struct radv_video_session_params *params = cmd_buffer->video.params;
unsigned size = 0;
@ -2924,7 +2934,7 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf
radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
msg_bo = cmd_buffer->upload.upload_bo;
if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED)
if (pdev->vid_decode_ip == AMD_IP_VCN_UNIFIED)
radv_vcn_sq_start(cmd_buffer);
uint32_t slice_offset;
@ -2955,9 +2965,9 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf
else if (have_probs(vid))
send_cmd(cmd_buffer, RDECODE_CMD_PROB_TBL_BUFFER, it_probs_bo, it_probs_offset);
if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2);
set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1);
set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1);
} else
radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
}
@ -2966,8 +2976,9 @@ VKAPI_ATTR void VKAPI_CALL
radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer, const VkVideoDecodeInfoKHR *frame_info)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
if (radv_has_uvd(cmd_buffer->device->physical_device))
if (radv_has_uvd(pdev))
radv_uvd_decode_video(cmd_buffer, frame_info);
else
radv_vcn_decode_video(cmd_buffer, frame_info);

View file

@ -54,17 +54,17 @@ static VkQueue
radv_wsi_get_prime_blit_queue(VkDevice _device)
{
RADV_FROM_HANDLE(radv_device, device, _device);
struct radv_physical_device *pdev = radv_device_physical(device);
if (device->private_sdma_queue != VK_NULL_HANDLE)
return vk_queue_to_handle(&device->private_sdma_queue->vk);
if (device->physical_device->info.gfx_level >= GFX9 &&
!(device->physical_device->instance->debug_flags & RADV_DEBUG_NO_DMA_BLIT)) {
if (pdev->info.gfx_level >= GFX9 && !(pdev->instance->debug_flags & RADV_DEBUG_NO_DMA_BLIT)) {
device->physical_device->vk_queue_to_radv[device->physical_device->num_queues++] = RADV_QUEUE_TRANSFER;
pdev->vk_queue_to_radv[pdev->num_queues++] = RADV_QUEUE_TRANSFER;
const VkDeviceQueueCreateInfo queue_create = {
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
.queueFamilyIndex = device->physical_device->num_queues - 1,
.queueFamilyIndex = pdev->num_queues - 1,
.queueCount = 1,
};

View file

@ -73,14 +73,15 @@ radv_write_harvested_raster_configs(struct radv_physical_device *pdev, struct ra
void
radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
{
const struct radeon_info *gpu_info = &device->physical_device->info;
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radeon_info *gpu_info = &pdev->info;
radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(device->physical_device->info.address32_hi >> 8));
radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(pdev->info.address32_hi >> 8));
radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
/* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1,
@ -90,7 +91,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
radeon_emit(cs, S_00B8AC_SA0_CU_EN(cu_mask) | S_00B8AC_SA1_CU_EN(cu_mask));
}
if (device->physical_device->info.gfx_level >= GFX7) {
if (pdev->info.gfx_level >= GFX7) {
/* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */
radeon_set_sh_reg_seq(cs, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2);
for (unsigned i = 2; i < 4; ++i) {
@ -107,12 +108,11 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
}
}
if (device->physical_device->info.gfx_level >= GFX9 && device->physical_device->info.gfx_level < GFX11) {
radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY,
device->physical_device->info.gfx_level >= GFX10 ? 0x20 : 0);
if (pdev->info.gfx_level >= GFX9 && pdev->info.gfx_level < GFX11) {
radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY, pdev->info.gfx_level >= GFX10 ? 0x20 : 0);
}
if (device->physical_device->info.gfx_level >= GFX10) {
if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg_seq(cs, R_00B890_COMPUTE_USER_ACCUM_0, 4);
radeon_emit(cs, 0); /* R_00B890_COMPUTE_USER_ACCUM_0 */
radeon_emit(cs, 0); /* R_00B894_COMPUTE_USER_ACCUM_1 */
@ -122,7 +122,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
}
if (device->physical_device->info.gfx_level == GFX6) {
if (pdev->info.gfx_level == GFX6) {
if (device->border_color_data.bo) {
uint64_t bc_va = radv_buffer_get_va(device->border_color_data.bo);
radeon_set_config_reg(cs, R_00950C_TA_CS_BC_BASE_ADDR, bc_va >> 8);
@ -132,7 +132,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
if (device->tma_bo) {
uint64_t tba_va, tma_va;
assert(device->physical_device->info.gfx_level == GFX8);
assert(pdev->info.gfx_level == GFX8);
tba_va = radv_shader_get_va(device->trap_handler_shader);
tma_va = radv_buffer_get_va(device->tma_bo);
@ -144,7 +144,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
radeon_emit(cs, tma_va >> 40);
}
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
radeon_set_sh_reg_seq(cs, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4, 4);
/* SE4-SE7 */
for (unsigned i = 4; i < 8; ++i) {
@ -187,7 +187,7 @@ radv_set_raster_config(struct radv_physical_device *pdev, struct radeon_cmdbuf *
void
radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
{
struct radv_physical_device *pdev = device->physical_device;
struct radv_physical_device *pdev = radv_device_physical(device);
bool has_clear_state = pdev->info.has_clear_state;
int i;
@ -300,26 +300,19 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
}
if (device->physical_device->info.gfx_level >= GFX10) {
radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS,
S_00B524_MEM_BASE(device->physical_device->info.address32_hi >> 8));
radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES,
S_00B324_MEM_BASE(device->physical_device->info.address32_hi >> 8));
} else if (device->physical_device->info.gfx_level == GFX9) {
radeon_set_sh_reg(cs, R_00B414_SPI_SHADER_PGM_HI_LS,
S_00B414_MEM_BASE(device->physical_device->info.address32_hi >> 8));
radeon_set_sh_reg(cs, R_00B214_SPI_SHADER_PGM_HI_ES,
S_00B214_MEM_BASE(device->physical_device->info.address32_hi >> 8));
if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS, S_00B524_MEM_BASE(pdev->info.address32_hi >> 8));
radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(pdev->info.address32_hi >> 8));
} else if (pdev->info.gfx_level == GFX9) {
radeon_set_sh_reg(cs, R_00B414_SPI_SHADER_PGM_HI_LS, S_00B414_MEM_BASE(pdev->info.address32_hi >> 8));
radeon_set_sh_reg(cs, R_00B214_SPI_SHADER_PGM_HI_ES, S_00B214_MEM_BASE(pdev->info.address32_hi >> 8));
} else {
radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS,
S_00B524_MEM_BASE(device->physical_device->info.address32_hi >> 8));
radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES,
S_00B324_MEM_BASE(device->physical_device->info.address32_hi >> 8));
radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS, S_00B524_MEM_BASE(pdev->info.address32_hi >> 8));
radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(pdev->info.address32_hi >> 8));
}
if (device->physical_device->info.gfx_level < GFX11)
radeon_set_sh_reg(cs, R_00B124_SPI_SHADER_PGM_HI_VS,
S_00B124_MEM_BASE(device->physical_device->info.address32_hi >> 8));
if (pdev->info.gfx_level < GFX11)
radeon_set_sh_reg(cs, R_00B124_SPI_SHADER_PGM_HI_VS, S_00B124_MEM_BASE(pdev->info.address32_hi >> 8));
unsigned cu_mask_ps = 0xffffffff;
@ -400,8 +393,7 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
/* Enable CMASK/FMASK/HTILE/DCC caching in L2 for small chips. */
unsigned meta_write_policy, meta_read_policy;
unsigned no_alloc =
device->physical_device->info.gfx_level >= GFX11 ? V_02807C_CACHE_NOA_GFX11 : V_02807C_CACHE_NOA_GFX10;
unsigned no_alloc = pdev->info.gfx_level >= GFX11 ? V_02807C_CACHE_NOA_GFX11 : V_02807C_CACHE_NOA_GFX10;
/* TODO: investigate whether LRU improves performance on other chips too */
if (pdev->info.max_render_backends <= 4) {
@ -419,7 +411,7 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
S_02807C_S_RD_POLICY(no_alloc) | S_02807C_HTILE_RD_POLICY(meta_read_policy));
uint32_t gl2_cc;
if (device->physical_device->info.gfx_level >= GFX11) {
if (pdev->info.gfx_level >= GFX11) {
gl2_cc = S_028410_DCC_WR_POLICY_GFX11(meta_write_policy) |
S_028410_COLOR_WR_POLICY_GFX11(V_028410_CACHE_STREAM) |
S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_GFX11);
@ -569,7 +561,7 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
if (device->tma_bo) {
uint64_t tba_va, tma_va;
assert(device->physical_device->info.gfx_level == GFX8);
assert(pdev->info.gfx_level == GFX8);
tba_va = radv_shader_get_va(device->trap_handler_shader);
tma_va = radv_buffer_get_va(device->tma_bo);
@ -630,6 +622,7 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
void
radv_create_gfx_config(struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = device->ws->cs_create(device->ws, AMD_IP_GFX, false);
if (!cs)
return;
@ -639,7 +632,7 @@ radv_create_gfx_config(struct radv_device *device)
radv_emit_graphics(device, cs);
while (cs->cdw & 7) {
if (device->physical_device->info.gfx_ib_pad_with_type2)
if (pdev->info.gfx_ib_pad_with_type2)
radeon_emit(cs, PKT2_NOP_PAD);
else
radeon_emit(cs, PKT3_NOP_PAD);
@ -817,7 +810,8 @@ radv_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_d
bool count_from_stream_output, uint32_t draw_vertex_count, unsigned topology,
bool prim_restart_enable, unsigned patch_control_points, unsigned num_tess_patches)
{
const struct radeon_info *gpu_info = &cmd_buffer->device->physical_device->info;
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
const struct radeon_info *gpu_info = &pdev->info;
const unsigned max_primgroup_in_wave = 2;
/* SWITCH_ON_EOP(0) is always preferable. */
bool wd_switch_on_eop = false;
@ -839,7 +833,7 @@ radv_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_d
/* GS requirement. */
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY) && gpu_info->gfx_level <= GFX8) {
unsigned gs_table_depth = cmd_buffer->device->physical_device->gs_table_depth;
unsigned gs_table_depth = pdev->gs_table_depth;
if (SI_GS_PER_ES / primgroup_size >= gs_table_depth - 3)
partial_es_wave = true;
}
@ -1495,6 +1489,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enu
void
radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
bool is_compute = cmd_buffer->qf == RADV_QUEUE_COMPUTE;
if (is_compute)
@ -1509,10 +1504,10 @@ radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
return;
}
radv_cs_emit_cache_flush(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->physical_device->info.gfx_level,
&cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va,
radv_cmd_buffer_uses_mec(cmd_buffer), cmd_buffer->state.flush_bits,
&cmd_buffer->state.sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va);
radv_cs_emit_cache_flush(cmd_buffer->device->ws, cmd_buffer->cs, pdev->info.gfx_level, &cmd_buffer->gfx9_fence_idx,
cmd_buffer->gfx9_fence_va, radv_cmd_buffer_uses_mec(cmd_buffer),
cmd_buffer->state.flush_bits, &cmd_buffer->state.sqtt_flush_bits,
cmd_buffer->gfx9_eop_bug_va);
if (radv_device_fault_detection_enabled(cmd_buffer->device))
radv_cmd_buffer_trace_emit(cmd_buffer);
@ -1539,6 +1534,7 @@ radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
void
radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, unsigned pred_op, uint64_t va)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
uint32_t op = 0;
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4);
@ -1554,7 +1550,7 @@ radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_vi
*/
op |= draw_visible ? PREDICATION_DRAW_VISIBLE : PREDICATION_DRAW_NOT_VISIBLE;
}
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
radeon_emit(cmd_buffer->cs, op);
radeon_emit(cmd_buffer->cs, va);
@ -1569,7 +1565,8 @@ radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_vi
void
radv_emit_cond_exec(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, uint32_t count)
{
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
if (gfx_level >= GFX7) {
radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
@ -1619,12 +1616,13 @@ static void
radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool predicating, uint64_t dst_va,
uint64_t src_va, unsigned size, unsigned flags)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t header = 0, command = 0;
assert(size <= cp_dma_max_byte_count(device->physical_device->info.gfx_level));
assert(size <= cp_dma_max_byte_count(pdev->info.gfx_level));
radeon_check_space(device->ws, cs, 9);
if (device->physical_device->info.gfx_level >= GFX9)
if (pdev->info.gfx_level >= GFX9)
command |= S_415_BYTE_COUNT_GFX9(size);
else
command |= S_415_BYTE_COUNT_GFX6(size);
@ -1637,7 +1635,7 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool p
command |= S_415_RAW_WAIT(1);
/* Src and dst flags. */
if (device->physical_device->info.gfx_level >= GFX9 && !(flags & CP_DMA_CLEAR) && src_va == dst_va)
if (pdev->info.gfx_level >= GFX9 && !(flags & CP_DMA_CLEAR) && src_va == dst_va)
header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */
else if (flags & CP_DMA_USE_L2)
header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
@ -1647,7 +1645,7 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool p
else if (flags & CP_DMA_USE_L2)
header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
if (device->physical_device->info.gfx_level >= GFX7) {
if (pdev->info.gfx_level >= GFX7) {
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, predicating));
radeon_emit(cs, header);
radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
@ -1699,8 +1697,9 @@ void
radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, unsigned size,
bool predicating)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_winsys *ws = device->ws;
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
uint32_t header = 0, command = 0;
if (gfx_level >= GFX11)
@ -1784,15 +1783,15 @@ radv_cp_dma_realign_engine(struct radv_cmd_buffer *cmd_buffer, unsigned size)
void
radv_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, uint64_t size)
{
enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level;
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
uint64_t main_src_va, main_dest_va;
uint64_t skipped_size = 0, realign_size = 0;
/* Assume that we are not going to sync after the last DMA operation. */
cmd_buffer->state.dma_is_busy = true;
if (cmd_buffer->device->physical_device->info.family <= CHIP_CARRIZO ||
cmd_buffer->device->physical_device->info.family == CHIP_STONEY) {
if (pdev->info.family <= CHIP_CARRIZO || pdev->info.family == CHIP_STONEY) {
/* If the size is not aligned, we must add a dummy copy at the end
* just to align the internal counter. Otherwise, the DMA engine
* would slow down by an order of magnitude for following copies.
@ -1818,7 +1817,7 @@ radv_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uin
unsigned dma_flags = 0;
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(gfx_level));
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
/* DMA operations via L2 are coherent and faster.
* TODO: GFX7-GFX8 should also support this but it
* requires tests/benchmarks.
@ -1858,12 +1857,14 @@ radv_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uin
void
radv_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, unsigned value)
{
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
if (!size)
return;
assert(va % 4 == 0 && size % 4 == 0);
enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level;
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
/* Assume that we are not going to sync after the last DMA operation. */
cmd_buffer->state.dma_is_busy = true;
@ -1872,7 +1873,7 @@ radv_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(gfx_level));
unsigned dma_flags = CP_DMA_CLEAR;
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) {
if (pdev->info.gfx_level >= GFX9) {
/* DMA operations via L2 are coherent and faster.
* TODO: GFX7-GFX8 should also support this but it
* requires tests/benchmarks.
@ -1895,7 +1896,9 @@ radv_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64
void
radv_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer)
{
if (cmd_buffer->device->physical_device->info.gfx_level < GFX7)
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
if (pdev->info.gfx_level < GFX7)
return;
if (!cmd_buffer->state.dma_is_busy)