mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-10 07:10:39 +01:00
radv: remove radv_device::physical_device
Get the logical device object using the base object. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28500>
This commit is contained in:
parent
310597cab6
commit
896c9cf486
55 changed files with 1390 additions and 1035 deletions
|
|
@ -35,7 +35,8 @@
|
|||
void
|
||||
radv_sqtt_emit_relocated_shaders(struct radv_cmd_buffer *cmd_buffer, struct radv_graphics_pipeline *pipeline)
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
struct radv_sqtt_shaders_reloc *reloc = pipeline->sqtt_shaders_reloc;
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
uint64_t va;
|
||||
|
|
@ -340,7 +341,8 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
|
|||
return;
|
||||
|
||||
/* Reserve a command buffer ID for SQTT. */
|
||||
enum amd_ip_type ip_type = radv_queue_family_to_ring(cmd_buffer->device->physical_device, cmd_buffer->qf);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
enum amd_ip_type ip_type = radv_queue_family_to_ring(pdev, cmd_buffer->qf);
|
||||
union rgp_sqtt_marker_cb_id cb_id = ac_sqtt_get_next_cmdbuf_id(&cmd_buffer->device->sqtt, ip_type);
|
||||
cmd_buffer->sqtt_cb_id = cb_id.all;
|
||||
|
||||
|
|
@ -354,7 +356,7 @@ radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
|
|||
if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
|
||||
marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;
|
||||
|
||||
if (!radv_sparse_queue_enabled(cmd_buffer->device->physical_device))
|
||||
if (!radv_sparse_queue_enabled(pdev))
|
||||
marker.queue_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
|
||||
|
||||
radv_emit_sqtt_userdata(cmd_buffer, &marker, sizeof(marker) / 4);
|
||||
|
|
@ -655,6 +657,7 @@ radv_handle_sqtt(VkQueue _queue)
|
|||
{
|
||||
RADV_FROM_HANDLE(radv_queue, queue, _queue);
|
||||
|
||||
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
|
||||
bool trigger = queue->device->sqtt_triggered;
|
||||
queue->device->sqtt_triggered = false;
|
||||
|
||||
|
|
@ -673,8 +676,7 @@ radv_handle_sqtt(VkQueue _queue)
|
|||
if (queue->device->spm.bo)
|
||||
ac_spm_get_trace(&queue->device->spm, &spm_trace);
|
||||
|
||||
ac_dump_rgp_capture(&queue->device->physical_device->info, &sqtt_trace,
|
||||
queue->device->spm.bo ? &spm_trace : NULL);
|
||||
ac_dump_rgp_capture(&pdev->info, &sqtt_trace, queue->device->spm.bo ? &spm_trace : NULL);
|
||||
} else {
|
||||
/* Trigger a new capture if the driver failed to get
|
||||
* the trace because the buffer was too small.
|
||||
|
|
@ -687,7 +689,7 @@ radv_handle_sqtt(VkQueue _queue)
|
|||
}
|
||||
|
||||
if (trigger) {
|
||||
if (ac_check_profile_state(&queue->device->physical_device->info)) {
|
||||
if (ac_check_profile_state(&pdev->info)) {
|
||||
fprintf(stderr, "radv: Canceling RGP trace request as a hang condition has been "
|
||||
"detected. Force the GPU into a profiling mode with e.g. "
|
||||
"\"echo profile_peak > "
|
||||
|
|
@ -1415,7 +1417,7 @@ static void
|
|||
radv_fill_code_object_record(struct radv_device *device, struct rgp_shader_data *shader_data,
|
||||
struct radv_shader *shader, uint64_t va)
|
||||
{
|
||||
struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
unsigned lds_increment = pdev->info.gfx_level >= GFX11 && shader->info.stage == MESA_SHADER_FRAGMENT
|
||||
? 1024
|
||||
: pdev->info.lds_encode_granularity;
|
||||
|
|
|
|||
|
|
@ -461,6 +461,7 @@ fail:
|
|||
VkResult
|
||||
radv_device_init_meta(struct radv_device *device)
|
||||
{
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
VkResult result;
|
||||
|
||||
memset(&device->meta_state, 0, sizeof(device->meta_state));
|
||||
|
|
@ -521,7 +522,7 @@ radv_device_init_meta(struct radv_device *device)
|
|||
if (result != VK_SUCCESS)
|
||||
goto fail_resolve_fragment;
|
||||
|
||||
if (device->physical_device->use_fmask) {
|
||||
if (pdev->use_fmask) {
|
||||
result = radv_device_init_meta_fmask_expand_state(device, on_demand);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_fmask_expand;
|
||||
|
|
@ -555,11 +556,11 @@ radv_device_init_meta(struct radv_device *device)
|
|||
/* FIXME: Acceleration structure builds hang when the build shaders are compiled with LLVM.
|
||||
* Work around it by forcing ACO for now.
|
||||
*/
|
||||
bool use_llvm = device->physical_device->use_llvm;
|
||||
bool use_llvm = pdev->use_llvm;
|
||||
if (loaded_cache || use_llvm) {
|
||||
device->physical_device->use_llvm = false;
|
||||
pdev->use_llvm = false;
|
||||
result = radv_device_init_accel_struct_build_state(device);
|
||||
device->physical_device->use_llvm = use_llvm;
|
||||
pdev->use_llvm = use_llvm;
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_accel_struct;
|
||||
|
|
@ -639,6 +640,7 @@ radv_device_finish_meta(struct radv_device *device)
|
|||
nir_builder PRINTFLIKE(3, 4)
|
||||
radv_meta_init_shader(struct radv_device *dev, gl_shader_stage stage, const char *name, ...)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(dev);
|
||||
nir_builder b = nir_builder_init_simple_shader(stage, NULL, NULL);
|
||||
if (name) {
|
||||
va_list args;
|
||||
|
|
@ -647,7 +649,7 @@ nir_builder PRINTFLIKE(3, 4)
|
|||
va_end(args);
|
||||
}
|
||||
|
||||
b.shader->options = &dev->physical_device->nir_options[stage];
|
||||
b.shader->options = &pdev->nir_options[stage];
|
||||
|
||||
radv_device_associate_nir(dev, b.shader);
|
||||
|
||||
|
|
@ -684,6 +686,7 @@ void
|
|||
radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b, bool is_integer, int samples,
|
||||
nir_variable *input_img, nir_variable *color, nir_def *img_coord)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
nir_deref_instr *input_img_deref = nir_build_deref_var(b, input_img);
|
||||
nir_def *sample0 = nir_txf_ms_deref(b, input_img_deref, img_coord, nir_imm_int(b, 0));
|
||||
|
||||
|
|
@ -692,7 +695,7 @@ radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b,
|
|||
return;
|
||||
}
|
||||
|
||||
if (device->physical_device->use_fmask) {
|
||||
if (pdev->use_fmask) {
|
||||
nir_def *all_same = nir_samples_identical_deref(b, input_img_deref, img_coord);
|
||||
nir_push_if(b, nir_inot(b, all_same));
|
||||
}
|
||||
|
|
@ -706,7 +709,7 @@ radv_meta_build_resolve_shader_core(struct radv_device *device, nir_builder *b,
|
|||
accum = nir_fdiv_imm(b, accum, samples);
|
||||
nir_store_var(b, color, accum, 0xf);
|
||||
|
||||
if (device->physical_device->use_fmask) {
|
||||
if (pdev->use_fmask) {
|
||||
nir_push_else(b, NULL);
|
||||
nir_store_var(b, color, sample0, 0xf);
|
||||
nir_pop_if(b, NULL);
|
||||
|
|
|
|||
|
|
@ -32,9 +32,10 @@
|
|||
VkResult
|
||||
radv_device_init_meta_astc_decode_state(struct radv_device *device, bool on_demand)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_meta_state *state = &device->meta_state;
|
||||
|
||||
if (!device->physical_device->emulate_astc)
|
||||
if (!pdev->emulate_astc)
|
||||
return VK_SUCCESS;
|
||||
|
||||
return vk_texcompress_astc_init(&device->vk, &state->alloc, state->cache, &state->astc_decode);
|
||||
|
|
@ -43,10 +44,11 @@ radv_device_init_meta_astc_decode_state(struct radv_device *device, bool on_dema
|
|||
void
|
||||
radv_device_finish_meta_astc_decode_state(struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_meta_state *state = &device->meta_state;
|
||||
struct vk_texcompress_astc_state *astc = state->astc_decode;
|
||||
|
||||
if (!device->physical_device->emulate_astc)
|
||||
if (!pdev->emulate_astc)
|
||||
return;
|
||||
|
||||
vk_texcompress_astc_finish(&device->vk, &state->alloc, astc);
|
||||
|
|
|
|||
|
|
@ -213,9 +213,10 @@ static bool
|
|||
radv_prefer_compute_dma(const struct radv_device *device, uint64_t size, struct radeon_winsys_bo *src_bo,
|
||||
struct radeon_winsys_bo *dst_bo)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
bool use_compute = size >= RADV_BUFFER_OPS_CS_THRESHOLD;
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10 && device->physical_device->info.has_dedicated_vram) {
|
||||
if (pdev->info.gfx_level >= GFX10 && pdev->info.has_dedicated_vram) {
|
||||
if ((src_bo && !(src_bo->initial_domain & RADEON_DOMAIN_VRAM)) ||
|
||||
(dst_bo && !(dst_bo->initial_domain & RADEON_DOMAIN_VRAM))) {
|
||||
/* Prefer CP DMA for GTT on dGPUS due to slow PCIe. */
|
||||
|
|
|
|||
|
|
@ -1174,11 +1174,12 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_bl
|
|||
const struct radv_meta_blit2d_surf *img_bsurf, const struct radv_meta_blit2d_rect *rect,
|
||||
bool to_image)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
const unsigned mip_level = img_bsurf->level;
|
||||
const struct radv_image *image = img_bsurf->image;
|
||||
const struct radeon_surf *surf = &image->planes[0].surface;
|
||||
struct radv_device *device = cmd_buffer->device;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->info;
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws);
|
||||
struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image);
|
||||
|
||||
|
|
@ -1243,9 +1244,10 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_bl
|
|||
static unsigned
|
||||
get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
unsigned stride;
|
||||
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
|
||||
} else {
|
||||
stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
|
||||
|
|
|
|||
|
|
@ -1139,10 +1139,11 @@ uint32_t
|
|||
radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
|
||||
uint32_t value)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
uint64_t offset = image->bindings[0].offset + image->planes[0].surface.cmask_offset;
|
||||
uint64_t size;
|
||||
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level == GFX9) {
|
||||
if (pdev->info.gfx_level == GFX9) {
|
||||
/* TODO: clear layers. */
|
||||
size = image->planes[0].surface.cmask_size;
|
||||
} else {
|
||||
|
|
@ -1178,6 +1179,7 @@ uint32_t
|
|||
radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
|
||||
uint32_t value)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
|
||||
uint32_t layer_count = vk_image_subresource_layer_count(&image->vk, range);
|
||||
uint32_t flush_bits = 0;
|
||||
|
|
@ -1190,12 +1192,12 @@ radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, con
|
|||
uint32_t level = range->baseMipLevel + l;
|
||||
uint64_t size;
|
||||
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
/* DCC for mipmaps+layers is currently disabled. */
|
||||
offset += image->planes[0].surface.meta_slice_size * range->baseArrayLayer +
|
||||
image->planes[0].surface.u.gfx9.meta_levels[level].offset;
|
||||
size = image->planes[0].surface.u.gfx9.meta_levels[level].size * layer_count;
|
||||
} else if (cmd_buffer->device->physical_device->info.gfx_level == GFX9) {
|
||||
} else if (pdev->info.gfx_level == GFX9) {
|
||||
/* Mipmap levels and layers aren't implemented. */
|
||||
assert(level == 0);
|
||||
size = image->planes[0].surface.meta_size;
|
||||
|
|
@ -1331,6 +1333,7 @@ uint32_t
|
|||
radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
|
||||
const VkImageSubresourceRange *range, uint32_t value)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
|
||||
uint32_t flush_bits = 0;
|
||||
uint32_t htile_mask;
|
||||
|
|
@ -1338,7 +1341,7 @@ radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *im
|
|||
htile_mask = radv_get_htile_mask(cmd_buffer->device, image, range->aspectMask);
|
||||
|
||||
if (level_count != image->vk.mip_levels) {
|
||||
assert(cmd_buffer->device->physical_device->info.gfx_level >= GFX10);
|
||||
assert(pdev->info.gfx_level >= GFX10);
|
||||
|
||||
/* Clear individuals levels separately. */
|
||||
for (uint32_t l = 0; l < level_count; l++) {
|
||||
|
|
@ -1398,7 +1401,8 @@ enum {
|
|||
static uint32_t
|
||||
radv_dcc_single_clear_value(const struct radv_device *device)
|
||||
{
|
||||
return device->physical_device->info.gfx_level >= GFX11 ? RADV_DCC_GFX11_CLEAR_SINGLE : RADV_DCC_GFX9_CLEAR_SINGLE;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
return pdev->info.gfx_level >= GFX11 ? RADV_DCC_GFX11_CLEAR_SINGLE : RADV_DCC_GFX9_CLEAR_SINGLE;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1605,6 +1609,7 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_
|
|||
VkImageLayout image_layout, const VkClearRect *clear_rect, VkClearColorValue clear_value,
|
||||
uint32_t view_mask)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
uint32_t clear_color[2];
|
||||
|
||||
if (!iview || !iview->support_fast_clear)
|
||||
|
|
@ -1641,7 +1646,7 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_
|
|||
bool can_avoid_fast_clear_elim;
|
||||
uint32_t reset_value;
|
||||
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
if (!gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value))
|
||||
return false;
|
||||
} else {
|
||||
|
|
@ -1650,7 +1655,7 @@ radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_
|
|||
}
|
||||
|
||||
if (iview->image->vk.mip_levels > 1) {
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
uint32_t last_level = iview->vk.base_mip_level + iview->vk.level_count - 1;
|
||||
if (last_level >= iview->image->planes[0].surface.num_meta_levels) {
|
||||
/* Do not fast clears if one level can't be fast cleard. */
|
||||
|
|
@ -1680,6 +1685,7 @@ radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_imag
|
|||
const VkClearAttachment *clear_att, enum radv_cmd_flush_bits *pre_flush,
|
||||
enum radv_cmd_flush_bits *post_flush)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
VkClearColorValue clear_value = clear_att->clearValue.color;
|
||||
uint32_t clear_color[4], flush_bits = 0;
|
||||
uint32_t cmask_clear_value;
|
||||
|
|
@ -1710,7 +1716,7 @@ radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_imag
|
|||
uint32_t reset_value;
|
||||
bool can_avoid_fast_clear_elim = true;
|
||||
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
ASSERTED bool result = gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value);
|
||||
assert(result);
|
||||
} else {
|
||||
|
|
@ -2074,6 +2080,7 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *imag
|
|||
const VkClearValue *clear_value, uint32_t range_count, const VkImageSubresourceRange *ranges,
|
||||
bool cs)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
VkFormat format = image->vk.format;
|
||||
VkClearValue internal_clear_value;
|
||||
|
||||
|
|
@ -2086,8 +2093,8 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *imag
|
|||
|
||||
if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
|
||||
bool blendable;
|
||||
if (cs ? !radv_is_storage_image_format_supported(cmd_buffer->device->physical_device, format)
|
||||
: !radv_is_colorbuffer_format_supported(cmd_buffer->device->physical_device, format, &blendable)) {
|
||||
if (cs ? !radv_is_storage_image_format_supported(pdev, format)
|
||||
: !radv_is_colorbuffer_format_supported(pdev, format, &blendable)) {
|
||||
format = VK_FORMAT_R32_UINT;
|
||||
internal_clear_value.color.uint32[0] = float3_to_rgb9e5(clear_value->color.float32);
|
||||
|
||||
|
|
|
|||
|
|
@ -239,13 +239,14 @@ radv_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer, const VkCopyBufferToIm
|
|||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
|
||||
RADV_FROM_HANDLE(radv_image, dst_image, pCopyBufferToImageInfo->dstImage);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
|
||||
for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
|
||||
copy_buffer_to_image(cmd_buffer, src_buffer, dst_image, pCopyBufferToImageInfo->dstImageLayout,
|
||||
&pCopyBufferToImageInfo->pRegions[r]);
|
||||
}
|
||||
|
||||
if (radv_is_format_emulated(cmd_buffer->device->physical_device, dst_image->vk.format)) {
|
||||
if (radv_is_format_emulated(pdev, dst_image->vk.format)) {
|
||||
cmd_buffer->state.flush_bits |=
|
||||
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
|
||||
radv_src_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) |
|
||||
|
|
@ -422,6 +423,8 @@ static void
|
|||
copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkImageLayout src_image_layout,
|
||||
struct radv_image *dst_image, VkImageLayout dst_image_layout, const VkImageCopy2 *region)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
|
||||
if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
|
||||
transfer_copy_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region);
|
||||
return;
|
||||
|
|
@ -499,9 +502,9 @@ copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkI
|
|||
src_image_layout, src_queue_mask);
|
||||
bool need_dcc_sign_reinterpret = false;
|
||||
|
||||
if (!src_compressed || (radv_dcc_formats_compatible(cmd_buffer->device->physical_device->info.gfx_level,
|
||||
b_src.format, b_dst.format, &need_dcc_sign_reinterpret) &&
|
||||
!need_dcc_sign_reinterpret)) {
|
||||
if (!src_compressed ||
|
||||
(radv_dcc_formats_compatible(pdev->info.gfx_level, b_src.format, b_dst.format, &need_dcc_sign_reinterpret) &&
|
||||
!need_dcc_sign_reinterpret)) {
|
||||
b_src.format = b_dst.format;
|
||||
} else if (!dst_compressed) {
|
||||
b_dst.format = b_src.format;
|
||||
|
|
@ -613,13 +616,14 @@ radv_CmdCopyImage2(VkCommandBuffer commandBuffer, const VkCopyImageInfo2 *pCopyI
|
|||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
RADV_FROM_HANDLE(radv_image, src_image, pCopyImageInfo->srcImage);
|
||||
RADV_FROM_HANDLE(radv_image, dst_image, pCopyImageInfo->dstImage);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
|
||||
for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
|
||||
copy_image(cmd_buffer, src_image, pCopyImageInfo->srcImageLayout, dst_image, pCopyImageInfo->dstImageLayout,
|
||||
&pCopyImageInfo->pRegions[r]);
|
||||
}
|
||||
|
||||
if (radv_is_format_emulated(cmd_buffer->device->physical_device, dst_image->vk.format)) {
|
||||
if (radv_is_format_emulated(pdev, dst_image->vk.format)) {
|
||||
cmd_buffer->state.flush_bits |=
|
||||
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
|
||||
radv_src_access_flush(cmd_buffer, VK_ACCESS_TRANSFER_WRITE_BIT, dst_image) |
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ radv_device_finish_meta_copy_vrs_htile_state(struct radv_device *device)
|
|||
static nir_shader *
|
||||
build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "meta_copy_vrs_htile");
|
||||
b.shader->info.workgroup_size[0] = 8;
|
||||
b.shader->info.workgroup_size[1] = 8;
|
||||
|
|
@ -64,8 +65,8 @@ build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf
|
|||
/* Get the HTILE addr from coordinates. */
|
||||
nir_def *zero = nir_imm_int(&b, 0);
|
||||
nir_def *htile_addr =
|
||||
ac_nir_htile_addr_from_coord(&b, &device->physical_device->info, &surf->u.gfx9.zs.htile_equation, htile_pitch,
|
||||
htile_slice_size, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero);
|
||||
ac_nir_htile_addr_from_coord(&b, &pdev->info, &surf->u.gfx9.zs.htile_equation, htile_pitch, htile_slice_size,
|
||||
nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero);
|
||||
|
||||
/* Set up the input VRS image descriptor. */
|
||||
const struct glsl_type *vrs_sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT);
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
static nir_shader *
|
||||
build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *surf)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(dev);
|
||||
enum glsl_sampler_dim dim = GLSL_SAMPLER_DIM_BUF;
|
||||
const struct glsl_type *buf_type = glsl_image_type(dim, false, GLSL_TYPE_UINT);
|
||||
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "dcc_retile_compute");
|
||||
|
|
@ -60,12 +61,12 @@ build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *sur
|
|||
coord =
|
||||
nir_imul(&b, coord, nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height));
|
||||
|
||||
nir_def *src = ac_nir_dcc_addr_from_coord(&b, &dev->physical_device->info, surf->bpe,
|
||||
&surf->u.gfx9.color.dcc_equation, src_dcc_pitch, src_dcc_height, zero,
|
||||
nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero);
|
||||
nir_def *dst = ac_nir_dcc_addr_from_coord(
|
||||
&b, &dev->physical_device->info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation, dst_dcc_pitch,
|
||||
dst_dcc_height, zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero);
|
||||
nir_def *src = ac_nir_dcc_addr_from_coord(&b, &pdev->info, surf->bpe, &surf->u.gfx9.color.dcc_equation,
|
||||
src_dcc_pitch, src_dcc_height, zero, nir_channel(&b, coord, 0),
|
||||
nir_channel(&b, coord, 1), zero, zero, zero);
|
||||
nir_def *dst = ac_nir_dcc_addr_from_coord(&b, &pdev->info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation,
|
||||
dst_dcc_pitch, dst_dcc_height, zero, nir_channel(&b, coord, 0),
|
||||
nir_channel(&b, coord, 1), zero, zero, zero);
|
||||
|
||||
nir_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref, nir_vec4(&b, src, src, src, src),
|
||||
nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = dim);
|
||||
|
|
|
|||
|
|
@ -34,13 +34,14 @@
|
|||
VkResult
|
||||
radv_device_init_meta_etc_decode_state(struct radv_device *device, bool on_demand)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_meta_state *state = &device->meta_state;
|
||||
|
||||
if (!device->physical_device->emulate_etc2)
|
||||
if (!pdev->emulate_etc2)
|
||||
return VK_SUCCESS;
|
||||
|
||||
state->etc_decode.allocator = &state->alloc;
|
||||
state->etc_decode.nir_options = &device->physical_device->nir_options[MESA_SHADER_COMPUTE];
|
||||
state->etc_decode.nir_options = &pdev->nir_options[MESA_SHADER_COMPUTE];
|
||||
state->etc_decode.pipeline_cache = state->cache;
|
||||
vk_texcompress_etc2_init(&device->vk, &state->etc_decode);
|
||||
|
||||
|
|
|
|||
|
|
@ -155,6 +155,7 @@ create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
|
|||
static VkResult
|
||||
create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipelineLayout layout)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
VkResult result;
|
||||
VkDevice device_h = radv_device_to_handle(device);
|
||||
|
||||
|
|
@ -363,8 +364,8 @@ create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipeli
|
|||
},
|
||||
&(struct radv_graphics_pipeline_create_info){
|
||||
.use_rectlist = true,
|
||||
.custom_blend_mode = device->physical_device->info.gfx_level >= GFX11 ? V_028808_CB_DCC_DECOMPRESS_GFX11
|
||||
: V_028808_CB_DCC_DECOMPRESS_GFX8,
|
||||
.custom_blend_mode =
|
||||
pdev->info.gfx_level >= GFX11 ? V_028808_CB_DCC_DECOMPRESS_GFX11 : V_028808_CB_DCC_DECOMPRESS_GFX8,
|
||||
},
|
||||
&device->meta_state.alloc, &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
|
|||
|
|
@ -232,8 +232,10 @@ radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_im
|
|||
const struct radv_image *dst_image, unsigned num_rects,
|
||||
const struct radv_meta_blit2d_rect *rects)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
|
||||
/* TODO: Test on pre GFX10 chips. */
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level < GFX10)
|
||||
if (pdev->info.gfx_level < GFX10)
|
||||
return false;
|
||||
|
||||
/* TODO: Add support for layers. */
|
||||
|
|
|
|||
|
|
@ -253,7 +253,8 @@ enum radv_resolve_method {
|
|||
static bool
|
||||
image_hw_resolve_compat(const struct radv_device *device, struct radv_image *src_image, struct radv_image *dst_image)
|
||||
{
|
||||
if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
return dst_image->planes[0].surface.u.gfx9.swizzle_mode == src_image->planes[0].surface.u.gfx9.swizzle_mode;
|
||||
} else {
|
||||
return dst_image->planes[0].surface.micro_tile_mode == src_image->planes[0].surface.micro_tile_mode;
|
||||
|
|
@ -506,9 +507,9 @@ radv_CmdResolveImage2(VkCommandBuffer commandBuffer, const VkResolveImageInfo2 *
|
|||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
RADV_FROM_HANDLE(radv_image, src_image, pResolveImageInfo->srcImage);
|
||||
RADV_FROM_HANDLE(radv_image, dst_image, pResolveImageInfo->dstImage);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
VkImageLayout src_image_layout = pResolveImageInfo->srcImageLayout;
|
||||
VkImageLayout dst_image_layout = pResolveImageInfo->dstImageLayout;
|
||||
const struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
|
||||
enum radv_resolve_method resolve_method = pdev->info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW;
|
||||
|
||||
/* we can use the hw resolve only for single full resolves */
|
||||
|
|
@ -622,7 +623,7 @@ radv_cmd_buffer_resolve_rendering_hw(struct radv_cmd_buffer *cmd_buffer, struct
|
|||
void
|
||||
radv_cmd_buffer_resolve_rendering(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
const struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
const struct radv_rendering_state *render = &cmd_buffer->state.render;
|
||||
enum radv_resolve_method resolve_method = pdev->info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW;
|
||||
|
||||
|
|
|
|||
|
|
@ -501,12 +501,13 @@ apply_layout_to_tex(nir_builder *b, apply_layout_state *state, nir_tex_instr *te
|
|||
void
|
||||
radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device, const struct radv_shader_stage *stage)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
apply_layout_state state = {
|
||||
.gfx_level = device->physical_device->info.gfx_level,
|
||||
.address32_hi = device->physical_device->info.address32_hi,
|
||||
.gfx_level = pdev->info.gfx_level,
|
||||
.address32_hi = pdev->info.address32_hi,
|
||||
.disable_aniso_single_level = device->instance->drirc.disable_aniso_single_level,
|
||||
.has_image_load_dcc_bug = device->physical_device->info.has_image_load_dcc_bug,
|
||||
.disable_tg4_trunc_coord = !device->physical_device->info.conformant_trunc_coord && !device->disable_trunc_coord,
|
||||
.has_image_load_dcc_bug = pdev->info.has_image_load_dcc_bug,
|
||||
.disable_tg4_trunc_coord = !pdev->info.conformant_trunc_coord && !device->disable_trunc_coord,
|
||||
.args = &stage->args,
|
||||
.info = &stage->info,
|
||||
.layout = &stage->layout,
|
||||
|
|
|
|||
|
|
@ -72,6 +72,8 @@ radv_nir_lower_io_to_scalar_early(nir_shader *nir, nir_variable_mode mask)
|
|||
void
|
||||
radv_nir_lower_io(struct radv_device *device, nir_shader *nir)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, MESA_SHADER_FRAGMENT);
|
||||
}
|
||||
|
|
@ -89,7 +91,7 @@ radv_nir_lower_io(struct radv_device *device, nir_shader *nir)
|
|||
|
||||
NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
|
||||
|
||||
if (device->physical_device->use_ngg_streamout && nir->xfb_info) {
|
||||
if (pdev->use_ngg_streamout && nir->xfb_info) {
|
||||
NIR_PASS_V(nir, nir_io_add_intrinsic_xfb_info);
|
||||
|
||||
/* The total number of shader outputs is required for computing the pervertex LDS size for
|
||||
|
|
@ -133,6 +135,7 @@ radv_map_io_driver_location(unsigned semantic)
|
|||
bool
|
||||
radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *stage)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_shader_info *info = &stage->info;
|
||||
ac_nir_map_io_driver_location map_input = info->inputs_linked ? NULL : radv_map_io_driver_location;
|
||||
ac_nir_map_io_driver_location map_output = info->outputs_linked ? NULL : radv_map_io_driver_location;
|
||||
|
|
@ -144,35 +147,33 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *s
|
|||
info->vs.tcs_temp_only_input_mask);
|
||||
return true;
|
||||
} else if (info->vs.as_es) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, device->physical_device->info.gfx_level,
|
||||
info->esgs_itemsize);
|
||||
NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize);
|
||||
return true;
|
||||
}
|
||||
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, map_input, info->vs.tcs_in_out_eq);
|
||||
NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, map_output, device->physical_device->info.gfx_level,
|
||||
info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, info->tcs.num_linked_outputs,
|
||||
info->tcs.num_linked_patch_outputs, info->wave_size, false, false);
|
||||
NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, map_output, pdev->info.gfx_level, info->tcs.tes_inputs_read,
|
||||
info->tcs.tes_patch_inputs_read, info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs,
|
||||
info->wave_size, false, false);
|
||||
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_tes_inputs_to_mem, map_input);
|
||||
|
||||
if (info->tes.as_es) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, device->physical_device->info.gfx_level,
|
||||
info->esgs_itemsize);
|
||||
NIR_PASS_V(nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize);
|
||||
}
|
||||
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
|
||||
NIR_PASS_V(nir, ac_nir_lower_gs_inputs_to_mem, map_input, device->physical_device->info.gfx_level, false);
|
||||
NIR_PASS_V(nir, ac_nir_lower_gs_inputs_to_mem, map_input, pdev->info.gfx_level, false);
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_TASK) {
|
||||
ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, device->physical_device->task_info.num_entries,
|
||||
ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, pdev->task_info.num_entries,
|
||||
info->cs.has_query);
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_MESH) {
|
||||
ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, device->physical_device->task_info.num_entries);
|
||||
ac_nir_lower_mesh_inputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES, pdev->task_info.num_entries);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -648,6 +648,7 @@ lower_rq_terminate(nir_builder *b, nir_def *index, nir_intrinsic_instr *instr, s
|
|||
bool
|
||||
radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
bool progress = false;
|
||||
struct hash_table *query_ht = _mesa_pointer_hash_table_create(NULL);
|
||||
|
||||
|
|
@ -655,7 +656,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
|
|||
if (!var->data.ray_query)
|
||||
continue;
|
||||
|
||||
lower_ray_query(shader, var, query_ht, device->physical_device->max_shared_size);
|
||||
lower_ray_query(shader, var, query_ht, pdev->max_shared_size);
|
||||
|
||||
progress = true;
|
||||
}
|
||||
|
|
@ -670,7 +671,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
|
|||
if (!var->data.ray_query)
|
||||
continue;
|
||||
|
||||
lower_ray_query(shader, var, query_ht, device->physical_device->max_shared_size);
|
||||
lower_ray_query(shader, var, query_ht, pdev->max_shared_size);
|
||||
|
||||
progress = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -297,11 +297,12 @@ build_addr_to_node(nir_builder *b, nir_def *addr)
|
|||
static nir_def *
|
||||
build_node_to_addr(struct radv_device *device, nir_builder *b, nir_def *node, bool skip_type_and)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
nir_def *addr = skip_type_and ? node : nir_iand_imm(b, node, ~7ull);
|
||||
addr = nir_ishl_imm(b, addr, 3);
|
||||
/* Assumes everything is in the top half of address space, which is true in
|
||||
* GFX9+ for now. */
|
||||
return device->physical_device->info.gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr;
|
||||
return pdev->info.gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr;
|
||||
}
|
||||
|
||||
nir_def *
|
||||
|
|
@ -477,6 +478,7 @@ radv_test_flag(nir_builder *b, const struct radv_ray_traversal_args *args, uint3
|
|||
nir_def *
|
||||
radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
|
||||
nir_store_var(b, incomplete, nir_imm_true(b), 0x1);
|
||||
|
||||
|
|
@ -568,7 +570,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
nir_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));
|
||||
|
||||
nir_def *intrinsic_result = NULL;
|
||||
if (!radv_emulate_rt(device->physical_device)) {
|
||||
if (!radv_emulate_rt(pdev)) {
|
||||
intrinsic_result =
|
||||
nir_bvh64_intersect_ray_amd(b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node),
|
||||
nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),
|
||||
|
|
|
|||
|
|
@ -1524,6 +1524,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
|
|||
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo, bool monolithic, nir_builder *b,
|
||||
struct rt_variables *vars, bool ignore_cull_mask, struct radv_ray_tracing_stage_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
nir_variable *barycentrics =
|
||||
nir_variable_create(b->shader, nir_var_ray_hit_attrib, glsl_vector_type(GLSL_TYPE_FLOAT, 2), "barycentrics");
|
||||
barycentrics->data.driver_location = 0;
|
||||
|
|
@ -1602,7 +1603,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
|
|||
.tmin = nir_load_var(b, vars->tmin),
|
||||
.dir = nir_load_var(b, vars->direction),
|
||||
.vars = trav_vars_args,
|
||||
.stack_stride = device->physical_device->rt_wave_size * sizeof(uint32_t),
|
||||
.stack_stride = pdev->rt_wave_size * sizeof(uint32_t),
|
||||
.stack_entries = MAX_STACK_ENTRY_COUNT,
|
||||
.stack_base = 0,
|
||||
.ignore_cull_mask = ignore_cull_mask,
|
||||
|
|
@ -1638,7 +1639,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
|
|||
hit_attribs[i] =
|
||||
nir_local_variable_create(nir_shader_get_entrypoint(b->shader), glsl_uint_type(), "ahit_attrib");
|
||||
|
||||
lower_hit_attribs(b->shader, hit_attribs, device->physical_device->rt_wave_size);
|
||||
lower_hit_attribs(b->shader, hit_attribs, pdev->rt_wave_size);
|
||||
}
|
||||
|
||||
/* Initialize follow-up shader. */
|
||||
|
|
@ -1702,6 +1703,7 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
|
|||
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
|
||||
struct radv_ray_tracing_stage_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const VkPipelineCreateFlagBits2KHR create_flags = vk_rt_pipeline_create_flags(pCreateInfo);
|
||||
|
||||
/* Create the traversal shader as an intersection shader to prevent validation failures due to
|
||||
|
|
@ -1709,8 +1711,8 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
|
|||
nir_builder b = radv_meta_init_shader(device, MESA_SHADER_INTERSECTION, "rt_traversal");
|
||||
b.shader->info.internal = false;
|
||||
b.shader->info.workgroup_size[0] = 8;
|
||||
b.shader->info.workgroup_size[1] = device->physical_device->rt_wave_size == 64 ? 8 : 4;
|
||||
b.shader->info.shared_size = device->physical_device->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t);
|
||||
b.shader->info.workgroup_size[1] = pdev->rt_wave_size == 64 ? 8 : 4;
|
||||
b.shader->info.shared_size = pdev->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t);
|
||||
struct rt_variables vars = create_rt_variables(b.shader, device, create_flags, false);
|
||||
|
||||
if (info->tmin.state == RADV_RT_CONST_ARG_STATE_VALID)
|
||||
|
|
@ -1773,6 +1775,7 @@ lower_rt_instruction_monolithic(nir_builder *b, nir_instr *instr, void *data)
|
|||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
||||
struct lower_rt_instruction_monolithic_state *state = data;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(state->device);
|
||||
struct rt_variables *vars = state->vars;
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
|
|
@ -1800,8 +1803,8 @@ lower_rt_instruction_monolithic(nir_builder *b, nir_instr *instr, void *data)
|
|||
nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, stack_ptr, b->shader->scratch_size), 0x1);
|
||||
|
||||
radv_build_traversal(state->device, state->pipeline, state->pCreateInfo, true, b, vars, ignore_cull_mask, NULL);
|
||||
b->shader->info.shared_size = MAX2(b->shader->info.shared_size, state->device->physical_device->rt_wave_size *
|
||||
MAX_STACK_ENTRY_COUNT * sizeof(uint32_t));
|
||||
b->shader->info.shared_size =
|
||||
MAX2(b->shader->info.shared_size, pdev->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t));
|
||||
|
||||
nir_store_var(b, vars->stack_ptr, stack_ptr, 0x1);
|
||||
|
||||
|
|
|
|||
|
|
@ -424,7 +424,9 @@ cleanup:
|
|||
VkResult
|
||||
radv_device_init_null_accel_struct(struct radv_device *device)
|
||||
{
|
||||
if (device->physical_device->memory_properties.memoryTypeCount == 0)
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->memory_properties.memoryTypeCount == 0)
|
||||
return VK_SUCCESS; /* Exit in the case of null winsys. */
|
||||
|
||||
VkDevice _device = radv_device_to_handle(device);
|
||||
|
|
@ -465,9 +467,9 @@ radv_device_init_null_accel_struct(struct radv_device *device)
|
|||
VkMemoryAllocateInfo alloc_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.allocationSize = mem_req.memoryRequirements.size,
|
||||
.memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
|
||||
.memoryTypeIndex =
|
||||
radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
|
||||
};
|
||||
|
||||
result = radv_AllocateMemory(_device, &alloc_info, &device->meta_state.alloc, &memory);
|
||||
|
|
@ -1537,9 +1539,9 @@ radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,
|
|||
VkAccelerationStructureCompatibilityKHR *pCompatibility)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
bool compat =
|
||||
memcmp(pVersionInfo->pVersionData, device->physical_device->driver_uuid, VK_UUID_SIZE) == 0 &&
|
||||
memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, device->physical_device->cache_uuid, VK_UUID_SIZE) == 0;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
bool compat = memcmp(pVersionInfo->pVersionData, pdev->driver_uuid, VK_UUID_SIZE) == 0 &&
|
||||
memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE) == 0;
|
||||
*pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR
|
||||
: VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
|
||||
}
|
||||
|
|
@ -1601,6 +1603,7 @@ radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
|
|||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
RADV_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
|
||||
RADV_FROM_HANDLE(radv_buffer, src_buffer, src->buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
struct radv_meta_saved_state saved_state;
|
||||
|
||||
VkResult result = radv_device_init_accel_struct_copy_state(cmd_buffer->device);
|
||||
|
|
@ -1634,8 +1637,8 @@ radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
|
|||
|
||||
/* Set the header of the serialized data. */
|
||||
uint8_t header_data[2 * VK_UUID_SIZE];
|
||||
memcpy(header_data, cmd_buffer->device->physical_device->driver_uuid, VK_UUID_SIZE);
|
||||
memcpy(header_data + VK_UUID_SIZE, cmd_buffer->device->physical_device->cache_uuid, VK_UUID_SIZE);
|
||||
memcpy(header_data, pdev->driver_uuid, VK_UUID_SIZE);
|
||||
memcpy(header_data + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE);
|
||||
|
||||
radv_update_buffer_cp(cmd_buffer, pInfo->dst.deviceAddress, header_data, sizeof(header_data));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -114,6 +114,7 @@ radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
|
|||
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, device_h);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
VkImage image_h = VK_NULL_HANDLE;
|
||||
struct radv_image *image = NULL;
|
||||
VkResult result;
|
||||
|
|
@ -141,10 +142,9 @@ radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
|
|||
|
||||
/* Find the first VRAM memory type, or GART for PRIME images. */
|
||||
int memory_type_index = -1;
|
||||
for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) {
|
||||
bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags &
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
bool is_32bit = !!(device->physical_device->memory_types_32bit & (1u << i));
|
||||
for (int i = 0; i < pdev->memory_properties.memoryTypeCount; ++i) {
|
||||
bool is_local = !!(pdev->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
bool is_32bit = !!(pdev->memory_types_32bit & (1u << i));
|
||||
if (is_local && !is_32bit) {
|
||||
memory_type_index = i;
|
||||
break;
|
||||
|
|
@ -217,7 +217,7 @@ radv_GetSwapchainGrallocUsageANDROID(VkDevice device_h, VkFormat format, VkImage
|
|||
int *grallocUsage)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, device_h);
|
||||
struct radv_physical_device *pdev = device->physical_device;
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
VkPhysicalDevice pdev_h = radv_physical_device_to_handle(pdev);
|
||||
VkResult result;
|
||||
|
||||
|
|
@ -298,7 +298,7 @@ radv_GetSwapchainGrallocUsage2ANDROID(VkDevice device_h, VkFormat format, VkImag
|
|||
* vkGetSwapchainGrallocUsageANDROID. */
|
||||
#if ANDROID_API_LEVEL >= 26
|
||||
RADV_FROM_HANDLE(radv_device, device, device_h);
|
||||
struct radv_physical_device *pdev = device->physical_device;
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
VkPhysicalDevice pdev_h = radv_physical_device_to_handle(pdev);
|
||||
VkResult result;
|
||||
|
||||
|
|
@ -408,6 +408,7 @@ get_ahb_buffer_format_properties(VkDevice device_h, const struct AHardwareBuffer
|
|||
VkAndroidHardwareBufferFormatPropertiesANDROID *pProperties)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, device_h);
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* Get a description of buffer contents . */
|
||||
AHardwareBuffer_Desc desc;
|
||||
|
|
@ -431,8 +432,7 @@ get_ahb_buffer_format_properties(VkDevice device_h, const struct AHardwareBuffer
|
|||
|
||||
VkFormatProperties2 format_properties = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2};
|
||||
|
||||
radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(device->physical_device), p->format,
|
||||
&format_properties);
|
||||
radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(pdev), p->format, &format_properties);
|
||||
|
||||
if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER)
|
||||
p->formatFeatures = format_properties.formatProperties.linearTilingFeatures;
|
||||
|
|
@ -481,6 +481,7 @@ get_ahb_buffer_format_properties2(VkDevice device_h, const struct AHardwareBuffe
|
|||
VkAndroidHardwareBufferFormatProperties2ANDROID *pProperties)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, device_h);
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* Get a description of buffer contents . */
|
||||
AHardwareBuffer_Desc desc;
|
||||
|
|
@ -504,8 +505,7 @@ get_ahb_buffer_format_properties2(VkDevice device_h, const struct AHardwareBuffe
|
|||
|
||||
VkFormatProperties2 format_properties = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2};
|
||||
|
||||
radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(device->physical_device), p->format,
|
||||
&format_properties);
|
||||
radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(pdev), p->format, &format_properties);
|
||||
|
||||
if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER)
|
||||
p->formatFeatures = format_properties.formatProperties.linearTilingFeatures;
|
||||
|
|
@ -554,7 +554,7 @@ radv_GetAndroidHardwareBufferPropertiesANDROID(VkDevice device_h, const struct A
|
|||
VkAndroidHardwareBufferPropertiesANDROID *pProperties)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, dev, device_h);
|
||||
struct radv_physical_device *pdev = dev->physical_device;
|
||||
struct radv_physical_device *pdev = radv_device_physical(dev);
|
||||
|
||||
VkAndroidHardwareBufferFormatPropertiesANDROID *format_prop =
|
||||
vk_find_struct(pProperties->pNext, ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_ANDROID);
|
||||
|
|
|
|||
|
|
@ -176,9 +176,10 @@ static void
|
|||
radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize size, VkBufferCreateFlags flags,
|
||||
VkBufferUsageFlags2KHR usage, VkMemoryRequirements2 *pMemoryRequirements)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
pMemoryRequirements->memoryRequirements.memoryTypeBits =
|
||||
((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) &
|
||||
~device->physical_device->memory_types_32bit;
|
||||
((1u << pdev->memory_properties.memoryTypeCount) - 1u) & ~pdev->memory_types_32bit;
|
||||
|
||||
/* Allow 32-bit address-space for DGC usage, as this buffer will contain
|
||||
* cmd buffer upload buffers, and those get passed to shaders through 32-bit
|
||||
|
|
@ -190,14 +191,14 @@ radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize siz
|
|||
* intersection is non-zero at least)
|
||||
*/
|
||||
if ((usage & VK_BUFFER_USAGE_2_INDIRECT_BUFFER_BIT_KHR) && radv_uses_device_generated_commands(device))
|
||||
pMemoryRequirements->memoryRequirements.memoryTypeBits |= device->physical_device->memory_types_32bit;
|
||||
pMemoryRequirements->memoryRequirements.memoryTypeBits |= pdev->memory_types_32bit;
|
||||
|
||||
/* Force 32-bit address-space for descriptor buffers usage because they are passed to shaders
|
||||
* through 32-bit pointers.
|
||||
*/
|
||||
if (usage &
|
||||
(VK_BUFFER_USAGE_2_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | VK_BUFFER_USAGE_2_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT))
|
||||
pMemoryRequirements->memoryRequirements.memoryTypeBits = device->physical_device->memory_types_32bit;
|
||||
pMemoryRequirements->memoryRequirements.memoryTypeBits = pdev->memory_types_32bit;
|
||||
|
||||
if (flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
|
||||
pMemoryRequirements->memoryRequirements.alignment = 4096;
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ void
|
|||
radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFormat vk_format, unsigned offset,
|
||||
unsigned range, uint32_t *state)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct util_format_description *desc;
|
||||
unsigned stride;
|
||||
unsigned num_format, data_format;
|
||||
|
|
@ -49,16 +50,15 @@ radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFor
|
|||
|
||||
va += offset;
|
||||
|
||||
if (device->physical_device->info.gfx_level != GFX8 && stride) {
|
||||
if (pdev->info.gfx_level != GFX8 && stride) {
|
||||
range /= stride;
|
||||
}
|
||||
|
||||
rsrc_word3 = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
|
||||
S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
const struct gfx10_format *fmt =
|
||||
&ac_get_gfx10_format_table(&device->physical_device->info)[vk_format_to_pipe_format(vk_format)];
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&pdev->info)[vk_format_to_pipe_format(vk_format)];
|
||||
|
||||
/* OOB_SELECT chooses the out-of-bounds check.
|
||||
*
|
||||
|
|
@ -81,7 +81,7 @@ radv_make_texel_buffer_descriptor(struct radv_device *device, uint64_t va, VkFor
|
|||
* offset+payload > NUM_RECORDS
|
||||
*/
|
||||
rsrc_word3 |= S_008F0C_FORMAT(fmt->img_format) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
|
||||
S_008F0C_RESOURCE_LEVEL(device->physical_device->info.gfx_level < GFX11);
|
||||
S_008F0C_RESOURCE_LEVEL(pdev->info.gfx_level < GFX11);
|
||||
} else {
|
||||
num_format = radv_translate_buffer_numformat(desc, first_non_void);
|
||||
data_format = radv_translate_buffer_dataformat(desc, first_non_void);
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -38,8 +38,9 @@ radv_set_context_reg_array(struct radeon_cmdbuf *cs, unsigned reg, unsigned num,
|
|||
VkResult
|
||||
radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_state *queue_state)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->info;
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
VkResult result;
|
||||
|
||||
struct radeon_cmdbuf *cs = ws->cs_create(ws, AMD_IP_GFX, false);
|
||||
|
|
@ -125,7 +126,8 @@ radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_devic
|
|||
VkResult
|
||||
radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct radv_queue *queue)
|
||||
{
|
||||
const struct radeon_info *gpu_info = &device->physical_device->info;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
struct radeon_cmdbuf *cs;
|
||||
VkResult result;
|
||||
|
|
|
|||
|
|
@ -108,17 +108,19 @@ radv_dump_trace(const struct radv_device *device, struct radeon_cmdbuf *cs, FILE
|
|||
static void
|
||||
radv_dump_mmapped_reg(const struct radv_device *device, FILE *f, unsigned offset)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
uint32_t value;
|
||||
|
||||
if (ws->read_registers(ws, offset, 1, &value))
|
||||
ac_dump_reg(f, device->physical_device->info.gfx_level, device->physical_device->info.family, offset, value, ~0);
|
||||
ac_dump_reg(f, pdev->info.gfx_level, pdev->info.family, offset, value, ~0);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_dump_debug_registers(const struct radv_device *device, FILE *f)
|
||||
{
|
||||
const struct radeon_info *gpu_info = &device->physical_device->info;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
|
||||
fprintf(f, "Memory-mapped registers:\n");
|
||||
radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
|
||||
|
|
@ -190,8 +192,9 @@ radv_dump_combined_image_sampler_descriptor(enum amd_gfx_level gfx_level, enum r
|
|||
static void
|
||||
radv_dump_descriptor_set(const struct radv_device *device, const struct radv_descriptor_set *set, unsigned id, FILE *f)
|
||||
{
|
||||
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
enum radeon_family family = device->physical_device->info.family;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
enum radeon_family family = pdev->info.family;
|
||||
const struct radv_descriptor_set_layout *layout;
|
||||
int i;
|
||||
|
||||
|
|
@ -376,6 +379,8 @@ static void
|
|||
radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader,
|
||||
gl_shader_stage stage, const char *dump_dir, FILE *f)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (!shader)
|
||||
return;
|
||||
|
||||
|
|
@ -400,7 +405,7 @@ radv_dump_shader(struct radv_device *device, struct radv_pipeline *pipeline, str
|
|||
fprintf(f, "NIR:\n%s\n", shader->nir_string);
|
||||
}
|
||||
|
||||
fprintf(f, "%s IR:\n%s\n", device->physical_device->use_llvm ? "LLVM" : "ACO", shader->ir_string);
|
||||
fprintf(f, "%s IR:\n%s\n", pdev->use_llvm ? "LLVM" : "ACO", shader->ir_string);
|
||||
fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
|
||||
|
||||
radv_dump_shader_stats(device, pipeline, shader, stage, f);
|
||||
|
|
@ -504,9 +509,10 @@ radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f)
|
|||
}
|
||||
|
||||
if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
|
||||
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
unsigned num_waves = ac_get_wave_info(gfx_level, &device->physical_device->info, waves);
|
||||
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
unsigned num_waves = ac_get_wave_info(gfx_level, &pdev->info, waves);
|
||||
|
||||
fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
|
||||
|
||||
|
|
@ -633,21 +639,22 @@ radv_dump_app_info(const struct radv_device *device, FILE *f)
|
|||
static void
|
||||
radv_dump_device_name(const struct radv_device *device, FILE *f)
|
||||
{
|
||||
const struct radeon_info *gpu_info = &device->physical_device->info;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
#ifndef _WIN32
|
||||
char kernel_version[128] = {0};
|
||||
struct utsname uname_data;
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", device->physical_device->marketing_name, gpu_info->drm_major,
|
||||
gpu_info->drm_minor, gpu_info->drm_patchlevel);
|
||||
fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", pdev->marketing_name, gpu_info->drm_major, gpu_info->drm_minor,
|
||||
gpu_info->drm_patchlevel);
|
||||
#else
|
||||
if (uname(&uname_data) == 0)
|
||||
snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
|
||||
|
||||
fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", device->physical_device->marketing_name, gpu_info->drm_major,
|
||||
gpu_info->drm_minor, gpu_info->drm_patchlevel, kernel_version);
|
||||
fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", pdev->marketing_name, gpu_info->drm_major, gpu_info->drm_minor,
|
||||
gpu_info->drm_patchlevel, kernel_version);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -655,18 +662,16 @@ static void
|
|||
radv_dump_umr_ring(const struct radv_queue *queue, FILE *f)
|
||||
{
|
||||
#ifndef _WIN32
|
||||
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
|
||||
const enum amd_ip_type ring = radv_queue_ring(queue);
|
||||
const struct radv_device *device = queue->device;
|
||||
char cmd[256];
|
||||
|
||||
/* TODO: Dump compute ring. */
|
||||
if (ring != AMD_IP_GFX)
|
||||
return;
|
||||
|
||||
sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -RS %s 2>&1", device->physical_device->bus_info.domain,
|
||||
device->physical_device->bus_info.bus, device->physical_device->bus_info.dev,
|
||||
device->physical_device->bus_info.func,
|
||||
device->physical_device->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
|
||||
sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -RS %s 2>&1", pdev->bus_info.domain, pdev->bus_info.bus,
|
||||
pdev->bus_info.dev, pdev->bus_info.func, pdev->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
|
||||
fprintf(f, "\nUMR GFX ring:\n\n");
|
||||
radv_dump_cmd(cmd, f);
|
||||
#endif
|
||||
|
|
@ -676,18 +681,17 @@ static void
|
|||
radv_dump_umr_waves(struct radv_queue *queue, FILE *f)
|
||||
{
|
||||
#ifndef _WIN32
|
||||
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
|
||||
enum amd_ip_type ring = radv_queue_ring(queue);
|
||||
struct radv_device *device = queue->device;
|
||||
char cmd[256];
|
||||
|
||||
/* TODO: Dump compute ring. */
|
||||
if (ring != AMD_IP_GFX)
|
||||
return;
|
||||
|
||||
sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -O bits,halt_waves -go 0 -wa %s -go 1 2>&1",
|
||||
device->physical_device->bus_info.domain, device->physical_device->bus_info.bus,
|
||||
device->physical_device->bus_info.dev, device->physical_device->bus_info.func,
|
||||
device->physical_device->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
|
||||
sprintf(cmd, "umr --by-pci %04x:%02x:%02x.%01x -O bits,halt_waves -go 0 -wa %s -go 1 2>&1", pdev->bus_info.domain,
|
||||
pdev->bus_info.bus, pdev->bus_info.dev, pdev->bus_info.func,
|
||||
pdev->info.gfx_level >= GFX10 ? "gfx_0.0.0" : "gfx");
|
||||
fprintf(f, "\nUMR GFX waves:\n\n");
|
||||
radv_dump_cmd(cmd, f);
|
||||
#endif
|
||||
|
|
@ -707,7 +711,9 @@ radv_gpu_hang_occurred(struct radv_queue *queue, enum amd_ip_type ring)
|
|||
bool
|
||||
radv_vm_fault_occurred(struct radv_device *device, struct radv_winsys_gpuvm_fault_info *fault_info)
|
||||
{
|
||||
if (!device->physical_device->info.has_gpuvm_fault_query)
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (!pdev->info.has_gpuvm_fault_query)
|
||||
return false;
|
||||
|
||||
return device->ws->query_gpuvm_fault(device->ws, fault_info);
|
||||
|
|
@ -742,6 +748,7 @@ radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_i
|
|||
fprintf(stderr, "radv: GPU hang detected...\n");
|
||||
|
||||
#ifndef _WIN32
|
||||
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
|
||||
const bool save_hang_report = !queue->device->vk.enabled_features.deviceFaultVendorBinary;
|
||||
struct radv_winsys_gpuvm_fault_info fault_info = {0};
|
||||
struct radv_device *device = queue->device;
|
||||
|
|
@ -822,7 +829,7 @@ radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_i
|
|||
if (vm_fault_occurred) {
|
||||
fprintf(f, "VM fault report.\n\n");
|
||||
fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n", fault_info.addr);
|
||||
ac_print_gpuvm_fault_status(f, device->physical_device->info.gfx_level, fault_info.status);
|
||||
ac_print_gpuvm_fault_status(f, pdev->info.gfx_level, fault_info.status);
|
||||
}
|
||||
break;
|
||||
case RADV_DEVICE_FAULT_CHUNK_APP_INFO:
|
||||
|
|
@ -830,7 +837,7 @@ radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_i
|
|||
break;
|
||||
case RADV_DEVICE_FAULT_CHUNK_GPU_INFO:
|
||||
radv_dump_device_name(device, f);
|
||||
ac_print_gpu_info(&device->physical_device->info, f);
|
||||
ac_print_gpu_info(&pdev->info, f);
|
||||
break;
|
||||
case RADV_DEVICE_FAULT_CHUNK_DMESG:
|
||||
radv_dump_dmesg(f);
|
||||
|
|
@ -1010,12 +1017,13 @@ struct radv_sq_hw_reg {
|
|||
static void
|
||||
radv_dump_sq_hw_regs(struct radv_device *device)
|
||||
{
|
||||
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
enum radeon_family family = device->physical_device->info.family;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
enum radeon_family family = pdev->info.family;
|
||||
struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
|
||||
|
||||
fprintf(stderr, "\nHardware registers:\n");
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
ac_dump_reg(stderr, gfx_level, family, R_000408_SQ_WAVE_STATUS, regs->status, ~0);
|
||||
ac_dump_reg(stderr, gfx_level, family, R_00040C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0);
|
||||
ac_dump_reg(stderr, gfx_level, family, R_00045C_SQ_WAVE_HW_ID1, regs->hw_id, ~0);
|
||||
|
|
@ -1084,6 +1092,7 @@ radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCount
|
|||
&pFaultCounts->addressInfoCount);
|
||||
struct radv_winsys_gpuvm_fault_info fault_info = {0};
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
bool vm_fault_occurred = false;
|
||||
|
||||
/* Query if a GPUVM fault happened. */
|
||||
|
|
@ -1094,8 +1103,6 @@ radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCount
|
|||
pFaultCounts->vendorBinarySize = 0;
|
||||
|
||||
if (device->gpu_hang_report) {
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
|
||||
VkDeviceFaultVendorBinaryHeaderVersionOneEXT hdr;
|
||||
|
||||
hdr.headerSize = sizeof(VkDeviceFaultVendorBinaryHeaderVersionOneEXT);
|
||||
|
|
@ -1127,7 +1134,7 @@ radv_GetDeviceFaultInfoEXT(VkDevice _device, VkDeviceFaultCountsEXT *pFaultCount
|
|||
if (pFaultInfo)
|
||||
strncpy(pFaultInfo->description, "A GPUVM fault has been detected", sizeof(pFaultInfo->description));
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
addr_fault_info.addressType = G_00A130_RW(fault_info.status) ? VK_DEVICE_FAULT_ADDRESS_TYPE_WRITE_INVALID_EXT
|
||||
: VK_DEVICE_FAULT_ADDRESS_TYPE_READ_INVALID_EXT;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -1074,6 +1074,8 @@ write_texel_buffer_descriptor(struct radv_device *device, struct radv_cmd_buffer
|
|||
static ALWAYS_INLINE void
|
||||
write_buffer_descriptor(struct radv_device *device, unsigned *dst, uint64_t va, uint64_t range)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (!va) {
|
||||
memset(dst, 0, 4 * 4);
|
||||
return;
|
||||
|
|
@ -1082,9 +1084,9 @@ write_buffer_descriptor(struct radv_device *device, unsigned *dst, uint64_t va,
|
|||
uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
|
||||
} else if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
} else if (pdev->info.gfx_level >= GFX10) {
|
||||
rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
|
||||
S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -97,10 +97,10 @@ radv_GetMemoryHostPointerPropertiesEXT(VkDevice _device, VkExternalMemoryHandleT
|
|||
VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
switch (handleType) {
|
||||
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
uint32_t memoryTypeBits = 0;
|
||||
for (int i = 0; i < pdev->memory_properties.memoryTypeCount; i++) {
|
||||
if (pdev->memory_domains[i] == RADEON_DOMAIN_GTT && !(pdev->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
|
||||
|
|
@ -186,8 +186,10 @@ static struct radv_shader_part_cache_ops vs_prolog_ops = {
|
|||
static VkResult
|
||||
radv_device_init_vs_prologs(struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (!radv_shader_part_cache_init(&device->vs_prologs, &vs_prolog_ops))
|
||||
return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
return vk_error(pdev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
/* don't pre-compile prologs if we want to print them */
|
||||
if (device->instance->debug_flags & RADV_DEBUG_DUMP_PROLOGS)
|
||||
|
|
@ -196,9 +198,9 @@ radv_device_init_vs_prologs(struct radv_device *device)
|
|||
struct radv_vs_prolog_key key;
|
||||
memset(&key, 0, sizeof(key));
|
||||
key.as_ls = false;
|
||||
key.is_ngg = device->physical_device->use_ngg;
|
||||
key.is_ngg = pdev->use_ngg;
|
||||
key.next_stage = MESA_SHADER_VERTEX;
|
||||
key.wave32 = device->physical_device->ge_wave_size == 32;
|
||||
key.wave32 = pdev->ge_wave_size == 32;
|
||||
|
||||
for (unsigned i = 1; i <= MAX_VERTEX_ATTRIBS; i++) {
|
||||
key.instance_rate_inputs = 0;
|
||||
|
|
@ -206,7 +208,7 @@ radv_device_init_vs_prologs(struct radv_device *device)
|
|||
|
||||
device->simple_vs_prologs[i - 1] = radv_create_vs_prolog(device, &key);
|
||||
if (!device->simple_vs_prologs[i - 1])
|
||||
return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
return vk_error(pdev->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
|
||||
unsigned idx = 0;
|
||||
|
|
@ -218,7 +220,7 @@ radv_device_init_vs_prologs(struct radv_device *device)
|
|||
|
||||
struct radv_shader_part *prolog = radv_create_vs_prolog(device, &key);
|
||||
if (!prolog)
|
||||
return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
return vk_error(pdev->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
|
||||
assert(idx == radv_instance_rate_prolog_index(num_attributes, key.instance_rate_inputs));
|
||||
device->instance_rate_vs_prologs[idx++] = prolog;
|
||||
|
|
@ -638,11 +640,11 @@ capture_trace(VkQueue _queue)
|
|||
static void
|
||||
radv_device_init_cache_key(struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_device_cache_key *key = &device->cache_key;
|
||||
|
||||
key->disable_trunc_coord = device->disable_trunc_coord;
|
||||
key->image_2d_view_of_3d =
|
||||
device->vk.enabled_features.image2DViewOf3D && device->physical_device->info.gfx_level == GFX9;
|
||||
key->image_2d_view_of_3d = device->vk.enabled_features.image2DViewOf3D && pdev->info.gfx_level == GFX9;
|
||||
key->mesh_shader_queries = device->vk.enabled_features.meshShaderQueries;
|
||||
key->primitives_generated_query = radv_uses_primitives_generated_query(device);
|
||||
|
||||
|
|
@ -655,7 +657,7 @@ radv_device_init_cache_key(struct radv_device *device)
|
|||
* enabled, regardless of what features are actually enabled on the logical device.
|
||||
*/
|
||||
if (device->vk.enabled_features.shaderObject) {
|
||||
key->image_2d_view_of_3d = device->physical_device->info.gfx_level == GFX9;
|
||||
key->image_2d_view_of_3d = pdev->info.gfx_level == GFX9;
|
||||
key->primitives_generated_query = true;
|
||||
}
|
||||
|
||||
|
|
@ -701,7 +703,6 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
device->vk.command_buffer_ops = &radv_cmd_buffer_ops;
|
||||
|
||||
device->instance = pdev->instance;
|
||||
device->physical_device = pdev;
|
||||
|
||||
init_dispatch_tables(device, pdev);
|
||||
|
||||
|
|
@ -782,13 +783,12 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
|
||||
device->shader_use_invisible_vram = (device->instance->perftest_flags & RADV_PERFTEST_DMA_SHADERS) &&
|
||||
/* SDMA buffer copy is only implemented for GFX7+. */
|
||||
device->physical_device->info.gfx_level >= GFX7;
|
||||
pdev->info.gfx_level >= GFX7;
|
||||
result = radv_init_shader_upload_queue(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
device->pbb_allowed =
|
||||
device->physical_device->info.gfx_level >= GFX9 && !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
|
||||
device->pbb_allowed = pdev->info.gfx_level >= GFX9 && !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
|
||||
|
||||
device->disable_trunc_coord = device->instance->drirc.disable_trunc_coord;
|
||||
|
||||
|
|
@ -818,13 +818,13 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
|
||||
device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX7) {
|
||||
if (pdev->info.gfx_level >= GFX7) {
|
||||
/* If the KMD allows it (there is a KMD hw register for it),
|
||||
* allow launching waves out-of-order.
|
||||
*/
|
||||
device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
|
||||
}
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
/* Enable asynchronous compute tunneling. The KMD restricts this feature
|
||||
* to high-priority compute queues, so setting the bit on any other queue
|
||||
* is a no-op. PAL always sets this bit as well.
|
||||
|
|
@ -862,7 +862,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
}
|
||||
|
||||
if (device->instance->vk.trace_mode & RADV_TRACE_MODE_RGP) {
|
||||
if (device->physical_device->info.gfx_level < GFX8 || device->physical_device->info.gfx_level > GFX11) {
|
||||
if (pdev->info.gfx_level < GFX8 || pdev->info.gfx_level > GFX11) {
|
||||
fprintf(stderr, "GPU hardware not supported: refer to "
|
||||
"the RGP documentation for the list of "
|
||||
"supported GPUs!\n");
|
||||
|
|
@ -882,13 +882,13 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
radv_sqtt_queue_events_enabled() ? "enabled" : "disabled");
|
||||
|
||||
if (radv_spm_trace_enabled(device->instance)) {
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
if (!radv_spm_init(device)) {
|
||||
result = VK_ERROR_INITIALIZATION_FAILED;
|
||||
goto fail;
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "radv: SPM isn't supported for this GPU (%s)!\n", device->physical_device->name);
|
||||
fprintf(stderr, "radv: SPM isn't supported for this GPU (%s)!\n", pdev->name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -905,7 +905,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
|
||||
if (getenv("RADV_TRAP_HANDLER")) {
|
||||
/* TODO: Add support for more hardware. */
|
||||
assert(device->physical_device->info.gfx_level == GFX8);
|
||||
assert(pdev->info.gfx_level == GFX8);
|
||||
|
||||
fprintf(stderr, "**********************************************************************\n");
|
||||
fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
|
||||
|
|
@ -922,7 +922,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
}
|
||||
}
|
||||
|
||||
if (device->physical_device->info.gfx_level == GFX10_3) {
|
||||
if (pdev->info.gfx_level == GFX10_3) {
|
||||
if (getenv("RADV_FORCE_VRS_CONFIG_FILE")) {
|
||||
const char *file = radv_get_force_vrs_config_file();
|
||||
|
||||
|
|
@ -942,7 +942,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
}
|
||||
|
||||
/* PKT3_LOAD_SH_REG_INDEX is supported on GFX8+, but it hangs with compute queues until GFX10.3. */
|
||||
device->load_grid_size_from_user_sgpr = device->physical_device->info.gfx_level >= GFX10_3;
|
||||
device->load_grid_size_from_user_sgpr = pdev->info.gfx_level >= GFX10_3;
|
||||
|
||||
device->keep_shader_info = keep_shader_info;
|
||||
|
||||
|
|
@ -1009,7 +1009,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
goto fail_cache;
|
||||
}
|
||||
|
||||
if (!device->physical_device->ac_perfcounters.blocks) {
|
||||
if (!pdev->ac_perfcounters.blocks) {
|
||||
result = VK_ERROR_INITIALIZATION_FAILED;
|
||||
goto fail_cache;
|
||||
}
|
||||
|
|
@ -1029,7 +1029,7 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
if (result != VK_SUCCESS)
|
||||
goto fail_cache;
|
||||
|
||||
if (device->physical_device->info.gfx_level == GFX11 && device->physical_device->info.has_dedicated_vram &&
|
||||
if (pdev->info.gfx_level == GFX11 && pdev->info.has_dedicated_vram &&
|
||||
device->instance->drirc.force_pstate_peak_gfx11_dgpu) {
|
||||
if (!radv_device_acquire_performance_counters(device))
|
||||
fprintf(stderr, "radv: failed to set pstate to profile_peak.\n");
|
||||
|
|
@ -1197,10 +1197,10 @@ radv_GetImageMemoryRequirements2(VkDevice _device, const VkImageMemoryRequiremen
|
|||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(radv_image, image, pInfo->image);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
pMemoryRequirements->memoryRequirements.memoryTypeBits =
|
||||
((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) &
|
||||
~device->physical_device->memory_types_32bit;
|
||||
((1u << pdev->memory_properties.memoryTypeCount) - 1u) & ~pdev->memory_types_32bit;
|
||||
|
||||
pMemoryRequirements->memoryRequirements.size = image->size;
|
||||
pMemoryRequirements->memoryRequirements.alignment = image->alignment;
|
||||
|
|
@ -1254,7 +1254,9 @@ radv_surface_max_layer_count(struct radv_image_view *iview)
|
|||
unsigned
|
||||
radv_get_dcc_max_uncompressed_block_size(const struct radv_device *device, const struct radv_image *image)
|
||||
{
|
||||
if (device->physical_device->info.gfx_level < GFX10 && image->vk.samples > 1) {
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->info.gfx_level < GFX10 && image->vk.samples > 1) {
|
||||
if (image->planes[0].surface.bpe == 1)
|
||||
return V_028C78_MAX_BLOCK_SIZE_64B;
|
||||
else if (image->planes[0].surface.bpe == 2)
|
||||
|
|
@ -1267,7 +1269,9 @@ radv_get_dcc_max_uncompressed_block_size(const struct radv_device *device, const
|
|||
static unsigned
|
||||
get_dcc_min_compressed_block_size(const struct radv_device *device)
|
||||
{
|
||||
if (!device->physical_device->info.has_dedicated_vram) {
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (!pdev->info.has_dedicated_vram) {
|
||||
/* amdvlk: [min-compressed-block-size] should be set to 32 for
|
||||
* dGPU and 64 for APU because all of our APUs to date use
|
||||
* DIMMs which have a request granularity size of 64B while all
|
||||
|
|
@ -1282,6 +1286,7 @@ get_dcc_min_compressed_block_size(const struct radv_device *device)
|
|||
static uint32_t
|
||||
radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iview)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
unsigned max_uncompressed_block_size = radv_get_dcc_max_uncompressed_block_size(device, iview->image);
|
||||
unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device);
|
||||
unsigned max_compressed_block_size;
|
||||
|
|
@ -1293,7 +1298,7 @@ radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iv
|
|||
|
||||
/* For GFX9+ ac_surface computes values for us (except min_compressed
|
||||
* and max_uncompressed) */
|
||||
if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
max_compressed_block_size = iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size;
|
||||
independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_128B_blocks;
|
||||
independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_64B_blocks;
|
||||
|
|
@ -1322,12 +1327,12 @@ radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iv
|
|||
S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
|
||||
S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
result |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX11(independent_128b_blocks) |
|
||||
S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) |
|
||||
S_028C78_FDCC_ENABLE(radv_dcc_enabled(iview->image, iview->vk.base_mip_level));
|
||||
|
||||
if (device->physical_device->info.family >= CHIP_GFX1103_R2) {
|
||||
if (pdev->info.family >= CHIP_GFX1103_R2) {
|
||||
result |= S_028C78_ENABLE_MAX_COMP_FRAG_OVERRIDE(1) | S_028C78_MAX_COMP_FRAGS(iview->image->vk.samples >= 4);
|
||||
}
|
||||
} else {
|
||||
|
|
@ -1341,6 +1346,7 @@ void
|
|||
radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
|
||||
struct radv_image_view *iview)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct util_format_description *desc;
|
||||
unsigned ntype, format, swap, endian;
|
||||
unsigned blend_clamp = 0, blend_bypass = 0;
|
||||
|
|
@ -1354,7 +1360,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
memset(cb, 0, sizeof(*cb));
|
||||
|
||||
/* Intensity is implemented as Red, so treat it that way. */
|
||||
if (device->physical_device->info.gfx_level >= GFX11)
|
||||
if (pdev->info.gfx_level >= GFX11)
|
||||
cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX11(desc->swizzle[3] == PIPE_SWIZZLE_1);
|
||||
else
|
||||
cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX6(desc->swizzle[3] == PIPE_SWIZZLE_1);
|
||||
|
|
@ -1369,11 +1375,11 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
|
||||
cb->cb_color_base = va >> 8;
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
|
||||
S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
|
||||
} else if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
} else if (pdev->info.gfx_level >= GFX10) {
|
||||
cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
|
||||
S_028EE0_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
|
||||
S_028EE0_CMASK_PIPE_ALIGNED(1) |
|
||||
|
|
@ -1414,13 +1420,13 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
|
||||
|
||||
if (radv_image_has_fmask(iview->image)) {
|
||||
if (device->physical_device->info.gfx_level >= GFX7)
|
||||
if (pdev->info.gfx_level >= GFX7)
|
||||
cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1);
|
||||
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index);
|
||||
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max);
|
||||
} else {
|
||||
/* This must be set for fast clear to work without FMASK. */
|
||||
if (device->physical_device->info.gfx_level >= GFX7)
|
||||
if (pdev->info.gfx_level >= GFX7)
|
||||
cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
|
||||
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
|
||||
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
|
||||
|
|
@ -1435,7 +1441,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset;
|
||||
va += surf->meta_offset;
|
||||
|
||||
if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && device->physical_device->info.gfx_level <= GFX8)
|
||||
if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && pdev->info.gfx_level <= GFX8)
|
||||
va += plane->surface.u.legacy.color.dcc_level[iview->vk.base_mip_level].dcc_offset;
|
||||
|
||||
unsigned dcc_tile_swizzle = tile_swizzle;
|
||||
|
|
@ -1452,7 +1458,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
if (iview->image->vk.samples > 1) {
|
||||
unsigned log_samples = util_logbase2(iview->image->vk.samples);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11)
|
||||
if (pdev->info.gfx_level >= GFX11)
|
||||
cb->cb_color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(log_samples);
|
||||
else
|
||||
cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS_GFX6(log_samples);
|
||||
|
|
@ -1467,7 +1473,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
}
|
||||
|
||||
ntype = ac_get_cb_number_type(desc->format);
|
||||
format = ac_get_cb_format(device->physical_device->info.gfx_level, desc->format);
|
||||
format = ac_get_cb_format(pdev->info.gfx_level, desc->format);
|
||||
assert(format != V_028C70_COLOR_INVALID);
|
||||
|
||||
swap = radv_translate_colorswap(iview->vk.format, false);
|
||||
|
|
@ -1498,14 +1504,14 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
format != V_028C70_COLOR_24_8) |
|
||||
S_028C70_NUMBER_TYPE(ntype);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11)
|
||||
if (pdev->info.gfx_level >= GFX11)
|
||||
cb->cb_color_info |= S_028C70_FORMAT_GFX11(format);
|
||||
else
|
||||
cb->cb_color_info |= S_028C70_FORMAT_GFX6(format) | S_028C70_ENDIAN(endian);
|
||||
|
||||
if (radv_image_has_fmask(iview->image)) {
|
||||
cb->cb_color_info |= S_028C70_COMPRESSION(1);
|
||||
if (device->physical_device->info.gfx_level == GFX6) {
|
||||
if (pdev->info.gfx_level == GFX6) {
|
||||
unsigned fmask_bankh = util_logbase2(surf->u.legacy.color.fmask.bankh);
|
||||
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
|
||||
}
|
||||
|
|
@ -1514,7 +1520,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
/* Allow the texture block to read FMASK directly without decompressing it. */
|
||||
cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
|
||||
|
||||
if (device->physical_device->info.gfx_level == GFX8) {
|
||||
if (pdev->info.gfx_level == GFX8) {
|
||||
/* Set CMASK into a tiling format that allows
|
||||
* the texture block to read it.
|
||||
*/
|
||||
|
|
@ -1527,25 +1533,25 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
|
||||
|
||||
if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && !iview->disable_dcc_mrt &&
|
||||
device->physical_device->info.gfx_level < GFX11)
|
||||
pdev->info.gfx_level < GFX11)
|
||||
cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
|
||||
|
||||
cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
|
||||
|
||||
/* This must be set for fast clear to work without FMASK. */
|
||||
if (!radv_image_has_fmask(iview->image) && device->physical_device->info.gfx_level == GFX6) {
|
||||
if (!radv_image_has_fmask(iview->image) && pdev->info.gfx_level == GFX6) {
|
||||
unsigned bankh = util_logbase2(surf->u.legacy.bankh);
|
||||
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
|
||||
}
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
unsigned mip0_depth = iview->image->vk.image_type == VK_IMAGE_TYPE_3D ? (iview->extent.depth - 1)
|
||||
: (iview->image->vk.array_layers - 1);
|
||||
unsigned width = vk_format_get_plane_width(iview->image->vk.format, iview->plane_id, iview->extent.width);
|
||||
unsigned height = vk_format_get_plane_height(iview->image->vk.format, iview->plane_id, iview->extent.height);
|
||||
unsigned max_mip = iview->image->vk.mip_levels - 1;
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
unsigned base_level = iview->vk.base_mip_level;
|
||||
|
||||
if (iview->nbc_view.valid) {
|
||||
|
|
@ -1556,7 +1562,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(base_level);
|
||||
|
||||
cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) | S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
|
||||
S_028EE0_RESOURCE_LEVEL(device->physical_device->info.gfx_level >= GFX11 ? 0 : 1);
|
||||
S_028EE0_RESOURCE_LEVEL(pdev->info.gfx_level >= GFX11 ? 0 : 1);
|
||||
} else {
|
||||
cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->vk.base_mip_level);
|
||||
cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
|
||||
|
|
@ -1567,7 +1573,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
*
|
||||
* We set the pitch in MIP0_WIDTH.
|
||||
*/
|
||||
if (device->physical_device->info.gfx_level && iview->image->vk.image_type == VK_IMAGE_TYPE_2D &&
|
||||
if (pdev->info.gfx_level && iview->image->vk.image_type == VK_IMAGE_TYPE_2D &&
|
||||
iview->image->vk.array_layers == 1 && plane->surface.is_linear) {
|
||||
assert((plane->surface.u.gfx9.surf_pitch * plane->surface.bpe) % 256 == 0);
|
||||
|
||||
|
|
@ -1586,11 +1592,12 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
|
|||
static unsigned
|
||||
radv_calc_decompress_on_z_planes(const struct radv_device *device, struct radv_image_view *iview)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
unsigned max_zplanes = 0;
|
||||
|
||||
assert(radv_image_is_tc_compat_htile(iview->image));
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
/* Default value for 32-bit depth surfaces. */
|
||||
max_zplanes = 4;
|
||||
|
||||
|
|
@ -1598,9 +1605,8 @@ radv_calc_decompress_on_z_planes(const struct radv_device *device, struct radv_i
|
|||
max_zplanes = 2;
|
||||
|
||||
/* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
|
||||
if (device->physical_device->info.has_two_planes_iterate256_bug &&
|
||||
radv_image_get_iterate256(device, iview->image) && !radv_image_tile_stencil_disabled(device, iview->image) &&
|
||||
iview->image->vk.samples == 4) {
|
||||
if (pdev->info.has_two_planes_iterate256_bug && radv_image_get_iterate256(device, iview->image) &&
|
||||
!radv_image_tile_stencil_disabled(device, iview->image) && iview->image->vk.samples == 4) {
|
||||
max_zplanes = 1;
|
||||
}
|
||||
|
||||
|
|
@ -1650,6 +1656,7 @@ void
|
|||
radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buffer_info *ds,
|
||||
struct radv_image_view *iview, VkImageAspectFlags ds_aspects)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
unsigned level = iview->vk.base_mip_level;
|
||||
unsigned format, stencil_format;
|
||||
uint64_t va, s_offs, z_offs;
|
||||
|
|
@ -1668,7 +1675,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
|
|||
ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) | S_028008_SLICE_MAX(max_slice) |
|
||||
S_028008_Z_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) |
|
||||
S_028008_STENCIL_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT));
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
ds->db_depth_view |=
|
||||
S_028008_SLICE_START_HI(iview->vk.base_array_layer >> 11) | S_028008_SLICE_MAX_HI(max_slice >> 11);
|
||||
}
|
||||
|
|
@ -1681,20 +1688,19 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
|
|||
|
||||
/* Recommended value for better performance with 4x and 8x. */
|
||||
ds->db_render_override2 = S_028010_DECOMPRESS_Z_ON_FLUSH(iview->image->vk.samples >= 4) |
|
||||
S_028010_CENTROID_COMPUTATION_MODE(device->physical_device->info.gfx_level >= GFX10_3);
|
||||
S_028010_CENTROID_COMPUTATION_MODE(pdev->info.gfx_level >= GFX10_3);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
assert(surf->u.gfx9.surf_offset == 0);
|
||||
s_offs += surf->u.gfx9.zs.stencil_offset;
|
||||
|
||||
ds->db_z_info = S_028038_FORMAT(format) | S_028038_NUM_SAMPLES(util_logbase2(iview->image->vk.samples)) |
|
||||
S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) | S_028038_MAXMIP(iview->image->vk.mip_levels - 1) |
|
||||
S_028038_ZRANGE_PRECISION(1) |
|
||||
S_028040_ITERATE_256(device->physical_device->info.gfx_level >= GFX11);
|
||||
S_028038_ZRANGE_PRECISION(1) | S_028040_ITERATE_256(pdev->info.gfx_level >= GFX11);
|
||||
ds->db_stencil_info = S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) |
|
||||
S_028044_ITERATE_256(device->physical_device->info.gfx_level >= GFX11);
|
||||
S_028044_ITERATE_256(pdev->info.gfx_level >= GFX11);
|
||||
|
||||
if (device->physical_device->info.gfx_level == GFX9) {
|
||||
if (pdev->info.gfx_level == GFX9) {
|
||||
ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch);
|
||||
ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.zs.stencil_epitch);
|
||||
}
|
||||
|
|
@ -1711,7 +1717,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
|
|||
|
||||
ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
bool iterate256 = radv_image_get_iterate256(device, iview->image);
|
||||
|
||||
ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
|
||||
|
|
@ -1732,7 +1738,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
|
|||
ds->db_htile_data_base = va >> 8;
|
||||
ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);
|
||||
|
||||
if (device->physical_device->info.gfx_level == GFX9) {
|
||||
if (pdev->info.gfx_level == GFX9) {
|
||||
ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
|
||||
}
|
||||
|
||||
|
|
@ -1741,7 +1747,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
|
|||
}
|
||||
}
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
radv_gfx11_set_db_render_control(device, iview->image->vk.samples, &ds->db_render_control);
|
||||
}
|
||||
} else {
|
||||
|
|
@ -1760,8 +1766,8 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
|
|||
if (iview->image->vk.samples > 1)
|
||||
ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->vk.samples));
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX7) {
|
||||
const struct radeon_info *gpu_info = &device->physical_device->info;
|
||||
if (pdev->info.gfx_level >= GFX7) {
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
unsigned tiling_index = surf->u.legacy.tiling_index[level];
|
||||
unsigned stencil_index = surf->u.legacy.zs.stencil_tiling_index[level];
|
||||
unsigned macro_index = surf->u.legacy.macro_tile_index;
|
||||
|
|
@ -1820,7 +1826,7 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
|
|||
void
|
||||
radv_gfx11_set_db_render_control(const struct radv_device *device, unsigned num_samples, unsigned *db_render_control)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
unsigned max_allowed_tiles_in_wave = 0;
|
||||
|
||||
if (pdev->info.has_dedicated_vram) {
|
||||
|
|
@ -1911,6 +1917,7 @@ radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBi
|
|||
VkMemoryFdPropertiesKHR *pMemoryFdProperties)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
switch (handleType) {
|
||||
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
|
||||
|
|
@ -1919,7 +1926,7 @@ radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBi
|
|||
if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
|
||||
return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
|
||||
|
||||
pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(device->physical_device, domains, flags);
|
||||
pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(pdev, domains, flags);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
default:
|
||||
|
|
@ -1941,7 +1948,8 @@ radv_GetCalibratedTimestampsKHR(VkDevice _device, uint32_t timestampCount,
|
|||
{
|
||||
#ifndef _WIN32
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
uint32_t clock_crystal_freq = device->physical_device->info.clock_crystal_freq;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint32_t clock_crystal_freq = pdev->info.clock_crystal_freq;
|
||||
int d;
|
||||
uint64_t begin, end;
|
||||
uint64_t max_clock_period = 0;
|
||||
|
|
@ -1992,10 +2000,11 @@ radv_GetCalibratedTimestampsKHR(VkDevice _device, uint32_t timestampCount,
|
|||
bool
|
||||
radv_device_set_pstate(struct radv_device *device, bool enable)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
enum radeon_ctx_pstate pstate = enable ? RADEON_CTX_PSTATE_PEAK : RADEON_CTX_PSTATE_NONE;
|
||||
|
||||
if (device->physical_device->info.has_stable_pstate) {
|
||||
if (pdev->info.has_stable_pstate) {
|
||||
/* pstate is per-device; setting it for one ctx is sufficient.
|
||||
* We pick the first initialized one below. */
|
||||
for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++)
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout
|
|||
const struct radv_compute_pipeline *pipeline, uint32_t *cmd_size)
|
||||
{
|
||||
const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* dispatch */
|
||||
*cmd_size += 5 * 4;
|
||||
|
|
@ -55,7 +56,7 @@ radv_get_sequence_size_compute(const struct radv_indirect_command_layout *layout
|
|||
/* COMPUTE_PGM_{LO,RSRC1,RSRC2} */
|
||||
*cmd_size += 7 * 4;
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
/* COMPUTE_PGM_RSRC3 */
|
||||
*cmd_size += 3 * 4;
|
||||
}
|
||||
|
|
@ -87,6 +88,7 @@ radv_get_sequence_size_graphics(const struct radv_indirect_command_layout *layou
|
|||
uint32_t *upload_size)
|
||||
{
|
||||
const struct radv_device *device = container_of(layout->base.device, struct radv_device, vk);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_shader *vs = radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX);
|
||||
|
||||
if (layout->bind_vbo_mask) {
|
||||
|
|
@ -115,7 +117,7 @@ radv_get_sequence_size_graphics(const struct radv_indirect_command_layout *layou
|
|||
} else {
|
||||
if (layout->draw_mesh_tasks) {
|
||||
/* userdata writes + instance count + non-indexed draw */
|
||||
*cmd_size += (6 + 2 + (device->physical_device->mesh_fast_launch_2 ? 5 : 3)) * 4;
|
||||
*cmd_size += (6 + 2 + (pdev->mesh_fast_launch_2 ? 5 : 3)) * 4;
|
||||
} else {
|
||||
/* userdata writes + instance count + non-indexed draw */
|
||||
*cmd_size += (5 + 2 + 3) * 4;
|
||||
|
|
@ -189,7 +191,8 @@ radv_get_sequence_size(const struct radv_indirect_command_layout *layout, struct
|
|||
static uint32_t
|
||||
radv_align_cmdbuf_size(const struct radv_device *device, uint32_t size, enum amd_ip_type ip_type)
|
||||
{
|
||||
const uint32_t ib_alignment = device->physical_device->info.ip[ip_type].ib_alignment;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const uint32_t ib_alignment = pdev->info.ip[ip_type].ib_alignment;
|
||||
|
||||
return align(size, ib_alignment);
|
||||
}
|
||||
|
|
@ -365,7 +368,9 @@ nir_pkt3(nir_builder *b, unsigned op, nir_def *len)
|
|||
static nir_def *
|
||||
dgc_get_nop_packet(nir_builder *b, const struct radv_device *device)
|
||||
{
|
||||
if (device->physical_device->info.gfx_ib_pad_with_type2) {
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->info.gfx_ib_pad_with_type2) {
|
||||
return nir_imm_int(b, PKT2_NOP_PAD);
|
||||
} else {
|
||||
return nir_imm_int(b, PKT3_NOP_PAD);
|
||||
|
|
@ -691,6 +696,8 @@ dgc_main_cmd_buf_offset(nir_builder *b, const struct radv_device *device)
|
|||
static void
|
||||
build_dgc_buffer_tail(nir_builder *b, nir_def *sequence_count, const struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
nir_def *global_id = get_global_ids(b, 1);
|
||||
|
||||
nir_def *cmd_buf_stride = load_param32(b, cmd_buf_stride);
|
||||
|
|
@ -718,7 +725,7 @@ build_dgc_buffer_tail(nir_builder *b, nir_def *sequence_count, const struct radv
|
|||
|
||||
nir_def *packet, *packet_size;
|
||||
|
||||
if (device->physical_device->info.gfx_ib_pad_with_type2) {
|
||||
if (pdev->info.gfx_ib_pad_with_type2) {
|
||||
packet_size = nir_imm_int(b, 4);
|
||||
packet = nir_imm_int(b, PKT2_NOP_PAD);
|
||||
} else {
|
||||
|
|
@ -741,6 +748,8 @@ build_dgc_buffer_tail(nir_builder *b, nir_def *sequence_count, const struct radv
|
|||
static void
|
||||
build_dgc_buffer_preamble(nir_builder *b, nir_def *sequence_count, const struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
nir_def *global_id = get_global_ids(b, 1);
|
||||
nir_def *use_preamble = nir_ine_imm(b, load_param8(b, use_preamble), 0);
|
||||
|
||||
|
|
@ -778,7 +787,7 @@ build_dgc_buffer_preamble(nir_builder *b, nir_def *sequence_count, const struct
|
|||
nir_def *chain_packets[] = {
|
||||
nir_imm_int(b, PKT3(PKT3_INDIRECT_BUFFER, 2, 0)),
|
||||
addr,
|
||||
nir_imm_int(b, device->physical_device->info.address32_hi),
|
||||
nir_imm_int(b, pdev->info.address32_hi),
|
||||
nir_ior_imm(b, words, S_3F2_CHAIN(1) | S_3F2_VALID(1) | S_3F2_PRE_ENA(false)),
|
||||
};
|
||||
|
||||
|
|
@ -861,6 +870,8 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf
|
|||
nir_def *index_buffer_offset, nir_def *ibo_type_32, nir_def *ibo_type_8,
|
||||
nir_variable *max_index_count_var, const struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
nir_def *index_stream_offset = nir_iadd(b, index_buffer_offset, stream_base);
|
||||
nir_def *data = nir_load_ssbo(b, 4, 32, stream_buf, index_stream_offset);
|
||||
|
||||
|
|
@ -876,10 +887,9 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf
|
|||
|
||||
nir_def *cmd_values[3 + 2 + 3];
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
|
||||
if (device->physical_device->info.gfx_level < GFX9 ||
|
||||
(device->physical_device->info.gfx_level == GFX9 && device->physical_device->info.me_fw_version < 26))
|
||||
if (pdev->info.gfx_level < GFX9 || (pdev->info.gfx_level == GFX9 && pdev->info.me_fw_version < 26))
|
||||
opcode = PKT3_SET_UCONFIG_REG;
|
||||
cmd_values[0] = nir_imm_int(b, PKT3(opcode, 1, 0));
|
||||
cmd_values[1] = nir_imm_int(b, (R_03090C_VGT_INDEX_TYPE - CIK_UCONFIG_REG_OFFSET) >> 2 | (2u << 28));
|
||||
|
|
@ -1186,6 +1196,8 @@ static void
|
|||
dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
|
||||
nir_def *vbo_bind_mask, nir_variable *upload_offset, const struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
nir_def *vbo_cnt = load_param8(b, vbo_cnt);
|
||||
nir_variable *vbo_idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "vbo_idx");
|
||||
nir_store_var(b, vbo_idx, nir_imm_int(b, 0), 0x1);
|
||||
|
|
@ -1252,9 +1264,9 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
|
|||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_def *convert_cond = nir_ine_imm(b, nir_load_var(b, num_records), 0);
|
||||
if (device->physical_device->info.gfx_level == GFX9)
|
||||
if (pdev->info.gfx_level == GFX9)
|
||||
convert_cond = nir_imm_false(b);
|
||||
else if (device->physical_device->info.gfx_level != GFX8)
|
||||
else if (pdev->info.gfx_level != GFX8)
|
||||
convert_cond = nir_iand(b, convert_cond, nir_ieq_imm(b, stride, 0));
|
||||
|
||||
nir_def *new_records =
|
||||
|
|
@ -1264,7 +1276,7 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
|
|||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
if (device->physical_device->info.gfx_level != GFX8) {
|
||||
if (pdev->info.gfx_level != GFX8) {
|
||||
nir_push_if(b, nir_ine_imm(b, stride, 0));
|
||||
{
|
||||
nir_def *r = nir_iadd(b, nir_load_var(b, num_records), nir_iadd_imm(b, stride, -1));
|
||||
|
|
@ -1276,7 +1288,7 @@ dgc_emit_vertex_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
|
|||
nir_pop_if(b, NULL);
|
||||
|
||||
nir_def *rsrc_word3 = nir_channel(b, nir_load_var(b, vbo_data), 3);
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
nir_def *oob_select = nir_bcsel(b, nir_ieq_imm(b, stride, 0), nir_imm_int(b, V_008F0C_OOB_SELECT_RAW),
|
||||
nir_imm_int(b, V_008F0C_OOB_SELECT_STRUCTURED));
|
||||
rsrc_word3 = nir_iand_imm(b, rsrc_word3, C_008F0C_OOB_SELECT);
|
||||
|
|
@ -1408,6 +1420,8 @@ static void
|
|||
dgc_emit_draw_mesh_tasks(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
|
||||
nir_def *draw_params_offset, nir_def *sequence_id, const struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
|
||||
nir_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base);
|
||||
|
||||
|
|
@ -1424,7 +1438,7 @@ dgc_emit_draw_mesh_tasks(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_
|
|||
dgc_emit_userdata_mesh(b, cs, vtx_base_sgpr, x, y, z, sequence_id, device);
|
||||
dgc_emit_instance_count(b, cs, nir_imm_int(b, 1));
|
||||
|
||||
if (device->physical_device->mesh_fast_launch_2) {
|
||||
if (pdev->mesh_fast_launch_2) {
|
||||
dgc_emit_dispatch_mesh_direct(b, cs, x, y, z);
|
||||
} else {
|
||||
nir_def *vertex_count = nir_imul(b, x, nir_imul(b, y, z));
|
||||
|
|
@ -1454,6 +1468,8 @@ static void
|
|||
dgc_emit_bind_pipeline(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_buf, nir_def *stream_base,
|
||||
nir_def *pipeline_params_offset, const struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
nir_def *stream_offset = nir_iadd(b, pipeline_params_offset, stream_base);
|
||||
|
||||
nir_def *pipeline_va = nir_load_ssbo(b, 1, 64, stream_buf, stream_offset);
|
||||
|
|
@ -1465,7 +1481,7 @@ dgc_emit_bind_pipeline(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_bu
|
|||
dgc_emit1(b, cs, load_metadata32(b, rsrc1));
|
||||
dgc_emit1(b, cs, load_metadata32(b, rsrc2));
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
dgc_emit_set_sh_reg_seq(b, cs, R_00B8A0_COMPUTE_PGM_RSRC3, 1);
|
||||
dgc_emit1(b, cs, load_metadata32(b, rsrc3));
|
||||
}
|
||||
|
|
@ -1504,6 +1520,7 @@ dgc_is_cond_render_enabled(nir_builder *b)
|
|||
static nir_shader *
|
||||
build_dgc_prepare_shader(struct radv_device *dev)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(dev);
|
||||
nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_dgc_prepare");
|
||||
b.shader->info.workgroup_size[0] = 64;
|
||||
|
||||
|
|
@ -1554,7 +1571,7 @@ build_dgc_prepare_shader(struct radv_device *dev)
|
|||
struct dgc_cmdbuf cmd_buf = {
|
||||
.descriptor = radv_meta_load_descriptor(&b, 0, DGC_DESC_PREPARE),
|
||||
.offset = nir_variable_create(b.shader, nir_var_shader_temp, glsl_uint_type(), "cmd_buf_offset"),
|
||||
.gfx_level = dev->physical_device->info.gfx_level,
|
||||
.gfx_level = pdev->info.gfx_level,
|
||||
.sqtt_enabled = !!dev->sqtt.bo,
|
||||
};
|
||||
nir_store_var(&b, cmd_buf.offset, nir_iadd(&b, nir_imul(&b, global_id, cmd_buf_stride), cmd_buf_base_offset), 1);
|
||||
|
|
@ -1647,7 +1664,7 @@ build_dgc_prepare_shader(struct radv_device *dev)
|
|||
/* Pad the cmdbuffer if we did not use the whole stride */
|
||||
nir_push_if(&b, nir_ine(&b, nir_load_var(&b, cmd_buf.offset), cmd_buf_end));
|
||||
{
|
||||
if (dev->physical_device->info.gfx_ib_pad_with_type2) {
|
||||
if (pdev->info.gfx_ib_pad_with_type2) {
|
||||
nir_push_loop(&b);
|
||||
{
|
||||
nir_def *curr_offset = nir_load_var(&b, cmd_buf.offset);
|
||||
|
|
@ -1872,6 +1889,7 @@ radv_GetGeneratedCommandsMemoryRequirementsNV(VkDevice _device,
|
|||
VkMemoryRequirements2 *pMemoryRequirements)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
VK_FROM_HANDLE(radv_indirect_command_layout, layout, pInfo->indirectCommandsLayout);
|
||||
VK_FROM_HANDLE(radv_pipeline, pipeline, pInfo->pipeline);
|
||||
|
||||
|
|
@ -1882,10 +1900,9 @@ radv_GetGeneratedCommandsMemoryRequirementsNV(VkDevice _device,
|
|||
radv_dgc_preamble_cmdbuf_size(device);
|
||||
VkDeviceSize upload_buf_size = upload_stride * pInfo->maxSequencesCount;
|
||||
|
||||
pMemoryRequirements->memoryRequirements.memoryTypeBits = device->physical_device->memory_types_32bit;
|
||||
pMemoryRequirements->memoryRequirements.memoryTypeBits = pdev->memory_types_32bit;
|
||||
pMemoryRequirements->memoryRequirements.alignment =
|
||||
MAX2(device->physical_device->info.ip[AMD_IP_GFX].ib_alignment,
|
||||
device->physical_device->info.ip[AMD_IP_COMPUTE].ib_alignment);
|
||||
MAX2(pdev->info.ip[AMD_IP_GFX].ib_alignment, pdev->info.ip[AMD_IP_COMPUTE].ib_alignment);
|
||||
pMemoryRequirements->memoryRequirements.size =
|
||||
align(cmd_buf_size + upload_buf_size, pMemoryRequirements->memoryRequirements.alignment);
|
||||
}
|
||||
|
|
@ -2051,6 +2068,7 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
|
|||
{
|
||||
VK_FROM_HANDLE(radv_indirect_command_layout, layout, pGeneratedCommandsInfo->indirectCommandsLayout);
|
||||
VK_FROM_HANDLE(radv_pipeline, pipeline, pGeneratedCommandsInfo->pipeline);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
|
||||
*upload_size = MAX2(*upload_size, 16);
|
||||
|
||||
|
|
@ -2074,7 +2092,7 @@ radv_prepare_dgc_compute(struct radv_cmd_buffer *cmd_buffer, const VkGeneratedCo
|
|||
struct radv_shader *cs = radv_get_shader(compute_pipeline->base.shaders, MESA_SHADER_COMPUTE);
|
||||
|
||||
if (cs->info.wave_size == 32) {
|
||||
assert(cmd_buffer->device->physical_device->info.gfx_level >= GFX10);
|
||||
assert(pdev->info.gfx_level >= GFX10);
|
||||
params->dispatch_initiator |= S_00B800_CS_W32_EN(1);
|
||||
}
|
||||
|
||||
|
|
@ -2276,9 +2294,9 @@ radv_GetPipelineIndirectMemoryRequirementsNV(VkDevice _device, const VkComputePi
|
|||
VkMemoryRequirements *reqs = &pMemoryRequirements->memoryRequirements;
|
||||
const uint32_t size = sizeof(struct radv_compute_pipeline_metadata);
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
reqs->memoryTypeBits = ((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) &
|
||||
~device->physical_device->memory_types_32bit;
|
||||
reqs->memoryTypeBits = ((1u << pdev->memory_properties.memoryTypeCount) - 1u) & ~pdev->memory_types_32bit;
|
||||
reqs->alignment = 4;
|
||||
reqs->size = align(size, reqs->alignment);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -195,12 +195,13 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc
|
|||
mem->user_ptr = host_ptr_info->pHostPointer;
|
||||
}
|
||||
} else {
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
|
||||
uint32_t heap_index;
|
||||
|
||||
heap_index = device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex].heapIndex;
|
||||
domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex];
|
||||
flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex];
|
||||
heap_index = pdev->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex].heapIndex;
|
||||
domain = pdev->memory_domains[pAllocateInfo->memoryTypeIndex];
|
||||
flags |= pdev->memory_flags[pAllocateInfo->memoryTypeIndex];
|
||||
|
||||
if (export_info && export_info->handleTypes) {
|
||||
/* Setting RADEON_FLAG_GTT_WC in case the bo is spilled to GTT. This is important when the
|
||||
|
|
@ -226,7 +227,7 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc
|
|||
flags |= RADEON_FLAG_ZERO_VRAM;
|
||||
|
||||
if (device->overallocation_disallowed) {
|
||||
uint64_t total_size = device->physical_device->memory_properties.memoryHeaps[heap_index].size;
|
||||
uint64_t total_size = pdev->memory_properties.memoryHeaps[heap_index].size;
|
||||
|
||||
mtx_lock(&device->overallocation_mutex);
|
||||
if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {
|
||||
|
|
@ -238,8 +239,8 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc
|
|||
mtx_unlock(&device->overallocation_mutex);
|
||||
}
|
||||
|
||||
result = radv_bo_create(device, alloc_size, device->physical_device->info.max_alignment, domain, flags, priority,
|
||||
replay_address, is_internal, &mem->bo);
|
||||
result = radv_bo_create(device, alloc_size, pdev->info.max_alignment, domain, flags, priority, replay_address,
|
||||
is_internal, &mem->bo);
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
if (device->overallocation_disallowed) {
|
||||
|
|
|
|||
|
|
@ -1881,6 +1881,7 @@ radv_GetImageSparseMemoryRequirements2(VkDevice _device, const VkImageSparseMemo
|
|||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(radv_image, image, pInfo->image);
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (!(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)) {
|
||||
*pSparseMemoryRequirementCount = 0;
|
||||
|
|
@ -1892,12 +1893,12 @@ radv_GetImageSparseMemoryRequirements2(VkDevice _device, const VkImageSparseMemo
|
|||
|
||||
vk_outarray_append_typed(VkSparseImageMemoryRequirements2, &out, req)
|
||||
{
|
||||
fill_sparse_image_format_properties(device->physical_device, image->vk.image_type, image->vk.format,
|
||||
fill_sparse_image_format_properties(pdev, image->vk.image_type, image->vk.format,
|
||||
&req->memoryRequirements.formatProperties);
|
||||
req->memoryRequirements.imageMipTailFirstLod = image->planes[0].surface.first_mip_tail_level;
|
||||
|
||||
if (req->memoryRequirements.imageMipTailFirstLod < image->vk.mip_levels) {
|
||||
if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
/* The tail is always a single tile per layer. */
|
||||
req->memoryRequirements.imageMipTailSize = 65536;
|
||||
req->memoryRequirements.imageMipTailOffset =
|
||||
|
|
|
|||
|
|
@ -42,6 +42,8 @@
|
|||
static unsigned
|
||||
radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
|
||||
assert(pCreateInfo->samples <= 1);
|
||||
return RADEON_SURF_MODE_LINEAR_ALIGNED;
|
||||
|
|
@ -54,8 +56,7 @@ radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateI
|
|||
if (pCreateInfo->samples > 1)
|
||||
return RADEON_SURF_MODE_2D;
|
||||
|
||||
if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) &&
|
||||
device->physical_device->info.gfx_level <= GFX8) {
|
||||
if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) && pdev->info.gfx_level <= GFX8) {
|
||||
/* this causes hangs in some VK CTS tests on GFX9. */
|
||||
/* Textures with a very small height are recommended to be linear. */
|
||||
if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
|
||||
|
|
@ -71,14 +72,16 @@ radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateI
|
|||
static bool
|
||||
radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* TC-compat HTILE is only available for GFX8+. */
|
||||
if (device->physical_device->info.gfx_level < GFX8)
|
||||
if (pdev->info.gfx_level < GFX8)
|
||||
return false;
|
||||
|
||||
/* TC-compat HTILE looks broken on Tonga (and Iceland is the same design) and the documented bug
|
||||
* workarounds don't help.
|
||||
*/
|
||||
if (device->physical_device->info.family == CHIP_TONGA || device->physical_device->info.family == CHIP_ICELAND)
|
||||
if (pdev->info.family == CHIP_TONGA || pdev->info.family == CHIP_ICELAND)
|
||||
return false;
|
||||
|
||||
if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
|
||||
|
|
@ -91,7 +94,7 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCrea
|
|||
(VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
|
||||
return false;
|
||||
|
||||
if (device->physical_device->info.gfx_level < GFX9) {
|
||||
if (pdev->info.gfx_level < GFX9) {
|
||||
/* TC-compat HTILE for MSAA depth/stencil images is broken
|
||||
* on GFX8 because the tiling doesn't match.
|
||||
*/
|
||||
|
|
@ -114,7 +117,7 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCrea
|
|||
}
|
||||
|
||||
/* GFX9 has issues when the sample count is 4 and the format is D16 */
|
||||
if (device->physical_device->info.gfx_level == GFX9 && pCreateInfo->samples == 4 && format == VK_FORMAT_D16_UNORM)
|
||||
if (pdev->info.gfx_level == GFX9 && pCreateInfo->samples == 4 && format == VK_FORMAT_D16_UNORM)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
|
@ -123,8 +126,10 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCrea
|
|||
static bool
|
||||
radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (info->bo_metadata) {
|
||||
if (device->physical_device->info.gfx_level >= GFX9)
|
||||
if (pdev->info.gfx_level >= GFX9)
|
||||
return info->bo_metadata->u.gfx9.scanout;
|
||||
else
|
||||
return info->bo_metadata->u.legacy.scanout;
|
||||
|
|
@ -237,8 +242,10 @@ static bool
|
|||
radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image, const VkImageCreateInfo *pCreateInfo,
|
||||
VkFormat format, bool *sign_reinterpret)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* DCC (Delta Color Compression) is only available for GFX8+. */
|
||||
if (device->physical_device->info.gfx_level < GFX8)
|
||||
if (pdev->info.gfx_level < GFX8)
|
||||
return false;
|
||||
|
||||
const VkImageCompressionControlEXT *compression =
|
||||
|
|
@ -260,7 +267,7 @@ radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *imag
|
|||
* decompressing a lot anyway we might as well not have DCC.
|
||||
*/
|
||||
if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
|
||||
(device->physical_device->info.gfx_level < GFX10 ||
|
||||
(pdev->info.gfx_level < GFX10 ||
|
||||
radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags)))
|
||||
return false;
|
||||
|
||||
|
|
@ -278,24 +285,22 @@ radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *imag
|
|||
if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
|
||||
return false;
|
||||
|
||||
if (device->physical_device->info.gfx_level < GFX10) {
|
||||
if (pdev->info.gfx_level < GFX10) {
|
||||
/* TODO: Add support for DCC MSAA on GFX8-9. */
|
||||
if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed)
|
||||
if (pCreateInfo->samples > 1 && !pdev->dcc_msaa_allowed)
|
||||
return false;
|
||||
|
||||
/* TODO: Add support for DCC layers/mipmaps on GFX9. */
|
||||
if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
|
||||
device->physical_device->info.gfx_level == GFX9)
|
||||
if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) && pdev->info.gfx_level == GFX9)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* DCC MSAA can't work on GFX10.3 and earlier without FMASK. */
|
||||
if (pCreateInfo->samples > 1 && device->physical_device->info.gfx_level < GFX11 &&
|
||||
if (pCreateInfo->samples > 1 && pdev->info.gfx_level < GFX11 &&
|
||||
(device->instance->debug_flags & RADV_DEBUG_NO_FMASK))
|
||||
return false;
|
||||
|
||||
return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format, pCreateInfo->flags,
|
||||
sign_reinterpret);
|
||||
return radv_are_formats_dcc_compatible(pdev, pCreateInfo->pNext, format, pCreateInfo->flags, sign_reinterpret);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -331,7 +336,9 @@ radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image
|
|||
bool
|
||||
radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
|
||||
{
|
||||
return ac_surface_supports_dcc_image_stores(device->physical_device->info.gfx_level, &image->planes[0].surface);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
return ac_surface_supports_dcc_image_stores(pdev->info.gfx_level, &image->planes[0].surface);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -347,12 +354,14 @@ radv_image_use_dcc_predication(const struct radv_device *device, const struct ra
|
|||
static inline bool
|
||||
radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image)
|
||||
{
|
||||
if (device->physical_device->info.gfx_level == GFX9 && image->vk.array_layers > 1) {
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->info.gfx_level == GFX9 && image->vk.array_layers > 1) {
|
||||
/* On GFX9, FMASK can be interleaved with layers and this isn't properly supported. */
|
||||
return false;
|
||||
}
|
||||
|
||||
return device->physical_device->use_fmask && image->vk.samples > 1 &&
|
||||
return pdev->use_fmask && image->vk.samples > 1 &&
|
||||
((image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
|
||||
(device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
|
||||
}
|
||||
|
|
@ -361,7 +370,8 @@ static inline bool
|
|||
radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image,
|
||||
const VkImageCreateInfo *pCreateInfo)
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
|
||||
const VkImageCompressionControlEXT *compression =
|
||||
vk_find_struct_const(pCreateInfo->pNext, IMAGE_COMPRESSION_CONTROL_EXT);
|
||||
|
|
@ -374,11 +384,10 @@ radv_use_htile_for_image(const struct radv_device *device, const struct radv_ima
|
|||
* - Investigate about mips+layers.
|
||||
* - Enable on other gens.
|
||||
*/
|
||||
bool use_htile_for_mips = image->vk.array_layers == 1 && device->physical_device->info.gfx_level >= GFX10;
|
||||
bool use_htile_for_mips = image->vk.array_layers == 1 && pdev->info.gfx_level >= GFX10;
|
||||
|
||||
/* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
|
||||
if (device->physical_device->info.gfx_level == GFX10 && image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT &&
|
||||
image->vk.mip_levels > 1)
|
||||
if (pdev->info.gfx_level == GFX10 && image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->vk.mip_levels > 1)
|
||||
return false;
|
||||
|
||||
/* Do not enable HTILE for very small images because it seems less performant but make sure it's
|
||||
|
|
@ -395,19 +404,21 @@ radv_use_htile_for_image(const struct radv_device *device, const struct radv_ima
|
|||
static bool
|
||||
radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* TC-compat CMASK is only available for GFX8+. */
|
||||
if (device->physical_device->info.gfx_level < GFX8)
|
||||
if (pdev->info.gfx_level < GFX8)
|
||||
return false;
|
||||
|
||||
/* GFX9 has issues when sample count is greater than 2 */
|
||||
if (device->physical_device->info.gfx_level == GFX9 && image->vk.samples > 2)
|
||||
if (pdev->info.gfx_level == GFX9 && image->vk.samples > 2)
|
||||
return false;
|
||||
|
||||
if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
|
||||
return false;
|
||||
|
||||
/* TC-compat CMASK with storage images is supported on GFX10+. */
|
||||
if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && device->physical_device->info.gfx_level < GFX10)
|
||||
if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && pdev->info.gfx_level < GFX10)
|
||||
return false;
|
||||
|
||||
/* Do not enable TC-compatible if the image isn't readable by a shader
|
||||
|
|
@ -427,7 +438,9 @@ radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image
|
|||
static uint32_t
|
||||
radv_get_bo_metadata_word1(const struct radv_device *device)
|
||||
{
|
||||
return (ATI_VENDOR_ID << 16) | device->physical_device->info.pci_id;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
return (ATI_VENDOR_ID << 16) | pdev->info.pci_id;
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -446,9 +459,11 @@ static void
|
|||
radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface,
|
||||
const struct radeon_bo_metadata *md)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
if (md->u.gfx9.swizzle_mode > 0)
|
||||
surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
|
||||
else
|
||||
|
|
@ -476,6 +491,7 @@ static VkResult
|
|||
radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image,
|
||||
const struct radv_image_create_info *create_info, struct ac_surf_info *image_info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
unsigned width = image->vk.extent.width;
|
||||
unsigned height = image->vk.extent.height;
|
||||
|
||||
|
|
@ -489,7 +505,7 @@ radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image
|
|||
if (create_info->bo_metadata && radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
|
||||
const struct radeon_bo_metadata *md = create_info->bo_metadata;
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
|
||||
height = G_00A008_HEIGHT(md->metadata[4]) + 1;
|
||||
} else {
|
||||
|
|
@ -509,7 +525,7 @@ radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image
|
|||
"(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
|
||||
image->vk.extent.width, image->vk.extent.height, width, height);
|
||||
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
|
||||
} else if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
} else if (pdev->info.gfx_level >= GFX10) {
|
||||
fprintf(stderr,
|
||||
"Tried to import an image with inconsistent width on GFX10.\n"
|
||||
"As GFX10 has no separate stride fields we cannot cope with\n"
|
||||
|
|
@ -535,6 +551,8 @@ static VkResult
|
|||
radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image,
|
||||
const struct radv_image_create_info *create_info, struct ac_surf_info *image_info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
|
@ -552,7 +570,7 @@ radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *
|
|||
image_info->surf_index = NULL;
|
||||
}
|
||||
|
||||
if (create_info->prime_blit_src && !device->physical_device->info.sdma_supports_compression) {
|
||||
if (create_info->prime_blit_src && !pdev->info.sdma_supports_compression) {
|
||||
/* Older SDMA hw can't handle DCC */
|
||||
image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
|
||||
}
|
||||
|
|
@ -579,9 +597,10 @@ static uint64_t
|
|||
radv_get_surface_flags(struct radv_device *device, struct radv_image *image, unsigned plane_id,
|
||||
const VkImageCreateInfo *pCreateInfo, VkFormat image_format)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint64_t flags;
|
||||
unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
|
||||
VkFormat format = radv_image_get_plane_format(device->physical_device, image, plane_id);
|
||||
VkFormat format = radv_image_get_plane_format(pdev, image, plane_id);
|
||||
const struct util_format_description *desc = vk_format_description(format);
|
||||
bool is_depth, is_stencil;
|
||||
|
||||
|
|
@ -616,7 +635,7 @@ radv_get_surface_flags(struct radv_device *device, struct radv_image *image, uns
|
|||
if (is_depth) {
|
||||
flags |= RADEON_SURF_ZBUFFER;
|
||||
|
||||
if (is_depth && is_stencil && device->physical_device->info.gfx_level <= GFX8) {
|
||||
if (is_depth && is_stencil && pdev->info.gfx_level <= GFX8) {
|
||||
if (!(pCreateInfo->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT))
|
||||
flags |= RADEON_SURF_NO_RENDER_TARGET;
|
||||
|
||||
|
|
@ -641,7 +660,7 @@ radv_get_surface_flags(struct radv_device *device, struct radv_image *image, uns
|
|||
if (is_stencil)
|
||||
flags |= RADEON_SURF_SBUFFER;
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX9 && pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
|
||||
if (pdev->info.gfx_level >= GFX9 && pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
|
||||
vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
|
||||
flags |= RADEON_SURF_NO_RENDER_TARGET;
|
||||
|
||||
|
|
@ -656,7 +675,7 @@ radv_get_surface_flags(struct radv_device *device, struct radv_image *image, uns
|
|||
}
|
||||
|
||||
if (image->queue_family_mask & BITFIELD_BIT(RADV_QUEUE_TRANSFER)) {
|
||||
if (!device->physical_device->info.sdma_supports_compression)
|
||||
if (!pdev->info.sdma_supports_compression)
|
||||
flags |= RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_HTILE;
|
||||
}
|
||||
|
||||
|
|
@ -720,12 +739,14 @@ radv_compose_swizzle(const struct util_format_description *desc, const VkCompone
|
|||
bool
|
||||
vi_alpha_is_on_msb(const struct radv_device *device, const VkFormat format)
|
||||
{
|
||||
if (device->physical_device->info.gfx_level >= GFX11)
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11)
|
||||
return false;
|
||||
|
||||
const struct util_format_description *desc = vk_format_description(format);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10 && desc->nr_channels == 1)
|
||||
if (pdev->info.gfx_level >= GFX10 && desc->nr_channels == 1)
|
||||
return desc->swizzle[3] == PIPE_SWIZZLE_X;
|
||||
|
||||
return radv_translate_colorswap(format, false) <= 1;
|
||||
|
|
@ -735,13 +756,13 @@ static void
|
|||
radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image, unsigned plane_id,
|
||||
struct radeon_bo_metadata *md)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
static const VkComponentMapping fixedmapping;
|
||||
const VkFormat plane_format = radv_image_get_plane_format(device->physical_device, image, plane_id);
|
||||
const VkFormat plane_format = radv_image_get_plane_format(pdev, image, plane_id);
|
||||
const unsigned plane_width = vk_format_get_plane_width(image->vk.format, plane_id, image->vk.extent.width);
|
||||
const unsigned plane_height = vk_format_get_plane_height(image->vk.format, plane_id, image->vk.extent.height);
|
||||
struct radeon_surf *surface = &image->planes[plane_id].surface;
|
||||
const struct legacy_surf_level *base_level_info =
|
||||
device->physical_device->info.gfx_level <= GFX8 ? &surface->u.legacy.level[0] : NULL;
|
||||
const struct legacy_surf_level *base_level_info = pdev->info.gfx_level <= GFX8 ? &surface->u.legacy.level[0] : NULL;
|
||||
uint32_t desc[8];
|
||||
|
||||
radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->vk.image_type, plane_format,
|
||||
|
|
@ -751,21 +772,22 @@ radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image,
|
|||
radv_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, 0, 0, surface->blk_w, false, false, false,
|
||||
false, desc, NULL);
|
||||
|
||||
ac_surface_compute_umd_metadata(&device->physical_device->info, surface, image->vk.mip_levels, desc,
|
||||
&md->size_metadata, md->metadata,
|
||||
ac_surface_compute_umd_metadata(&pdev->info, surface, image->vk.mip_levels, desc, &md->size_metadata, md->metadata,
|
||||
device->instance->debug_flags & RADV_DEBUG_EXTRA_MD);
|
||||
}
|
||||
|
||||
void
|
||||
radv_init_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *metadata)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* use plane 0, even when there are multiple planes, to follow radeonsi */
|
||||
const unsigned plane_id = 0;
|
||||
struct radeon_surf *surface = &image->planes[plane_id].surface;
|
||||
|
||||
memset(metadata, 0, sizeof(*metadata));
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
uint64_t dcc_offset =
|
||||
image->bindings[0].offset + (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset);
|
||||
metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode;
|
||||
|
|
@ -796,7 +818,8 @@ void
|
|||
radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, uint64_t offset,
|
||||
uint32_t stride)
|
||||
{
|
||||
ac_surface_override_offset_stride(&device->physical_device->info, &image->planes[0].surface, image->vk.array_layers,
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
ac_surface_override_offset_stride(&pdev->info, &image->planes[0].surface, image->vk.array_layers,
|
||||
image->vk.mip_levels, offset, stride);
|
||||
}
|
||||
|
||||
|
|
@ -819,6 +842,8 @@ radv_image_alloc_single_sample_cmask(const struct radv_device *device, const str
|
|||
static void
|
||||
radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* images with modifiers can be potentially imported */
|
||||
if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
|
||||
return;
|
||||
|
|
@ -839,7 +864,7 @@ radv_image_alloc_values(const struct radv_device *device, struct radv_image *ima
|
|||
image->size += 8 * image->vk.mip_levels;
|
||||
}
|
||||
|
||||
if (radv_image_is_tc_compat_htile(image) && device->physical_device->info.has_tc_compat_zrange_bug) {
|
||||
if (radv_image_is_tc_compat_htile(image) && pdev->info.has_tc_compat_zrange_bug) {
|
||||
/* Metadata for the TC-compatible HTILE hardware bug which
|
||||
* have to be fixed by updating ZRANGE_PRECISION when doing
|
||||
* fast depth clears to 0.0f.
|
||||
|
|
@ -855,13 +880,14 @@ radv_image_alloc_values(const struct radv_device *device, struct radv_image *ima
|
|||
static bool
|
||||
radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
|
||||
{
|
||||
const struct radeon_info *gpu_info = &device->physical_device->info;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
int log2_samples = util_logbase2(image->vk.samples);
|
||||
|
||||
assert(gpu_info->gfx_level >= GFX10);
|
||||
|
||||
for (unsigned i = 0; i < image->plane_count; ++i) {
|
||||
VkFormat fmt = radv_image_get_plane_format(device->physical_device, image, i);
|
||||
VkFormat fmt = radv_image_get_plane_format(pdev, image, i);
|
||||
int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
|
||||
int log2_bpp_and_samples;
|
||||
|
||||
|
|
@ -903,9 +929,11 @@ radv_image_is_pipe_misaligned(const struct radv_device *device, const struct rad
|
|||
static bool
|
||||
radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
|
||||
{
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
return !device->physical_device->info.tcc_rb_non_coherent && !radv_image_is_pipe_misaligned(device, image);
|
||||
} else if (device->physical_device->info.gfx_level == GFX9) {
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
return !pdev->info.tcc_rb_non_coherent && !radv_image_is_pipe_misaligned(device, image);
|
||||
} else if (pdev->info.gfx_level == GFX9) {
|
||||
if (image->vk.samples == 1 &&
|
||||
(image->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
|
||||
!vk_format_has_stencil(image->vk.format)) {
|
||||
|
|
@ -926,6 +954,8 @@ radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_im
|
|||
bool
|
||||
radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
|
||||
return false;
|
||||
|
||||
|
|
@ -934,7 +964,7 @@ radv_image_can_fast_clear(const struct radv_device *device, const struct radv_im
|
|||
return false;
|
||||
|
||||
/* RB+ doesn't work with CMASK fast clear on Stoney. */
|
||||
if (!radv_image_has_dcc(image) && device->physical_device->info.family == CHIP_STONEY)
|
||||
if (!radv_image_has_dcc(image) && pdev->info.family == CHIP_STONEY)
|
||||
return false;
|
||||
|
||||
/* Fast-clears with CMASK aren't supported for 128-bit formats. */
|
||||
|
|
@ -958,8 +988,10 @@ radv_image_can_fast_clear(const struct radv_device *device, const struct radv_im
|
|||
static bool
|
||||
radv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* comp-to-single is only available for GFX10+. */
|
||||
if (device->physical_device->info.gfx_level < GFX10)
|
||||
if (pdev->info.gfx_level < GFX10)
|
||||
return false;
|
||||
|
||||
/* If the image can't be fast cleared, comp-to-single can't be used. */
|
||||
|
|
@ -972,7 +1004,7 @@ radv_image_use_comp_to_single(const struct radv_device *device, const struct rad
|
|||
|
||||
/* It seems 8bpp and 16bpp require RB+ to work. */
|
||||
unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format);
|
||||
if (bytes_per_pixel <= 2 && !device->physical_device->info.rbplus_allowed)
|
||||
if (bytes_per_pixel <= 2 && !pdev->info.rbplus_allowed)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
|
@ -1049,6 +1081,8 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
|
|||
const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
|
||||
const struct VkVideoProfileListInfoKHR *profile_list, struct radv_image *image)
|
||||
{
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
|
||||
* common internal case. */
|
||||
create_info.vk_info = NULL;
|
||||
|
|
@ -1060,7 +1094,7 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
|
|||
|
||||
assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
|
||||
|
||||
radv_image_reset_layout(device->physical_device, image);
|
||||
radv_image_reset_layout(pdev, image);
|
||||
|
||||
/*
|
||||
* Due to how the decoder works, the user can't supply an oversized image, because if it attempts
|
||||
|
|
@ -1070,17 +1104,17 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
|
|||
if (image->vk.usage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR)) {
|
||||
assert(profile_list);
|
||||
uint32_t width_align, height_align;
|
||||
radv_video_get_profile_alignments(device->physical_device, profile_list, &width_align, &height_align);
|
||||
radv_video_get_profile_alignments(pdev, profile_list, &width_align, &height_align);
|
||||
image_info.width = align(image_info.width, width_align);
|
||||
image_info.height = align(image_info.height, height_align);
|
||||
|
||||
if (radv_has_uvd(device->physical_device) && image->vk.usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) {
|
||||
if (radv_has_uvd(pdev) && image->vk.usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) {
|
||||
/* UVD and kernel demand a full DPB allocation. */
|
||||
image_info.array_size = MIN2(16, image_info.array_size);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned plane_count = radv_get_internal_plane_count(device->physical_device, image->vk.format);
|
||||
unsigned plane_count = radv_get_internal_plane_count(pdev, image->vk.format);
|
||||
for (unsigned plane = 0; plane < plane_count; ++plane) {
|
||||
struct ac_surf_info info = image_info;
|
||||
uint64_t offset;
|
||||
|
|
@ -1101,9 +1135,9 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
|
|||
}
|
||||
|
||||
if (create_info.bo_metadata && !mod_info &&
|
||||
!ac_surface_apply_umd_metadata(&device->physical_device->info, &image->planes[plane].surface,
|
||||
image->vk.samples, image->vk.mip_levels,
|
||||
create_info.bo_metadata->size_metadata, create_info.bo_metadata->metadata))
|
||||
!ac_surface_apply_umd_metadata(&pdev->info, &image->planes[plane].surface, image->vk.samples,
|
||||
image->vk.mip_levels, create_info.bo_metadata->size_metadata,
|
||||
create_info.bo_metadata->metadata))
|
||||
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
|
||||
|
||||
if (!create_info.no_metadata_planes && !create_info.bo_metadata && plane_count == 1 && !mod_info)
|
||||
|
|
@ -1121,8 +1155,8 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
|
|||
stride = 0; /* 0 means no override */
|
||||
}
|
||||
|
||||
if (!ac_surface_override_offset_stride(&device->physical_device->info, &image->planes[plane].surface,
|
||||
image->vk.array_layers, image->vk.mip_levels, offset, stride))
|
||||
if (!ac_surface_override_offset_stride(&pdev->info, &image->planes[plane].surface, image->vk.array_layers,
|
||||
image->vk.mip_levels, offset, stride))
|
||||
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
|
||||
|
||||
/* Validate DCC offsets in modifier layout. */
|
||||
|
|
@ -1132,8 +1166,8 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
|
|||
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
|
||||
|
||||
for (unsigned i = 1; i < mem_planes; ++i) {
|
||||
if (ac_surface_get_plane_offset(device->physical_device->info.gfx_level, &image->planes[plane].surface, i,
|
||||
0) != mod_info->pPlaneLayouts[i].offset)
|
||||
if (ac_surface_get_plane_offset(pdev->info.gfx_level, &image->planes[plane].surface, i, 0) !=
|
||||
mod_info->pPlaneLayouts[i].offset)
|
||||
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
|
||||
}
|
||||
}
|
||||
|
|
@ -1141,7 +1175,7 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
|
|||
image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
|
||||
image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2);
|
||||
|
||||
image->planes[plane].format = radv_image_get_plane_format(device->physical_device, image, plane);
|
||||
image->planes[plane].format = radv_image_get_plane_format(pdev, image, plane);
|
||||
}
|
||||
|
||||
image->tc_compatible_cmask = radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
|
||||
|
|
@ -1177,6 +1211,8 @@ radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAll
|
|||
static void
|
||||
radv_image_print_info(struct radv_device *device, struct radv_image *image)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
fprintf(stderr, "Image:\n");
|
||||
fprintf(stderr,
|
||||
" Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
|
||||
|
|
@ -1188,11 +1224,11 @@ radv_image_print_info(struct radv_device *device, struct radv_image *image)
|
|||
const struct radv_image_plane *plane = &image->planes[i];
|
||||
const struct radeon_surf *surf = &plane->surface;
|
||||
const struct util_format_description *desc = vk_format_description(plane->format);
|
||||
uint64_t offset = ac_surface_get_plane_offset(device->physical_device->info.gfx_level, &plane->surface, 0, 0);
|
||||
uint64_t offset = ac_surface_get_plane_offset(pdev->info.gfx_level, &plane->surface, 0, 0);
|
||||
|
||||
fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
|
||||
|
||||
ac_surface_print_info(stderr, &device->physical_device->info, surf);
|
||||
ac_surface_print_info(stderr, &pdev->info, surf);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1200,7 +1236,7 @@ static uint64_t
|
|||
radv_select_modifier(const struct radv_device *dev, VkFormat format,
|
||||
const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list)
|
||||
{
|
||||
const struct radv_physical_device *pdev = dev->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(dev);
|
||||
unsigned mod_count;
|
||||
|
||||
assert(mod_list->drmFormatModifierCount);
|
||||
|
|
@ -1238,6 +1274,7 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_
|
|||
const VkAllocationCallbacks *alloc, VkImage *pImage, bool is_internal)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
|
||||
uint64_t modifier = DRM_FORMAT_MOD_INVALID;
|
||||
struct radv_image *image = NULL;
|
||||
|
|
@ -1250,7 +1287,7 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_
|
|||
const struct VkVideoProfileListInfoKHR *profile_list =
|
||||
vk_find_struct_const(pCreateInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR);
|
||||
|
||||
unsigned plane_count = radv_get_internal_plane_count(device->physical_device, format);
|
||||
unsigned plane_count = radv_get_internal_plane_count(pdev, format);
|
||||
|
||||
const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
|
||||
|
||||
|
|
@ -1270,8 +1307,7 @@ radv_image_create(VkDevice _device, const struct radv_image_create_info *create_
|
|||
pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
|
||||
image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
|
||||
else
|
||||
image->queue_family_mask |=
|
||||
1u << vk_queue_to_radv(device->physical_device, pCreateInfo->pQueueFamilyIndices[i]);
|
||||
image->queue_family_mask |= 1u << vk_queue_to_radv(pdev, pCreateInfo->pQueueFamilyIndices[i]);
|
||||
|
||||
/* This queue never really accesses the image. */
|
||||
image->queue_family_mask &= ~(1u << RADV_QUEUE_SPARSE);
|
||||
|
|
@ -1375,10 +1411,12 @@ bool
|
|||
radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image, VkImageLayout layout,
|
||||
unsigned queue_mask)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* Don't compress exclusive images used on transfer queues when SDMA doesn't support HTILE.
|
||||
* Note that HTILE is already disabled on concurrent images when not supported.
|
||||
*/
|
||||
if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !device->physical_device->info.sdma_supports_compression)
|
||||
if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !pdev->info.sdma_supports_compression)
|
||||
return false;
|
||||
|
||||
switch (layout) {
|
||||
|
|
@ -1452,6 +1490,8 @@ bool
|
|||
radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, unsigned level,
|
||||
VkImageLayout layout, unsigned queue_mask)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (!radv_dcc_enabled(image, level))
|
||||
return false;
|
||||
|
||||
|
|
@ -1470,7 +1510,7 @@ radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_i
|
|||
/* Don't compress exclusive images used on transfer queues when SDMA doesn't support DCC.
|
||||
* Note that DCC is already disabled on concurrent images when not supported.
|
||||
*/
|
||||
if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !device->physical_device->info.sdma_supports_compression)
|
||||
if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !pdev->info.sdma_supports_compression)
|
||||
return false;
|
||||
|
||||
if (layout == VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT) {
|
||||
|
|
@ -1480,7 +1520,7 @@ radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_i
|
|||
return false;
|
||||
}
|
||||
|
||||
return device->physical_device->info.gfx_level >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
|
||||
return pdev->info.gfx_level >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
|
||||
}
|
||||
|
||||
enum radv_fmask_compression
|
||||
|
|
@ -1533,11 +1573,13 @@ radv_image_queue_family_mask(const struct radv_image *image, enum radv_queue_fam
|
|||
bool
|
||||
radv_image_is_renderable(const struct radv_device *device, const struct radv_image *image)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT ||
|
||||
image->vk.format == VK_FORMAT_R32G32B32_SFLOAT)
|
||||
return false;
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX9 && image->vk.image_type == VK_IMAGE_TYPE_3D &&
|
||||
if (pdev->info.gfx_level >= GFX9 && image->vk.image_type == VK_IMAGE_TYPE_3D &&
|
||||
vk_format_get_blocksizebits(image->vk.format) == 128 && vk_format_is_compressed(image->vk.format))
|
||||
return false;
|
||||
|
||||
|
|
@ -1572,11 +1614,11 @@ radv_CreateImage(VkDevice _device, const VkImageCreateInfo *pCreateInfo, const V
|
|||
* we're guaranteed to access an Android object incorrectly.
|
||||
*/
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const VkImageSwapchainCreateInfoKHR *swapchain_info =
|
||||
vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR);
|
||||
if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) {
|
||||
return wsi_common_create_swapchain_image(device->physical_device->vk.wsi_device, pCreateInfo,
|
||||
swapchain_info->swapchain, pImage);
|
||||
return wsi_common_create_swapchain_image(pdev->vk.wsi_device, pCreateInfo, swapchain_info->swapchain, pImage);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -1686,6 +1728,7 @@ radv_GetImageSubresourceLayout2KHR(VkDevice _device, VkImage _image, const VkIma
|
|||
{
|
||||
RADV_FROM_HANDLE(radv_image, image, _image);
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
int level = pSubresource->imageSubresource.mipLevel;
|
||||
int layer = pSubresource->imageSubresource.arrayLayer;
|
||||
|
||||
|
|
@ -1703,18 +1746,17 @@ radv_GetImageSubresourceLayout2KHR(VkDevice _device, VkImage _image, const VkIma
|
|||
assert(level == 0);
|
||||
assert(layer == 0);
|
||||
|
||||
pLayout->subresourceLayout.offset =
|
||||
ac_surface_get_plane_offset(device->physical_device->info.gfx_level, surface, mem_plane_id, 0);
|
||||
pLayout->subresourceLayout.offset = ac_surface_get_plane_offset(pdev->info.gfx_level, surface, mem_plane_id, 0);
|
||||
pLayout->subresourceLayout.rowPitch =
|
||||
ac_surface_get_plane_stride(device->physical_device->info.gfx_level, surface, mem_plane_id, level);
|
||||
ac_surface_get_plane_stride(pdev->info.gfx_level, surface, mem_plane_id, level);
|
||||
pLayout->subresourceLayout.arrayPitch = 0;
|
||||
pLayout->subresourceLayout.depthPitch = 0;
|
||||
pLayout->subresourceLayout.size = ac_surface_get_plane_size(surface, mem_plane_id);
|
||||
} else if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
} else if (pdev->info.gfx_level >= GFX9) {
|
||||
uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
|
||||
|
||||
pLayout->subresourceLayout.offset =
|
||||
ac_surface_get_plane_offset(device->physical_device->info.gfx_level, &plane->surface, 0, layer) + level_offset;
|
||||
ac_surface_get_plane_offset(pdev->info.gfx_level, &plane->surface, 0, layer) + level_offset;
|
||||
if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT ||
|
||||
image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) {
|
||||
/* Adjust the number of bytes between each row because
|
||||
|
|
|
|||
|
|
@ -106,7 +106,8 @@ radv_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *
|
|||
uint64_t gpu_address = binding->bo ? radv_buffer_get_va(binding->bo) + binding->offset : 0;
|
||||
uint64_t va = gpu_address;
|
||||
uint8_t swizzle = plane->surface.tile_swizzle;
|
||||
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
uint64_t meta_va = 0;
|
||||
if (gfx_level >= GFX9) {
|
||||
if (is_stencil)
|
||||
|
|
@ -154,7 +155,7 @@ radv_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *
|
|||
* If an imported image is used with VK_IMAGE_VIEW_TYPE_2D_ARRAY, it may hang due to VM faults
|
||||
* because DEPTH means pitch with 2D, but it means depth with 2D array.
|
||||
*/
|
||||
if (device->physical_device->info.gfx_level >= GFX10_3 && plane->surface.u.gfx9.uses_custom_pitch) {
|
||||
if (pdev->info.gfx_level >= GFX10_3 && plane->surface.u.gfx9.uses_custom_pitch) {
|
||||
assert((plane->surface.u.gfx9.surf_pitch * plane->surface.bpe) % 256 == 0);
|
||||
assert(image->vk.image_type == VK_IMAGE_TYPE_2D);
|
||||
assert(plane->surface.is_linear);
|
||||
|
|
@ -245,6 +246,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima
|
|||
uint32_t *fmask_state, VkImageCreateFlags img_create_flags,
|
||||
const struct ac_surf_nbc_view *nbc_view, const VkImageViewSlicedCreateInfoEXT *sliced_3d)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct util_format_description *desc;
|
||||
enum pipe_swizzle swizzle[4];
|
||||
unsigned img_format;
|
||||
|
|
@ -261,8 +263,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima
|
|||
desc = util_format_description(PIPE_FORMAT_R8G8B8X8_SRGB);
|
||||
}
|
||||
|
||||
img_format =
|
||||
ac_get_gfx10_format_table(&device->physical_device->info)[vk_format_to_pipe_format(vk_format)].img_format;
|
||||
img_format = ac_get_gfx10_format_table(&pdev->info)[vk_format_to_pipe_format(vk_format)].img_format;
|
||||
|
||||
radv_compose_swizzle(desc, mapping, swizzle);
|
||||
|
||||
|
|
@ -271,7 +272,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima
|
|||
type = V_008F1C_SQ_RSRC_IMG_3D;
|
||||
} else {
|
||||
type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples, is_storage_image,
|
||||
device->physical_device->info.gfx_level == GFX9);
|
||||
pdev->info.gfx_level == GFX9);
|
||||
}
|
||||
|
||||
if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
|
||||
|
|
@ -286,7 +287,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima
|
|||
state[0] = 0;
|
||||
state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1);
|
||||
state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
|
||||
S_00A008_RESOURCE_LEVEL(device->physical_device->info.gfx_level < GFX11);
|
||||
S_00A008_RESOURCE_LEVEL(pdev->info.gfx_level < GFX11);
|
||||
state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) | S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
|
||||
S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) | S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
|
||||
S_00A00C_BASE_LEVEL(image->vk.samples > 1 ? 0 : first_level) |
|
||||
|
|
@ -332,7 +333,7 @@ gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *ima
|
|||
max_mip = nbc_view->num_levels - 1;
|
||||
|
||||
unsigned min_lod_clamped = radv_float_to_ufixed(CLAMP(min_lod, 0, 15), 8);
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
state[1] |= S_00A004_MAX_MIP(max_mip);
|
||||
state[5] |= S_00A014_MIN_LOD_LO(min_lod_clamped);
|
||||
state[6] |= S_00A018_MIN_LOD_HI(min_lod_clamped >> 5);
|
||||
|
|
@ -413,6 +414,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
|
|||
unsigned width, unsigned height, unsigned depth, float min_lod, uint32_t *state,
|
||||
uint32_t *fmask_state, VkImageCreateFlags img_create_flags)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct util_format_description *desc;
|
||||
enum pipe_swizzle swizzle[4];
|
||||
int first_non_void;
|
||||
|
|
@ -444,21 +446,19 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
|
|||
}
|
||||
|
||||
/* S8 with either Z16 or Z32 HTILE need a special format. */
|
||||
if (device->physical_device->info.gfx_level == GFX9 && vk_format == VK_FORMAT_S8_UINT &&
|
||||
radv_image_is_tc_compat_htile(image)) {
|
||||
if (pdev->info.gfx_level == GFX9 && vk_format == VK_FORMAT_S8_UINT && radv_image_is_tc_compat_htile(image)) {
|
||||
if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT)
|
||||
data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
|
||||
else if (image->vk.format == VK_FORMAT_D16_UNORM_S8_UINT)
|
||||
data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
|
||||
}
|
||||
|
||||
if (device->physical_device->info.gfx_level == GFX9 &&
|
||||
img_create_flags & VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT) {
|
||||
if (pdev->info.gfx_level == GFX9 && img_create_flags & VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT) {
|
||||
assert(image->vk.image_type == VK_IMAGE_TYPE_3D);
|
||||
type = V_008F1C_SQ_RSRC_IMG_3D;
|
||||
} else {
|
||||
type = radv_tex_dim(image->vk.image_type, view_type, image->vk.array_layers, image->vk.samples, is_storage_image,
|
||||
device->physical_device->info.gfx_level == GFX9);
|
||||
pdev->info.gfx_level == GFX9);
|
||||
}
|
||||
|
||||
if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
|
||||
|
|
@ -484,7 +484,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
|
|||
state[6] = 0;
|
||||
state[7] = 0;
|
||||
|
||||
if (device->physical_device->info.gfx_level == GFX9) {
|
||||
if (pdev->info.gfx_level == GFX9) {
|
||||
unsigned bc_swizzle = gfx9_border_color_swizzle(desc);
|
||||
|
||||
/* Depth is the last accessible layer on Gfx9.
|
||||
|
|
@ -509,7 +509,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
|
|||
/* The last dword is unused by hw. The shader uses it to clear
|
||||
* bits in the first dword of sampler state.
|
||||
*/
|
||||
if (device->physical_device->info.gfx_level <= GFX7 && image->vk.samples <= 1) {
|
||||
if (pdev->info.gfx_level <= GFX7 && image->vk.samples <= 1) {
|
||||
if (first_level == last_level)
|
||||
state[7] = C_008F30_MAX_ANISO_RATIO;
|
||||
else
|
||||
|
|
@ -529,7 +529,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
|
|||
|
||||
va = gpu_address + image->bindings[0].offset + image->planes[0].surface.fmask_offset;
|
||||
|
||||
if (device->physical_device->info.gfx_level == GFX9) {
|
||||
if (pdev->info.gfx_level == GFX9) {
|
||||
fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
|
||||
switch (image->vk.samples) {
|
||||
case 2:
|
||||
|
|
@ -576,7 +576,7 @@ gfx6_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
|
|||
fmask_state[6] = 0;
|
||||
fmask_state[7] = 0;
|
||||
|
||||
if (device->physical_device->info.gfx_level == GFX9) {
|
||||
if (pdev->info.gfx_level == GFX9) {
|
||||
fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode);
|
||||
fmask_state[4] |=
|
||||
S_008F20_DEPTH(last_layer) | S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch);
|
||||
|
|
@ -615,7 +615,9 @@ radv_make_texture_descriptor(struct radv_device *device, struct radv_image *imag
|
|||
uint32_t *fmask_state, VkImageCreateFlags img_create_flags,
|
||||
const struct ac_surf_nbc_view *nbc_view, const VkImageViewSlicedCreateInfoEXT *sliced_3d)
|
||||
{
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping, first_level,
|
||||
last_level, first_layer, last_layer, width, height, depth, min_lod, state,
|
||||
fmask_state, img_create_flags, nbc_view, sliced_3d);
|
||||
|
|
@ -630,12 +632,13 @@ static inline void
|
|||
compute_non_block_compressed_view(struct radv_device *device, const struct radv_image_view *iview,
|
||||
struct ac_surf_nbc_view *nbc_view)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_image *image = iview->image;
|
||||
const struct radeon_surf *surf = &image->planes[0].surface;
|
||||
struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws);
|
||||
struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image);
|
||||
|
||||
ac_surface_compute_nbc_view(addrlib, &device->physical_device->info, surf, &surf_info, iview->vk.base_mip_level,
|
||||
ac_surface_compute_nbc_view(addrlib, &pdev->info, surf, &surf_info, iview->vk.base_mip_level,
|
||||
iview->vk.base_array_layer, nbc_view);
|
||||
}
|
||||
|
||||
|
|
@ -647,6 +650,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic
|
|||
const struct ac_surf_nbc_view *nbc_view,
|
||||
const VkImageViewSlicedCreateInfoEXT *sliced_3d, bool force_zero_base_mip)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_image *image = iview->image;
|
||||
struct radv_image_plane *plane = &image->planes[plane_id];
|
||||
bool is_stencil = iview->vk.aspects == VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
|
|
@ -665,7 +669,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic
|
|||
assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
|
||||
blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
if (nbc_view->valid) {
|
||||
hw_level = nbc_view->level;
|
||||
iview->extent.width = nbc_view->width;
|
||||
|
|
@ -689,7 +693,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_devic
|
|||
img_create_flags, nbc_view, sliced_3d);
|
||||
|
||||
const struct legacy_surf_level *base_level_info = NULL;
|
||||
if (device->physical_device->info.gfx_level <= GFX8) {
|
||||
if (pdev->info.gfx_level <= GFX8) {
|
||||
if (is_stencil)
|
||||
base_level_info = &plane->surface.u.legacy.zs.stencil_level[iview->vk.base_mip_level];
|
||||
else
|
||||
|
|
@ -738,6 +742,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
|
|||
const struct radv_image_view_extra_create_info *extra_create_info)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
|
||||
uint32_t plane_count = 1;
|
||||
float min_lod = 0.0f;
|
||||
|
|
@ -755,7 +760,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
|
|||
vk_image_view_init(&device->vk, &iview->vk, !from_client, pCreateInfo);
|
||||
|
||||
bool force_zero_base_mip = true;
|
||||
if (device->physical_device->info.gfx_level <= GFX8 && min_lod) {
|
||||
if (pdev->info.gfx_level <= GFX8 && min_lod) {
|
||||
/* Do not force the base level to zero to workaround a spurious bug with mipmaps and min LOD. */
|
||||
force_zero_base_mip = false;
|
||||
}
|
||||
|
|
@ -800,15 +805,15 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
|
|||
}
|
||||
|
||||
/* when the view format is emulated, redirect the view to the hidden plane 1 */
|
||||
if (radv_is_format_emulated(device->physical_device, iview->vk.format)) {
|
||||
assert(radv_is_format_emulated(device->physical_device, image->vk.format));
|
||||
if (radv_is_format_emulated(pdev, iview->vk.format)) {
|
||||
assert(radv_is_format_emulated(pdev, image->vk.format));
|
||||
iview->plane_id = 1;
|
||||
iview->vk.view_format = image->planes[iview->plane_id].format;
|
||||
iview->vk.format = image->planes[iview->plane_id].format;
|
||||
plane_count = 1;
|
||||
}
|
||||
|
||||
if (!force_zero_base_mip || device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (!force_zero_base_mip || pdev->info.gfx_level >= GFX9) {
|
||||
iview->extent = (VkExtent3D){
|
||||
.width = image->vk.extent.width,
|
||||
.height = image->vk.extent.height,
|
||||
|
|
@ -854,7 +859,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
|
|||
* block compatible format and the compressed format, so even if we take
|
||||
* the plain converted dimensions the physical layout is correct.
|
||||
*/
|
||||
if (device->physical_device->info.gfx_level >= GFX9 && vk_format_is_block_compressed(plane->format) &&
|
||||
if (pdev->info.gfx_level >= GFX9 && vk_format_is_block_compressed(plane->format) &&
|
||||
!vk_format_is_block_compressed(iview->vk.format)) {
|
||||
/* If we have multiple levels in the view we should ideally take the last level,
|
||||
* but the mip calculation has a max(..., 1) so walking back to the base mip in an
|
||||
|
|
@ -879,7 +884,7 @@ radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
|
|||
* changes the descriptor's base level, and adjusts the address and
|
||||
* extents accordingly.
|
||||
*/
|
||||
if (device->physical_device->info.gfx_level >= GFX10 &&
|
||||
if (pdev->info.gfx_level >= GFX10 &&
|
||||
(radv_minify(iview->extent.width, range->baseMipLevel) < lvl_width ||
|
||||
radv_minify(iview->extent.height, range->baseMipLevel) < lvl_height) &&
|
||||
iview->vk.layer_count == 1) {
|
||||
|
|
|
|||
|
|
@ -32,7 +32,9 @@
|
|||
void
|
||||
radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders)
|
||||
{
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
radeon_set_uconfig_reg(cs, R_036760_SQG_PERFCOUNTER_CTRL, shaders & 0x7f);
|
||||
} else {
|
||||
radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2);
|
||||
|
|
@ -74,12 +76,14 @@ radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf
|
|||
void
|
||||
radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
radv_emit_windowed_counters(device, cs, family, false);
|
||||
|
||||
/* Stop SPM counters. */
|
||||
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
|
||||
S_036020_PERFMON_STATE(V_036020_CP_PERFMON_STATE_DISABLE_AND_RESET) |
|
||||
S_036020_SPM_PERFMON_STATE(device->physical_device->info.never_stop_sq_perf_counters
|
||||
S_036020_SPM_PERFMON_STATE(pdev->info.never_stop_sq_perf_counters
|
||||
? V_036020_STRM_PERFMON_STATE_START_COUNTING
|
||||
: V_036020_STRM_PERFMON_STATE_STOP_COUNTING));
|
||||
}
|
||||
|
|
@ -466,7 +470,8 @@ radv_emit_instance(struct radv_cmd_buffer *cmd_buffer, int se, int instance)
|
|||
static void
|
||||
radv_emit_select(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, unsigned *selectors)
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
const enum radv_queue_family qf = cmd_buffer->qf;
|
||||
struct ac_pc_block_base *regs = block->b->b;
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
|
|
@ -492,6 +497,7 @@ static void
|
|||
radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count,
|
||||
uint64_t va)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
struct ac_pc_block_base *regs = block->b->b;
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
unsigned reg = regs->counter0_lo;
|
||||
|
|
@ -510,7 +516,7 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p
|
|||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
|
||||
va += sizeof(uint64_t) * 2 * radv_pc_get_num_instances(cmd_buffer->device->physical_device, block);
|
||||
va += sizeof(uint64_t) * 2 * radv_pc_get_num_instances(pdev, block);
|
||||
reg += reg_delta;
|
||||
}
|
||||
}
|
||||
|
|
@ -518,9 +524,10 @@ radv_pc_emit_block_instance_read(struct radv_cmd_buffer *cmd_buffer, struct ac_p
|
|||
static void
|
||||
radv_pc_sample_block(struct radv_cmd_buffer *cmd_buffer, struct ac_pc_block *block, unsigned count, uint64_t va)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
unsigned se_end = 1;
|
||||
if (block->b->b->flags & AC_PC_BLOCK_SE)
|
||||
se_end = cmd_buffer->device->physical_device->info.max_se;
|
||||
se_end = pdev->info.max_se;
|
||||
|
||||
for (unsigned se = 0; se < se_end; ++se) {
|
||||
for (unsigned instance = 0; instance < block->num_instances; ++instance) {
|
||||
|
|
@ -555,8 +562,8 @@ radv_pc_wait_idle(struct radv_cmd_buffer *cmd_buffer)
|
|||
static void
|
||||
radv_pc_stop_and_sample(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va, bool end)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
|
||||
|
|
@ -621,7 +628,7 @@ void
|
|||
radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va)
|
||||
{
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
ASSERTED unsigned cdw_max;
|
||||
|
||||
cmd_buffer->state.uses_perf_counters = true;
|
||||
|
|
@ -698,6 +705,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
|
|||
void
|
||||
radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool *pool, uint64_t va)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
ASSERTED unsigned cdw_max;
|
||||
|
||||
|
|
@ -710,9 +718,8 @@ radv_pc_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_pool
|
|||
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->perf_counter_bo);
|
||||
|
||||
uint64_t perf_ctr_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_FENCE_OFFSET;
|
||||
radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf,
|
||||
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va,
|
||||
1, cmd_buffer->gfx9_fence_va);
|
||||
radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
||||
EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, perf_ctr_va, 1, cmd_buffer->gfx9_fence_va);
|
||||
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, perf_ctr_va, 1, 0xffffffff);
|
||||
|
||||
radv_pc_wait_idle(cmd_buffer);
|
||||
|
|
|
|||
|
|
@ -365,7 +365,8 @@ static unsigned
|
|||
lower_bit_size_callback(const nir_instr *instr, void *_)
|
||||
{
|
||||
struct radv_device *device = _;
|
||||
enum amd_gfx_level chip = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
enum amd_gfx_level chip = pdev->info.gfx_level;
|
||||
|
||||
if (instr->type != nir_instr_type_alu)
|
||||
return 0;
|
||||
|
|
@ -437,7 +438,8 @@ opt_vectorize_callback(const nir_instr *instr, const void *_)
|
|||
return 0;
|
||||
|
||||
const struct radv_device *device = _;
|
||||
enum amd_gfx_level chip = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
enum amd_gfx_level chip = pdev->info.gfx_level;
|
||||
if (chip < GFX9)
|
||||
return 1;
|
||||
|
||||
|
|
@ -461,7 +463,8 @@ void
|
|||
radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
|
||||
struct radv_shader_stage *stage)
|
||||
{
|
||||
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
bool progress;
|
||||
|
||||
/* Wave and workgroup size should already be filled. */
|
||||
|
|
@ -548,8 +551,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
NIR_PASS(_, stage->nir, ac_nir_lower_tex,
|
||||
&(ac_nir_lower_tex_options){
|
||||
.gfx_level = gfx_level,
|
||||
.lower_array_layer_round_even =
|
||||
!device->physical_device->info.conformant_trunc_coord || device->disable_trunc_coord,
|
||||
.lower_array_layer_round_even = !pdev->info.conformant_trunc_coord || device->disable_trunc_coord,
|
||||
.fix_derivs_in_divergent_cf = fix_derivs_in_divergent_cf,
|
||||
.max_wqm_vgprs = 64, // TODO: improve spiller and RA support for linear VGPRs
|
||||
});
|
||||
|
|
@ -570,7 +572,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
nir_move_options sink_opts = nir_move_const_undef | nir_move_copies;
|
||||
|
||||
if (!stage->key.optimisations_disabled) {
|
||||
if (stage->stage != MESA_SHADER_FRAGMENT || !device->physical_device->cache_key.disable_sinking_load_input_fs)
|
||||
if (stage->stage != MESA_SHADER_FRAGMENT || !pdev->cache_key.disable_sinking_load_input_fs)
|
||||
sink_opts |= nir_move_load_input;
|
||||
|
||||
NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
|
||||
|
|
@ -581,7 +583,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
* load_input can be reordered, but buffer loads can't.
|
||||
*/
|
||||
if (stage->stage == MESA_SHADER_VERTEX) {
|
||||
NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, gfx_state, &device->physical_device->info);
|
||||
NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, gfx_state, &pdev->info);
|
||||
}
|
||||
|
||||
/* Lower I/O intrinsics to memory instructions. */
|
||||
|
|
@ -598,7 +600,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
stage->info.outinfo.export_prim_id, false, false, false, stage->info.force_vrs_per_vertex);
|
||||
|
||||
} else {
|
||||
bool emulate_ngg_gs_query_pipeline_stat = device->physical_device->emulate_ngg_gs_query_pipeline_stat;
|
||||
bool emulate_ngg_gs_query_pipeline_stat = pdev->emulate_ngg_gs_query_pipeline_stat;
|
||||
|
||||
ac_nir_gs_output_info gs_out_info = {
|
||||
.streams = stage->info.gs.output_streams,
|
||||
|
|
@ -609,7 +611,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
} else if (stage->stage == MESA_SHADER_FRAGMENT) {
|
||||
ac_nir_lower_ps_options options = {
|
||||
.gfx_level = gfx_level,
|
||||
.family = device->physical_device->info.family,
|
||||
.family = pdev->info.family,
|
||||
.use_aco = !radv_use_llvm_for_stage(device, stage->stage),
|
||||
.uses_discard = true,
|
||||
.alpha_func = COMPARE_FUNC_ALWAYS,
|
||||
|
|
@ -666,7 +668,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
NIR_PASS(_, stage->nir, ac_nir_lower_global_access);
|
||||
NIR_PASS_V(stage->nir, ac_nir_lower_intrinsics_to_args, gfx_level, radv_select_hw_stage(&stage->info, gfx_level),
|
||||
&stage->args.ac);
|
||||
NIR_PASS_V(stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, device->physical_device->info.address32_hi);
|
||||
NIR_PASS_V(stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, pdev->info.address32_hi);
|
||||
radv_optimize_nir_algebraic(
|
||||
stage->nir, io_to_mem || lowered_ngg || stage->stage == MESA_SHADER_COMPUTE || stage->stage == MESA_SHADER_TASK);
|
||||
|
||||
|
|
@ -926,7 +928,7 @@ radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecut
|
|||
struct radv_shader *shader =
|
||||
radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
|
||||
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
unsigned lds_increment =
|
||||
pdev->info.gfx_level >= GFX11 && stage == MESA_SHADER_FRAGMENT ? 1024 : pdev->info.lds_encode_granularity;
|
||||
|
|
|
|||
|
|
@ -38,6 +38,8 @@
|
|||
static bool
|
||||
radv_is_cache_disabled(struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* The buffer address used for debug printf is hardcoded. */
|
||||
if (device->printf.buffer_addr)
|
||||
return true;
|
||||
|
|
@ -45,8 +47,7 @@ radv_is_cache_disabled(struct radv_device *device)
|
|||
/* Pipeline caches can be disabled with RADV_DEBUG=nocache, with MESA_GLSL_CACHE_DISABLE=1 and
|
||||
* when ACO_DEBUG is used. MESA_GLSL_CACHE_DISABLE is done elsewhere.
|
||||
*/
|
||||
return (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) ||
|
||||
(device->physical_device->use_llvm ? 0 : aco_get_codegen_flags());
|
||||
return (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) || (pdev->use_llvm ? 0 : aco_get_codegen_flags());
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -532,14 +533,15 @@ nir_shader *
|
|||
radv_pipeline_cache_lookup_nir(struct radv_device *device, struct vk_pipeline_cache *cache, gl_shader_stage stage,
|
||||
const blake3_hash key)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (radv_is_cache_disabled(device))
|
||||
return NULL;
|
||||
|
||||
if (!cache)
|
||||
cache = device->mem_cache;
|
||||
|
||||
return vk_pipeline_cache_lookup_nir(cache, key, sizeof(blake3_hash), &device->physical_device->nir_options[stage],
|
||||
NULL, NULL);
|
||||
return vk_pipeline_cache_lookup_nir(cache, key, sizeof(blake3_hash), &pdev->nir_options[stage], NULL, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -570,6 +572,7 @@ radv_pipeline_cache_lookup_nir_handle(struct radv_device *device, struct vk_pipe
|
|||
struct nir_shader *
|
||||
radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline_cache_object *object)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct blob_reader blob;
|
||||
struct vk_raw_data_cache_object *nir_object = container_of(object, struct vk_raw_data_cache_object, base);
|
||||
blob_reader_init(&blob, nir_object->data, nir_object->data_size);
|
||||
|
|
@ -579,7 +582,7 @@ radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline
|
|||
ralloc_free(nir);
|
||||
return NULL;
|
||||
}
|
||||
nir->options = &device->physical_device->nir_options[nir->info.stage];
|
||||
nir->options = &pdev->nir_options[nir->info.stage];
|
||||
|
||||
return nir;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -75,6 +75,7 @@ void
|
|||
radv_get_compute_pipeline_metadata(const struct radv_device *device, const struct radv_compute_pipeline *pipeline,
|
||||
struct radv_compute_pipeline_metadata *metadata)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_shader *cs = pipeline->base.shaders[MESA_SHADER_COMPUTE];
|
||||
uint32_t upload_sgpr = 0, inline_sgpr = 0;
|
||||
|
||||
|
|
@ -84,7 +85,7 @@ radv_get_compute_pipeline_metadata(const struct radv_device *device, const struc
|
|||
metadata->rsrc1 = cs->config.rsrc1;
|
||||
metadata->rsrc2 = cs->config.rsrc2;
|
||||
metadata->rsrc3 = cs->config.rsrc3;
|
||||
metadata->compute_resource_limits = radv_get_compute_resource_limits(device->physical_device, cs);
|
||||
metadata->compute_resource_limits = radv_get_compute_resource_limits(pdev, cs);
|
||||
metadata->block_size_x = cs->info.cs.block_size[0];
|
||||
metadata->block_size_y = cs->info.cs.block_size[1];
|
||||
metadata->block_size_z = cs->info.cs.block_size[2];
|
||||
|
|
@ -136,7 +137,7 @@ static void
|
|||
radv_compute_generate_pm4(const struct radv_device *device, struct radv_compute_pipeline *pipeline,
|
||||
struct radv_shader *shader)
|
||||
{
|
||||
struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = &pipeline->base.cs;
|
||||
|
||||
cs->reserved_dw = cs->max_dw = pdev->info.gfx_level >= GFX10 ? 19 : 16;
|
||||
|
|
|
|||
|
|
@ -132,12 +132,13 @@ static unsigned
|
|||
radv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format, bool blend_enable,
|
||||
bool blend_need_alpha)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct util_format_description *desc = vk_format_description(vk_format);
|
||||
bool use_rbplus = device->physical_device->info.rbplus_allowed;
|
||||
bool use_rbplus = pdev->info.rbplus_allowed;
|
||||
struct ac_spi_color_formats formats = {0};
|
||||
unsigned format, ntype, swap;
|
||||
|
||||
format = ac_get_cb_format(device->physical_device->info.gfx_level, desc->format);
|
||||
format = ac_get_cb_format(pdev->info.gfx_level, desc->format);
|
||||
ntype = ac_get_cb_number_type(desc->format);
|
||||
swap = radv_translate_colorswap(vk_format, false);
|
||||
|
||||
|
|
@ -508,12 +509,13 @@ static uint64_t
|
|||
radv_pipeline_needed_dynamic_state(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline,
|
||||
const struct vk_graphics_pipeline_state *state)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
bool has_color_att = radv_pipeline_has_color_attachments(state->rp);
|
||||
bool raster_enabled =
|
||||
!state->rs->rasterizer_discard_enable || (pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
|
||||
uint64_t states = RADV_DYNAMIC_ALL;
|
||||
|
||||
if (device->physical_device->info.gfx_level < GFX10_3)
|
||||
if (pdev->info.gfx_level < GFX10_3)
|
||||
states &= ~RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
|
||||
|
||||
/* Disable dynamic states that are useless to mesh shading. */
|
||||
|
|
@ -568,7 +570,7 @@ radv_pipeline_needed_dynamic_state(const struct radv_device *device, const struc
|
|||
struct radv_ia_multi_vgt_param_helpers
|
||||
radv_compute_ia_multi_vgt_param(const struct radv_device *device, struct radv_shader *const *shaders)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0};
|
||||
|
||||
ia_multi_vgt_param.ia_switch_on_eoi = false;
|
||||
|
|
@ -1295,7 +1297,8 @@ static void
|
|||
radv_link_shaders(const struct radv_device *device, struct radv_shader_stage *producer_stage,
|
||||
struct radv_shader_stage *consumer_stage, const struct radv_graphics_state_key *gfx_state)
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
nir_shader *producer = producer_stage->nir;
|
||||
nir_shader *consumer = consumer_stage->nir;
|
||||
bool progress;
|
||||
|
|
@ -1686,6 +1689,7 @@ radv_graphics_shaders_link(const struct radv_device *device, const struct radv_g
|
|||
struct radv_ps_epilog_key
|
||||
radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ps_epilog_state *state)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0, z_format = 0;
|
||||
struct radv_ps_epilog_key key;
|
||||
|
||||
|
|
@ -1731,8 +1735,8 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
|
|||
state->alpha_to_coverage_via_mrtz);
|
||||
|
||||
key.spi_shader_col_format = col_format;
|
||||
key.color_is_int8 = device->physical_device->info.gfx_level < GFX8 ? is_int8 : 0;
|
||||
key.color_is_int10 = device->physical_device->info.gfx_level < GFX8 ? is_int10 : 0;
|
||||
key.color_is_int8 = pdev->info.gfx_level < GFX8 ? is_int8 : 0;
|
||||
key.color_is_int10 = pdev->info.gfx_level < GFX8 ? is_int10 : 0;
|
||||
key.enable_mrt_output_nan_fixup = device->instance->drirc.enable_mrt_output_nan_fixup ? is_float32 : 0;
|
||||
key.colors_written = state->colors_written;
|
||||
key.mrt0_is_dual_src = state->mrt0_is_dual_src;
|
||||
|
|
@ -1811,7 +1815,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
|
|||
const struct vk_graphics_pipeline_state *state,
|
||||
VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_graphics_state_key key;
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
|
|
@ -1884,7 +1888,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
|
|||
}
|
||||
}
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11 && state->ms) {
|
||||
if (pdev->info.gfx_level >= GFX11 && state->ms) {
|
||||
key.ms.alpha_to_coverage_via_mrtz = state->ms->alpha_to_coverage_enable;
|
||||
}
|
||||
|
||||
|
|
@ -1898,15 +1902,14 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
|
|||
key.unknown_rast_prim = true;
|
||||
}
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10 && state->rs) {
|
||||
if (pdev->info.gfx_level >= GFX10 && state->rs) {
|
||||
key.rs.provoking_vtx_last = state->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
|
||||
}
|
||||
|
||||
key.ps.force_vrs_enabled = device->force_vrs_enabled && !radv_is_static_vrs_enabled(pipeline, state);
|
||||
|
||||
if ((radv_is_vrs_enabled(pipeline, state) || key.ps.force_vrs_enabled) &&
|
||||
(device->physical_device->info.family == CHIP_NAVI21 || device->physical_device->info.family == CHIP_NAVI22 ||
|
||||
device->physical_device->info.family == CHIP_VANGOGH))
|
||||
(pdev->info.family == CHIP_NAVI21 || pdev->info.family == CHIP_NAVI22 || pdev->info.family == CHIP_VANGOGH))
|
||||
key.adjust_frag_coord_z = true;
|
||||
|
||||
if (radv_pipeline_needs_ps_epilog(pipeline, lib_flags))
|
||||
|
|
@ -1914,7 +1917,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
|
|||
|
||||
key.ps.epilog = radv_pipeline_generate_ps_epilog_key(device, state);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
/* On GFX11, alpha to coverage is exported via MRTZ when depth/stencil/samplemask are also
|
||||
* exported. Though, when a PS epilog is needed and the MS state is NULL (with dynamic
|
||||
* rendering), it's not possible to know the info at compile time and MRTZ needs to be
|
||||
|
|
@ -1927,7 +1930,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
|
|||
key.dynamic_rasterization_samples = !!(pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZATION_SAMPLES) ||
|
||||
(!!(pipeline->active_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && !state->ms);
|
||||
|
||||
if (device->physical_device->use_ngg) {
|
||||
if (pdev->use_ngg) {
|
||||
VkShaderStageFlags ngg_stage;
|
||||
|
||||
if (pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
|
||||
|
|
@ -1995,7 +1998,9 @@ static void
|
|||
radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *stages,
|
||||
VkShaderStageFlagBits active_nir_stages)
|
||||
{
|
||||
if (!device->physical_device->cache_key.use_ngg)
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (!pdev->cache_key.use_ngg)
|
||||
return;
|
||||
|
||||
if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.next_stage != MESA_SHADER_TESS_CTRL) {
|
||||
|
|
@ -2006,7 +2011,7 @@ radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *
|
|||
stages[MESA_SHADER_MESH].info.is_ngg = true;
|
||||
}
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
if (stages[MESA_SHADER_GEOMETRY].nir)
|
||||
stages[MESA_SHADER_GEOMETRY].info.is_ngg = true;
|
||||
} else {
|
||||
|
|
@ -2164,7 +2169,8 @@ static void
|
|||
radv_declare_pipeline_args(struct radv_device *device, struct radv_shader_stage *stages,
|
||||
const struct radv_graphics_state_key *gfx_state, VkShaderStageFlagBits active_nir_stages)
|
||||
{
|
||||
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
|
||||
if (gfx_level >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) {
|
||||
radv_declare_shader_args(device, gfx_state, &stages[MESA_SHADER_TESS_CTRL].info, MESA_SHADER_TESS_CTRL,
|
||||
|
|
@ -2210,15 +2216,16 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache
|
|||
bool keep_executable_info, bool keep_statistic_info,
|
||||
struct radv_shader_binary **gs_copy_binary)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_shader_info *gs_info = &gs_stage->info;
|
||||
ac_nir_gs_output_info output_info = {
|
||||
.streams = gs_info->gs.output_streams,
|
||||
.usage_mask = gs_info->gs.output_usage_mask,
|
||||
};
|
||||
nir_shader *nir = ac_nir_create_gs_copy_shader(
|
||||
gs_stage->nir, device->physical_device->info.gfx_level,
|
||||
gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask, gs_info->outinfo.vs_output_param_offset,
|
||||
gs_info->outinfo.param_exports, false, false, false, gs_info->force_vrs_per_vertex, &output_info);
|
||||
gs_stage->nir, pdev->info.gfx_level, gs_info->outinfo.clip_dist_mask | gs_info->outinfo.cull_dist_mask,
|
||||
gs_info->outinfo.vs_output_param_offset, gs_info->outinfo.param_exports, false, false, false,
|
||||
gs_info->force_vrs_per_vertex, &output_info);
|
||||
|
||||
nir_validate_shader(nir, "after ac_nir_create_gs_copy_shader");
|
||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
|
|
@ -2246,10 +2253,8 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache
|
|||
gs_copy_stage.info.user_sgprs_locs = gs_copy_stage.args.user_sgprs_locs;
|
||||
gs_copy_stage.info.inline_push_constant_mask = gs_copy_stage.args.ac.inline_push_const_mask;
|
||||
|
||||
NIR_PASS_V(nir, ac_nir_lower_intrinsics_to_args, device->physical_device->info.gfx_level, AC_HW_VERTEX_SHADER,
|
||||
&gs_copy_stage.args.ac);
|
||||
NIR_PASS_V(nir, radv_nir_lower_abi, device->physical_device->info.gfx_level, &gs_copy_stage, gfx_state,
|
||||
device->physical_device->info.address32_hi);
|
||||
NIR_PASS_V(nir, ac_nir_lower_intrinsics_to_args, pdev->info.gfx_level, AC_HW_VERTEX_SHADER, &gs_copy_stage.args.ac);
|
||||
NIR_PASS_V(nir, radv_nir_lower_abi, pdev->info.gfx_level, &gs_copy_stage, gfx_state, pdev->info.address32_hi);
|
||||
|
||||
struct radv_graphics_pipeline_key key = {0};
|
||||
bool dump_shader = radv_can_dump_shader(device, nir, true);
|
||||
|
|
@ -2272,6 +2277,8 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_
|
|||
struct radv_shader_binary **binaries, struct radv_shader **gs_copy_shader,
|
||||
struct radv_shader_binary **gs_copy_binary)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
for (int s = MESA_VULKAN_SHADER_STAGES - 1; s >= 0; s--) {
|
||||
if (!(active_nir_stages & (1 << s)))
|
||||
continue;
|
||||
|
|
@ -2280,7 +2287,7 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_
|
|||
unsigned shader_count = 1;
|
||||
|
||||
/* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */
|
||||
if (device->physical_device->info.gfx_level >= GFX9 &&
|
||||
if (pdev->info.gfx_level >= GFX9 &&
|
||||
((s == MESA_SHADER_GEOMETRY &&
|
||||
(active_nir_stages & (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))) ||
|
||||
(s == MESA_SHADER_TESS_CTRL && (active_nir_stages & VK_SHADER_STAGE_VERTEX_BIT)))) {
|
||||
|
|
@ -2348,6 +2355,7 @@ static void
|
|||
radv_pipeline_import_retained_shaders(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
|
||||
struct radv_graphics_lib_pipeline *lib, struct radv_shader_stage *stages)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_retained_shaders *retained_shaders = &lib->retained_shaders;
|
||||
|
||||
/* Import the stages (SPIR-V only in case of cache hits). */
|
||||
|
|
@ -2370,7 +2378,7 @@ radv_pipeline_import_retained_shaders(const struct radv_device *device, struct r
|
|||
int64_t stage_start = os_time_get_nano();
|
||||
|
||||
/* Deserialize the NIR shader. */
|
||||
const struct nir_shader_compiler_options *options = &device->physical_device->nir_options[s];
|
||||
const struct nir_shader_compiler_options *options = &pdev->nir_options[s];
|
||||
struct blob_reader blob_reader;
|
||||
blob_reader_init(&blob_reader, retained_shaders->stages[s].serialized_nir,
|
||||
retained_shaders->stages[s].serialized_nir_size);
|
||||
|
|
@ -2442,6 +2450,7 @@ static bool
|
|||
radv_skip_graphics_pipeline_compile(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline,
|
||||
VkGraphicsPipelineLibraryFlagBitsEXT lib_flags, bool fast_linking_enabled)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
VkShaderStageFlagBits binary_stages = 0;
|
||||
|
||||
/* Do not skip when fast-linking isn't enabled. */
|
||||
|
|
@ -2462,7 +2471,7 @@ radv_skip_graphics_pipeline_compile(const struct radv_device *device, const stru
|
|||
binary_stages |= mesa_to_vk_shader_stage(i);
|
||||
}
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
/* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */
|
||||
if (binary_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) {
|
||||
binary_stages |= VK_SHADER_STAGE_VERTEX_BIT;
|
||||
|
|
@ -2490,6 +2499,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
|
|||
struct radv_shader **shaders, struct radv_shader_binary **binaries,
|
||||
struct radv_shader **gs_copy_shader, struct radv_shader_binary **gs_copy_binary)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const bool nir_cache = device->instance->perftest_flags & RADV_PERFTEST_NIR_CACHE;
|
||||
for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
|
||||
if (!stages[s].entrypoint)
|
||||
|
|
@ -2530,7 +2540,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
|
|||
active_nir_stages |= mesa_to_vk_shader_stage(i);
|
||||
}
|
||||
|
||||
if (!device->physical_device->mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir &&
|
||||
if (!pdev->mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir &&
|
||||
BITSET_TEST(stages[MESA_SHADER_MESH].nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID)) {
|
||||
nir_shader *mesh = stages[MESA_SHADER_MESH].nir;
|
||||
nir_shader *task = stages[MESA_SHADER_TASK].nir;
|
||||
|
|
@ -2848,7 +2858,7 @@ void
|
|||
radv_emit_vgt_gs_mode(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
|
||||
const struct radv_shader *last_vgt_api_shader)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_shader_info *info = &last_vgt_api_shader->info;
|
||||
unsigned vgt_primitiveid_en = 0;
|
||||
uint32_t vgt_gs_mode = 0;
|
||||
|
|
@ -2871,7 +2881,7 @@ static void
|
|||
radv_emit_hw_vs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
|
||||
const struct radv_shader *shader)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint64_t va = radv_shader_get_va(shader);
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4);
|
||||
|
|
@ -2971,7 +2981,7 @@ static void
|
|||
radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
|
||||
const struct radv_shader *es, const struct radv_shader *shader)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint64_t va = radv_shader_get_va(shader);
|
||||
gl_shader_stage es_type;
|
||||
const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info;
|
||||
|
|
@ -3128,7 +3138,7 @@ radv_emit_hw_ngg(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
|
|||
static void
|
||||
radv_emit_hw_hs(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_shader *shader)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint64_t va = radv_shader_get_va(shader);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
|
|
@ -3152,6 +3162,8 @@ void
|
|||
radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
|
||||
const struct radv_shader *vs, const struct radv_shader *next_stage)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (vs->info.merged_shader_compiled_separately) {
|
||||
const struct radv_userdata_info *loc = &vs->info.user_sgprs_locs.shader_data[AC_UD_NEXT_STAGE_PC];
|
||||
const uint32_t base_reg = vs->info.user_data_0;
|
||||
|
|
@ -3164,7 +3176,7 @@ radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *
|
|||
if (vs->info.next_stage == MESA_SHADER_TESS_CTRL) {
|
||||
radv_shader_combine_cfg_vs_tcs(vs, next_stage, &rsrc1, NULL);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, vs->va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, vs->va >> 8);
|
||||
|
|
@ -3174,7 +3186,7 @@ radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *
|
|||
} else {
|
||||
radv_shader_combine_cfg_vs_gs(vs, next_stage, &rsrc1, &rsrc2);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, vs->va >> 8);
|
||||
} else {
|
||||
radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, vs->va >> 8);
|
||||
|
|
@ -3182,8 +3194,7 @@ radv_emit_vertex_shader(const struct radv_device *device, struct radeon_cmdbuf *
|
|||
|
||||
unsigned lds_size;
|
||||
if (next_stage->info.is_ngg) {
|
||||
lds_size = DIV_ROUND_UP(next_stage->info.ngg_info.lds_size,
|
||||
device->physical_device->info.lds_encode_granularity);
|
||||
lds_size = DIV_ROUND_UP(next_stage->info.ngg_info.lds_size, pdev->info.lds_encode_granularity);
|
||||
} else {
|
||||
lds_size = next_stage->info.gs_ring_info.lds_size;
|
||||
}
|
||||
|
|
@ -3225,6 +3236,8 @@ void
|
|||
radv_emit_tess_eval_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
|
||||
const struct radv_shader *tes, const struct radv_shader *gs)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (tes->info.merged_shader_compiled_separately) {
|
||||
const struct radv_userdata_info *loc = &tes->info.user_sgprs_locs.shader_data[AC_UD_NEXT_STAGE_PC];
|
||||
const uint32_t base_reg = tes->info.user_data_0;
|
||||
|
|
@ -3238,7 +3251,7 @@ radv_emit_tess_eval_shader(const struct radv_device *device, struct radeon_cmdbu
|
|||
|
||||
unsigned lds_size;
|
||||
if (gs->info.is_ngg) {
|
||||
lds_size = DIV_ROUND_UP(gs->info.ngg_info.lds_size, device->physical_device->info.lds_encode_granularity);
|
||||
lds_size = DIV_ROUND_UP(gs->info.ngg_info.lds_size, pdev->info.lds_encode_granularity);
|
||||
} else {
|
||||
lds_size = gs->info.gs_ring_info.lds_size;
|
||||
}
|
||||
|
|
@ -3264,7 +3277,7 @@ static void
|
|||
radv_emit_hw_gs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
|
||||
const struct radv_shader *gs)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_legacy_gs_info *gs_state = &gs->info.gs_ring_info;
|
||||
unsigned gs_max_out_vertices;
|
||||
const uint8_t *num_components;
|
||||
|
|
@ -3382,16 +3395,15 @@ void
|
|||
radv_emit_mesh_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
|
||||
const struct radv_shader *ms)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const uint32_t gs_out = radv_conv_gl_prim_to_gs_out(ms->info.ms.output_prim);
|
||||
|
||||
radv_emit_hw_ngg(device, ctx_cs, cs, NULL, ms);
|
||||
radeon_set_context_reg(
|
||||
ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT,
|
||||
device->physical_device->mesh_fast_launch_2 ? ms->info.ngg_info.max_out_verts : ms->info.workgroup_size);
|
||||
radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT,
|
||||
pdev->mesh_fast_launch_2 ? ms->info.ngg_info.max_out_verts : ms->info.workgroup_size);
|
||||
radeon_set_uconfig_reg_idx(pdev, ctx_cs, R_030908_VGT_PRIMITIVE_TYPE, 1, V_008958_DI_PT_POINTLIST);
|
||||
|
||||
if (device->physical_device->mesh_fast_launch_2) {
|
||||
if (pdev->mesh_fast_launch_2) {
|
||||
radeon_set_sh_reg_seq(cs, R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, 2);
|
||||
radeon_emit(cs, S_00B2B0_MESHLET_NUM_THREAD_X(ms->info.cs.block_size[0] - 1) |
|
||||
S_00B2B0_MESHLET_NUM_THREAD_Y(ms->info.cs.block_size[1] - 1) |
|
||||
|
|
@ -3476,9 +3488,10 @@ void
|
|||
radv_emit_ps_inputs(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
|
||||
const struct radv_shader *last_vgt_shader, const struct radv_shader *ps)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_vs_output_info *outinfo = &last_vgt_shader->info.outinfo;
|
||||
bool mesh = last_vgt_shader->info.stage == MESA_SHADER_MESH;
|
||||
bool gfx11plus = device->physical_device->info.gfx_level >= GFX11;
|
||||
bool gfx11plus = pdev->info.gfx_level >= GFX11;
|
||||
uint32_t ps_input_cntl[32];
|
||||
|
||||
unsigned ps_offset = 0;
|
||||
|
|
@ -3530,7 +3543,7 @@ void
|
|||
radv_emit_fragment_shader(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
|
||||
const struct radv_shader *ps)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
bool param_gen;
|
||||
uint64_t va;
|
||||
|
||||
|
|
@ -3566,7 +3579,7 @@ void
|
|||
radv_emit_vgt_reuse(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, const struct radv_shader *tes,
|
||||
const struct radv_vgt_shader_key *key)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->info.gfx_level == GFX10_3) {
|
||||
/* Legacy Tess+GS should disable reuse to prevent hangs on GFX10.3. */
|
||||
|
|
@ -3635,7 +3648,7 @@ void
|
|||
radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs,
|
||||
const struct radv_vgt_shader_key *key)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint32_t stages = 0;
|
||||
|
||||
if (key->tess) {
|
||||
|
|
@ -3651,7 +3664,7 @@ radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdb
|
|||
stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1);
|
||||
} else if (key->mesh) {
|
||||
assert(!key->ngg_passthrough);
|
||||
unsigned gs_fast_launch = device->physical_device->mesh_fast_launch_2 ? 2 : 1;
|
||||
unsigned gs_fast_launch = pdev->mesh_fast_launch_2 ? 2 : 1;
|
||||
stages |=
|
||||
S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(gs_fast_launch) | S_028B54_NGG_WAVE_ID_EN(key->mesh_scratch_ring);
|
||||
} else if (key->ngg) {
|
||||
|
|
@ -3682,7 +3695,7 @@ radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdb
|
|||
void
|
||||
radv_emit_vgt_gs_out(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, uint32_t vgt_gs_out_prim_type)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
radeon_set_uconfig_reg(ctx_cs, R_030998_VGT_GS_OUT_PRIM_TYPE, vgt_gs_out_prim_type);
|
||||
|
|
@ -3713,9 +3726,10 @@ gfx103_emit_vgt_draw_payload_cntl(struct radeon_cmdbuf *ctx_cs, const struct rad
|
|||
static bool
|
||||
gfx103_pipeline_vrs_coarse_shading(const struct radv_device *device, const struct radv_graphics_pipeline *pipeline)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
|
||||
|
||||
if (device->physical_device->info.gfx_level != GFX10_3)
|
||||
if (pdev->info.gfx_level != GFX10_3)
|
||||
return false;
|
||||
|
||||
if (device->instance->debug_flags & RADV_DEBUG_NO_VRS_FLAT_SHADING)
|
||||
|
|
@ -3731,7 +3745,7 @@ void
|
|||
gfx103_emit_vrs_state(const struct radv_device *device, struct radeon_cmdbuf *ctx_cs, const struct radv_shader *ps,
|
||||
bool enable_vrs, bool enable_vrs_coarse_shading, bool force_vrs_per_vertex)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint32_t mode = V_028064_SC_VRS_COMB_MODE_PASSTHRU;
|
||||
uint8_t rate_x = 0, rate_y = 0;
|
||||
|
||||
|
|
@ -3769,7 +3783,7 @@ radv_pipeline_emit_pm4(const struct radv_device *device, struct radv_graphics_pi
|
|||
const struct vk_graphics_pipeline_state *state)
|
||||
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_shader *last_vgt_shader = radv_get_last_vgt_shader(pipeline);
|
||||
const struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT];
|
||||
struct radeon_cmdbuf *ctx_cs = &pipeline->base.ctx_cs;
|
||||
|
|
@ -3835,7 +3849,7 @@ static void
|
|||
radv_pipeline_init_vertex_input_state(const struct radv_device *device, struct radv_graphics_pipeline *pipeline,
|
||||
const struct vk_graphics_pipeline_state *state)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_shader_info *vs_info = &radv_get_shader(pipeline->base.shaders, MESA_SHADER_VERTEX)->info;
|
||||
|
||||
if (state->vi) {
|
||||
|
|
@ -4022,7 +4036,8 @@ bool
|
|||
radv_needs_null_export_workaround(const struct radv_device *device, const struct radv_shader *ps,
|
||||
unsigned custom_blend_mode)
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
|
||||
if (!ps)
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -370,6 +370,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
|
|||
const struct radv_ray_tracing_stage_info *traversal_stage_info,
|
||||
struct radv_serialized_shader_arena_block *replay_block, struct radv_shader **out_shader)
|
||||
{
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_shader_binary *binary;
|
||||
bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags);
|
||||
bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.base.create_flags);
|
||||
|
|
@ -405,7 +406,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
|
|||
.stack_alignment = 16,
|
||||
.localized_loads = true,
|
||||
.vectorizer_callback = radv_mem_vectorize_callback,
|
||||
.vectorizer_data = &device->physical_device->info.gfx_level,
|
||||
.vectorizer_data = &pdev->info.gfx_level,
|
||||
};
|
||||
nir_lower_shader_calls(stage->nir, &opts, &resume_shaders, &num_resume_shaders, stage->nir);
|
||||
}
|
||||
|
|
@ -828,6 +829,8 @@ postprocess_rt_config(struct ac_shader_config *config, enum amd_gfx_level gfx_le
|
|||
static void
|
||||
compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
pipeline->prolog = radv_create_rt_prolog(device);
|
||||
|
||||
/* create combined config */
|
||||
|
|
@ -839,7 +842,7 @@ compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline *
|
|||
if (pipeline->base.base.shaders[MESA_SHADER_INTERSECTION])
|
||||
combine_config(config, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]->config);
|
||||
|
||||
postprocess_rt_config(config, device->physical_device->info.gfx_level, device->physical_device->rt_wave_size);
|
||||
postprocess_rt_config(config, pdev->info.gfx_level, pdev->rt_wave_size);
|
||||
|
||||
pipeline->prolog->max_waves = radv_get_max_waves(device, config, &pipeline->prolog->info);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,6 +18,8 @@ static struct hash_table *device_ht = NULL;
|
|||
VkResult
|
||||
radv_printf_data_init(struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
util_dynarray_init(&device->printf.formats, NULL);
|
||||
|
||||
device->printf.buffer_size = debug_get_num_option("RADV_PRINTF_BUFFER_SIZE", 0);
|
||||
|
|
@ -45,9 +47,9 @@ radv_printf_data_init(struct radv_device *device)
|
|||
VkMemoryAllocateInfo alloc_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.allocationSize = requirements.size,
|
||||
.memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
|
||||
.memoryTypeIndex =
|
||||
radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
|
||||
};
|
||||
|
||||
result = device->vk.dispatch_table.AllocateMemory(_device, &alloc_info, NULL, &device->printf.memory);
|
||||
|
|
|
|||
|
|
@ -1134,8 +1134,6 @@ struct radv_device {
|
|||
/* Whether to keep shader debug info, for debugging. */
|
||||
bool keep_shader_info;
|
||||
|
||||
struct radv_physical_device *physical_device;
|
||||
|
||||
/* Backup in-memory cache to be used if the app doesn't provide one */
|
||||
struct vk_pipeline_cache *mem_cache;
|
||||
|
||||
|
|
@ -1275,6 +1273,12 @@ struct radv_device {
|
|||
uint32_t compute_scratch_waves;
|
||||
};
|
||||
|
||||
static inline struct radv_physical_device *
|
||||
radv_device_physical(const struct radv_device *dev)
|
||||
{
|
||||
return (struct radv_physical_device *)dev->vk.physical;
|
||||
}
|
||||
|
||||
bool radv_device_set_pstate(struct radv_device *device, bool enable);
|
||||
bool radv_device_acquire_performance_counters(struct radv_device *device);
|
||||
void radv_device_release_performance_counters(struct radv_device *device);
|
||||
|
|
@ -2155,10 +2159,12 @@ static inline void
|
|||
radv_emit_shader_pointer_body(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
|
||||
bool use_32bit_pointers)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
radeon_emit(cs, va);
|
||||
|
||||
if (use_32bit_pointers) {
|
||||
assert(va == 0 || (va >> 32) == device->physical_device->info.address32_hi);
|
||||
assert(va == 0 || (va >> 32) == pdev->info.address32_hi);
|
||||
} else {
|
||||
radeon_emit(cs, va >> 32);
|
||||
}
|
||||
|
|
@ -2798,7 +2804,8 @@ radv_image_has_htile(const struct radv_image *image)
|
|||
static inline bool
|
||||
radv_image_has_vrs_htile(const struct radv_device *device, const struct radv_image *image)
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
|
||||
/* Any depth buffer can potentially use VRS on GFX10.3. */
|
||||
return gfx_level == GFX10_3 && device->vk.enabled_features.attachmentFragmentShadingRate &&
|
||||
|
|
@ -2830,7 +2837,9 @@ radv_image_is_tc_compat_htile(const struct radv_image *image)
|
|||
static inline bool
|
||||
radv_image_tile_stencil_disabled(const struct radv_device *device, const struct radv_image *image)
|
||||
{
|
||||
if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
return !vk_format_has_stencil(image->vk.format) && !radv_image_has_vrs_htile(device, image);
|
||||
} else {
|
||||
/* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for
|
||||
|
|
@ -2938,8 +2947,10 @@ radv_get_htile_initial_value(const struct radv_device *device, const struct radv
|
|||
static inline bool
|
||||
radv_image_get_iterate256(const struct radv_device *device, struct radv_image *image)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */
|
||||
return device->physical_device->info.gfx_level >= GFX10 &&
|
||||
return pdev->info.gfx_level >= GFX10 &&
|
||||
(image->vk.usage & (VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
|
||||
radv_image_is_tc_compat_htile(image) && image->vk.samples > 1;
|
||||
}
|
||||
|
|
@ -3744,7 +3755,8 @@ radv_is_streamout_enabled(struct radv_cmd_buffer *cmd_buffer)
|
|||
static inline enum amd_ip_type
|
||||
radv_queue_ring(const struct radv_queue *queue)
|
||||
{
|
||||
return radv_queue_family_to_ring(queue->device->physical_device, queue->state.qf);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
|
||||
return radv_queue_family_to_ring(pdev, queue->state.qf);
|
||||
}
|
||||
|
||||
/* radv_video */
|
||||
|
|
@ -3758,7 +3770,8 @@ void radv_video_get_profile_alignments(struct radv_physical_device *pdev, const
|
|||
static inline bool
|
||||
radv_use_llvm_for_stage(const struct radv_device *device, UNUSED gl_shader_stage stage)
|
||||
{
|
||||
return device->physical_device->use_llvm;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
return pdev->use_llvm;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
|
|
|
|||
|
|
@ -56,7 +56,8 @@ radv_get_pipelinestat_query_size(struct radv_device *device)
|
|||
/* GFX10_3 only has 11 valid pipeline statistics queries but in order to emulate mesh/task shader
|
||||
* invocations, it's easier to use the same size as GFX11.
|
||||
*/
|
||||
unsigned num_results = device->physical_device->info.gfx_level >= GFX10_3 ? 14 : 11;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
unsigned num_results = pdev->info.gfx_level >= GFX10_3 ? 14 : 11;
|
||||
return num_results * 8;
|
||||
}
|
||||
|
||||
|
|
@ -120,6 +121,7 @@ build_occlusion_query_shader(struct radv_device *device)
|
|||
* }
|
||||
* }
|
||||
*/
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "occlusion_query");
|
||||
b.shader->info.workgroup_size[0] = 64;
|
||||
|
||||
|
|
@ -128,8 +130,8 @@ build_occlusion_query_shader(struct radv_device *device)
|
|||
nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start");
|
||||
nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end");
|
||||
nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available");
|
||||
uint64_t enabled_rb_mask = device->physical_device->info.enabled_rb_mask;
|
||||
unsigned db_count = device->physical_device->info.max_render_backends;
|
||||
uint64_t enabled_rb_mask = pdev->info.enabled_rb_mask;
|
||||
unsigned db_count = pdev->info.max_render_backends;
|
||||
|
||||
nir_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
|
||||
|
||||
|
|
@ -275,6 +277,7 @@ build_pipeline_statistics_query_shader(struct radv_device *device)
|
|||
* }
|
||||
* }
|
||||
*/
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "pipeline_statistics_query");
|
||||
b.shader->info.workgroup_size[0] = 64;
|
||||
|
||||
|
|
@ -301,7 +304,7 @@ build_pipeline_statistics_query_shader(struct radv_device *device)
|
|||
nir_def *available32 = nir_load_ssbo(&b, 1, 32, src_buf, avail_offset);
|
||||
nir_store_var(&b, available, nir_i2b(&b, available32), 0x1);
|
||||
|
||||
if (device->physical_device->emulate_mesh_shader_queries) {
|
||||
if (pdev->emulate_mesh_shader_queries) {
|
||||
nir_push_if(&b, nir_test_mask(&b, stats_mask, VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT));
|
||||
{
|
||||
const uint32_t idx = ffs(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT) - 1;
|
||||
|
|
@ -867,6 +870,7 @@ build_ms_prim_gen_query_shader(struct radv_device *device)
|
|||
static VkResult
|
||||
radv_device_init_meta_query_state_internal(struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
VkResult result;
|
||||
nir_shader *occlusion_cs = NULL;
|
||||
nir_shader *pipeline_statistics_cs = NULL;
|
||||
|
|
@ -886,7 +890,7 @@ radv_device_init_meta_query_state_internal(struct radv_device *device)
|
|||
timestamp_cs = build_timestamp_query_shader(device);
|
||||
pg_cs = build_pg_query_shader(device);
|
||||
|
||||
if (device->physical_device->emulate_mesh_shader_queries)
|
||||
if (pdev->emulate_mesh_shader_queries)
|
||||
ms_prim_gen_cs = build_ms_prim_gen_query_shader(device);
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo occlusion_ds_create_info = {
|
||||
|
|
@ -1025,7 +1029,7 @@ radv_device_init_meta_query_state_internal(struct radv_device *device)
|
|||
result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &pg_pipeline_info,
|
||||
NULL, &device->meta_state.query.pg_query_pipeline);
|
||||
|
||||
if (device->physical_device->emulate_mesh_shader_queries) {
|
||||
if (pdev->emulate_mesh_shader_queries) {
|
||||
VkPipelineShaderStageCreateInfo ms_prim_gen_pipeline_shader_stage = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
|
|
@ -1211,6 +1215,7 @@ static VkResult
|
|||
radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool)
|
||||
{
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
VkResult result;
|
||||
size_t pool_struct_size = pCreateInfo->queryType == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR
|
||||
? sizeof(struct radv_pc_query_pool)
|
||||
|
|
@ -1232,21 +1237,20 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
|
|||
* and the legacy GS path but it increments for NGG VS/TES because they are merged with GS. To
|
||||
* avoid this counter to increment, it's also emulated.
|
||||
*/
|
||||
pool->uses_gds =
|
||||
(device->physical_device->emulate_ngg_gs_query_pipeline_stat &&
|
||||
(pool->vk.pipeline_statistics & (VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
|
||||
VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT))) ||
|
||||
(device->physical_device->use_ngg && pCreateInfo->queryType == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) ||
|
||||
(device->physical_device->emulate_mesh_shader_queries &&
|
||||
(pCreateInfo->queryType == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT ||
|
||||
pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT));
|
||||
pool->uses_gds = (pdev->emulate_ngg_gs_query_pipeline_stat &&
|
||||
(pool->vk.pipeline_statistics & (VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
|
||||
VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT))) ||
|
||||
(pdev->use_ngg && pCreateInfo->queryType == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT) ||
|
||||
(pdev->emulate_mesh_shader_queries &&
|
||||
(pCreateInfo->queryType == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT ||
|
||||
pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT));
|
||||
|
||||
/* The number of task shader invocations needs to be queried on ACE. */
|
||||
pool->uses_ace = (pool->vk.pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
|
||||
|
||||
switch (pCreateInfo->queryType) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
pool->stride = 16 * device->physical_device->info.max_render_backends;
|
||||
pool->stride = 16 * pdev->info.max_render_backends;
|
||||
break;
|
||||
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
||||
pool->stride = radv_get_pipelinestat_query_size(device) * 2;
|
||||
|
|
@ -1262,7 +1266,7 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
|
|||
pool->stride = 32;
|
||||
break;
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||
if (pool->uses_gds && device->physical_device->info.gfx_level < GFX11) {
|
||||
if (pool->uses_gds && pdev->info.gfx_level < GFX11) {
|
||||
/* When the hardware can use both the legacy and the NGG paths in the same begin/end pair,
|
||||
* allocate 2x64-bit values for the GDS counters.
|
||||
*/
|
||||
|
|
@ -1272,7 +1276,7 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
|
|||
}
|
||||
break;
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
|
||||
result = radv_pc_init_query_pool(device->physical_device, pCreateInfo, (struct radv_pc_query_pool *)pool);
|
||||
result = radv_pc_init_query_pool(pdev, pCreateInfo, (struct radv_pc_query_pool *)pool);
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
radv_destroy_query_pool(device, pAllocator, pool);
|
||||
|
|
@ -1281,11 +1285,11 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
|
|||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
/* GFX11 natively supports mesh generated primitives with pipeline statistics. */
|
||||
pool->stride = radv_get_pipelinestat_query_size(device) * 2;
|
||||
} else {
|
||||
assert(device->physical_device->emulate_mesh_shader_queries);
|
||||
assert(pdev->emulate_mesh_shader_queries);
|
||||
pool->stride = 16;
|
||||
}
|
||||
break;
|
||||
|
|
@ -1296,8 +1300,7 @@ radv_create_query_pool(struct radv_device *device, const VkQueryPoolCreateInfo *
|
|||
pool->availability_offset = pool->stride * pCreateInfo->queryCount;
|
||||
pool->size = pool->availability_offset;
|
||||
if (pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
|
||||
(pCreateInfo->queryType == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT &&
|
||||
device->physical_device->info.gfx_level >= GFX11))
|
||||
(pCreateInfo->queryType == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && pdev->info.gfx_level >= GFX11))
|
||||
pool->size += 4 * pCreateInfo->queryCount;
|
||||
|
||||
result = radv_bo_create(device, pool->size, 64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING,
|
||||
|
|
@ -1365,6 +1368,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
|
|||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
char *data = pData;
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
|
|
@ -1413,8 +1417,8 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
|
|||
}
|
||||
case VK_QUERY_TYPE_OCCLUSION: {
|
||||
p_atomic_uint64_t const *src64 = (p_atomic_uint64_t const *)src;
|
||||
uint32_t db_count = device->physical_device->info.max_render_backends;
|
||||
uint64_t enabled_rb_mask = device->physical_device->info.enabled_rb_mask;
|
||||
uint32_t db_count = pdev->info.max_render_backends;
|
||||
uint64_t enabled_rb_mask = pdev->info.enabled_rb_mask;
|
||||
uint64_t sample_count = 0;
|
||||
available = 1;
|
||||
|
||||
|
|
@ -1460,7 +1464,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
|
|||
do {
|
||||
available = p_atomic_read(avail_ptr);
|
||||
|
||||
if (pool->uses_ace && device->physical_device->emulate_mesh_shader_queries) {
|
||||
if (pool->uses_ace && pdev->emulate_mesh_shader_queries) {
|
||||
const uint32_t task_invoc_offset =
|
||||
radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
|
||||
const uint32_t *avail_ptr_start = (const uint32_t *)(src + task_invoc_offset + 4);
|
||||
|
|
@ -1550,7 +1554,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
|
|||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
|
||||
const bool uses_gds_query = pool->uses_gds && device->physical_device->info.gfx_level < GFX11;
|
||||
const bool uses_gds_query = pool->uses_gds && pdev->info.gfx_level < GFX11;
|
||||
p_atomic_uint64_t const *src64 = (p_atomic_uint64_t const *)src;
|
||||
uint64_t primitive_storage_needed;
|
||||
|
||||
|
|
@ -1615,7 +1619,7 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
|
|||
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: {
|
||||
uint64_t ms_prim_gen;
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(device);
|
||||
const uint32_t *avail_ptr = (const uint32_t *)(pool->ptr + pool->availability_offset + 4 * query);
|
||||
|
||||
|
|
@ -1730,6 +1734,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
|
|||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
|
||||
RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
uint64_t va = radv_buffer_get_va(pool->bo);
|
||||
uint64_t dest_va = radv_buffer_get_va(dst_buffer->bo);
|
||||
|
|
@ -1780,7 +1785,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
|
|||
/* This waits on the ME. All copies below are done on the ME */
|
||||
radv_cp_wait_mem(cs, cmd_buffer->qf, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff);
|
||||
|
||||
if (pool->uses_ace && cmd_buffer->device->physical_device->emulate_mesh_shader_queries) {
|
||||
if (pool->uses_ace && pdev->emulate_mesh_shader_queries) {
|
||||
const uint64_t src_va = va + query * pool->stride;
|
||||
const uint64_t start_va = src_va + task_invoc_offset + 4;
|
||||
const uint64_t stop_va = start_va + pipelinestat_block_size;
|
||||
|
|
@ -1842,7 +1847,7 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
|
|||
break;
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
|
||||
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
|
||||
const bool uses_gds_query = pool->uses_gds && cmd_buffer->device->physical_device->info.gfx_level < GFX11;
|
||||
const bool uses_gds_query = pool->uses_gds && pdev->info.gfx_level < GFX11;
|
||||
|
||||
for (unsigned i = 0; i < queryCount; i++) {
|
||||
unsigned query = firstQuery + i;
|
||||
|
|
@ -1863,11 +1868,10 @@ radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPoo
|
|||
|
||||
radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pg_query_pipeline, pool->bo, dst_buffer->bo,
|
||||
firstQuery * pool->stride, dst_buffer->offset + dstOffset, pool->stride, stride, dst_size,
|
||||
queryCount, flags, 0, 0,
|
||||
pool->uses_gds && cmd_buffer->device->physical_device->info.gfx_level < GFX11);
|
||||
queryCount, flags, 0, 0, pool->uses_gds && pdev->info.gfx_level < GFX11);
|
||||
break;
|
||||
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
|
||||
for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
|
||||
unsigned query = firstQuery + i;
|
||||
|
|
@ -1928,6 +1932,7 @@ radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uin
|
|||
{
|
||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
uint32_t value = query_clear_value(pool->vk.query_type);
|
||||
uint32_t flush_bits = 0;
|
||||
|
||||
|
|
@ -1941,8 +1946,7 @@ radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uin
|
|||
queryCount * pool->stride, value);
|
||||
|
||||
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
|
||||
(pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT &&
|
||||
cmd_buffer->device->physical_device->info.gfx_level >= GFX11)) {
|
||||
(pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && pdev->info.gfx_level >= GFX11)) {
|
||||
flush_bits |=
|
||||
radv_fill_buffer(cmd_buffer, NULL, pool->bo,
|
||||
radv_buffer_get_va(pool->bo) + pool->availability_offset + firstQuery * 4, queryCount * 4, 0);
|
||||
|
|
@ -1960,6 +1964,7 @@ radv_ResetQueryPool(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery
|
|||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
uint32_t value = query_clear_value(pool->vk.query_type);
|
||||
uint32_t *data = (uint32_t *)(pool->ptr + firstQuery * pool->stride);
|
||||
|
|
@ -1969,8 +1974,7 @@ radv_ResetQueryPool(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery
|
|||
*p = value;
|
||||
|
||||
if (pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
|
||||
(pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT &&
|
||||
device->physical_device->info.gfx_level >= GFX11)) {
|
||||
(pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT && pdev->info.gfx_level >= GFX11)) {
|
||||
memset(pool->ptr + pool->availability_offset + firstQuery * 4, 0, queryCount * 4);
|
||||
}
|
||||
}
|
||||
|
|
@ -2055,6 +2059,7 @@ static void
|
|||
emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va, VkQueryType query_type,
|
||||
VkQueryControlFlags flags, uint32_t index)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
switch (query_type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
|
|
@ -2082,12 +2087,11 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
|
|||
}
|
||||
}
|
||||
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11 &&
|
||||
cmd_buffer->device->physical_device->info.pfp_fw_version >= EVENT_WRITE_ZPASS_PFP_VERSION) {
|
||||
if (pdev->info.gfx_level >= GFX11 && pdev->info.pfp_fw_version >= EVENT_WRITE_ZPASS_PFP_VERSION) {
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_ZPASS, 1, 0));
|
||||
} else {
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
radeon_emit(cs, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_DUMP) | EVENT_INDEX(1));
|
||||
} else {
|
||||
radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
|
||||
|
|
@ -2149,7 +2153,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
|
|||
uint32_t task_invoc_offset =
|
||||
radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
|
||||
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
va += task_invoc_offset;
|
||||
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->gang.cs, 4);
|
||||
|
|
@ -2176,7 +2180,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
|
|||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
|
||||
if (pdev->use_ngg_streamout) {
|
||||
/* generated prim counter */
|
||||
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
|
||||
radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
|
||||
|
|
@ -2201,7 +2205,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
|
|||
}
|
||||
break;
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
/* On GFX11+, primitives generated query always use GDS. */
|
||||
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va);
|
||||
radv_cs_write_data_imm(cs, V_370_ME, va + 4, 0x80000000);
|
||||
|
|
@ -2251,7 +2255,7 @@ emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *poo
|
|||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: {
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
radeon_check_space(cmd_buffer->device->ws, cs, 4);
|
||||
|
||||
++cmd_buffer->state.active_pipeline_queries;
|
||||
|
|
@ -2285,6 +2289,7 @@ static void
|
|||
emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va, uint64_t avail_va,
|
||||
VkQueryType query_type, uint32_t index)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
switch (query_type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
|
|
@ -2300,12 +2305,11 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
|
|||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_OCCLUSION_QUERY;
|
||||
}
|
||||
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11 &&
|
||||
cmd_buffer->device->physical_device->info.pfp_fw_version >= EVENT_WRITE_ZPASS_PFP_VERSION) {
|
||||
if (pdev->info.gfx_level >= GFX11 && pdev->info.pfp_fw_version >= EVENT_WRITE_ZPASS_PFP_VERSION) {
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_ZPASS, 1, 0));
|
||||
} else {
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
radeon_emit(cs, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_DUMP) | EVENT_INDEX(1));
|
||||
} else {
|
||||
radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
|
||||
|
|
@ -2369,7 +2373,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
|
|||
uint32_t task_invoc_offset =
|
||||
radv_get_pipelinestat_query_offset(VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT);
|
||||
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
va += task_invoc_offset;
|
||||
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->gang.cs, 4);
|
||||
|
|
@ -2391,13 +2395,12 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
|
|||
}
|
||||
}
|
||||
|
||||
radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf,
|
||||
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va,
|
||||
1, cmd_buffer->gfx9_eop_bug_va);
|
||||
radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
||||
EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va, 1, cmd_buffer->gfx9_eop_bug_va);
|
||||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
|
||||
if (pdev->use_ngg_streamout) {
|
||||
/* generated prim counter */
|
||||
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
|
||||
radv_cs_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
|
||||
|
|
@ -2419,7 +2422,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
|
|||
}
|
||||
break;
|
||||
case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
/* On GFX11+, primitives generated query always use GDS. */
|
||||
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_PRIM_GEN_OFFSET(index), va + 16);
|
||||
radv_cs_write_data_imm(cs, V_370_ME, va + 20, 0x80000000);
|
||||
|
|
@ -2463,7 +2466,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
|
|||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT: {
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
unsigned pipelinestat_block_size = radv_get_pipelinestat_query_size(cmd_buffer->device);
|
||||
|
||||
radeon_check_space(cmd_buffer->device->ws, cs, 16);
|
||||
|
|
@ -2479,9 +2482,9 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
|
|||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
|
||||
radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf,
|
||||
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT,
|
||||
avail_va, 1, cmd_buffer->gfx9_eop_bug_va);
|
||||
radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
||||
EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va, 1,
|
||||
cmd_buffer->gfx9_eop_bug_va);
|
||||
} else {
|
||||
gfx10_copy_gds_query_gfx(cmd_buffer, RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET, va + 8);
|
||||
radv_cs_write_data_imm(cs, V_370_ME, va + 12, 0x80000000);
|
||||
|
|
@ -2499,7 +2502,7 @@ emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool,
|
|||
|
||||
cmd_buffer->active_query_flush_bits |=
|
||||
RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE;
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
|
||||
}
|
||||
}
|
||||
|
|
@ -2564,6 +2567,7 @@ radv_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
|
|||
void
|
||||
radv_write_timestamp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkPipelineStageFlags2 stage)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
|
||||
if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT) {
|
||||
|
|
@ -2575,9 +2579,8 @@ radv_write_timestamp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, VkPipeline
|
|||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
} else {
|
||||
radv_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->info.gfx_level, cmd_buffer->qf,
|
||||
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_TIMESTAMP, va, 0,
|
||||
cmd_buffer->gfx9_eop_bug_va);
|
||||
radv_cs_emit_write_event_eop(cs, pdev->info.gfx_level, cmd_buffer->qf, V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
||||
EOP_DST_SEL_MEM, EOP_DATA_SEL_TIMESTAMP, va, 0, cmd_buffer->gfx9_eop_bug_va);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2587,6 +2590,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta
|
|||
{
|
||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
const unsigned num_queries = MAX2(util_bitcount(cmd_buffer->state.render.view_mask), 1);
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
const uint64_t va = radv_buffer_get_va(pool->bo);
|
||||
|
|
@ -2625,7 +2629,7 @@ radv_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 sta
|
|||
|
||||
cmd_buffer->active_query_flush_bits |=
|
||||
RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE;
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -126,6 +126,7 @@ static VkResult
|
|||
radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMemoryBindInfo *bind)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_image, image, bind->image);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_surf *surface = &image->planes[0].surface;
|
||||
uint32_t bs = vk_format_get_blocksize(image->vk.format);
|
||||
VkResult result;
|
||||
|
|
@ -149,7 +150,7 @@ radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMem
|
|||
if (bind->pBinds[i].memory != VK_NULL_HANDLE)
|
||||
mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
offset = surface->u.gfx9.surf_slice_size * layer + surface->u.gfx9.prt_level_offset[level];
|
||||
pitch = surface->u.gfx9.prt_level_pitch[level];
|
||||
depth_pitch = surface->u.gfx9.surf_slice_size;
|
||||
|
|
@ -243,11 +244,13 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
|
|||
struct radeon_winsys_bo *task_rings_bo, struct radeon_winsys_bo *mesh_scratch_ring_bo,
|
||||
uint32_t attr_ring_size, struct radeon_winsys_bo *attr_ring_bo)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (scratch_bo) {
|
||||
uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
|
||||
uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11)
|
||||
if (pdev->info.gfx_level >= GFX11)
|
||||
rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
|
||||
else
|
||||
rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
|
||||
|
|
@ -270,17 +273,17 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
|
|||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
|
||||
S_008F0C_INDEX_STRIDE(3) | S_008F0C_ADD_TID_ENABLE(1);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11)
|
||||
if (pdev->info.gfx_level >= GFX11)
|
||||
desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
|
||||
else
|
||||
desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
|
||||
} else if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
} else if (pdev->info.gfx_level >= GFX10) {
|
||||
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else if (device->physical_device->info.gfx_level >= GFX8) {
|
||||
} else if (pdev->info.gfx_level >= GFX8) {
|
||||
/* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */
|
||||
desc[3] |=
|
||||
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1);
|
||||
|
|
@ -298,9 +301,9 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
|
|||
desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
|
||||
} else if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
} else if (pdev->info.gfx_level >= GFX10) {
|
||||
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
|
|
@ -323,9 +326,9 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
|
|||
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
|
||||
} else if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
} else if (pdev->info.gfx_level >= GFX10) {
|
||||
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
|
|
@ -343,17 +346,17 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
|
|||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
|
||||
S_008F0C_INDEX_STRIDE(1) | S_008F0C_ADD_TID_ENABLE(true);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11)
|
||||
if (pdev->info.gfx_level >= GFX11)
|
||||
desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
|
||||
else
|
||||
desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
|
||||
} else if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
} else if (pdev->info.gfx_level >= GFX10) {
|
||||
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else if (device->physical_device->info.gfx_level >= GFX8) {
|
||||
} else if (pdev->info.gfx_level >= GFX8) {
|
||||
/* DATA_FORMAT is STRIDE[14:17] for MUBUF with ADD_TID_ENABLE=1 */
|
||||
desc[7] |=
|
||||
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(0) | S_008F0C_ELEMENT_SIZE(1);
|
||||
|
|
@ -367,17 +370,17 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
|
|||
|
||||
if (tess_rings_bo) {
|
||||
uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
|
||||
uint64_t tess_offchip_va = tess_va + device->physical_device->hs.tess_offchip_ring_offset;
|
||||
uint64_t tess_offchip_va = tess_va + pdev->hs.tess_offchip_ring_offset;
|
||||
|
||||
desc[0] = tess_va;
|
||||
desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
|
||||
desc[2] = device->physical_device->hs.tess_factor_ring_size;
|
||||
desc[2] = pdev->hs.tess_factor_ring_size;
|
||||
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
|
||||
} else if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
} else if (pdev->info.gfx_level >= GFX10) {
|
||||
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
|
||||
S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
|
|
@ -387,13 +390,13 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
|
|||
|
||||
desc[4] = tess_offchip_va;
|
||||
desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
|
||||
desc[6] = device->physical_device->hs.tess_offchip_ring_size;
|
||||
desc[6] = pdev->hs.tess_offchip_ring_size;
|
||||
desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
|
||||
} else if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
} else if (pdev->info.gfx_level >= GFX10) {
|
||||
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
|
||||
S_008F0C_RESOURCE_LEVEL(1);
|
||||
} else {
|
||||
|
|
@ -406,33 +409,33 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
|
|||
|
||||
if (task_rings_bo) {
|
||||
uint64_t task_va = radv_buffer_get_va(task_rings_bo);
|
||||
uint64_t task_draw_ring_va = task_va + device->physical_device->task_info.draw_ring_offset;
|
||||
uint64_t task_payload_ring_va = task_va + device->physical_device->task_info.payload_ring_offset;
|
||||
uint64_t task_draw_ring_va = task_va + pdev->task_info.draw_ring_offset;
|
||||
uint64_t task_payload_ring_va = task_va + pdev->task_info.payload_ring_offset;
|
||||
|
||||
desc[0] = task_draw_ring_va;
|
||||
desc[1] = S_008F04_BASE_ADDRESS_HI(task_draw_ring_va >> 32);
|
||||
desc[2] = device->physical_device->task_info.num_entries * AC_TASK_DRAW_ENTRY_BYTES;
|
||||
desc[2] = pdev->task_info.num_entries * AC_TASK_DRAW_ENTRY_BYTES;
|
||||
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
|
||||
} else {
|
||||
assert(device->physical_device->info.gfx_level >= GFX10_3);
|
||||
assert(pdev->info.gfx_level >= GFX10_3);
|
||||
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
|
||||
S_008F0C_RESOURCE_LEVEL(1);
|
||||
}
|
||||
|
||||
desc[4] = task_payload_ring_va;
|
||||
desc[5] = S_008F04_BASE_ADDRESS_HI(task_payload_ring_va >> 32);
|
||||
desc[6] = device->physical_device->task_info.num_entries * AC_TASK_PAYLOAD_ENTRY_BYTES;
|
||||
desc[6] = pdev->task_info.num_entries * AC_TASK_PAYLOAD_ENTRY_BYTES;
|
||||
desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
|
||||
} else {
|
||||
assert(device->physical_device->info.gfx_level >= GFX10_3);
|
||||
assert(pdev->info.gfx_level >= GFX10_3);
|
||||
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
|
||||
S_008F0C_RESOURCE_LEVEL(1);
|
||||
}
|
||||
|
|
@ -449,10 +452,10 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
|
|||
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
|
||||
} else {
|
||||
assert(device->physical_device->info.gfx_level >= GFX10_3);
|
||||
assert(pdev->info.gfx_level >= GFX10_3);
|
||||
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) | S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
|
||||
S_008F0C_RESOURCE_LEVEL(1);
|
||||
}
|
||||
|
|
@ -461,7 +464,7 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *desc, struct radeon
|
|||
desc += 4;
|
||||
|
||||
if (attr_ring_bo) {
|
||||
assert(device->physical_device->info.gfx_level >= GFX11);
|
||||
assert(pdev->info.gfx_level >= GFX11);
|
||||
|
||||
uint64_t va = radv_buffer_get_va(attr_ring_bo);
|
||||
|
||||
|
|
@ -489,6 +492,8 @@ static void
|
|||
radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *esgs_ring_bo,
|
||||
uint32_t esgs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (!esgs_ring_bo && !gsvs_ring_bo)
|
||||
return;
|
||||
|
||||
|
|
@ -498,7 +503,7 @@ radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, st
|
|||
if (gsvs_ring_bo)
|
||||
radv_cs_add_buffer(device->ws, cs, gsvs_ring_bo);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX7) {
|
||||
if (pdev->info.gfx_level >= GFX7) {
|
||||
radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
|
||||
radeon_emit(cs, esgs_ring_size >> 8);
|
||||
radeon_emit(cs, gsvs_ring_size >> 8);
|
||||
|
|
@ -512,49 +517,51 @@ radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs, st
|
|||
static void
|
||||
radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *tess_rings_bo)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint64_t tf_va;
|
||||
uint32_t tf_ring_size;
|
||||
if (!tess_rings_bo)
|
||||
return;
|
||||
|
||||
tf_ring_size = device->physical_device->hs.tess_factor_ring_size / 4;
|
||||
tf_ring_size = pdev->hs.tess_factor_ring_size / 4;
|
||||
tf_va = radv_buffer_get_va(tess_rings_bo);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, tess_rings_bo);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX7) {
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX7) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
/* TF_RING_SIZE is per SE on GFX11. */
|
||||
tf_ring_size /= device->physical_device->info.max_se;
|
||||
tf_ring_size /= pdev->info.max_se;
|
||||
}
|
||||
|
||||
radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size));
|
||||
radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, tf_va >> 8);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI, S_030984_BASE_HI(tf_va >> 40));
|
||||
} else if (device->physical_device->info.gfx_level == GFX9) {
|
||||
} else if (pdev->info.gfx_level == GFX9) {
|
||||
radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40));
|
||||
}
|
||||
|
||||
radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param);
|
||||
radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, pdev->hs.hs_offchip_param);
|
||||
} else {
|
||||
radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size));
|
||||
radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8);
|
||||
radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param);
|
||||
radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, pdev->hs.hs_offchip_param);
|
||||
}
|
||||
}
|
||||
|
||||
static VkResult
|
||||
radv_initialise_task_control_buffer(struct radv_device *device, struct radeon_winsys_bo *task_rings_bo)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint32_t *ptr = (uint32_t *)radv_buffer_map(device->ws, task_rings_bo);
|
||||
if (!ptr)
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
const uint32_t num_entries = device->physical_device->task_info.num_entries;
|
||||
const uint32_t num_entries = pdev->task_info.num_entries;
|
||||
const uint64_t task_va = radv_buffer_get_va(task_rings_bo);
|
||||
const uint64_t task_draw_ring_va = task_va + device->physical_device->task_info.draw_ring_offset;
|
||||
const uint64_t task_draw_ring_va = task_va + pdev->task_info.draw_ring_offset;
|
||||
assert((task_draw_ring_va & 0xFFFFFF00) == (task_draw_ring_va & 0xFFFFFFFF));
|
||||
|
||||
/* 64-bit write_ptr */
|
||||
|
|
@ -599,7 +606,8 @@ static void
|
|||
radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves,
|
||||
struct radeon_winsys_bo *scratch_bo)
|
||||
{
|
||||
const struct radeon_info *gpu_info = &device->physical_device->info;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
|
||||
if (!scratch_bo)
|
||||
return;
|
||||
|
|
@ -626,7 +634,8 @@ static void
|
|||
radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves,
|
||||
struct radeon_winsys_bo *compute_scratch_bo)
|
||||
{
|
||||
const struct radeon_info *gpu_info = &device->physical_device->info;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
uint64_t scratch_va;
|
||||
uint32_t rsrc1;
|
||||
|
||||
|
|
@ -680,6 +689,7 @@ static void
|
|||
radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmdbuf *cs,
|
||||
struct radeon_winsys_bo *descriptor_bo)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint64_t va;
|
||||
|
||||
if (!descriptor_bo)
|
||||
|
|
@ -689,21 +699,21 @@ radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmd
|
|||
|
||||
radv_cs_add_buffer(device->ws, cs, descriptor_bo);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B420_SPI_SHADER_PGM_LO_HS,
|
||||
R_00B220_SPI_SHADER_PGM_LO_GS};
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
|
||||
radv_emit_shader_pointer(device, cs, regs[i], va, true);
|
||||
}
|
||||
} else if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
} else if (pdev->info.gfx_level >= GFX10) {
|
||||
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
|
||||
R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
|
||||
radv_emit_shader_pointer(device, cs, regs[i], va, true);
|
||||
}
|
||||
} else if (device->physical_device->info.gfx_level == GFX9) {
|
||||
} else if (pdev->info.gfx_level == GFX9) {
|
||||
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
|
||||
R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS, R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
|
||||
|
||||
|
|
@ -725,7 +735,7 @@ static void
|
|||
radv_emit_attribute_ring(struct radv_device *device, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *attr_ring_bo,
|
||||
uint32_t attr_ring_size)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint64_t va;
|
||||
|
||||
if (!attr_ring_bo)
|
||||
|
|
@ -792,6 +802,7 @@ static VkResult
|
|||
radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *device,
|
||||
const struct radv_queue_ring_info *needs)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
struct radeon_winsys_bo *scratch_bo = queue->scratch_bo;
|
||||
struct radeon_winsys_bo *descriptor_bo = queue->descriptor_bo;
|
||||
|
|
@ -848,8 +859,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
|||
}
|
||||
|
||||
if (!queue->ring_info.tess_rings && needs->tess_rings) {
|
||||
uint64_t tess_rings_size =
|
||||
device->physical_device->hs.tess_offchip_ring_offset + device->physical_device->hs.tess_offchip_ring_size;
|
||||
uint64_t tess_rings_size = pdev->hs.tess_offchip_ring_offset + pdev->hs.tess_offchip_ring_size;
|
||||
result = radv_bo_create(device, tess_rings_size, 256, RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH,
|
||||
0, true, &tess_rings_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
@ -858,7 +868,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
|||
}
|
||||
|
||||
if (!queue->ring_info.task_rings && needs->task_rings) {
|
||||
assert(device->physical_device->info.gfx_level >= GFX10_3);
|
||||
assert(pdev->info.gfx_level >= GFX10_3);
|
||||
|
||||
/* We write the control buffer from the CPU, so need to grant CPU access to the BO.
|
||||
* The draw ring needs to be zero-initialized otherwise the ready bits will be incorrect.
|
||||
|
|
@ -866,12 +876,11 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
|||
uint32_t task_rings_bo_flags =
|
||||
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM;
|
||||
|
||||
result = radv_bo_create(device, device->physical_device->task_info.bo_size_bytes, 256, RADEON_DOMAIN_VRAM,
|
||||
task_rings_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, true, &task_rings_bo);
|
||||
result = radv_bo_create(device, pdev->task_info.bo_size_bytes, 256, RADEON_DOMAIN_VRAM, task_rings_bo_flags,
|
||||
RADV_BO_PRIORITY_SCRATCH, 0, true, &task_rings_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
radv_rmv_log_command_buffer_bo_create(device, task_rings_bo, 0, 0,
|
||||
device->physical_device->task_info.bo_size_bytes);
|
||||
radv_rmv_log_command_buffer_bo_create(device, task_rings_bo, 0, 0, pdev->task_info.bo_size_bytes);
|
||||
|
||||
result = radv_initialise_task_control_buffer(device, task_rings_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
@ -879,7 +888,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
|||
}
|
||||
|
||||
if (!queue->ring_info.mesh_scratch_ring && needs->mesh_scratch_ring) {
|
||||
assert(device->physical_device->info.gfx_level >= GFX10_3);
|
||||
assert(pdev->info.gfx_level >= GFX10_3);
|
||||
result = radv_bo_create(device, RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES, 256,
|
||||
RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, true,
|
||||
&mesh_scratch_ring_bo);
|
||||
|
|
@ -891,7 +900,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
|||
}
|
||||
|
||||
if (needs->attr_ring_size > queue->ring_info.attr_ring_size) {
|
||||
assert(device->physical_device->info.gfx_level >= GFX11);
|
||||
assert(pdev->info.gfx_level >= GFX11);
|
||||
result = radv_bo_create(device, needs->attr_ring_size, 2 * 1024 * 1024 /* 2MiB */, RADEON_DOMAIN_VRAM,
|
||||
RADEON_FLAG_32BIT | RADEON_FLAG_DISCARDABLE | ring_bo_flags, RADV_BO_PRIORITY_SCRATCH,
|
||||
0, true, &attr_ring_bo);
|
||||
|
|
@ -901,7 +910,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
|||
}
|
||||
|
||||
if (!queue->ring_info.gds && needs->gds) {
|
||||
assert(device->physical_device->info.gfx_level >= GFX10);
|
||||
assert(pdev->info.gfx_level >= GFX10);
|
||||
|
||||
/* 4 streamout GDS counters.
|
||||
* We need 256B (64 dw) of GDS, otherwise streamout hangs.
|
||||
|
|
@ -920,7 +929,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
|||
}
|
||||
|
||||
if (!queue->ring_info.gds_oa && needs->gds_oa) {
|
||||
assert(device->physical_device->info.gfx_level >= GFX10);
|
||||
assert(pdev->info.gfx_level >= GFX10);
|
||||
|
||||
result =
|
||||
radv_bo_create(device, 1, 1, RADEON_DOMAIN_OA, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, true, &gds_oa_bo);
|
||||
|
|
@ -972,7 +981,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
|||
for (int i = 0; i < 3; ++i) {
|
||||
enum rgp_flush_bits sqtt_flush_bits = 0;
|
||||
struct radeon_cmdbuf *cs = NULL;
|
||||
cs = ws->cs_create(ws, radv_queue_family_to_ring(device->physical_device, queue->qf), false);
|
||||
cs = ws->cs_create(ws, radv_queue_family_to_ring(pdev, queue->qf), false);
|
||||
if (!cs) {
|
||||
result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
goto fail;
|
||||
|
|
@ -1027,7 +1036,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi
|
|||
|
||||
if (i < 2) {
|
||||
/* The two initial preambles have a cache flush at the beginning. */
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
enum radv_cmd_flush_bits flush_bits = RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE |
|
||||
RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2 |
|
||||
RADV_CMD_FLAG_START_PIPELINE_STATS;
|
||||
|
|
@ -1143,6 +1152,7 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device
|
|||
struct vk_command_buffer *const *cmd_buffers, uint32_t cmd_buffer_count, bool *use_perf_counters,
|
||||
bool *has_follower)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
bool has_indirect_pipeline_binds = false;
|
||||
|
||||
if (queue->qf != RADV_QUEUE_GENERAL && queue->qf != RADV_QUEUE_COMPUTE) {
|
||||
|
|
@ -1203,9 +1213,8 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device
|
|||
? MIN2(needs.compute_scratch_waves, UINT32_MAX / needs.compute_scratch_size_per_wave)
|
||||
: 0;
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11 && queue->qf == RADV_QUEUE_GENERAL) {
|
||||
needs.attr_ring_size =
|
||||
device->physical_device->info.attribute_ring_size_per_se * device->physical_device->info.max_se;
|
||||
if (pdev->info.gfx_level >= GFX11 && queue->qf == RADV_QUEUE_GENERAL) {
|
||||
needs.attr_ring_size = pdev->info.attribute_ring_size_per_se * pdev->info.max_se;
|
||||
}
|
||||
|
||||
/* Return early if we already match these needs.
|
||||
|
|
@ -1230,13 +1239,15 @@ radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device
|
|||
static VkResult
|
||||
radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
|
||||
|
||||
if (queue->gang_sem_bo)
|
||||
return VK_SUCCESS;
|
||||
|
||||
VkResult r = VK_SUCCESS;
|
||||
struct radv_device *device = queue->device;
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
const enum amd_ip_type leader_ip = radv_queue_family_to_ring(device->physical_device, queue->state.qf);
|
||||
const enum amd_ip_type leader_ip = radv_queue_family_to_ring(pdev, queue->state.qf);
|
||||
struct radeon_winsys_bo *gang_sem_bo = NULL;
|
||||
|
||||
/* Gang semaphores BO.
|
||||
|
|
@ -1291,9 +1302,8 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue)
|
|||
*/
|
||||
radv_cp_wait_mem(leader_post_cs, queue->state.qf, WAIT_REG_MEM_GREATER_OR_EQUAL, leader_wait_va, 1, 0xffffffff);
|
||||
radv_cs_write_data(device, leader_post_cs, queue->state.qf, V_370_ME, leader_wait_va, 1, &zero, false);
|
||||
radv_cs_emit_write_event_eop(ace_post_cs, device->physical_device->info.gfx_level, RADV_QUEUE_COMPUTE,
|
||||
V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT,
|
||||
leader_wait_va, 1, 0);
|
||||
radv_cs_emit_write_event_eop(ace_post_cs, pdev->info.gfx_level, RADV_QUEUE_COMPUTE, V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
||||
EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, leader_wait_va, 1, 0);
|
||||
|
||||
r = ws->cs_finalize(leader_pre_cs);
|
||||
if (r != VK_SUCCESS)
|
||||
|
|
@ -1681,13 +1691,14 @@ fail:
|
|||
static void
|
||||
radv_report_gpuvm_fault(struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_winsys_gpuvm_fault_info fault_info = {0};
|
||||
|
||||
if (!radv_vm_fault_occurred(device, &fault_info))
|
||||
return;
|
||||
|
||||
fprintf(stderr, "radv: GPUVM fault detected at address 0x%08" PRIx64 ".\n", fault_info.addr);
|
||||
ac_print_gpuvm_fault_status(stderr, device->physical_device->info.gfx_level, fault_info.status);
|
||||
ac_print_gpuvm_fault_status(stderr, pdev->info.gfx_level, fault_info.status);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
|
|
@ -1735,9 +1746,10 @@ static VkResult
|
|||
radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission)
|
||||
{
|
||||
struct radv_queue *queue = (struct radv_queue *)vqueue;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(queue->device);
|
||||
VkResult result;
|
||||
|
||||
if (!radv_sparse_queue_enabled(queue->device->physical_device)) {
|
||||
if (!radv_sparse_queue_enabled(pdev)) {
|
||||
result = radv_queue_submit_bind_sparse_memory(queue->device, submission);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
|
@ -1792,10 +1804,12 @@ radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
|
|||
const VkDeviceQueueCreateInfo *create_info,
|
||||
const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
queue->device = device;
|
||||
queue->priority = radv_get_queue_global_priority(global_priority);
|
||||
queue->hw_ctx = device->hw_ctx[queue->priority];
|
||||
queue->state.qf = vk_queue_to_radv(device->physical_device, create_info->queueFamilyIndex);
|
||||
queue->state.qf = vk_queue_to_radv(pdev, create_info->queueFamilyIndex);
|
||||
queue->gang_sem_bo = NULL;
|
||||
|
||||
VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);
|
||||
|
|
|
|||
|
|
@ -173,6 +173,8 @@ static void
|
|||
evaluate_trace_event(struct radv_device *device, uint64_t timestamp, struct util_dynarray *tokens,
|
||||
struct trace_event_amdgpu_vm_update_ptes *event)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (event->common.pid != getpid() && event->pid != getpid()) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -180,8 +182,8 @@ evaluate_trace_event(struct radv_device *device, uint64_t timestamp, struct util
|
|||
struct trace_event_address_array *array = (struct trace_event_address_array *)(event + 1);
|
||||
|
||||
for (uint32_t i = 0; i < event->num_ptes; ++i)
|
||||
emit_page_table_update_event(&device->vk.memory_trace_data, !device->physical_device->info.has_dedicated_vram,
|
||||
timestamp, event, (uint64_t *)array->data, i);
|
||||
emit_page_table_update_event(&device->vk.memory_trace_data, !pdev->info.has_dedicated_vram, timestamp, event,
|
||||
(uint64_t *)array->data, i);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -480,6 +482,8 @@ void
|
|||
radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool is_internal,
|
||||
VkMemoryAllocateFlags alloc_flags)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (!device->vk.memory_trace_data.is_enabled)
|
||||
return;
|
||||
|
||||
|
|
@ -495,7 +499,7 @@ radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool i
|
|||
token.is_driver_internal = is_internal;
|
||||
token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)heap);
|
||||
token.type = VK_RMV_RESOURCE_TYPE_HEAP;
|
||||
token.heap.alignment = device->physical_device->info.max_alignment;
|
||||
token.heap.alignment = pdev->info.max_alignment;
|
||||
token.heap.size = memory->alloc_size;
|
||||
token.heap.heap_index = memory->heap_index;
|
||||
token.heap.alloc_flags = alloc_flags;
|
||||
|
|
@ -508,6 +512,8 @@ radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool i
|
|||
void
|
||||
radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, bool is_internal)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (!device->vk.memory_trace_data.is_enabled)
|
||||
return;
|
||||
|
||||
|
|
@ -518,7 +524,7 @@ radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo
|
|||
struct vk_rmv_virtual_allocate_token token = {0};
|
||||
token.address = bo->va;
|
||||
/* If all VRAM is visible, no bo will be in invisible memory. */
|
||||
token.is_in_invisible_vram = bo->vram_no_cpu_access && !device->physical_device->info.all_vram_visible;
|
||||
token.is_in_invisible_vram = bo->vram_no_cpu_access && !pdev->info.all_vram_visible;
|
||||
token.preferred_domains = (enum vk_rmv_kernel_memory_domain)bo->initial_domain;
|
||||
token.is_driver_internal = is_internal;
|
||||
token.page_count = DIV_ROUND_UP(bo->size, 4096);
|
||||
|
|
|
|||
|
|
@ -898,15 +898,17 @@ exit:
|
|||
VkResult
|
||||
radv_rra_trace_init(struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
device->rra_trace.validate_as = debug_get_bool_option("RADV_RRA_TRACE_VALIDATE", false);
|
||||
device->rra_trace.copy_after_build = debug_get_bool_option("RADV_RRA_TRACE_COPY_AFTER_BUILD", false);
|
||||
device->rra_trace.accel_structs = _mesa_pointer_hash_table_create(NULL);
|
||||
device->rra_trace.accel_struct_vas = _mesa_hash_table_u64_create(NULL);
|
||||
simple_mtx_init(&device->rra_trace.data_mtx, mtx_plain);
|
||||
|
||||
device->rra_trace.copy_memory_index = radv_find_memory_index(
|
||||
device->physical_device,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
|
||||
device->rra_trace.copy_memory_index =
|
||||
radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
|
||||
|
||||
util_dynarray_init(&device->rra_trace.ray_history, NULL);
|
||||
|
||||
|
|
@ -939,9 +941,9 @@ radv_rra_trace_init(struct radv_device *device)
|
|||
VkMemoryAllocateInfo alloc_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.allocationSize = requirements.size,
|
||||
.memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
|
||||
.memoryTypeIndex =
|
||||
radv_find_memory_index(pdev, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
|
||||
};
|
||||
|
||||
result = radv_AllocateMemory(_device, &alloc_info, NULL, &device->rra_trace.ray_history_memory);
|
||||
|
|
@ -1316,6 +1318,7 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename)
|
|||
{
|
||||
RADV_FROM_HANDLE(radv_queue, queue, vk_queue);
|
||||
struct radv_device *device = queue->device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
VkDevice vk_device = radv_device_to_handle(device);
|
||||
|
||||
VkResult result = vk_common_DeviceWaitIdle(vk_device);
|
||||
|
|
@ -1365,7 +1368,7 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename)
|
|||
fwrite(&api, sizeof(uint64_t), 1, file);
|
||||
|
||||
uint64_t asic_info_offset = (uint64_t)ftell(file);
|
||||
rra_dump_asic_info(&device->physical_device->info, file);
|
||||
rra_dump_asic_info(&pdev->info, file);
|
||||
|
||||
uint64_t written_accel_struct_count = 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -196,14 +196,14 @@ radv_unregister_border_color(struct radv_device *device, uint32_t slot)
|
|||
static void
|
||||
radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, const VkSamplerCreateInfo *pCreateInfo)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
|
||||
uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
|
||||
bool compat_mode =
|
||||
device->physical_device->info.gfx_level == GFX8 || device->physical_device->info.gfx_level == GFX9;
|
||||
bool compat_mode = pdev->info.gfx_level == GFX8 || pdev->info.gfx_level == GFX9;
|
||||
unsigned filter_mode = radv_tex_filter_mode(sampler->vk.reduction_mode);
|
||||
unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
|
||||
bool trunc_coord = ((pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST) ||
|
||||
device->physical_device->info.conformant_trunc_coord) &&
|
||||
pdev->info.conformant_trunc_coord) &&
|
||||
!device->disable_trunc_coord;
|
||||
bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
|
||||
pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
|
||||
|
|
@ -246,18 +246,17 @@ radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler, cons
|
|||
S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)));
|
||||
sampler->state[3] = S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color));
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
sampler->state[2] |= S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -32, 31), 8)) |
|
||||
S_008F38_ANISO_OVERRIDE_GFX10(device->instance->drirc.disable_aniso_single_level);
|
||||
} else {
|
||||
sampler->state[2] |= S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
|
||||
S_008F38_DISABLE_LSB_CEIL(device->physical_device->info.gfx_level <= GFX8) |
|
||||
S_008F38_FILTER_PREC_FIX(1) |
|
||||
S_008F38_DISABLE_LSB_CEIL(pdev->info.gfx_level <= GFX8) | S_008F38_FILTER_PREC_FIX(1) |
|
||||
S_008F38_ANISO_OVERRIDE_GFX8(device->instance->drirc.disable_aniso_single_level &&
|
||||
device->physical_device->info.gfx_level >= GFX8);
|
||||
pdev->info.gfx_level >= GFX8);
|
||||
}
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
sampler->state[3] |= S_008F3C_BORDER_COLOR_PTR_GFX11(border_color_ptr);
|
||||
} else {
|
||||
sampler->state[3] |= S_008F3C_BORDER_COLOR_PTR_GFX6(border_color_ptr);
|
||||
|
|
|
|||
|
|
@ -57,7 +57,9 @@ static const VkExtent3D radv_sdma_t2t_alignment_3d[] = {
|
|||
ALWAYS_INLINE static unsigned
|
||||
radv_sdma_pitch_alignment(const struct radv_device *device, const unsigned bpp)
|
||||
{
|
||||
if (device->physical_device->info.sdma_ip_version >= SDMA_5_0)
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->info.sdma_ip_version >= SDMA_5_0)
|
||||
return MAX2(1, 4 / bpp);
|
||||
|
||||
return 4;
|
||||
|
|
@ -82,7 +84,9 @@ radv_sdma_check_pitches(const unsigned pitch, const unsigned slice_pitch, const
|
|||
ALWAYS_INLINE static enum gfx9_resource_type
|
||||
radv_sdma_surface_resource_type(const struct radv_device *const device, const struct radeon_surf *const surf)
|
||||
{
|
||||
if (device->physical_device->info.sdma_ip_version >= SDMA_5_0) {
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->info.sdma_ip_version >= SDMA_5_0) {
|
||||
/* Use the 2D resource type for rotated or Z swizzles. */
|
||||
if ((surf->u.gfx9.resource_type == RADEON_RESOURCE_1D || surf->u.gfx9.resource_type == RADEON_RESOURCE_3D) &&
|
||||
(surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER || surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH))
|
||||
|
|
@ -195,7 +199,9 @@ radv_sdma_get_metadata_config(const struct radv_device *const device, const stru
|
|||
const struct radeon_surf *const surf, const VkImageSubresourceLayers subresource,
|
||||
const VkImageAspectFlags aspect_mask)
|
||||
{
|
||||
if (!device->physical_device->info.sdma_supports_compression ||
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (!pdev->info.sdma_supports_compression ||
|
||||
!(radv_dcc_enabled(image, subresource.mipLevel) || radv_image_has_htile(image))) {
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -203,8 +209,7 @@ radv_sdma_get_metadata_config(const struct radv_device *const device, const stru
|
|||
const VkFormat format = vk_format_get_aspect_format(image->vk.format, aspect_mask);
|
||||
const struct util_format_description *desc = vk_format_description(format);
|
||||
|
||||
const uint32_t data_format =
|
||||
ac_get_cb_format(device->physical_device->info.gfx_level, vk_format_to_pipe_format(format));
|
||||
const uint32_t data_format = ac_get_cb_format(pdev->info.gfx_level, vk_format_to_pipe_format(format));
|
||||
const uint32_t alpha_is_on_msb = vi_alpha_is_on_msb(device, format);
|
||||
const uint32_t number_type = radv_translate_buffer_numformat(desc, vk_format_get_first_non_void_channel(format));
|
||||
const uint32_t surface_type = radv_sdma_surface_type_from_aspect_mask(aspect_mask);
|
||||
|
|
@ -220,11 +225,12 @@ static uint32_t
|
|||
radv_sdma_get_tiled_info_dword(const struct radv_device *const device, const struct radv_image *const image,
|
||||
const struct radeon_surf *const surf, const VkImageSubresourceLayers subresource)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const uint32_t element_size = util_logbase2(surf->bpe);
|
||||
const uint32_t swizzle_mode = surf->has_stencil ? surf->u.gfx9.zs.stencil_swizzle_mode : surf->u.gfx9.swizzle_mode;
|
||||
const enum gfx9_resource_type dimension = radv_sdma_surface_resource_type(device, surf);
|
||||
const uint32_t info = element_size | swizzle_mode << 3 | dimension << 9;
|
||||
const enum sdma_version ver = device->physical_device->info.sdma_ip_version;
|
||||
const enum sdma_version ver = pdev->info.sdma_ip_version;
|
||||
|
||||
if (ver >= SDMA_5_0) {
|
||||
const uint32_t mip_max = MAX2(image->vk.mip_levels, 1);
|
||||
|
|
@ -242,7 +248,8 @@ static uint32_t
|
|||
radv_sdma_get_tiled_header_dword(const struct radv_device *const device, const struct radv_image *const image,
|
||||
const VkImageSubresourceLayers subresource)
|
||||
{
|
||||
const enum sdma_version ver = device->physical_device->info.sdma_ip_version;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum sdma_version ver = pdev->info.sdma_ip_version;
|
||||
|
||||
if (ver >= SDMA_5_0) {
|
||||
return 0;
|
||||
|
|
@ -262,6 +269,7 @@ radv_sdma_get_surf(const struct radv_device *const device, const struct radv_ima
|
|||
{
|
||||
assert(util_bitcount(aspect_mask) == 1);
|
||||
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const unsigned plane_idx = radv_plane_from_aspect(aspect_mask);
|
||||
const unsigned binding_idx = image->disjoint ? plane_idx : 0;
|
||||
const struct radv_image_binding *binding = &image->bindings[binding_idx];
|
||||
|
|
@ -301,7 +309,7 @@ radv_sdma_get_surf(const struct radv_device *const device, const struct radv_ima
|
|||
info.info_dword = radv_sdma_get_tiled_info_dword(device, image, surf, subresource);
|
||||
info.header_dword = radv_sdma_get_tiled_header_dword(device, image, subresource);
|
||||
|
||||
if (device->physical_device->info.sdma_supports_compression &&
|
||||
if (pdev->info.sdma_supports_compression &&
|
||||
(radv_dcc_enabled(image, subresource.mipLevel) || radv_image_has_htile(image))) {
|
||||
info.meta_va = binding->bo->va + binding->offset + surf->meta_offset;
|
||||
info.meta_config = radv_sdma_get_metadata_config(device, image, surf, subresource, aspect_mask);
|
||||
|
|
@ -326,7 +334,8 @@ radv_sdma_copy_buffer(const struct radv_device *device, struct radeon_cmdbuf *cs
|
|||
if (size == 0)
|
||||
return;
|
||||
|
||||
const enum sdma_version ver = device->physical_device->info.sdma_ip_version;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum sdma_version ver = pdev->info.sdma_ip_version;
|
||||
const unsigned max_size_per_packet = ver >= SDMA_5_2 ? SDMA_V5_2_COPY_MAX_BYTES : SDMA_V2_0_COPY_MAX_BYTES;
|
||||
|
||||
unsigned align = ~0u;
|
||||
|
|
@ -367,11 +376,13 @@ void
|
|||
radv_sdma_fill_buffer(const struct radv_device *device, struct radeon_cmdbuf *cs, const uint64_t va,
|
||||
const uint64_t size, const uint32_t value)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
const uint32_t fill_size = 2; /* This means that the count is in dwords. */
|
||||
const uint32_t constant_fill_header = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0) | (fill_size & 0x3) << 30;
|
||||
|
||||
/* This packet is the same since SDMA v2.4, haven't bothered to check older versions. */
|
||||
const enum sdma_version ver = device->physical_device->info.sdma_ip_version;
|
||||
const enum sdma_version ver = pdev->info.sdma_ip_version;
|
||||
assert(ver >= SDMA_2_4);
|
||||
|
||||
/* Maximum allowed fill size depends on the GPU.
|
||||
|
|
@ -450,7 +461,9 @@ radv_sdma_emit_copy_tiled_sub_window(const struct radv_device *device, struct ra
|
|||
const struct radv_sdma_surf *const linear, const VkExtent3D pix_extent,
|
||||
const bool detile)
|
||||
{
|
||||
if (!device->physical_device->info.sdma_supports_compression) {
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (!pdev->info.sdma_supports_compression) {
|
||||
assert(!tiled->meta_va);
|
||||
}
|
||||
|
||||
|
|
@ -499,14 +512,15 @@ radv_sdma_emit_copy_t2t_sub_window(const struct radv_device *device, struct rade
|
|||
const struct radv_sdma_surf *const src, const struct radv_sdma_surf *const dst,
|
||||
const VkExtent3D px_extent)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* We currently only support the SDMA v4+ versions of this packet. */
|
||||
assert(device->physical_device->info.sdma_ip_version >= SDMA_4_0);
|
||||
assert(pdev->info.sdma_ip_version >= SDMA_4_0);
|
||||
|
||||
/* On GFX10+ this supports DCC, but cannot copy a compressed surface to another compressed surface. */
|
||||
assert(!src->meta_va || !dst->meta_va);
|
||||
|
||||
if (device->physical_device->info.sdma_ip_version >= SDMA_4_0 &&
|
||||
device->physical_device->info.sdma_ip_version < SDMA_5_0) {
|
||||
if (pdev->info.sdma_ip_version >= SDMA_4_0 && pdev->info.sdma_ip_version < SDMA_5_0) {
|
||||
/* SDMA v4 doesn't support mip_id selection in the T2T copy packet. */
|
||||
assert(src->header_dword >> 24 == 0);
|
||||
assert(dst->header_dword >> 24 == 0);
|
||||
|
|
@ -696,7 +710,8 @@ radv_sdma_use_t2t_scanline_copy(const struct radv_device *device, const struct r
|
|||
/* SDMA can't do format conversion. */
|
||||
assert(src->bpp == dst->bpp);
|
||||
|
||||
const enum sdma_version ver = device->physical_device->info.sdma_ip_version;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum sdma_version ver = pdev->info.sdma_ip_version;
|
||||
if (ver < SDMA_5_0) {
|
||||
/* SDMA v4.x and older doesn't support proper mip level selection. */
|
||||
if (src->mip_levels > 1 || dst->mip_levels > 1)
|
||||
|
|
|
|||
|
|
@ -304,6 +304,7 @@ nir_shader *
|
|||
radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_stage *stage,
|
||||
const struct radv_spirv_to_nir_options *options, bool is_internal)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
unsigned subgroup_size = 64, ballot_bit_size = 64;
|
||||
const unsigned required_subgroup_size = stage->key.subgroup_required_size * 32;
|
||||
if (required_subgroup_size) {
|
||||
|
|
@ -340,7 +341,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
|
|||
.device = device,
|
||||
.object = stage->spirv.object,
|
||||
};
|
||||
const bool has_fragment_shader_interlock = radv_has_pops(device->physical_device);
|
||||
const bool has_fragment_shader_interlock = radv_has_pops(pdev);
|
||||
const struct spirv_to_nir_options spirv_options = {
|
||||
.caps =
|
||||
{
|
||||
|
|
@ -359,7 +360,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
|
|||
.device_group = true,
|
||||
.draw_parameters = true,
|
||||
.float_controls = true,
|
||||
.float16 = device->physical_device->info.has_packed_math_16bit,
|
||||
.float16 = pdev->info.has_packed_math_16bit,
|
||||
.float32_atomic_add = true,
|
||||
.float32_atomic_min_max = true,
|
||||
.float64 = true,
|
||||
|
|
@ -411,7 +412,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
|
|||
.variable_pointers = true,
|
||||
.vk_memory_model = true,
|
||||
.vk_memory_model_device_scope = true,
|
||||
.fragment_shading_rate = device->physical_device->info.gfx_level >= GFX10_3,
|
||||
.fragment_shading_rate = pdev->info.gfx_level >= GFX10_3,
|
||||
.workgroup_memory_explicit_layout = true,
|
||||
.cooperative_matrix = true,
|
||||
},
|
||||
|
|
@ -426,11 +427,11 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
|
|||
.func = radv_spirv_nir_debug,
|
||||
.private_data = &spirv_debug_data,
|
||||
},
|
||||
.force_tex_non_uniform = device->physical_device->cache_key.tex_non_uniform,
|
||||
.force_ssbo_non_uniform = device->physical_device->cache_key.ssbo_non_uniform,
|
||||
.force_tex_non_uniform = pdev->cache_key.tex_non_uniform,
|
||||
.force_ssbo_non_uniform = pdev->cache_key.ssbo_non_uniform,
|
||||
};
|
||||
nir = spirv_to_nir(spirv, stage->spirv.size / 4, spec_entries, num_spec_entries, stage->stage, stage->entrypoint,
|
||||
&spirv_options, &device->physical_device->nir_options[stage->stage]);
|
||||
&spirv_options, &pdev->nir_options[stage->stage]);
|
||||
nir->info.internal |= is_internal;
|
||||
assert(nir->info.stage == stage->stage);
|
||||
nir_validate_shader(nir, "after spirv_to_nir");
|
||||
|
|
@ -507,7 +508,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
|
|||
|
||||
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
|
||||
|
||||
NIR_PASS(_, nir, nir_propagate_invariant, device->physical_device->cache_key.invariant_geom);
|
||||
NIR_PASS(_, nir, nir_propagate_invariant, pdev->cache_key.invariant_geom);
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_clip_cull_distance_arrays);
|
||||
|
||||
|
|
@ -515,11 +516,11 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
|
|||
nir->info.stage == MESA_SHADER_GEOMETRY)
|
||||
NIR_PASS_V(nir, nir_shader_gather_xfb_info);
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_discard_or_demote, device->physical_device->cache_key.lower_discard_to_demote);
|
||||
NIR_PASS(_, nir, nir_lower_discard_or_demote, pdev->cache_key.lower_discard_to_demote);
|
||||
|
||||
nir_lower_doubles_options lower_doubles = nir->options->lower_doubles_options;
|
||||
|
||||
if (device->physical_device->info.gfx_level == GFX6) {
|
||||
if (pdev->info.gfx_level == GFX6) {
|
||||
/* GFX6 doesn't support v_floor_f64 and the precision
|
||||
* of v_fract_f64 which is used to implement 64-bit
|
||||
* floor is less than what Vulkan requires.
|
||||
|
|
@ -537,7 +538,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
|
|||
/* Mesh shaders run as NGG which can implement local_invocation_index from
|
||||
* the wave ID in merged_wave_info, but they don't have local_invocation_ids on GFX10.3.
|
||||
*/
|
||||
.lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !device->physical_device->mesh_fast_launch_2,
|
||||
.lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !pdev->mesh_fast_launch_2,
|
||||
.lower_local_invocation_index = nir->info.stage == MESA_SHADER_COMPUTE &&
|
||||
((nir->info.workgroup_size[0] == 1) + (nir->info.workgroup_size[1] == 1) +
|
||||
(nir->info.workgroup_size[2] == 1)) == 2,
|
||||
|
|
@ -569,10 +570,10 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
|
|||
.lower_txf_offset = true,
|
||||
.lower_tg4_offsets = true,
|
||||
.lower_txs_cube_array = true,
|
||||
.lower_to_fragment_fetch_amd = device->physical_device->use_fmask,
|
||||
.lower_to_fragment_fetch_amd = pdev->use_fmask,
|
||||
.lower_lod_zero_width = true,
|
||||
.lower_invalid_implicit_lod = true,
|
||||
.lower_1d = device->physical_device->info.gfx_level == GFX9,
|
||||
.lower_1d = pdev->info.gfx_level == GFX9,
|
||||
};
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_tex, &tex_options);
|
||||
|
|
@ -597,7 +598,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
|
|||
NIR_PASS(_, nir, nir_lower_global_vars_to_local);
|
||||
NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||
|
||||
bool gfx7minus = device->physical_device->info.gfx_level <= GFX7;
|
||||
bool gfx7minus = pdev->info.gfx_level <= GFX7;
|
||||
bool has_inverse_ballot = true;
|
||||
#if LLVM_AVAILABLE
|
||||
has_inverse_ballot = !radv_use_llvm_for_stage(device, nir->info.stage) || LLVM_VERSION_MAJOR >= 17;
|
||||
|
|
@ -690,7 +691,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
|
|||
nir->info.stage == MESA_SHADER_MESH) &&
|
||||
nir->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE)) {
|
||||
/* Lower primitive shading rate to match HW requirements. */
|
||||
NIR_PASS(_, nir, radv_nir_lower_primitive_shading_rate, device->physical_device->info.gfx_level);
|
||||
NIR_PASS(_, nir, radv_nir_lower_primitive_shading_rate, pdev->info.gfx_level);
|
||||
}
|
||||
|
||||
/* Indirect lowering must be called after the radv_optimize_nir() loop
|
||||
|
|
@ -698,8 +699,8 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st
|
|||
* bloat the instruction count of the loop and cause it to be
|
||||
* considered too large for unrolling.
|
||||
*/
|
||||
if (ac_nir_lower_indirect_derefs(nir, device->physical_device->info.gfx_level) &&
|
||||
!stage->key.optimisations_disabled && nir->info.stage != MESA_SHADER_COMPUTE) {
|
||||
if (ac_nir_lower_indirect_derefs(nir, pdev->info.gfx_level) && !stage->key.optimisations_disabled &&
|
||||
nir->info.stage != MESA_SHADER_COMPUTE) {
|
||||
/* Optimize the lowered code before the linking optimizations. */
|
||||
radv_optimize_nir(nir, false);
|
||||
}
|
||||
|
|
@ -775,6 +776,7 @@ void
|
|||
radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
|
||||
const struct radv_graphics_state_key *gfx_state)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_shader_info *info = &ngg_stage->info;
|
||||
nir_shader *nir = ngg_stage->nir;
|
||||
|
||||
|
|
@ -818,19 +820,19 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
|
|||
nir->info.shared_size = info->ngg_info.lds_size;
|
||||
|
||||
ac_nir_lower_ngg_options options = {0};
|
||||
options.family = device->physical_device->info.family;
|
||||
options.gfx_level = device->physical_device->info.gfx_level;
|
||||
options.family = pdev->info.family;
|
||||
options.gfx_level = pdev->info.gfx_level;
|
||||
options.max_workgroup_size = info->workgroup_size;
|
||||
options.wave_size = info->wave_size;
|
||||
options.clip_cull_dist_mask = info->outinfo.clip_dist_mask | info->outinfo.cull_dist_mask;
|
||||
options.vs_output_param_offset = info->outinfo.vs_output_param_offset;
|
||||
options.has_param_exports = info->outinfo.param_exports || info->outinfo.prim_param_exports;
|
||||
options.can_cull = nir->info.stage != MESA_SHADER_GEOMETRY && info->has_ngg_culling;
|
||||
options.disable_streamout = !device->physical_device->use_ngg_streamout;
|
||||
options.disable_streamout = !pdev->use_ngg_streamout;
|
||||
options.has_gen_prim_query = info->has_prim_query;
|
||||
options.has_xfb_prim_query = info->has_xfb_query;
|
||||
options.has_gs_invocations_query = device->physical_device->info.gfx_level < GFX11;
|
||||
options.has_gs_primitives_query = device->physical_device->info.gfx_level < GFX11;
|
||||
options.has_gs_invocations_query = pdev->info.gfx_level < GFX11;
|
||||
options.has_gs_primitives_query = pdev->info.gfx_level < GFX11;
|
||||
options.force_vrs = info->force_vrs_per_vertex;
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
|
|
@ -862,8 +864,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
|
|||
bool scratch_ring = false;
|
||||
NIR_PASS_V(nir, ac_nir_lower_ngg_ms, options.gfx_level, options.clip_cull_dist_mask,
|
||||
options.vs_output_param_offset, options.has_param_exports, &scratch_ring, info->wave_size,
|
||||
hw_workgroup_size, gfx_state->has_multiview_view_index, info->ms.has_query,
|
||||
device->physical_device->mesh_fast_launch_2);
|
||||
hw_workgroup_size, gfx_state->has_multiview_view_index, info->ms.has_query, pdev->mesh_fast_launch_2);
|
||||
ngg_stage->info.ms.needs_ms_scratch_ring = scratch_ring;
|
||||
} else {
|
||||
unreachable("invalid SW stage passed to radv_lower_ngg");
|
||||
|
|
@ -933,6 +934,7 @@ static struct radv_shader_arena *
|
|||
radv_create_shader_arena(struct radv_device *device, struct radv_shader_free_list *free_list, unsigned min_size,
|
||||
unsigned arena_size, bool replayable, uint64_t replay_va)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
union radv_shader_arena_block *alloc = NULL;
|
||||
struct radv_shader_arena *arena = calloc(1, sizeof(struct radv_shader_arena));
|
||||
if (!arena)
|
||||
|
|
@ -948,7 +950,7 @@ radv_create_shader_arena(struct radv_device *device, struct radv_shader_free_lis
|
|||
if (device->shader_use_invisible_vram)
|
||||
flags |= RADEON_FLAG_NO_CPU_ACCESS;
|
||||
else
|
||||
flags |= (device->physical_device->info.cpdma_prefetch_writes_memory ? 0 : RADEON_FLAG_READ_ONLY);
|
||||
flags |= (pdev->info.cpdma_prefetch_writes_memory ? 0 : RADEON_FLAG_READ_ONLY);
|
||||
|
||||
if (replayable)
|
||||
flags |= RADEON_FLAG_REPLAYABLE;
|
||||
|
|
@ -1079,7 +1081,9 @@ insert_block(struct radv_device *device, union radv_shader_arena_block *hole, ui
|
|||
union radv_shader_arena_block *
|
||||
radv_alloc_shader_memory(struct radv_device *device, uint32_t size, bool replayable, void *ptr)
|
||||
{
|
||||
size = ac_align_shader_binary_for_prefetch(&device->physical_device->info, size);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
size = ac_align_shader_binary_for_prefetch(&pdev->info, size);
|
||||
size = align(size, RADV_SHADER_ALLOC_ALIGNMENT);
|
||||
|
||||
mtx_lock(&device->shader_arena_mutex);
|
||||
|
|
@ -1402,7 +1406,8 @@ radv_destroy_shader_upload_queue(struct radv_device *device)
|
|||
static bool
|
||||
radv_should_use_wgp_mode(const struct radv_device *device, gl_shader_stage stage, const struct radv_shader_info *info)
|
||||
{
|
||||
enum amd_gfx_level chip = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
enum amd_gfx_level chip = pdev->info.gfx_level;
|
||||
switch (stage) {
|
||||
case MESA_SHADER_COMPUTE:
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
|
|
@ -1422,13 +1427,13 @@ static bool
|
|||
radv_open_rtld_binary(struct radv_device *device, const struct radv_shader_binary *binary,
|
||||
struct ac_rtld_binary *rtld_binary)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const char *elf_data = (const char *)((struct radv_shader_binary_rtld *)binary)->data;
|
||||
size_t elf_size = ((struct radv_shader_binary_rtld *)binary)->elf_size;
|
||||
struct ac_rtld_symbol lds_symbols[3];
|
||||
unsigned num_lds_symbols = 0;
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX9 &&
|
||||
(binary->info.stage == MESA_SHADER_GEOMETRY || binary->info.is_ngg)) {
|
||||
if (pdev->info.gfx_level >= GFX9 && (binary->info.stage == MESA_SHADER_GEOMETRY || binary->info.is_ngg)) {
|
||||
struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
|
||||
sym->name = "esgs_ring";
|
||||
sym->size = binary->info.ngg_info.esgs_ring_size;
|
||||
|
|
@ -1448,7 +1453,7 @@ radv_open_rtld_binary(struct radv_device *device, const struct radv_shader_binar
|
|||
}
|
||||
|
||||
struct ac_rtld_open_info open_info = {
|
||||
.info = &device->physical_device->info,
|
||||
.info = &pdev->info,
|
||||
.shader_type = binary->info.stage,
|
||||
.wave_size = binary->info.wave_size,
|
||||
.num_parts = 1,
|
||||
|
|
@ -1466,6 +1471,7 @@ static bool
|
|||
radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_binary *binary,
|
||||
const struct radv_shader_args *args)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct ac_shader_config *config = &binary->config;
|
||||
|
||||
if (binary->type == RADV_BINARY_TYPE_RTLD) {
|
||||
|
|
@ -1478,13 +1484,13 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi
|
|||
return false;
|
||||
}
|
||||
|
||||
if (!ac_rtld_read_config(&device->physical_device->info, &rtld_binary, config)) {
|
||||
if (!ac_rtld_read_config(&pdev->info, &rtld_binary, config)) {
|
||||
ac_rtld_close(&rtld_binary);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (rtld_binary.lds_size > 0) {
|
||||
unsigned encode_granularity = device->physical_device->info.lds_encode_granularity;
|
||||
unsigned encode_granularity = pdev->info.lds_encode_granularity;
|
||||
config->lds_size = DIV_ROUND_UP(rtld_binary.lds_size, encode_granularity);
|
||||
}
|
||||
if (!config->lds_size && binary->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
|
|
@ -1499,7 +1505,6 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi
|
|||
|
||||
const struct radv_shader_info *info = &binary->info;
|
||||
gl_shader_stage stage = binary->info.stage;
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
bool scratch_enabled = config->scratch_bytes_per_wave > 0;
|
||||
bool trap_enabled = !!device->trap_handler_shader;
|
||||
unsigned vgpr_comp_cnt = 0;
|
||||
|
|
@ -2064,7 +2069,8 @@ unsigned
|
|||
radv_get_max_waves(const struct radv_device *device, const struct ac_shader_config *conf,
|
||||
const struct radv_shader_info *info)
|
||||
{
|
||||
const struct radeon_info *gpu_info = &device->physical_device->info;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
const enum amd_gfx_level gfx_level = gpu_info->gfx_level;
|
||||
const uint8_t wave_size = info->wave_size;
|
||||
gl_shader_stage stage = info->stage;
|
||||
|
|
@ -2109,7 +2115,8 @@ radv_get_max_waves(const struct radv_device *device, const struct ac_shader_conf
|
|||
unsigned
|
||||
radv_get_max_scratch_waves(const struct radv_device *device, struct radv_shader *shader)
|
||||
{
|
||||
const unsigned num_cu = device->physical_device->info.num_cu;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const unsigned num_cu = pdev->info.num_cu;
|
||||
|
||||
return MIN2(device->scratch_waves, 4 * num_cu * shader->max_waves);
|
||||
}
|
||||
|
|
@ -2423,10 +2430,12 @@ radv_fill_nir_compiler_options(struct radv_nir_compiler_options *options, struct
|
|||
bool can_dump_shader, bool is_meta_shader, bool keep_shader_info,
|
||||
bool keep_statistic_info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* robust_buffer_access_llvm here used by LLVM only, pipeline robustness is not exposed there. */
|
||||
options->robust_buffer_access_llvm = device->buffer_robustness >= RADV_BUFFER_ROBUSTNESS_1;
|
||||
options->wgp_mode = should_use_wgp;
|
||||
options->info = &device->physical_device->info;
|
||||
options->info = &pdev->info;
|
||||
options->dump_shader = can_dump_shader;
|
||||
options->dump_preoptir = options->dump_shader && device->instance->debug_flags & RADV_DEBUG_PREOPTIR;
|
||||
options->record_ir = keep_shader_info;
|
||||
|
|
@ -2607,6 +2616,7 @@ radv_aco_build_shader_part(void **bin, uint32_t num_sgprs, uint32_t num_vgprs, c
|
|||
struct radv_shader *
|
||||
radv_create_rt_prolog(struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_shader *prolog;
|
||||
struct radv_shader_args in_args = {0};
|
||||
struct radv_shader_args out_args = {0};
|
||||
|
|
@ -2618,13 +2628,13 @@ radv_create_rt_prolog(struct radv_device *device)
|
|||
info.stage = MESA_SHADER_COMPUTE;
|
||||
info.loads_push_constants = true;
|
||||
info.desc_set_used_mask = -1; /* just to force indirection */
|
||||
info.wave_size = device->physical_device->rt_wave_size;
|
||||
info.wave_size = pdev->rt_wave_size;
|
||||
info.workgroup_size = info.wave_size;
|
||||
info.user_data_0 = R_00B900_COMPUTE_USER_DATA_0;
|
||||
info.cs.is_rt_shader = true;
|
||||
info.cs.uses_dynamic_rt_callable_stack = true;
|
||||
info.cs.block_size[0] = 8;
|
||||
info.cs.block_size[1] = device->physical_device->rt_wave_size == 64 ? 8 : 4;
|
||||
info.cs.block_size[1] = pdev->rt_wave_size == 64 ? 8 : 4;
|
||||
info.cs.block_size[2] = 1;
|
||||
info.cs.uses_thread_id[0] = true;
|
||||
info.cs.uses_thread_id[1] = true;
|
||||
|
|
@ -2739,6 +2749,7 @@ struct radv_shader_part *
|
|||
radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_key *key,
|
||||
struct radv_shader_part_binary **binary_out)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_shader_part *epilog;
|
||||
struct radv_shader_args args = {0};
|
||||
struct radv_nir_compiler_options options = {0};
|
||||
|
|
@ -2748,7 +2759,7 @@ radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_ke
|
|||
|
||||
struct radv_shader_info info = {0};
|
||||
info.stage = MESA_SHADER_FRAGMENT;
|
||||
info.wave_size = device->physical_device->ps_wave_size;
|
||||
info.wave_size = pdev->ps_wave_size;
|
||||
info.workgroup_size = 64;
|
||||
|
||||
radv_declare_ps_epilog_args(device, key, &args);
|
||||
|
|
|
|||
|
|
@ -258,7 +258,9 @@ declare_ms_input_sgprs(const struct radv_shader_info *info, struct radv_shader_a
|
|||
static void
|
||||
declare_ms_input_vgprs(const struct radv_device *device, struct radv_shader_args *args)
|
||||
{
|
||||
if (device->physical_device->mesh_fast_launch_2) {
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->mesh_fast_launch_2) {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids);
|
||||
} else {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
|
||||
|
|
@ -510,7 +512,8 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics
|
|||
const struct radv_shader_info *info, gl_shader_stage stage, gl_shader_stage previous_stage,
|
||||
struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info)
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
bool has_shader_query = info->has_prim_query || info->has_xfb_query ||
|
||||
(stage == MESA_SHADER_GEOMETRY && info->gs.has_pipeline_stat_query) ||
|
||||
(stage == MESA_SHADER_MESH && info->ms.has_query) ||
|
||||
|
|
@ -784,7 +787,7 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics
|
|||
declare_ngg_sgprs(info, args, has_ngg_provoking_vtx);
|
||||
}
|
||||
|
||||
if (previous_stage != MESA_SHADER_MESH || !device->physical_device->mesh_fast_launch_2) {
|
||||
if (previous_stage != MESA_SHADER_MESH || !pdev->mesh_fast_launch_2) {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
|
||||
|
|
@ -871,7 +874,8 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra
|
|||
if (info->loads_push_constants)
|
||||
num_user_sgprs++;
|
||||
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
uint32_t available_sgprs = gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16;
|
||||
uint32_t remaining_sgprs = available_sgprs - num_user_sgprs;
|
||||
|
||||
|
|
|
|||
|
|
@ -342,6 +342,8 @@ static uint8_t
|
|||
radv_get_wave_size(struct radv_device *device, gl_shader_stage stage, const struct radv_shader_info *info,
|
||||
const struct radv_shader_stage_key *stage_key)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (stage_key->subgroup_required_size)
|
||||
return stage_key->subgroup_required_size * 32;
|
||||
|
||||
|
|
@ -350,11 +352,11 @@ radv_get_wave_size(struct radv_device *device, gl_shader_stage stage, const stru
|
|||
else if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_TASK)
|
||||
return info->wave_size;
|
||||
else if (stage == MESA_SHADER_FRAGMENT)
|
||||
return device->physical_device->ps_wave_size;
|
||||
return pdev->ps_wave_size;
|
||||
else if (gl_shader_stage_is_rt(stage))
|
||||
return device->physical_device->rt_wave_size;
|
||||
return pdev->rt_wave_size;
|
||||
else
|
||||
return device->physical_device->ge_wave_size;
|
||||
return pdev->ge_wave_size;
|
||||
}
|
||||
|
||||
static uint8_t
|
||||
|
|
@ -370,6 +372,7 @@ radv_get_ballot_bit_size(struct radv_device *device, gl_shader_stage stage, cons
|
|||
static uint32_t
|
||||
radv_compute_esgs_itemsize(const struct radv_device *device, uint32_t num_varyings)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint32_t esgs_itemsize;
|
||||
|
||||
esgs_itemsize = num_varyings * 16;
|
||||
|
|
@ -377,7 +380,7 @@ radv_compute_esgs_itemsize(const struct radv_device *device, uint32_t num_varyin
|
|||
/* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
|
||||
* conflicts, i.e. each vertex will start on a different bank.
|
||||
*/
|
||||
if (device->physical_device->info.gfx_level >= GFX9 && esgs_itemsize)
|
||||
if (pdev->info.gfx_level >= GFX9 && esgs_itemsize)
|
||||
esgs_itemsize += 4;
|
||||
|
||||
return esgs_itemsize;
|
||||
|
|
@ -562,6 +565,8 @@ static void
|
|||
gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir,
|
||||
const struct radv_graphics_state_key *gfx_state, struct radv_shader_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
|
||||
info->tcs.tes_inputs_read = ~0ULL;
|
||||
info->tcs.tes_patch_inputs_read = ~0ULL;
|
||||
|
|
@ -571,15 +576,14 @@ gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir,
|
|||
|
||||
if (gfx_state->ts.patch_control_points) {
|
||||
/* Number of tessellation patches per workgroup processed by the current pipeline. */
|
||||
info->num_tess_patches =
|
||||
get_tcs_num_patches(gfx_state->ts.patch_control_points, nir->info.tess.tcs_vertices_out,
|
||||
info->tcs.num_linked_inputs, info->tcs.num_linked_outputs,
|
||||
info->tcs.num_linked_patch_outputs, device->physical_device->hs.tess_offchip_block_dw_size,
|
||||
device->physical_device->info.gfx_level, device->physical_device->info.family);
|
||||
info->num_tess_patches = get_tcs_num_patches(
|
||||
gfx_state->ts.patch_control_points, nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs,
|
||||
info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs, pdev->hs.tess_offchip_block_dw_size,
|
||||
pdev->info.gfx_level, pdev->info.family);
|
||||
|
||||
/* LDS size used by VS+TCS for storing TCS inputs and outputs. */
|
||||
info->tcs.num_lds_blocks =
|
||||
calculate_tess_lds_size(device->physical_device->info.gfx_level, gfx_state->ts.patch_control_points,
|
||||
calculate_tess_lds_size(pdev->info.gfx_level, gfx_state->ts.patch_control_points,
|
||||
nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs, info->num_tess_patches,
|
||||
info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs);
|
||||
}
|
||||
|
|
@ -616,7 +620,7 @@ gather_shader_info_tes(struct radv_device *device, const nir_shader *nir, struct
|
|||
static void
|
||||
radv_init_legacy_gs_ring_info(const struct radv_device *device, struct radv_shader_info *gs_info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_legacy_gs_info *gs_ring_info = &gs_info->gs_ring_info;
|
||||
unsigned num_se = pdev->info.max_se;
|
||||
unsigned wave_size = 64;
|
||||
|
|
@ -650,6 +654,7 @@ radv_init_legacy_gs_ring_info(const struct radv_device *device, struct radv_shad
|
|||
static void
|
||||
radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *gs_info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_legacy_gs_info *out = &gs_info->gs_ring_info;
|
||||
const unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1);
|
||||
const bool uses_adjacency =
|
||||
|
|
@ -734,7 +739,7 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_inf
|
|||
const uint32_t gs_prims_per_subgroup = gs_prims;
|
||||
const uint32_t gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations;
|
||||
const uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out;
|
||||
const uint32_t lds_granularity = device->physical_device->info.lds_encode_granularity;
|
||||
const uint32_t lds_granularity = pdev->info.lds_encode_granularity;
|
||||
const uint32_t total_lds_bytes = align(esgs_lds_size * 4, lds_granularity);
|
||||
out->lds_size = total_lds_bytes / lds_granularity;
|
||||
out->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) |
|
||||
|
|
@ -750,6 +755,7 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_inf
|
|||
static void
|
||||
gather_shader_info_gs(struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
unsigned add_clip = nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4;
|
||||
info->gs.gsvs_vertex_size = (util_bitcount64(nir->info.outputs_written) + add_clip) * 16;
|
||||
info->gs.max_gsvs_emit_size = info->gs.gsvs_vertex_size * nir->info.gs.vertices_out;
|
||||
|
|
@ -770,7 +776,7 @@ gather_shader_info_gs(struct radv_device *device, const nir_shader *nir, struct
|
|||
info->gs.num_stream_output_components[stream] += num_components;
|
||||
}
|
||||
|
||||
info->gs.has_pipeline_stat_query = device->physical_device->emulate_ngg_gs_query_pipeline_stat;
|
||||
info->gs.has_pipeline_stat_query = pdev->emulate_ngg_gs_query_pipeline_stat;
|
||||
|
||||
gather_info_unlinked_input(info, nir);
|
||||
|
||||
|
|
@ -830,9 +836,10 @@ gather_shader_info_mesh(struct radv_device *device, const nir_shader *nir,
|
|||
static void
|
||||
calc_mesh_workgroup_size(const struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
unsigned api_workgroup_size = ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
|
||||
|
||||
if (device->physical_device->mesh_fast_launch_2) {
|
||||
if (pdev->mesh_fast_launch_2) {
|
||||
/* Use multi-row export. It is also necessary to use the API workgroup size for non-emulated queries. */
|
||||
info->workgroup_size = api_workgroup_size;
|
||||
} else {
|
||||
|
|
@ -848,6 +855,7 @@ static void
|
|||
gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir,
|
||||
const struct radv_graphics_state_key *gfx_state, struct radv_shader_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint64_t per_primitive_input_mask = nir->info.inputs_read & nir->info.per_primitive_inputs;
|
||||
unsigned num_per_primitive_inputs = util_bitcount64(per_primitive_input_mask);
|
||||
assert(num_per_primitive_inputs <= nir->num_inputs);
|
||||
|
|
@ -855,7 +863,7 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir,
|
|||
info->ps.num_interp = nir->num_inputs;
|
||||
info->ps.num_prim_interp = 0;
|
||||
|
||||
if (device->physical_device->info.gfx_level == GFX10_3) {
|
||||
if (pdev->info.gfx_level == GFX10_3) {
|
||||
/* GFX10.3 distinguishes NUM_INTERP and NUM_PRIM_INTERP, but
|
||||
* these are counted together in NUM_INTERP on GFX11.
|
||||
*/
|
||||
|
|
@ -972,7 +980,7 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir,
|
|||
*/
|
||||
info->ps.force_sample_iter_shading_rate =
|
||||
(info->ps.reads_sample_mask_in && !info->ps.needs_poly_line_smooth) ||
|
||||
(device->physical_device->info.gfx_level == GFX10_3 &&
|
||||
(pdev->info.gfx_level == GFX10_3 &&
|
||||
(nir->info.fs.sample_interlock_ordered || nir->info.fs.sample_interlock_unordered ||
|
||||
nir->info.fs.pixel_interlock_ordered || nir->info.fs.pixel_interlock_unordered));
|
||||
|
||||
|
|
@ -992,8 +1000,7 @@ gather_shader_info_fs(const struct radv_device *device, const nir_shader *nir,
|
|||
*/
|
||||
const bool mask_export_enable = info->ps.writes_sample_mask;
|
||||
|
||||
const bool disable_rbplus =
|
||||
device->physical_device->info.has_rbplus && !device->physical_device->info.rbplus_allowed;
|
||||
const bool disable_rbplus = pdev->info.has_rbplus && !pdev->info.rbplus_allowed;
|
||||
|
||||
info->ps.db_shader_control =
|
||||
S_02880C_Z_EXPORT_ENABLE(info->ps.writes_z) | S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(info->ps.writes_stencil) |
|
||||
|
|
@ -1020,9 +1027,10 @@ static void
|
|||
gather_shader_info_cs(struct radv_device *device, const nir_shader *nir, const struct radv_shader_stage_key *stage_key,
|
||||
struct radv_shader_info *info)
|
||||
{
|
||||
unsigned default_wave_size = device->physical_device->cs_wave_size;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
unsigned default_wave_size = pdev->cs_wave_size;
|
||||
if (info->cs.uses_rt)
|
||||
default_wave_size = device->physical_device->rt_wave_size;
|
||||
default_wave_size = pdev->rt_wave_size;
|
||||
|
||||
unsigned local_size = nir->info.workgroup_size[0] * nir->info.workgroup_size[1] * nir->info.workgroup_size[2];
|
||||
|
||||
|
|
@ -1040,14 +1048,14 @@ gather_shader_info_cs(struct radv_device *device, const nir_shader *nir, const s
|
|||
info->wave_size = required_subgroup_size;
|
||||
} else if (require_full_subgroups) {
|
||||
info->wave_size = RADV_SUBGROUP_SIZE;
|
||||
} else if (device->physical_device->info.gfx_level >= GFX10 && local_size <= 32) {
|
||||
} else if (pdev->info.gfx_level >= GFX10 && local_size <= 32) {
|
||||
/* Use wave32 for small workgroups. */
|
||||
info->wave_size = 32;
|
||||
} else {
|
||||
info->wave_size = default_wave_size;
|
||||
}
|
||||
|
||||
if (device->physical_device->info.has_cs_regalloc_hang_bug) {
|
||||
if (pdev->info.has_cs_regalloc_hang_bug) {
|
||||
info->cs.regalloc_hang_bug = info->cs.block_size[0] * info->cs.block_size[1] * info->cs.block_size[2] > 256;
|
||||
}
|
||||
}
|
||||
|
|
@ -1083,7 +1091,8 @@ gather_shader_info_task(struct radv_device *device, const nir_shader *nir,
|
|||
static uint32_t
|
||||
radv_get_user_data_0(const struct radv_device *device, struct radv_shader_info *info)
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
|
||||
switch (info->stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
|
|
@ -1139,7 +1148,8 @@ radv_get_user_data_0(const struct radv_device *device, struct radv_shader_info *
|
|||
static bool
|
||||
radv_is_merged_shader_compiled_separately(const struct radv_device *device, const struct radv_shader_info *info)
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
|
||||
if (gfx_level >= GFX9) {
|
||||
switch (info->stage) {
|
||||
|
|
@ -1180,6 +1190,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||
const struct radv_graphics_state_key *gfx_state, const enum radv_pipeline_type pipeline_type,
|
||||
bool consider_force_vrs, struct radv_shader_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
|
||||
|
||||
if (layout->use_dynamic_descriptors) {
|
||||
|
|
@ -1257,7 +1268,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||
/* The HW always assumes that there is at least 1 per-vertex param.
|
||||
* so if there aren't any, we have to offset per-primitive params by 1.
|
||||
*/
|
||||
const unsigned extra_offset = !!(total_param_exports == 0 && device->physical_device->info.gfx_level >= GFX11);
|
||||
const unsigned extra_offset = !!(total_param_exports == 0 && pdev->info.gfx_level >= GFX11);
|
||||
|
||||
/* Per-primitive outputs: the HW needs these to be last. */
|
||||
assign_outinfo_params(outinfo, per_prim_mask, &total_param_exports, extra_offset);
|
||||
|
|
@ -1274,7 +1285,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||
|
||||
/* Used by compute and mesh shaders. Mesh shaders must always declare this before GFX11. */
|
||||
info->cs.uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS) ||
|
||||
(nir->info.stage == MESA_SHADER_MESH && device->physical_device->info.gfx_level < GFX11);
|
||||
(nir->info.stage == MESA_SHADER_MESH && pdev->info.gfx_level < GFX11);
|
||||
info->cs.uses_local_invocation_idx = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) |
|
||||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) |
|
||||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS) |
|
||||
|
|
@ -1348,9 +1359,9 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
if (gfx_state->ts.patch_control_points) {
|
||||
info->workgroup_size = ac_compute_lshs_workgroup_size(
|
||||
device->physical_device->info.gfx_level, MESA_SHADER_TESS_CTRL, info->num_tess_patches,
|
||||
gfx_state->ts.patch_control_points, info->tcs.tcs_vertices_out);
|
||||
info->workgroup_size =
|
||||
ac_compute_lshs_workgroup_size(pdev->info.gfx_level, MESA_SHADER_TESS_CTRL, info->num_tess_patches,
|
||||
gfx_state->ts.patch_control_points, info->tcs.tcs_vertices_out);
|
||||
} else {
|
||||
/* Set the maximum possible value when the workgroup size can't be determined. */
|
||||
info->workgroup_size = 256;
|
||||
|
|
@ -1371,7 +1382,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||
unsigned es_verts_per_subgroup = G_028A44_ES_VERTS_PER_SUBGRP(info->gs_ring_info.vgt_gs_onchip_cntl);
|
||||
unsigned gs_inst_prims_in_subgroup = G_028A44_GS_INST_PRIMS_IN_SUBGRP(info->gs_ring_info.vgt_gs_onchip_cntl);
|
||||
|
||||
info->workgroup_size = ac_compute_esgs_workgroup_size(device->physical_device->info.gfx_level, info->wave_size,
|
||||
info->workgroup_size = ac_compute_esgs_workgroup_size(pdev->info.gfx_level, info->wave_size,
|
||||
es_verts_per_subgroup, gs_inst_prims_in_subgroup);
|
||||
} else {
|
||||
/* Set the maximum possible value by default, this will be optimized during linking if
|
||||
|
|
@ -1441,6 +1452,7 @@ static unsigned
|
|||
gfx10_get_ngg_scratch_lds_base(const struct radv_device *device, const struct radv_shader_info *es_info,
|
||||
const struct radv_shader_info *gs_info, const struct gfx10_ngg_info *ngg_info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint32_t scratch_lds_base;
|
||||
|
||||
if (gs_info) {
|
||||
|
|
@ -1451,7 +1463,7 @@ gfx10_get_ngg_scratch_lds_base(const struct radv_device *device, const struct ra
|
|||
} else {
|
||||
const bool uses_instanceid = es_info->vs.needs_instance_id;
|
||||
const bool uses_primitive_id = es_info->uses_prim_id;
|
||||
const bool streamout_enabled = es_info->so.num_outputs && device->physical_device->use_ngg_streamout;
|
||||
const bool streamout_enabled = es_info->so.num_outputs && pdev->use_ngg_streamout;
|
||||
const uint32_t num_outputs =
|
||||
es_info->stage == MESA_SHADER_VERTEX ? es_info->vs.num_outputs : es_info->tes.num_outputs;
|
||||
unsigned pervertex_lds_bytes = ac_ngg_nogs_get_pervertex_lds_size(
|
||||
|
|
@ -1471,7 +1483,8 @@ void
|
|||
gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info,
|
||||
struct gfx10_ngg_info *out)
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
const unsigned max_verts_per_prim = radv_get_num_input_vertices(es_info, gs_info);
|
||||
const unsigned min_verts_per_prim = gs_info ? max_verts_per_prim : 1;
|
||||
|
||||
|
|
@ -1683,9 +1696,8 @@ gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es
|
|||
|
||||
/* Get scratch LDS usage. */
|
||||
const struct radv_shader_info *info = gs_info ? gs_info : es_info;
|
||||
const unsigned scratch_lds_size =
|
||||
ac_ngg_get_scratch_lds_size(info->stage, info->workgroup_size, info->wave_size,
|
||||
device->physical_device->use_ngg_streamout, info->has_ngg_culling);
|
||||
const unsigned scratch_lds_size = ac_ngg_get_scratch_lds_size(info->stage, info->workgroup_size, info->wave_size,
|
||||
pdev->use_ngg_streamout, info->has_ngg_culling);
|
||||
out->lds_size = out->scratch_lds_base + scratch_lds_size;
|
||||
|
||||
unsigned workgroup_size =
|
||||
|
|
@ -1700,6 +1712,8 @@ static void
|
|||
radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage *es_stage,
|
||||
struct radv_shader_stage *fs_stage, const struct radv_graphics_state_key *gfx_state)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
assert(es_stage->stage == MESA_SHADER_VERTEX || es_stage->stage == MESA_SHADER_TESS_EVAL);
|
||||
assert(!fs_stage || fs_stage->stage == MESA_SHADER_FRAGMENT);
|
||||
|
||||
|
|
@ -1715,8 +1729,8 @@ radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage
|
|||
: 3;
|
||||
}
|
||||
|
||||
es_stage->info.has_ngg_culling = radv_consider_culling(device->physical_device, es_stage->nir, ps_inputs_read,
|
||||
num_vertices_per_prim, &es_stage->info);
|
||||
es_stage->info.has_ngg_culling =
|
||||
radv_consider_culling(pdev, es_stage->nir, ps_inputs_read, num_vertices_per_prim, &es_stage->info);
|
||||
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(es_stage->nir);
|
||||
es_stage->info.has_ngg_early_prim_export = exec_list_is_singular(&impl->body);
|
||||
|
|
@ -1732,6 +1746,8 @@ static void
|
|||
radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *producer,
|
||||
struct radv_shader_stage *consumer, const struct radv_graphics_state_key *gfx_state)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* Export primitive ID and clip/cull distances if read by the FS, or export unconditionally when
|
||||
* the next stage is unknown (with graphics pipeline library).
|
||||
*/
|
||||
|
|
@ -1782,9 +1798,9 @@ radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *pro
|
|||
struct radv_shader_stage *tcs_stage = consumer;
|
||||
|
||||
if (gfx_state->ts.patch_control_points) {
|
||||
vs_stage->info.workgroup_size = ac_compute_lshs_workgroup_size(
|
||||
device->physical_device->info.gfx_level, MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches,
|
||||
gfx_state->ts.patch_control_points, tcs_stage->info.tcs.tcs_vertices_out);
|
||||
vs_stage->info.workgroup_size =
|
||||
ac_compute_lshs_workgroup_size(pdev->info.gfx_level, MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches,
|
||||
gfx_state->ts.patch_control_points, tcs_stage->info.tcs.tcs_vertices_out);
|
||||
|
||||
if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) {
|
||||
/* When the number of TCS input and output vertices are the same (typically 3):
|
||||
|
|
@ -1797,7 +1813,7 @@ radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *pro
|
|||
* instruction dominating another with a different mode.
|
||||
*/
|
||||
vs_stage->info.vs.tcs_in_out_eq =
|
||||
device->physical_device->info.gfx_level >= GFX9 &&
|
||||
pdev->info.gfx_level >= GFX9 &&
|
||||
gfx_state->ts.patch_control_points == tcs_stage->info.tcs.tcs_vertices_out &&
|
||||
vs_stage->nir->info.float_controls_execution_mode == tcs_stage->nir->info.float_controls_execution_mode;
|
||||
|
||||
|
|
@ -1865,6 +1881,8 @@ void
|
|||
radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
|
||||
struct radv_shader_stage *stages)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* Walk backwards to link */
|
||||
struct radv_shader_stage *next_stage = stages[MESA_SHADER_FRAGMENT].nir ? &stages[MESA_SHADER_FRAGMENT] : NULL;
|
||||
|
||||
|
|
@ -1877,7 +1895,7 @@ radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics
|
|||
next_stage = &stages[s];
|
||||
}
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
/* Merge shader info for VS+TCS. */
|
||||
if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_TESS_CTRL].nir) {
|
||||
radv_nir_shader_info_merge(&stages[MESA_SHADER_VERTEX], &stages[MESA_SHADER_TESS_CTRL]);
|
||||
|
|
|
|||
|
|
@ -128,6 +128,7 @@ static VkResult
|
|||
radv_shader_object_init_graphics(struct radv_shader_object *shader_obj, struct radv_device *device,
|
||||
const VkShaderCreateInfoEXT *pCreateInfo)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
gl_shader_stage stage = vk_to_mesa_shader_stage(pCreateInfo->stage);
|
||||
struct radv_shader_stage stages[MESA_VULKAN_SHADER_STAGES];
|
||||
|
||||
|
|
@ -149,7 +150,7 @@ radv_shader_object_init_graphics(struct radv_shader_object *shader_obj, struct r
|
|||
gfx_state.dynamic_provoking_vtx_mode = true;
|
||||
gfx_state.dynamic_line_rast_mode = true;
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11)
|
||||
if (pdev->info.gfx_level >= GFX11)
|
||||
gfx_state.ps.exports_mrtz_via_epilog = true;
|
||||
|
||||
struct radv_shader *shader = NULL;
|
||||
|
|
@ -297,6 +298,7 @@ static VkResult
|
|||
radv_shader_object_init(struct radv_shader_object *shader_obj, struct radv_device *device,
|
||||
const VkShaderCreateInfoEXT *pCreateInfo)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_shader_layout layout;
|
||||
VkResult result;
|
||||
|
||||
|
|
@ -317,7 +319,7 @@ radv_shader_object_init(struct radv_shader_object *shader_obj, struct radv_devic
|
|||
|
||||
const uint8_t *cache_uuid = blob_read_bytes(&blob, VK_UUID_SIZE);
|
||||
|
||||
if (memcmp(cache_uuid, device->physical_device->cache_uuid, VK_UUID_SIZE))
|
||||
if (memcmp(cache_uuid, pdev->cache_uuid, VK_UUID_SIZE))
|
||||
return VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT;
|
||||
|
||||
const bool has_main_binary = blob_read_uint32(&blob);
|
||||
|
|
@ -407,6 +409,7 @@ radv_shader_object_create_linked(VkDevice _device, uint32_t createInfoCount, con
|
|||
const VkAllocationCallbacks *pAllocator, VkShaderEXT *pShaders)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_shader_stage stages[MESA_VULKAN_SHADER_STAGES];
|
||||
|
||||
for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
|
||||
|
|
@ -425,7 +428,7 @@ radv_shader_object_create_linked(VkDevice _device, uint32_t createInfoCount, con
|
|||
gfx_state.dynamic_provoking_vtx_mode = true;
|
||||
gfx_state.dynamic_line_rast_mode = true;
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11)
|
||||
if (pdev->info.gfx_level >= GFX11)
|
||||
gfx_state.ps.exports_mrtz_via_epilog = true;
|
||||
|
||||
for (unsigned i = 0; i < createInfoCount; i++) {
|
||||
|
|
@ -621,6 +624,7 @@ radv_GetShaderBinaryDataEXT(VkDevice _device, VkShaderEXT shader, size_t *pDataS
|
|||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(radv_shader_object, shader_obj, shader);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const size_t size = radv_get_shader_object_size(shader_obj);
|
||||
|
||||
if (!pData) {
|
||||
|
|
@ -635,7 +639,7 @@ radv_GetShaderBinaryDataEXT(VkDevice _device, VkShaderEXT shader, size_t *pDataS
|
|||
|
||||
struct blob blob;
|
||||
blob_init_fixed(&blob, pData, *pDataSize);
|
||||
blob_write_bytes(&blob, device->physical_device->cache_uuid, VK_UUID_SIZE);
|
||||
blob_write_bytes(&blob, pdev->cache_uuid, VK_UUID_SIZE);
|
||||
|
||||
radv_write_shader_binary(&blob, shader_obj->binary);
|
||||
|
||||
|
|
|
|||
|
|
@ -62,7 +62,8 @@ radv_spm_init_bo(struct radv_device *device)
|
|||
static void
|
||||
radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
struct ac_spm *spm = &device->spm;
|
||||
|
||||
if (gfx_level >= GFX11) {
|
||||
|
|
@ -142,7 +143,8 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu
|
|||
void
|
||||
radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
struct ac_spm *spm = &device->spm;
|
||||
uint64_t va = radv_buffer_get_va(spm->bo);
|
||||
uint64_t ring_size = spm->buffer_size;
|
||||
|
|
@ -170,7 +172,7 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum r
|
|||
|
||||
radeon_set_uconfig_reg(cs, R_03726C_RLC_SPM_ACCUM_MODE, 0);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
radeon_set_uconfig_reg(cs, R_03721C_RLC_SPM_PERFMON_SEGMENT_SIZE,
|
||||
S_03721C_TOTAL_NUM_SEGMENT(total_muxsel_lines) |
|
||||
S_03721C_GLOBAL_NUM_SEGMENT(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]) |
|
||||
|
|
@ -238,8 +240,9 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum r
|
|||
bool
|
||||
radv_spm_init(struct radv_device *device)
|
||||
{
|
||||
const struct radeon_info *gpu_info = &device->physical_device->info;
|
||||
struct ac_perfcounters *pc = &device->physical_device->ac_perfcounters;
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
struct ac_perfcounters *pc = &pdev->ac_perfcounters;
|
||||
|
||||
/* We failed to initialize the performance counters. */
|
||||
if (!pc->blocks)
|
||||
|
|
|
|||
|
|
@ -54,15 +54,16 @@ gfx11_get_sqtt_ctrl(const struct radv_device *device, bool enable)
|
|||
static uint32_t
|
||||
gfx10_get_sqtt_ctrl(const struct radv_device *device, bool enable)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint32_t sqtt_ctrl = S_008D1C_MODE(enable) | S_008D1C_HIWATER(5) | S_008D1C_UTIL_TIMER(1) |
|
||||
S_008D1C_RT_FREQ(2) | /* 4096 clk */
|
||||
S_008D1C_DRAW_EVENT_EN(1) | S_008D1C_REG_STALL_EN(1) | S_008D1C_SPI_STALL_EN(1) |
|
||||
S_008D1C_SQ_STALL_EN(1) | S_008D1C_REG_DROP_ON_STALL(0);
|
||||
|
||||
if (device->physical_device->info.gfx_level == GFX10_3)
|
||||
if (pdev->info.gfx_level == GFX10_3)
|
||||
sqtt_ctrl |= S_008D1C_LOWATER_OFFSET(4);
|
||||
|
||||
if (device->physical_device->info.has_sqtt_auto_flush_mode_bug)
|
||||
if (pdev->info.has_sqtt_auto_flush_mode_bug)
|
||||
sqtt_ctrl |= S_008D1C_AUTO_FLUSH_MODE(1);
|
||||
|
||||
return sqtt_ctrl;
|
||||
|
|
@ -86,10 +87,11 @@ radv_ip_to_queue_family(enum amd_ip_type t)
|
|||
static void
|
||||
radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf *cs, int family)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum radv_queue_family qf = radv_ip_to_queue_family(family);
|
||||
enum rgp_flush_bits sqtt_flush_bits = 0;
|
||||
radv_cs_emit_cache_flush(
|
||||
device->ws, cs, device->physical_device->info.gfx_level, NULL, 0, qf,
|
||||
device->ws, cs, pdev->info.gfx_level, NULL, 0, qf,
|
||||
(family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH
|
||||
: (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
|
||||
RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2,
|
||||
|
|
@ -99,9 +101,10 @@ radv_emit_wait_for_idle(const struct radv_device *device, struct radeon_cmdbuf *
|
|||
static void
|
||||
radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
uint32_t shifted_size = device->sqtt.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->info;
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
const unsigned shader_mask = ac_sqtt_get_shader_mask(gpu_info);
|
||||
unsigned max_se = gpu_info->max_se;
|
||||
|
||||
|
|
@ -111,7 +114,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
uint64_t va = radv_buffer_get_va(device->sqtt.bo);
|
||||
uint64_t data_va = ac_sqtt_get_data_va(gpu_info, &device->sqtt, va, se);
|
||||
uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
|
||||
int active_cu = ac_sqtt_get_active_cu(&device->physical_device->info, se);
|
||||
int active_cu = ac_sqtt_get_active_cu(&pdev->info, se);
|
||||
|
||||
if (ac_sqtt_se_is_disabled(gpu_info, se))
|
||||
continue;
|
||||
|
|
@ -120,7 +123,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
|
||||
S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
/* Order seems important for the following 2 registers. */
|
||||
radeon_set_perfctr_reg(gfx_level, qf, cs, R_0367A4_SQ_THREAD_TRACE_BUF0_SIZE,
|
||||
S_0367A4_SIZE(shifted_size) | S_0367A4_BASE_HI(shifted_va >> 32));
|
||||
|
|
@ -151,7 +154,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
/* Should be emitted last (it enables thread traces). */
|
||||
radeon_set_perfctr_reg(gfx_level, qf, cs, R_0367B0_SQ_THREAD_TRACE_CTRL, gfx11_get_sqtt_ctrl(device, true));
|
||||
|
||||
} else if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
} else if (pdev->info.gfx_level >= GFX10) {
|
||||
/* Order seems important for the following 2 registers. */
|
||||
radeon_set_privileged_config_reg(cs, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
|
||||
S_008D04_SIZE(shifted_size) | S_008D04_BASE_HI(shifted_va >> 32));
|
||||
|
|
@ -196,7 +199,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
S_030CC8_VM_ID_MASK(0) | S_030CC8_REG_STALL_EN(1) | S_030CC8_SPI_STALL_EN(1) |
|
||||
S_030CC8_SQ_STALL_EN(1);
|
||||
|
||||
if (device->physical_device->info.gfx_level < GFX9) {
|
||||
if (pdev->info.gfx_level < GFX9) {
|
||||
sqtt_mask |= S_030CC8_RANDOM_SEED(0xffff);
|
||||
}
|
||||
|
||||
|
|
@ -214,7 +217,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
|
||||
radeon_set_uconfig_reg(cs, R_030CEC_SQ_THREAD_TRACE_HIWATER, S_030CEC_HIWATER(4));
|
||||
|
||||
if (device->physical_device->info.gfx_level == GFX9) {
|
||||
if (pdev->info.gfx_level == GFX9) {
|
||||
/* Reset thread trace status errors. */
|
||||
radeon_set_uconfig_reg(cs, R_030CE8_SQ_THREAD_TRACE_STATUS, S_030CE8_UTC_ERROR(0));
|
||||
}
|
||||
|
|
@ -225,7 +228,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */
|
||||
S_030CD8_MODE(1);
|
||||
|
||||
if (device->physical_device->info.gfx_level == GFX9) {
|
||||
if (pdev->info.gfx_level == GFX9) {
|
||||
/* Count SQTT traffic in TCC perf counters. */
|
||||
sqtt_mode |= S_030CD8_TC_PERF_EN(1);
|
||||
}
|
||||
|
|
@ -274,17 +277,17 @@ static const uint32_t gfx11_sqtt_info_regs[] = {
|
|||
static void
|
||||
radv_copy_sqtt_info_regs(const struct radv_device *device, struct radeon_cmdbuf *cs, unsigned se_index)
|
||||
{
|
||||
const struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const uint32_t *sqtt_info_regs = NULL;
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
sqtt_info_regs = gfx11_sqtt_info_regs;
|
||||
} else if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
} else if (pdev->info.gfx_level >= GFX10) {
|
||||
sqtt_info_regs = gfx10_sqtt_info_regs;
|
||||
} else if (device->physical_device->info.gfx_level == GFX9) {
|
||||
} else if (pdev->info.gfx_level == GFX9) {
|
||||
sqtt_info_regs = gfx9_sqtt_info_regs;
|
||||
} else {
|
||||
assert(device->physical_device->info.gfx_level == GFX8);
|
||||
assert(pdev->info.gfx_level == GFX8);
|
||||
sqtt_info_regs = gfx8_sqtt_info_regs;
|
||||
}
|
||||
|
||||
|
|
@ -330,8 +333,9 @@ radv_copy_sqtt_info_regs(const struct radv_device *device, struct radeon_cmdbuf
|
|||
static void
|
||||
radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
unsigned max_se = device->physical_device->info.max_se;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
unsigned max_se = pdev->info.max_se;
|
||||
|
||||
radeon_check_space(device->ws, cs, 8 + max_se * 64);
|
||||
|
||||
|
|
@ -346,20 +350,20 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0));
|
||||
|
||||
if (device->physical_device->info.has_sqtt_rb_harvest_bug) {
|
||||
if (pdev->info.has_sqtt_rb_harvest_bug) {
|
||||
/* Some chips with disabled RBs should wait for idle because FINISH_DONE doesn't work. */
|
||||
radv_emit_wait_for_idle(device, cs, qf);
|
||||
}
|
||||
|
||||
for (unsigned se = 0; se < max_se; se++) {
|
||||
if (ac_sqtt_se_is_disabled(&device->physical_device->info, se))
|
||||
if (ac_sqtt_se_is_disabled(&pdev->info, se))
|
||||
continue;
|
||||
|
||||
/* Target SEi and SH0. */
|
||||
radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
|
||||
S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
/* Make sure to wait for the trace buffer. */
|
||||
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
|
||||
radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
|
||||
|
|
@ -380,8 +384,8 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
radeon_emit(cs, 0); /* reference value */
|
||||
radeon_emit(cs, ~C_0367D0_BUSY); /* mask */
|
||||
radeon_emit(cs, 4); /* poll interval */
|
||||
} else if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (!device->physical_device->info.has_sqtt_rb_harvest_bug) {
|
||||
} else if (pdev->info.gfx_level >= GFX10) {
|
||||
if (!pdev->info.has_sqtt_rb_harvest_bug) {
|
||||
/* Make sure to wait for the trace buffer. */
|
||||
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
|
||||
radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
|
||||
|
|
@ -429,7 +433,8 @@ radv_emit_sqtt_stop(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
void
|
||||
radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *data, uint32_t num_dwords)
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
const enum radv_queue_family qf = cmd_buffer->qf;
|
||||
struct radv_device *device = cmd_buffer->device;
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
|
|
@ -446,7 +451,7 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da
|
|||
|
||||
/* Without the perfctr bit the CP might not always pass the
|
||||
* write on correctly. */
|
||||
if (device->physical_device->info.gfx_level >= GFX10)
|
||||
if (pdev->info.gfx_level >= GFX10)
|
||||
radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
|
||||
else
|
||||
radeon_set_uconfig_reg_seq(cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
|
||||
|
|
@ -460,11 +465,13 @@ radv_emit_sqtt_userdata(const struct radv_cmd_buffer *cmd_buffer, const void *da
|
|||
void
|
||||
radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf *cs, bool enable)
|
||||
{
|
||||
if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) | S_031100_EXP_PRIORITY_ORDER(3) |
|
||||
S_031100_ENABLE_SQG_TOP_EVENTS(enable) | S_031100_ENABLE_SQG_BOP_EVENTS(enable);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10)
|
||||
if (pdev->info.gfx_level >= GFX10)
|
||||
spi_config_cntl |= S_031100_PS_PKR_PRIORITY_CNTL(3);
|
||||
|
||||
radeon_set_uconfig_reg(cs, R_031100_SPI_CONFIG_CNTL, spi_config_cntl);
|
||||
|
|
@ -478,12 +485,14 @@ radv_emit_spi_config_cntl(const struct radv_device *device, struct radeon_cmdbuf
|
|||
void
|
||||
radv_emit_inhibit_clockgating(const struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit)
|
||||
{
|
||||
if (device->physical_device->info.gfx_level >= GFX11)
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11)
|
||||
return; /* not needed */
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL, S_037390_PERFMON_CLOCK_STATE(inhibit));
|
||||
} else if (device->physical_device->info.gfx_level >= GFX8) {
|
||||
} else if (pdev->info.gfx_level >= GFX8) {
|
||||
radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL, S_0372FC_PERFMON_CLOCK_STATE(inhibit));
|
||||
}
|
||||
}
|
||||
|
|
@ -620,7 +629,8 @@ radv_sqtt_finish_queue_event(struct radv_device *device)
|
|||
static bool
|
||||
radv_sqtt_init_bo(struct radv_device *device)
|
||||
{
|
||||
unsigned max_se = device->physical_device->info.max_se;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
unsigned max_se = pdev->info.max_se;
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
VkResult result;
|
||||
uint64_t size;
|
||||
|
|
@ -801,6 +811,7 @@ bool
|
|||
radv_begin_sqtt(struct radv_queue *queue)
|
||||
{
|
||||
struct radv_device *device = queue->device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
enum radv_queue_family family = queue->state.qf;
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
struct radeon_cmdbuf *cs;
|
||||
|
|
@ -846,7 +857,7 @@ radv_begin_sqtt(struct radv_queue *queue)
|
|||
|
||||
if (device->spm.bo) {
|
||||
/* Enable all shader stages by default. */
|
||||
radv_perfcounter_emit_shaders(device, cs, ac_sqtt_get_shader_mask(&device->physical_device->info));
|
||||
radv_perfcounter_emit_shaders(device, cs, ac_sqtt_get_shader_mask(&pdev->info));
|
||||
|
||||
radv_emit_spm_setup(device, cs, family);
|
||||
}
|
||||
|
|
@ -936,7 +947,8 @@ bool
|
|||
radv_get_sqtt_trace(struct radv_queue *queue, struct ac_sqtt_trace *sqtt_trace)
|
||||
{
|
||||
struct radv_device *device = queue->device;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->info;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
|
||||
if (!ac_sqtt_get_trace(&device->sqtt, gpu_info, sqtt_trace)) {
|
||||
if (!radv_sqtt_resize_bo(device))
|
||||
|
|
|
|||
|
|
@ -311,7 +311,7 @@ calc_ctx_size_h265_main10(struct radv_video_session *vid)
|
|||
static unsigned
|
||||
calc_ctx_size_av1(struct radv_device *device, struct radv_video_session *vid)
|
||||
{
|
||||
struct radv_physical_device *pdev = device->physical_device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
unsigned frame_ctxt_size = pdev->av1_version == RDECODE_AV1_VER_0
|
||||
? align(sizeof(rvcn_av1_frame_context_t), 2048)
|
||||
: align(sizeof(rvcn_av1_vcn4_frame_context_t), 2048);
|
||||
|
|
@ -345,6 +345,7 @@ radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *
|
|||
const VkAllocationCallbacks *pAllocator, VkVideoSessionKHR *pVideoSession)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
struct radv_video_session *vid =
|
||||
vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*vid), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
|
|
@ -365,12 +366,12 @@ radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *
|
|||
switch (vid->vk.op) {
|
||||
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
|
||||
vid->stream_type = RDECODE_CODEC_H264_PERF;
|
||||
if (radv_enable_tier2(device->physical_device))
|
||||
if (radv_enable_tier2(pdev))
|
||||
vid->dpb_type = DPB_DYNAMIC_TIER_2;
|
||||
break;
|
||||
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
|
||||
vid->stream_type = RDECODE_CODEC_H265;
|
||||
if (radv_enable_tier2(device->physical_device))
|
||||
if (radv_enable_tier2(pdev))
|
||||
vid->dpb_type = DPB_DYNAMIC_TIER_2;
|
||||
break;
|
||||
case VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR:
|
||||
|
|
@ -381,10 +382,10 @@ radv_CreateVideoSessionKHR(VkDevice _device, const VkVideoSessionCreateInfoKHR *
|
|||
return VK_ERROR_FEATURE_NOT_PRESENT;
|
||||
}
|
||||
|
||||
vid->stream_handle = radv_vid_alloc_stream_handle(device->physical_device);
|
||||
vid->stream_handle = radv_vid_alloc_stream_handle(pdev);
|
||||
vid->dbg_frame_cnt = 0;
|
||||
vid->db_alignment = radv_video_get_db_alignment(
|
||||
device->physical_device, vid->vk.max_coded.width,
|
||||
pdev, vid->vk.max_coded.width,
|
||||
(vid->stream_type == RDECODE_CODEC_AV1 ||
|
||||
(vid->stream_type == RDECODE_CODEC_H265 && vid->vk.h265.profile_idc == STD_VIDEO_H265_PROFILE_IDC_MAIN_10)));
|
||||
|
||||
|
|
@ -656,11 +657,13 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR vi
|
|||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(radv_video_session, vid, videoSession);
|
||||
uint32_t memory_type_bits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
uint32_t memory_type_bits = (1u << pdev->memory_properties.memoryTypeCount) - 1;
|
||||
|
||||
VK_OUTARRAY_MAKE_TYPED(VkVideoSessionMemoryRequirementsKHR, out, pMemoryRequirements, pMemoryRequirementsCount);
|
||||
/* 1 buffer for session context */
|
||||
if (device->physical_device->info.family >= CHIP_POLARIS10) {
|
||||
if (pdev->info.family >= CHIP_POLARIS10) {
|
||||
vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
|
||||
{
|
||||
m->memoryBindIndex = RADV_BIND_SESSION_CTX;
|
||||
|
|
@ -670,7 +673,7 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR vi
|
|||
}
|
||||
}
|
||||
|
||||
if (vid->stream_type == RDECODE_CODEC_H264_PERF && device->physical_device->info.family >= CHIP_POLARIS10) {
|
||||
if (vid->stream_type == RDECODE_CODEC_H264_PERF && pdev->info.family >= CHIP_POLARIS10) {
|
||||
vk_outarray_append_typed(VkVideoSessionMemoryRequirementsKHR, &out, m)
|
||||
{
|
||||
m->memoryBindIndex = RADV_BIND_DECODER_CTX;
|
||||
|
|
@ -701,9 +704,8 @@ radv_GetVideoSessionMemoryRequirementsKHR(VkDevice _device, VkVideoSessionKHR vi
|
|||
m->memoryRequirements.size = align(calc_ctx_size_av1(device, vid), 4096);
|
||||
m->memoryRequirements.alignment = 0;
|
||||
m->memoryRequirements.memoryTypeBits = 0;
|
||||
for (unsigned i = 0; i < device->physical_device->memory_properties.memoryTypeCount; i++)
|
||||
if (device->physical_device->memory_properties.memoryTypes[i].propertyFlags &
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
|
||||
for (unsigned i = 0; i < pdev->memory_properties.memoryTypeCount; i++)
|
||||
if (pdev->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
|
||||
m->memoryRequirements.memoryTypeBits |= (1 << i);
|
||||
}
|
||||
}
|
||||
|
|
@ -761,14 +763,15 @@ set_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, uint32_t val)
|
|||
static void
|
||||
send_cmd(struct radv_cmd_buffer *cmd_buffer, unsigned cmd, struct radeon_winsys_bo *bo, uint32_t offset)
|
||||
{
|
||||
struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
|
||||
struct radv_device *device = cmd_buffer->device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint64_t addr;
|
||||
|
||||
radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
|
||||
addr = radv_buffer_get_va(bo);
|
||||
addr += offset;
|
||||
|
||||
if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
|
||||
if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 6);
|
||||
set_reg(cmd_buffer, pdev->vid_dec_reg.data0, addr);
|
||||
set_reg(cmd_buffer, pdev->vid_dec_reg.data1, addr >> 32);
|
||||
|
|
@ -1037,6 +1040,7 @@ get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct
|
|||
uint32_t *height_in_samples,
|
||||
void *it_ptr)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
rvcn_dec_message_hevc_t result;
|
||||
int i, j;
|
||||
const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
|
||||
|
|
@ -1059,7 +1063,7 @@ get_h265_msg(struct radv_device *device, struct radv_video_session *vid, struct
|
|||
result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
|
||||
result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
|
||||
|
||||
if (device->physical_device->info.family == CHIP_CARRIZO)
|
||||
if (pdev->info.family == CHIP_CARRIZO)
|
||||
result.sps_info_flags |= 1 << 9;
|
||||
|
||||
if (!h265_pic_info->pStdPictureInfo->flags.short_term_ref_pic_set_sps_flag) {
|
||||
|
|
@ -2097,6 +2101,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
|
|||
const struct VkVideoDecodeInfoKHR *frame_info)
|
||||
{
|
||||
struct radv_device *device = cmd_buffer->device;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
rvcn_dec_message_header_t *header;
|
||||
rvcn_dec_message_index_t *index_codec;
|
||||
rvcn_dec_message_decode_t *decode;
|
||||
|
|
@ -2182,7 +2187,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
|
|||
|
||||
decode->dt_tiling_mode = 0;
|
||||
decode->dt_swizzle_mode = luma->surface.u.gfx9.swizzle_mode;
|
||||
decode->dt_array_mode = device->physical_device->vid_addr_gfx_mode;
|
||||
decode->dt_array_mode = pdev->vid_addr_gfx_mode;
|
||||
decode->dt_field_mode = vid->interlaced ? 1 : 0;
|
||||
decode->dt_surf_tile_config = 0;
|
||||
decode->dt_uv_surf_tile_config = 0;
|
||||
|
|
@ -2254,7 +2259,7 @@ rvcn_dec_message_decode(struct radv_cmd_buffer *cmd_buffer, struct radv_video_se
|
|||
decode->db_pitch = dpb->planes[0].surface.u.gfx9.surf_pitch;
|
||||
decode->db_aligned_height = dpb->planes[0].surface.u.gfx9.surf_height;
|
||||
decode->db_swizzle_mode = dpb->planes[0].surface.u.gfx9.swizzle_mode;
|
||||
decode->db_array_mode = device->physical_device->vid_addr_gfx_mode;
|
||||
decode->db_array_mode = pdev->vid_addr_gfx_mode;
|
||||
|
||||
decode->hw_ctxt_size = vid->ctx.size;
|
||||
|
||||
|
|
@ -2427,6 +2432,7 @@ get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, str
|
|||
const struct VkVideoDecodeInfoKHR *frame_info, uint32_t *width_in_samples,
|
||||
uint32_t *height_in_samples, void *it_ptr)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct ruvd_h265 result;
|
||||
int i, j;
|
||||
const struct VkVideoDecodeH265PictureInfoKHR *h265_pic_info =
|
||||
|
|
@ -2450,7 +2456,7 @@ get_uvd_h265_msg(struct radv_device *device, struct radv_video_session *vid, str
|
|||
result.sps_info_flags |= sps->flags.strong_intra_smoothing_enabled_flag << 7;
|
||||
result.sps_info_flags |= sps->flags.separate_colour_plane_flag << 8;
|
||||
|
||||
if (device->physical_device->info.family == CHIP_CARRIZO)
|
||||
if (pdev->info.family == CHIP_CARRIZO)
|
||||
result.sps_info_flags |= 1 << 9;
|
||||
|
||||
*width_in_samples = sps->pic_width_in_luma_samples;
|
||||
|
|
@ -2592,6 +2598,7 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v
|
|||
struct radv_video_session_params *params, void *ptr, void *it_ptr, uint32_t *slice_offset,
|
||||
const struct VkVideoDecodeInfoKHR *frame_info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct ruvd_msg *msg = ptr;
|
||||
struct radv_image_view *dst_iv = radv_image_view_from_handle(frame_info->dstPictureResource.imageViewBinding);
|
||||
struct radv_image *img = dst_iv->image;
|
||||
|
|
@ -2616,7 +2623,7 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v
|
|||
msg->body.decode.bsd_size = frame_info->srcBufferRange;
|
||||
msg->body.decode.db_pitch = align(frame_info->dstPictureResource.codedExtent.width, vid->db_alignment);
|
||||
|
||||
if (vid->stream_type == RUVD_CODEC_H264_PERF && device->physical_device->info.family >= CHIP_POLARIS10)
|
||||
if (vid->stream_type == RUVD_CODEC_H264_PERF && pdev->info.family >= CHIP_POLARIS10)
|
||||
msg->body.decode.dpb_reserved = vid->ctx.size;
|
||||
|
||||
*slice_offset = 0;
|
||||
|
|
@ -2643,7 +2650,7 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v
|
|||
|
||||
msg->body.decode.dt_field_mode = false;
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
msg->body.decode.dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
|
||||
msg->body.decode.dt_tiling_mode = RUVD_TILE_LINEAR;
|
||||
msg->body.decode.dt_array_mode = RUVD_ARRAY_MODE_LINEAR;
|
||||
|
|
@ -2703,7 +2710,7 @@ ruvd_dec_message_decode(struct radv_device *device, struct radv_video_session *v
|
|||
RUVD_MACRO_TILE_ASPECT_RATIO(util_logbase2(luma->surface.u.legacy.mtilea));
|
||||
}
|
||||
|
||||
if (device->physical_device->info.family >= CHIP_STONEY)
|
||||
if (pdev->info.family >= CHIP_STONEY)
|
||||
msg->body.decode.dt_wa_chroma_top_offset = msg->body.decode.dt_pitch / 2;
|
||||
|
||||
msg->body.decode.db_surf_tile_config = msg->body.decode.dt_surf_tile_config;
|
||||
|
|
@ -2740,8 +2747,8 @@ radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoBeginCod
|
|||
static void
|
||||
radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
struct radv_video_session *vid = cmd_buffer->video.vid;
|
||||
struct radv_physical_device *pdev = cmd_buffer->device->physical_device;
|
||||
uint32_t size = sizeof(rvcn_dec_message_header_t) + sizeof(rvcn_dec_message_create_t);
|
||||
|
||||
void *ptr;
|
||||
|
|
@ -2771,7 +2778,7 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
|
|||
}
|
||||
radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
|
||||
|
||||
if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED)
|
||||
if (pdev->vid_decode_ip == AMD_IP_VCN_UNIFIED)
|
||||
radv_vcn_sq_start(cmd_buffer);
|
||||
|
||||
rvcn_dec_message_create(vid, ptr, size);
|
||||
|
|
@ -2779,7 +2786,7 @@ radv_vcn_cmd_reset(struct radv_cmd_buffer *cmd_buffer)
|
|||
send_cmd(cmd_buffer, RDECODE_CMD_MSG_BUFFER, cmd_buffer->upload.upload_bo, out_offset);
|
||||
/* pad out the IB to the 16 dword boundary - otherwise the fw seems to be unhappy */
|
||||
|
||||
if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
|
||||
if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 8);
|
||||
for (unsigned i = 0; i < 8; i++)
|
||||
radeon_emit(cmd_buffer->cs, 0x81ff);
|
||||
|
|
@ -2812,8 +2819,10 @@ VKAPI_ATTR void VKAPI_CALL
|
|||
radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoCodingControlInfoKHR *pCodingControlInfo)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
|
||||
if (pCodingControlInfo->flags & VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR) {
|
||||
if (radv_has_uvd(cmd_buffer->device->physical_device))
|
||||
if (radv_has_uvd(pdev))
|
||||
radv_uvd_cmd_reset(cmd_buffer);
|
||||
else
|
||||
radv_vcn_cmd_reset(cmd_buffer);
|
||||
|
|
@ -2829,14 +2838,14 @@ static void
|
|||
radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
struct radv_video_session *vid = cmd_buffer->video.vid;
|
||||
struct radv_video_session_params *params = cmd_buffer->video.params;
|
||||
unsigned size = sizeof(struct ruvd_msg);
|
||||
void *ptr, *fb_ptr, *it_probs_ptr = NULL;
|
||||
uint32_t out_offset, fb_offset, it_probs_offset = 0;
|
||||
struct radeon_winsys_bo *msg_bo, *fb_bo, *it_probs_bo = NULL;
|
||||
unsigned fb_size =
|
||||
(cmd_buffer->device->physical_device->info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;
|
||||
unsigned fb_size = (pdev->info.family == CHIP_TONGA) ? FB_BUFFER_SIZE_TONGA : FB_BUFFER_SIZE;
|
||||
|
||||
radv_vid_buffer_upload_alloc(cmd_buffer, fb_size, &fb_offset, &fb_ptr);
|
||||
fb_bo = cmd_buffer->upload.upload_bo;
|
||||
|
|
@ -2876,13 +2885,14 @@ radv_uvd_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf
|
|||
send_cmd(cmd_buffer, RDECODE_CMD_IT_SCALING_TABLE_BUFFER, it_probs_bo, it_probs_offset);
|
||||
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2);
|
||||
set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1);
|
||||
set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInfoKHR *frame_info)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_buffer, src_buffer, frame_info->srcBuffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
struct radv_video_session *vid = cmd_buffer->video.vid;
|
||||
struct radv_video_session_params *params = cmd_buffer->video.params;
|
||||
unsigned size = 0;
|
||||
|
|
@ -2924,7 +2934,7 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf
|
|||
radv_vid_buffer_upload_alloc(cmd_buffer, size, &out_offset, &ptr);
|
||||
msg_bo = cmd_buffer->upload.upload_bo;
|
||||
|
||||
if (cmd_buffer->device->physical_device->vid_decode_ip == AMD_IP_VCN_UNIFIED)
|
||||
if (pdev->vid_decode_ip == AMD_IP_VCN_UNIFIED)
|
||||
radv_vcn_sq_start(cmd_buffer);
|
||||
|
||||
uint32_t slice_offset;
|
||||
|
|
@ -2955,9 +2965,9 @@ radv_vcn_decode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoDecodeInf
|
|||
else if (have_probs(vid))
|
||||
send_cmd(cmd_buffer, RDECODE_CMD_PROB_TBL_BUFFER, it_probs_bo, it_probs_offset);
|
||||
|
||||
if (cmd_buffer->device->physical_device->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
|
||||
if (pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED) {
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 2);
|
||||
set_reg(cmd_buffer, cmd_buffer->device->physical_device->vid_dec_reg.cntl, 1);
|
||||
set_reg(cmd_buffer, pdev->vid_dec_reg.cntl, 1);
|
||||
} else
|
||||
radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
|
||||
}
|
||||
|
|
@ -2966,8 +2976,9 @@ VKAPI_ATTR void VKAPI_CALL
|
|||
radv_CmdDecodeVideoKHR(VkCommandBuffer commandBuffer, const VkVideoDecodeInfoKHR *frame_info)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
|
||||
if (radv_has_uvd(cmd_buffer->device->physical_device))
|
||||
if (radv_has_uvd(pdev))
|
||||
radv_uvd_decode_video(cmd_buffer, frame_info);
|
||||
else
|
||||
radv_vcn_decode_video(cmd_buffer, frame_info);
|
||||
|
|
|
|||
|
|
@ -54,17 +54,17 @@ static VkQueue
|
|||
radv_wsi_get_prime_blit_queue(VkDevice _device)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (device->private_sdma_queue != VK_NULL_HANDLE)
|
||||
return vk_queue_to_handle(&device->private_sdma_queue->vk);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX9 &&
|
||||
!(device->physical_device->instance->debug_flags & RADV_DEBUG_NO_DMA_BLIT)) {
|
||||
if (pdev->info.gfx_level >= GFX9 && !(pdev->instance->debug_flags & RADV_DEBUG_NO_DMA_BLIT)) {
|
||||
|
||||
device->physical_device->vk_queue_to_radv[device->physical_device->num_queues++] = RADV_QUEUE_TRANSFER;
|
||||
pdev->vk_queue_to_radv[pdev->num_queues++] = RADV_QUEUE_TRANSFER;
|
||||
const VkDeviceQueueCreateInfo queue_create = {
|
||||
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
|
||||
.queueFamilyIndex = device->physical_device->num_queues - 1,
|
||||
.queueFamilyIndex = pdev->num_queues - 1,
|
||||
.queueCount = 1,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -73,14 +73,15 @@ radv_write_harvested_raster_configs(struct radv_physical_device *pdev, struct ra
|
|||
void
|
||||
radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
|
||||
{
|
||||
const struct radeon_info *gpu_info = &device->physical_device->info;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs, 0);
|
||||
|
||||
radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(device->physical_device->info.address32_hi >> 8));
|
||||
radeon_set_sh_reg(cs, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(pdev->info.address32_hi >> 8));
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
|
||||
/* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1,
|
||||
|
|
@ -90,7 +91,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
radeon_emit(cs, S_00B8AC_SA0_CU_EN(cu_mask) | S_00B8AC_SA1_CU_EN(cu_mask));
|
||||
}
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX7) {
|
||||
if (pdev->info.gfx_level >= GFX7) {
|
||||
/* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */
|
||||
radeon_set_sh_reg_seq(cs, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2);
|
||||
for (unsigned i = 2; i < 4; ++i) {
|
||||
|
|
@ -107,12 +108,11 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
}
|
||||
}
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX9 && device->physical_device->info.gfx_level < GFX11) {
|
||||
radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY,
|
||||
device->physical_device->info.gfx_level >= GFX10 ? 0x20 : 0);
|
||||
if (pdev->info.gfx_level >= GFX9 && pdev->info.gfx_level < GFX11) {
|
||||
radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY, pdev->info.gfx_level >= GFX10 ? 0x20 : 0);
|
||||
}
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg_seq(cs, R_00B890_COMPUTE_USER_ACCUM_0, 4);
|
||||
radeon_emit(cs, 0); /* R_00B890_COMPUTE_USER_ACCUM_0 */
|
||||
radeon_emit(cs, 0); /* R_00B894_COMPUTE_USER_ACCUM_1 */
|
||||
|
|
@ -122,7 +122,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
|
||||
}
|
||||
|
||||
if (device->physical_device->info.gfx_level == GFX6) {
|
||||
if (pdev->info.gfx_level == GFX6) {
|
||||
if (device->border_color_data.bo) {
|
||||
uint64_t bc_va = radv_buffer_get_va(device->border_color_data.bo);
|
||||
radeon_set_config_reg(cs, R_00950C_TA_CS_BC_BASE_ADDR, bc_va >> 8);
|
||||
|
|
@ -132,7 +132,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
if (device->tma_bo) {
|
||||
uint64_t tba_va, tma_va;
|
||||
|
||||
assert(device->physical_device->info.gfx_level == GFX8);
|
||||
assert(pdev->info.gfx_level == GFX8);
|
||||
|
||||
tba_va = radv_shader_get_va(device->trap_handler_shader);
|
||||
tma_va = radv_buffer_get_va(device->tma_bo);
|
||||
|
|
@ -144,7 +144,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
radeon_emit(cs, tma_va >> 40);
|
||||
}
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
radeon_set_sh_reg_seq(cs, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4, 4);
|
||||
/* SE4-SE7 */
|
||||
for (unsigned i = 4; i < 8; ++i) {
|
||||
|
|
@ -187,7 +187,7 @@ radv_set_raster_config(struct radv_physical_device *pdev, struct radeon_cmdbuf *
|
|||
void
|
||||
radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
||||
{
|
||||
struct radv_physical_device *pdev = device->physical_device;
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
bool has_clear_state = pdev->info.has_clear_state;
|
||||
int i;
|
||||
|
|
@ -300,26 +300,19 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
|
||||
}
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS,
|
||||
S_00B524_MEM_BASE(device->physical_device->info.address32_hi >> 8));
|
||||
radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES,
|
||||
S_00B324_MEM_BASE(device->physical_device->info.address32_hi >> 8));
|
||||
} else if (device->physical_device->info.gfx_level == GFX9) {
|
||||
radeon_set_sh_reg(cs, R_00B414_SPI_SHADER_PGM_HI_LS,
|
||||
S_00B414_MEM_BASE(device->physical_device->info.address32_hi >> 8));
|
||||
radeon_set_sh_reg(cs, R_00B214_SPI_SHADER_PGM_HI_ES,
|
||||
S_00B214_MEM_BASE(device->physical_device->info.address32_hi >> 8));
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS, S_00B524_MEM_BASE(pdev->info.address32_hi >> 8));
|
||||
radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(pdev->info.address32_hi >> 8));
|
||||
} else if (pdev->info.gfx_level == GFX9) {
|
||||
radeon_set_sh_reg(cs, R_00B414_SPI_SHADER_PGM_HI_LS, S_00B414_MEM_BASE(pdev->info.address32_hi >> 8));
|
||||
radeon_set_sh_reg(cs, R_00B214_SPI_SHADER_PGM_HI_ES, S_00B214_MEM_BASE(pdev->info.address32_hi >> 8));
|
||||
} else {
|
||||
radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS,
|
||||
S_00B524_MEM_BASE(device->physical_device->info.address32_hi >> 8));
|
||||
radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES,
|
||||
S_00B324_MEM_BASE(device->physical_device->info.address32_hi >> 8));
|
||||
radeon_set_sh_reg(cs, R_00B524_SPI_SHADER_PGM_HI_LS, S_00B524_MEM_BASE(pdev->info.address32_hi >> 8));
|
||||
radeon_set_sh_reg(cs, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(pdev->info.address32_hi >> 8));
|
||||
}
|
||||
|
||||
if (device->physical_device->info.gfx_level < GFX11)
|
||||
radeon_set_sh_reg(cs, R_00B124_SPI_SHADER_PGM_HI_VS,
|
||||
S_00B124_MEM_BASE(device->physical_device->info.address32_hi >> 8));
|
||||
if (pdev->info.gfx_level < GFX11)
|
||||
radeon_set_sh_reg(cs, R_00B124_SPI_SHADER_PGM_HI_VS, S_00B124_MEM_BASE(pdev->info.address32_hi >> 8));
|
||||
|
||||
unsigned cu_mask_ps = 0xffffffff;
|
||||
|
||||
|
|
@ -400,8 +393,7 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
|
||||
/* Enable CMASK/FMASK/HTILE/DCC caching in L2 for small chips. */
|
||||
unsigned meta_write_policy, meta_read_policy;
|
||||
unsigned no_alloc =
|
||||
device->physical_device->info.gfx_level >= GFX11 ? V_02807C_CACHE_NOA_GFX11 : V_02807C_CACHE_NOA_GFX10;
|
||||
unsigned no_alloc = pdev->info.gfx_level >= GFX11 ? V_02807C_CACHE_NOA_GFX11 : V_02807C_CACHE_NOA_GFX10;
|
||||
|
||||
/* TODO: investigate whether LRU improves performance on other chips too */
|
||||
if (pdev->info.max_render_backends <= 4) {
|
||||
|
|
@ -419,7 +411,7 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
S_02807C_S_RD_POLICY(no_alloc) | S_02807C_HTILE_RD_POLICY(meta_read_policy));
|
||||
|
||||
uint32_t gl2_cc;
|
||||
if (device->physical_device->info.gfx_level >= GFX11) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
gl2_cc = S_028410_DCC_WR_POLICY_GFX11(meta_write_policy) |
|
||||
S_028410_COLOR_WR_POLICY_GFX11(V_028410_CACHE_STREAM) |
|
||||
S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_GFX11);
|
||||
|
|
@ -569,7 +561,7 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
if (device->tma_bo) {
|
||||
uint64_t tba_va, tma_va;
|
||||
|
||||
assert(device->physical_device->info.gfx_level == GFX8);
|
||||
assert(pdev->info.gfx_level == GFX8);
|
||||
|
||||
tba_va = radv_shader_get_va(device->trap_handler_shader);
|
||||
tma_va = radv_buffer_get_va(device->tma_bo);
|
||||
|
|
@ -630,6 +622,7 @@ radv_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
void
|
||||
radv_create_gfx_config(struct radv_device *device)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = device->ws->cs_create(device->ws, AMD_IP_GFX, false);
|
||||
if (!cs)
|
||||
return;
|
||||
|
|
@ -639,7 +632,7 @@ radv_create_gfx_config(struct radv_device *device)
|
|||
radv_emit_graphics(device, cs);
|
||||
|
||||
while (cs->cdw & 7) {
|
||||
if (device->physical_device->info.gfx_ib_pad_with_type2)
|
||||
if (pdev->info.gfx_ib_pad_with_type2)
|
||||
radeon_emit(cs, PKT2_NOP_PAD);
|
||||
else
|
||||
radeon_emit(cs, PKT3_NOP_PAD);
|
||||
|
|
@ -817,7 +810,8 @@ radv_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_d
|
|||
bool count_from_stream_output, uint32_t draw_vertex_count, unsigned topology,
|
||||
bool prim_restart_enable, unsigned patch_control_points, unsigned num_tess_patches)
|
||||
{
|
||||
const struct radeon_info *gpu_info = &cmd_buffer->device->physical_device->info;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
const struct radeon_info *gpu_info = &pdev->info;
|
||||
const unsigned max_primgroup_in_wave = 2;
|
||||
/* SWITCH_ON_EOP(0) is always preferable. */
|
||||
bool wd_switch_on_eop = false;
|
||||
|
|
@ -839,7 +833,7 @@ radv_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_d
|
|||
|
||||
/* GS requirement. */
|
||||
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY) && gpu_info->gfx_level <= GFX8) {
|
||||
unsigned gs_table_depth = cmd_buffer->device->physical_device->gs_table_depth;
|
||||
unsigned gs_table_depth = pdev->gs_table_depth;
|
||||
if (SI_GS_PER_ES / primgroup_size >= gs_table_depth - 3)
|
||||
partial_es_wave = true;
|
||||
}
|
||||
|
|
@ -1495,6 +1489,7 @@ radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enu
|
|||
void
|
||||
radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
bool is_compute = cmd_buffer->qf == RADV_QUEUE_COMPUTE;
|
||||
|
||||
if (is_compute)
|
||||
|
|
@ -1509,10 +1504,10 @@ radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
|
|||
return;
|
||||
}
|
||||
|
||||
radv_cs_emit_cache_flush(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->device->physical_device->info.gfx_level,
|
||||
&cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer), cmd_buffer->state.flush_bits,
|
||||
&cmd_buffer->state.sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va);
|
||||
radv_cs_emit_cache_flush(cmd_buffer->device->ws, cmd_buffer->cs, pdev->info.gfx_level, &cmd_buffer->gfx9_fence_idx,
|
||||
cmd_buffer->gfx9_fence_va, radv_cmd_buffer_uses_mec(cmd_buffer),
|
||||
cmd_buffer->state.flush_bits, &cmd_buffer->state.sqtt_flush_bits,
|
||||
cmd_buffer->gfx9_eop_bug_va);
|
||||
|
||||
if (radv_device_fault_detection_enabled(cmd_buffer->device))
|
||||
radv_cmd_buffer_trace_emit(cmd_buffer);
|
||||
|
|
@ -1539,6 +1534,7 @@ radv_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
|
|||
void
|
||||
radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible, unsigned pred_op, uint64_t va)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
uint32_t op = 0;
|
||||
|
||||
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4);
|
||||
|
|
@ -1554,7 +1550,7 @@ radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_vi
|
|||
*/
|
||||
op |= draw_visible ? PREDICATION_DRAW_VISIBLE : PREDICATION_DRAW_NOT_VISIBLE;
|
||||
}
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
|
||||
radeon_emit(cmd_buffer->cs, op);
|
||||
radeon_emit(cmd_buffer->cs, va);
|
||||
|
|
@ -1569,7 +1565,8 @@ radv_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_vi
|
|||
void
|
||||
radv_emit_cond_exec(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, uint32_t count)
|
||||
{
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
|
||||
if (gfx_level >= GFX7) {
|
||||
radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
|
||||
|
|
@ -1619,12 +1616,13 @@ static void
|
|||
radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool predicating, uint64_t dst_va,
|
||||
uint64_t src_va, unsigned size, unsigned flags)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint32_t header = 0, command = 0;
|
||||
|
||||
assert(size <= cp_dma_max_byte_count(device->physical_device->info.gfx_level));
|
||||
assert(size <= cp_dma_max_byte_count(pdev->info.gfx_level));
|
||||
|
||||
radeon_check_space(device->ws, cs, 9);
|
||||
if (device->physical_device->info.gfx_level >= GFX9)
|
||||
if (pdev->info.gfx_level >= GFX9)
|
||||
command |= S_415_BYTE_COUNT_GFX9(size);
|
||||
else
|
||||
command |= S_415_BYTE_COUNT_GFX6(size);
|
||||
|
|
@ -1637,7 +1635,7 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool p
|
|||
command |= S_415_RAW_WAIT(1);
|
||||
|
||||
/* Src and dst flags. */
|
||||
if (device->physical_device->info.gfx_level >= GFX9 && !(flags & CP_DMA_CLEAR) && src_va == dst_va)
|
||||
if (pdev->info.gfx_level >= GFX9 && !(flags & CP_DMA_CLEAR) && src_va == dst_va)
|
||||
header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */
|
||||
else if (flags & CP_DMA_USE_L2)
|
||||
header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
|
||||
|
|
@ -1647,7 +1645,7 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool p
|
|||
else if (flags & CP_DMA_USE_L2)
|
||||
header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
|
||||
|
||||
if (device->physical_device->info.gfx_level >= GFX7) {
|
||||
if (pdev->info.gfx_level >= GFX7) {
|
||||
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, predicating));
|
||||
radeon_emit(cs, header);
|
||||
radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
|
||||
|
|
@ -1699,8 +1697,9 @@ void
|
|||
radv_cs_cp_dma_prefetch(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, unsigned size,
|
||||
bool predicating)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
enum amd_gfx_level gfx_level = device->physical_device->info.gfx_level;
|
||||
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
uint32_t header = 0, command = 0;
|
||||
|
||||
if (gfx_level >= GFX11)
|
||||
|
|
@ -1784,15 +1783,15 @@ radv_cp_dma_realign_engine(struct radv_cmd_buffer *cmd_buffer, unsigned size)
|
|||
void
|
||||
radv_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va, uint64_t size)
|
||||
{
|
||||
enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
uint64_t main_src_va, main_dest_va;
|
||||
uint64_t skipped_size = 0, realign_size = 0;
|
||||
|
||||
/* Assume that we are not going to sync after the last DMA operation. */
|
||||
cmd_buffer->state.dma_is_busy = true;
|
||||
|
||||
if (cmd_buffer->device->physical_device->info.family <= CHIP_CARRIZO ||
|
||||
cmd_buffer->device->physical_device->info.family == CHIP_STONEY) {
|
||||
if (pdev->info.family <= CHIP_CARRIZO || pdev->info.family == CHIP_STONEY) {
|
||||
/* If the size is not aligned, we must add a dummy copy at the end
|
||||
* just to align the internal counter. Otherwise, the DMA engine
|
||||
* would slow down by an order of magnitude for following copies.
|
||||
|
|
@ -1818,7 +1817,7 @@ radv_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uin
|
|||
unsigned dma_flags = 0;
|
||||
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(gfx_level));
|
||||
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
/* DMA operations via L2 are coherent and faster.
|
||||
* TODO: GFX7-GFX8 should also support this but it
|
||||
* requires tests/benchmarks.
|
||||
|
|
@ -1858,12 +1857,14 @@ radv_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uin
|
|||
void
|
||||
radv_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, unsigned value)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
|
||||
if (!size)
|
||||
return;
|
||||
|
||||
assert(va % 4 == 0 && size % 4 == 0);
|
||||
|
||||
enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->info.gfx_level;
|
||||
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
|
||||
/* Assume that we are not going to sync after the last DMA operation. */
|
||||
cmd_buffer->state.dma_is_busy = true;
|
||||
|
|
@ -1872,7 +1873,7 @@ radv_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64
|
|||
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(gfx_level));
|
||||
unsigned dma_flags = CP_DMA_CLEAR;
|
||||
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level >= GFX9) {
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
/* DMA operations via L2 are coherent and faster.
|
||||
* TODO: GFX7-GFX8 should also support this but it
|
||||
* requires tests/benchmarks.
|
||||
|
|
@ -1895,7 +1896,9 @@ radv_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64
|
|||
void
|
||||
radv_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
if (cmd_buffer->device->physical_device->info.gfx_level < GFX7)
|
||||
const struct radv_physical_device *pdev = radv_device_physical(cmd_buffer->device);
|
||||
|
||||
if (pdev->info.gfx_level < GFX7)
|
||||
return;
|
||||
|
||||
if (!cmd_buffer->state.dma_is_busy)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue