mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 03:40:10 +01:00
Compare commits
134 commits
main
...
mesa-25.3.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
944ec88ca5 | ||
|
|
7d4557bae8 | ||
|
|
4482281292 | ||
|
|
1dadb38d7b | ||
|
|
ef9457d119 | ||
|
|
9696921018 | ||
|
|
30678337dd | ||
|
|
eb2668aad7 | ||
|
|
e23c722170 | ||
|
|
2ad12150e8 | ||
|
|
3109237d7c | ||
|
|
775652e08b | ||
|
|
51285c6715 | ||
|
|
c70fd7f766 | ||
|
|
d527eedb15 | ||
|
|
7c18540961 | ||
|
|
90b6c3a8ac | ||
|
|
3dab73159b | ||
|
|
55a37838b9 | ||
|
|
9bad1beb98 | ||
|
|
3086692bcd | ||
|
|
ce6c6a7a57 | ||
|
|
629a0a4dcc | ||
|
|
12c82aaa82 | ||
|
|
1e885e7a88 | ||
|
|
3ddddf78b4 | ||
|
|
86313f9571 | ||
|
|
a46307a732 | ||
|
|
0a0d08dfe0 | ||
|
|
182877f3c8 | ||
|
|
9aeac1e0a7 | ||
|
|
46f0422165 | ||
|
|
f69d1abfcf | ||
|
|
770e095766 | ||
|
|
205fe1a245 | ||
|
|
093c7d9d8e | ||
|
|
2c67b0fac6 | ||
|
|
e082f6b6c0 | ||
|
|
a12369eb3d | ||
|
|
6670d0742b | ||
|
|
a7a020dde6 | ||
|
|
7e15070ee1 | ||
|
|
0edb1852a7 | ||
|
|
3ce875a2d0 | ||
|
|
fd777ce645 | ||
|
|
315b688976 | ||
|
|
3a71d94735 | ||
|
|
8a2bf930bb | ||
|
|
ac492d42be | ||
|
|
2e17fd0cb2 | ||
|
|
9311f170c4 | ||
|
|
3e227a04b1 | ||
|
|
f63a5df30b | ||
|
|
9ba765e3e3 | ||
|
|
8010d0cd39 | ||
|
|
f1f32d557e | ||
|
|
05e5db1a4d | ||
|
|
5ae8474029 | ||
|
|
b3470359bf | ||
|
|
5e1a88cea0 | ||
|
|
040453857b | ||
|
|
28e172e956 | ||
|
|
74880f8954 | ||
|
|
f02f5e217f | ||
|
|
d9636807f7 | ||
|
|
b768139858 | ||
|
|
498a25cfb8 | ||
|
|
9728bbf7b0 | ||
|
|
f142fdc273 | ||
|
|
1c52a94428 | ||
|
|
2cfd3c52b2 | ||
|
|
606ebb042e | ||
|
|
424f37b348 | ||
|
|
7f75931019 | ||
|
|
ba107091c2 | ||
|
|
b74000dbce | ||
|
|
fb2273df78 | ||
|
|
65eb3aed4b | ||
|
|
a9653fa019 | ||
|
|
159d397437 | ||
|
|
6a7effe059 | ||
|
|
2a0a2cc5b0 | ||
|
|
3f9f4d79d3 | ||
|
|
cd253df92a | ||
|
|
bfd09d9891 | ||
|
|
dcecd8fd1e | ||
|
|
1648f759c1 | ||
|
|
d5f7261ce5 | ||
|
|
2c1c52a8c8 | ||
|
|
fe3a3b08c9 | ||
|
|
d9812eaea8 | ||
|
|
be191ceff7 | ||
|
|
49bfddbd11 | ||
|
|
0182cde848 | ||
|
|
94ec7c686d | ||
|
|
4202ea6c7f | ||
|
|
10475e8ac1 | ||
|
|
c1cf6e75ae | ||
|
|
2b8675fd86 | ||
|
|
e967da84a8 | ||
|
|
2a8f2ff397 | ||
|
|
7a30a71c45 | ||
|
|
9c57c0a194 | ||
|
|
425c49ebf2 | ||
|
|
7b7cb63a14 | ||
|
|
1941ada4a6 | ||
|
|
e982234bb6 | ||
|
|
dbbadebe13 | ||
|
|
0d100cc078 | ||
|
|
f656d062e3 | ||
|
|
847ad886d6 | ||
|
|
5dcc65643c | ||
|
|
ab7bda0a1b | ||
|
|
a02d8d5767 | ||
|
|
13fa1460dd | ||
|
|
14544ef278 | ||
|
|
602b4a2924 | ||
|
|
717e8a8caf | ||
|
|
40ff53c5b8 | ||
|
|
bf9e1f2e37 | ||
|
|
c3cf272a04 | ||
|
|
30ba8880b4 | ||
|
|
42ab1c6f3c | ||
|
|
674e2a702a | ||
|
|
756618ee3b | ||
|
|
ca7d2daf5f | ||
|
|
45aafef631 | ||
|
|
8711394383 | ||
|
|
289c768e88 | ||
|
|
84655b4b5d | ||
|
|
fd6b9c70b6 | ||
|
|
9bb7bf9c66 | ||
|
|
f510e6a1bd | ||
|
|
40f7bef16c |
147 changed files with 9140 additions and 1853 deletions
7682
.pick_status.json
Normal file
7682
.pick_status.json
Normal file
File diff suppressed because it is too large
Load diff
2
VERSION
2
VERSION
|
|
@ -1 +1 @@
|
|||
25.3.0-devel
|
||||
25.3.0-rc4
|
||||
|
|
|
|||
|
|
@ -122,9 +122,8 @@ Enable the site and restart nginx:
|
|||
# Second download should be cached.
|
||||
wget http://localhost/cache/?uri=https://s3.freedesktop.org/mesa-tracie-public/itoral-gl-terrain-demo/demo-v2.trace
|
||||
|
||||
Now, set ``download-url`` in your ``traces-*.yml`` entry to something like
|
||||
``http://caching-proxy/cache/?uri=https://s3.freedesktop.org/mesa-tracie-public``
|
||||
and you should have cached downloads for traces. Add it to
|
||||
``FDO_HTTP_CACHE_URI=`` in your ``config.toml`` runner environment lines and you
|
||||
can use it for cached artifact downloads instead of going all the way to
|
||||
freedesktop.org on each job.
|
||||
The trace runner script automatically sets the caching proxy, so there's no
|
||||
need to modify anything in the Mesa CI YAML files.
|
||||
Add ``LAVA_HTTP_CACHE_URI=http://localhost/cache/?uri=`` to your ``config.toml``
|
||||
runner environment lines and you can use it for cached artifact downloads
|
||||
instead of going all the way to freedesktop.org on each job.
|
||||
|
|
|
|||
|
|
@ -1489,8 +1489,6 @@ struct drm_amdgpu_info_hw_ip {
|
|||
__u32 available_rings;
|
||||
/** version info: bits 23:16 major, 15:8 minor, 7:0 revision */
|
||||
__u32 ip_discovery_version;
|
||||
/* Userq available slots */
|
||||
__u32 userq_num_slots;
|
||||
};
|
||||
|
||||
/* GFX metadata BO sizes and alignment info (in bytes) */
|
||||
|
|
|
|||
|
|
@ -315,8 +315,6 @@ ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
|||
info->ip[ip_type].num_queues = 1;
|
||||
} else if (ip_info.available_rings) {
|
||||
info->ip[ip_type].num_queues = util_bitcount(ip_info.available_rings);
|
||||
} else if (ip_info.userq_num_slots) {
|
||||
info->ip[ip_type].num_queue_slots = ip_info.userq_num_slots;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -1696,11 +1694,11 @@ void ac_print_gpu_info(const struct radeon_info *info, FILE *f)
|
|||
fprintf(f, " clock_crystal_freq = %i KHz\n", info->clock_crystal_freq);
|
||||
|
||||
for (unsigned i = 0; i < AMD_NUM_IP_TYPES; i++) {
|
||||
if (info->ip[i].num_queues || info->ip[i].num_queue_slots) {
|
||||
fprintf(f, " IP %-7s %2u.%u \tqueues:%u \tqueue_slots:%u \talign:%u \tpad_dw:0x%x\n",
|
||||
if (info->ip[i].num_queues) {
|
||||
fprintf(f, " IP %-7s %2u.%u \tqueues:%u \talign:%u \tpad_dw:0x%x\n",
|
||||
ac_get_ip_type_string(info, i),
|
||||
info->ip[i].ver_major, info->ip[i].ver_minor, info->ip[i].num_queues,
|
||||
info->ip[i].num_queue_slots,info->ip[i].ib_alignment, info->ip[i].ib_pad_dw_mask);
|
||||
info->ip[i].ib_alignment, info->ip[i].ib_pad_dw_mask);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@ struct amd_ip_info {
|
|||
uint8_t ver_minor;
|
||||
uint8_t ver_rev;
|
||||
uint8_t num_queues;
|
||||
uint8_t num_queue_slots;
|
||||
uint8_t num_instances;
|
||||
uint32_t ib_alignment;
|
||||
uint32_t ib_pad_dw_mask;
|
||||
|
|
|
|||
|
|
@ -194,7 +194,6 @@ struct drm_amdgpu_info_hw_ip {
|
|||
uint32_t ib_size_alignment;
|
||||
uint32_t available_rings;
|
||||
uint32_t ip_discovery_version;
|
||||
uint32_t userq_num_slots;
|
||||
};
|
||||
|
||||
struct drm_amdgpu_info_uq_fw_areas_gfx {
|
||||
|
|
|
|||
|
|
@ -109,23 +109,37 @@ lower_mem_access_cb(nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size, ui
|
|||
nir_mem_access_size_align res;
|
||||
|
||||
if (intrin == nir_intrinsic_load_shared || intrin == nir_intrinsic_store_shared) {
|
||||
/* Split unsupported shared access. */
|
||||
res.bit_size = MIN2(bit_size, combined_align * 8ull);
|
||||
res.align = res.bit_size / 8;
|
||||
/* Don't use >64-bit LDS loads for performance reasons. */
|
||||
unsigned max_bytes = intrin == nir_intrinsic_store_shared && cb_data->gfx_level >= GFX7 ? 16 : 8;
|
||||
bytes = MIN3(bytes, combined_align, max_bytes);
|
||||
bytes = bytes == 12 ? bytes : round_down_to_power_of_2(bytes);
|
||||
|
||||
/* Split unsupported shared access. */
|
||||
res.bit_size = MIN2(bit_size, bytes * 8ull);
|
||||
res.align = res.bit_size / 8;
|
||||
res.num_components = bytes / res.align;
|
||||
res.shift = nir_mem_access_shift_method_bytealign_amd;
|
||||
return res;
|
||||
}
|
||||
|
||||
const bool is_buffer_load = intrin == nir_intrinsic_load_ubo ||
|
||||
intrin == nir_intrinsic_load_ssbo ||
|
||||
intrin == nir_intrinsic_load_constant;
|
||||
|
||||
if (is_smem) {
|
||||
const bool supported_subdword = cb_data->gfx_level >= GFX12 &&
|
||||
intrin != nir_intrinsic_load_push_constant &&
|
||||
(!cb_data->use_llvm || intrin != nir_intrinsic_load_ubo);
|
||||
|
||||
/* Round up subdword loads if unsupported. */
|
||||
const bool supported_subdword = cb_data->gfx_level >= GFX12 && intrin != nir_intrinsic_load_push_constant;
|
||||
if (bit_size < 32 && (bytes >= 3 || !supported_subdword))
|
||||
if (bytes <= 2 && combined_align % bytes == 0 && supported_subdword) {
|
||||
bit_size = bytes * 8;
|
||||
} else if (bytes % 4 || combined_align % 4) {
|
||||
if (is_buffer_load)
|
||||
bytes += 4 - MIN2(combined_align, 4);
|
||||
bytes = align(bytes, 4);
|
||||
bit_size = 32;
|
||||
}
|
||||
|
||||
/* Generally, require an alignment of 4. */
|
||||
res.align = MIN2(4, bytes);
|
||||
|
|
@ -138,9 +152,6 @@ lower_mem_access_cb(nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size, ui
|
|||
if (!util_is_power_of_two_nonzero(bytes) && (cb_data->gfx_level < GFX12 || bytes != 12)) {
|
||||
const uint8_t larger = util_next_power_of_two(bytes);
|
||||
const uint8_t smaller = larger / 2;
|
||||
const bool is_buffer_load = intrin == nir_intrinsic_load_ubo ||
|
||||
intrin == nir_intrinsic_load_ssbo ||
|
||||
intrin == nir_intrinsic_load_constant;
|
||||
const bool is_aligned = align_mul % smaller == 0;
|
||||
|
||||
/* Overfetch up to 1 dword if this is a bounds-checked buffer load or the access is aligned. */
|
||||
|
|
@ -185,8 +196,8 @@ lower_mem_access_cb(nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size, ui
|
|||
|
||||
const uint32_t max_pad = 4 - MIN2(combined_align, 4);
|
||||
|
||||
/* Global loads don't have bounds checking, so increasing the size might not be safe. */
|
||||
if (intrin == nir_intrinsic_load_global || intrin == nir_intrinsic_load_global_constant) {
|
||||
/* Global/scratch loads don't have bounds checking, so increasing the size might not be safe. */
|
||||
if (!is_buffer_load) {
|
||||
if (align_mul < 4) {
|
||||
/* If we split the load, only lower it to 32-bit if this is a SMEM load. */
|
||||
const unsigned chunk_bytes = align(bytes, 4) - max_pad;
|
||||
|
|
|
|||
|
|
@ -508,6 +508,8 @@ lower_ms_intrinsic(nir_builder *b, nir_instr *instr, void *state)
|
|||
return update_ms_barrier(b, intrin, s);
|
||||
case nir_intrinsic_load_workgroup_index:
|
||||
return lower_ms_load_workgroup_index(b, intrin, s);
|
||||
case nir_intrinsic_load_num_subgroups:
|
||||
return nir_imm_int(b, DIV_ROUND_UP(s->api_workgroup_size, s->wave_size));
|
||||
case nir_intrinsic_set_vertex_and_primitive_count:
|
||||
return lower_ms_set_vertex_and_primitive_count(b, intrin, s);
|
||||
default:
|
||||
|
|
@ -529,6 +531,7 @@ filter_ms_intrinsic(const nir_instr *instr,
|
|||
intrin->intrinsic == nir_intrinsic_store_per_primitive_output ||
|
||||
intrin->intrinsic == nir_intrinsic_barrier ||
|
||||
intrin->intrinsic == nir_intrinsic_load_workgroup_index ||
|
||||
intrin->intrinsic == nir_intrinsic_load_num_subgroups ||
|
||||
intrin->intrinsic == nir_intrinsic_set_vertex_and_primitive_count;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -214,6 +214,8 @@ select_rt_prolog(Program* program, ac_shader_config* config,
|
|||
bld.sop2(Builder::s_cselect, Definition(vcc, bld.lm),
|
||||
Operand::c32_or_c64(-1u, program->wave_size == 64),
|
||||
Operand::c32_or_c64(0, program->wave_size == 64), Operand(scc, s1));
|
||||
bld.sop2(aco_opcode::s_cselect_b32, Definition(out_launch_size_y, s1),
|
||||
Operand(out_launch_size_y, s1), Operand::c32(1), Operand(scc, s1));
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, Definition(out_launch_ids[0], v1),
|
||||
Operand(tmp_invocation_idx, v1), Operand(out_launch_ids[0], v1), Operand(vcc, bld.lm));
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, Definition(out_launch_ids[1], v1), Operand::zero(),
|
||||
|
|
|
|||
|
|
@ -1329,7 +1329,6 @@ const struct amdgpu_device amdgpu_devices[] = {
|
|||
.ib_size_alignment = 32,
|
||||
.available_rings = 0x1,
|
||||
.ip_discovery_version = 0xb0000,
|
||||
.userq_num_slots = 2,
|
||||
},
|
||||
.hw_ip_compute = {
|
||||
.hw_ip_version_major = 11,
|
||||
|
|
@ -1339,7 +1338,6 @@ const struct amdgpu_device amdgpu_devices[] = {
|
|||
.ib_size_alignment = 32,
|
||||
.available_rings = 0xf,
|
||||
.ip_discovery_version = 0xb0000,
|
||||
.userq_num_slots = 16,
|
||||
},
|
||||
.fw_gfx_me = {
|
||||
.ver = 1486,
|
||||
|
|
@ -1460,7 +1458,6 @@ const struct amdgpu_device amdgpu_devices[] = {
|
|||
.ib_size_alignment = 32,
|
||||
.available_rings = 0x1,
|
||||
.ip_discovery_version = 0xb0002,
|
||||
.userq_num_slots = 0x0,
|
||||
},
|
||||
.hw_ip_compute = {
|
||||
.hw_ip_version_major = 11,
|
||||
|
|
@ -1470,7 +1467,6 @@ const struct amdgpu_device amdgpu_devices[] = {
|
|||
.ib_size_alignment = 32,
|
||||
.available_rings = 0xf,
|
||||
.ip_discovery_version = 0xb0002,
|
||||
.userq_num_slots = 0x0,
|
||||
},
|
||||
.fw_gfx_me = {
|
||||
.ver = 2390,
|
||||
|
|
@ -2070,7 +2066,6 @@ const struct amdgpu_device amdgpu_devices[] = {
|
|||
.ib_size_alignment = 32,
|
||||
.available_rings = 0x1,
|
||||
.ip_discovery_version = 0xb0500,
|
||||
.userq_num_slots = 2,
|
||||
},
|
||||
.hw_ip_compute = {
|
||||
.hw_ip_version_major = 11,
|
||||
|
|
@ -2080,7 +2075,6 @@ const struct amdgpu_device amdgpu_devices[] = {
|
|||
.ib_size_alignment = 32,
|
||||
.available_rings = 0xf,
|
||||
.ip_discovery_version = 0xb0500,
|
||||
.userq_num_slots = 16,
|
||||
},
|
||||
.fw_gfx_me = {
|
||||
.ver = 29,
|
||||
|
|
@ -2201,7 +2195,6 @@ const struct amdgpu_device amdgpu_devices[] = {
|
|||
.ib_size_alignment = 32,
|
||||
.available_rings = 0x1,
|
||||
.ip_discovery_version = 0xc0001,
|
||||
.userq_num_slots = 8,
|
||||
},
|
||||
.hw_ip_compute = {
|
||||
.hw_ip_version_major = 12,
|
||||
|
|
@ -2211,7 +2204,6 @@ const struct amdgpu_device amdgpu_devices[] = {
|
|||
.ib_size_alignment = 32,
|
||||
.available_rings = 0xf,
|
||||
.ip_discovery_version = 0xc0001,
|
||||
.userq_num_slots = 8,
|
||||
},
|
||||
.fw_gfx_me = {
|
||||
.ver = 2590,
|
||||
|
|
|
|||
|
|
@ -379,7 +379,6 @@ amdgpu_dump_hw_ips(int fd)
|
|||
printf(" .ib_size_alignment = %u,\n", info.ib_size_alignment);
|
||||
printf(" .available_rings = 0x%x,\n", info.available_rings);
|
||||
printf(" .ip_discovery_version = 0x%04x,\n", info.ip_discovery_version);
|
||||
printf(" .userq_num_slots = 0x%x,\n", info.userq_num_slots);
|
||||
printf("},\n");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
35
src/amd/vulkan/layers/radv_no_mans_sky.c
Normal file
35
src/amd/vulkan/layers/radv_no_mans_sky.c
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Copyright © 2025 Valve Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "radv_device.h"
|
||||
#include "radv_entrypoints.h"
|
||||
#include "radv_image_view.h"
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
no_mans_sky_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo,
|
||||
const VkAllocationCallbacks *pAllocator, VkImageView *pView)
|
||||
{
|
||||
VK_FROM_HANDLE(radv_device, device, _device);
|
||||
VkResult result;
|
||||
|
||||
result = device->layer_dispatch.app.CreateImageView(_device, pCreateInfo, pAllocator, pView);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
VK_FROM_HANDLE(radv_image_view, iview, *pView);
|
||||
|
||||
if ((iview->vk.aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) &&
|
||||
(iview->vk.usage &
|
||||
(VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))) {
|
||||
/* No Man's Sky creates descriptors with depth/stencil aspects (only when Intel XESS is
|
||||
* enabled apparently). and this is illegal in Vulkan. Ignore them by using NULL descriptors
|
||||
* to workaroud GPU hangs.
|
||||
*/
|
||||
memset(&iview->descriptor, 0, sizeof(iview->descriptor));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
@ -21,6 +21,7 @@ radv_entrypoints_gen_command += [
|
|||
'--device-prefix', 'metro_exodus',
|
||||
'--device-prefix', 'rage2',
|
||||
'--device-prefix', 'quantic_dream',
|
||||
'--device-prefix', 'no_mans_sky',
|
||||
|
||||
# Command buffer annotation layer entrypoints
|
||||
'--device-prefix', 'annotate',
|
||||
|
|
@ -40,6 +41,7 @@ libradv_files = files(
|
|||
'layers/radv_metro_exodus.c',
|
||||
'layers/radv_rage2.c',
|
||||
'layers/radv_quantic_dream.c',
|
||||
'layers/radv_no_mans_sky.c',
|
||||
'layers/radv_rmv_layer.c',
|
||||
'layers/radv_rra_layer.c',
|
||||
'layers/radv_sqtt_layer.c',
|
||||
|
|
|
|||
|
|
@ -6111,6 +6111,13 @@ radv_emit_tess_domain_origin_state(struct radv_cmd_buffer *cmd_buffer)
|
|||
radeon_end();
|
||||
}
|
||||
|
||||
static bool
|
||||
radv_is_dual_src_enabled(const struct radv_dynamic_state *dynamic_state)
|
||||
{
|
||||
/* Dual-source blending must be ignored if blending isn't enabled for MRT0. */
|
||||
return dynamic_state->blend_eq.mrt0_is_dual_src && !!(dynamic_state->color_blend_enable & 1u);
|
||||
}
|
||||
|
||||
static struct radv_shader_part *
|
||||
lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
|
|
@ -6144,7 +6151,7 @@ lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer)
|
|||
|
||||
state.color_write_mask = d->color_write_mask;
|
||||
state.color_blend_enable = d->color_blend_enable;
|
||||
state.mrt0_is_dual_src = d->blend_eq.mrt0_is_dual_src;
|
||||
state.mrt0_is_dual_src = radv_is_dual_src_enabled(&cmd_buffer->state.dynamic);
|
||||
|
||||
if (d->vk.ms.alpha_to_coverage_enable) {
|
||||
/* Select a color export format with alpha when alpha to coverage is enabled. */
|
||||
|
|
@ -8114,6 +8121,8 @@ radv_mark_descriptors_dirty(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPo
|
|||
struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point);
|
||||
|
||||
descriptors_state->dirty |= descriptors_state->valid;
|
||||
if (descriptors_state->dynamic_offset_count)
|
||||
descriptors_state->dirty_dynamic = true;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -8642,7 +8651,6 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
|
|||
|
||||
if (cmd_buffer->state.compute_pipeline == compute_pipeline)
|
||||
return;
|
||||
radv_mark_descriptors_dirty(cmd_buffer, pipelineBindPoint);
|
||||
|
||||
radv_bind_shader(cmd_buffer, compute_pipeline->base.shaders[MESA_SHADER_COMPUTE], MESA_SHADER_COMPUTE);
|
||||
|
||||
|
|
@ -8656,7 +8664,6 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
|
|||
|
||||
if (cmd_buffer->state.rt_pipeline == rt_pipeline)
|
||||
return;
|
||||
radv_mark_descriptors_dirty(cmd_buffer, pipelineBindPoint);
|
||||
|
||||
radv_bind_shader(cmd_buffer, rt_pipeline->base.base.shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION);
|
||||
radv_bind_rt_prolog(cmd_buffer, rt_pipeline->prolog);
|
||||
|
|
@ -8690,7 +8697,6 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
|
|||
|
||||
if (cmd_buffer->state.graphics_pipeline == graphics_pipeline)
|
||||
return;
|
||||
radv_mark_descriptors_dirty(cmd_buffer, pipelineBindPoint);
|
||||
|
||||
radv_foreach_stage (
|
||||
stage, (cmd_buffer->state.active_stages | graphics_pipeline->active_stages) & RADV_GRAPHICS_STAGE_BITS) {
|
||||
|
|
@ -8744,6 +8750,8 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
|
|||
cmd_buffer->descriptors[vk_to_bind_point(pipelineBindPoint)].dynamic_offset_count = pipeline->dynamic_offset_count;
|
||||
cmd_buffer->descriptors[vk_to_bind_point(pipelineBindPoint)].need_indirect_descriptors =
|
||||
pipeline->need_indirect_descriptors;
|
||||
|
||||
radv_mark_descriptors_dirty(cmd_buffer, pipelineBindPoint);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
|
|
@ -11688,7 +11696,7 @@ radv_emit_cb_render_state(struct radv_cmd_buffer *cmd_buffer)
|
|||
const struct radv_rendering_state *render = &cmd_buffer->state.render;
|
||||
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
|
||||
unsigned cb_blend_control[MAX_RTS], sx_mrt_blend_opt[MAX_RTS];
|
||||
const bool mrt0_is_dual_src = d->blend_eq.mrt0_is_dual_src;
|
||||
const bool mrt0_is_dual_src = radv_is_dual_src_enabled(&cmd_buffer->state.dynamic);
|
||||
uint32_t cb_color_control = 0;
|
||||
|
||||
const uint32_t cb_target_mask = d->color_write_enable & d->color_write_mask;
|
||||
|
|
|
|||
|
|
@ -792,6 +792,8 @@ init_dispatch_tables(struct radv_device *device, struct radv_physical_device *pd
|
|||
add_entrypoints(&b, &rage2_device_entrypoints, RADV_APP_DISPATCH_TABLE);
|
||||
} else if (!strcmp(instance->drirc.debug.app_layer, "quanticdream")) {
|
||||
add_entrypoints(&b, &quantic_dream_device_entrypoints, RADV_APP_DISPATCH_TABLE);
|
||||
} else if (!strcmp(instance->drirc.debug.app_layer, "no_mans_sky")) {
|
||||
add_entrypoints(&b, &no_mans_sky_device_entrypoints, RADV_APP_DISPATCH_TABLE);
|
||||
}
|
||||
|
||||
if (instance->vk.trace_mode & RADV_TRACE_MODE_RGP)
|
||||
|
|
|
|||
|
|
@ -200,6 +200,7 @@ static const driOptionDescription radv_dri_options[] = {
|
|||
DRI_CONF_RADV_EMULATE_RT(false)
|
||||
DRI_CONF_RADV_ENABLE_FLOAT16_GFX8(false)
|
||||
DRI_CONF_RADV_COOPERATIVE_MATRIX2_NV(false)
|
||||
DRI_CONF_RADV_NO_IMPLICIT_VARYING_SUBGROUP_SIZE(false)
|
||||
DRI_CONF_SECTION_END
|
||||
};
|
||||
// clang-format on
|
||||
|
|
@ -236,6 +237,8 @@ radv_init_dri_debug_options(struct radv_instance *instance)
|
|||
drirc->debug.ssbo_non_uniform = driQueryOptionb(&drirc->options, "radv_ssbo_non_uniform");
|
||||
drirc->debug.tex_non_uniform = driQueryOptionb(&drirc->options, "radv_tex_non_uniform");
|
||||
drirc->debug.zero_vram = driQueryOptionb(&drirc->options, "radv_zero_vram");
|
||||
drirc->debug.no_implicit_varying_subgroup_size =
|
||||
driQueryOptionb(&drirc->options, "radv_no_implicit_varying_subgroup_size");
|
||||
drirc->debug.app_layer = driQueryOptionstr(&drirc->options, "radv_app_layer");
|
||||
|
||||
drirc->debug.override_uniform_offset_alignment =
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ struct radv_drirc {
|
|||
bool ssbo_non_uniform;
|
||||
bool tex_non_uniform;
|
||||
bool zero_vram;
|
||||
bool no_implicit_varying_subgroup_size;
|
||||
char *app_layer;
|
||||
int override_uniform_offset_alignment;
|
||||
} debug;
|
||||
|
|
|
|||
|
|
@ -252,6 +252,7 @@ radv_physical_device_init_cache_key(struct radv_physical_device *pdev)
|
|||
key->use_llvm = pdev->use_llvm;
|
||||
key->use_ngg = pdev->use_ngg;
|
||||
key->use_ngg_culling = pdev->use_ngg_culling;
|
||||
key->no_implicit_varying_subgroup_size = instance->drirc.debug.no_implicit_varying_subgroup_size;
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
|
|||
|
|
@ -64,8 +64,9 @@ struct radv_physical_device_cache_key {
|
|||
uint32_t use_llvm : 1;
|
||||
uint32_t use_ngg : 1;
|
||||
uint32_t use_ngg_culling : 1;
|
||||
uint32_t no_implicit_varying_subgroup_size : 1;
|
||||
|
||||
uint32_t reserved : 10;
|
||||
uint32_t reserved : 9;
|
||||
};
|
||||
|
||||
enum radv_video_enc_hw_ver {
|
||||
|
|
|
|||
|
|
@ -2383,8 +2383,9 @@ radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t first
|
|||
break;
|
||||
}
|
||||
case VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR: {
|
||||
const bool write_memory = radv_video_write_memory_supported(pdev) == RADV_VIDEO_WRITE_MEMORY_SUPPORT_FULL;
|
||||
uint32_t *src32 = (uint32_t *)src;
|
||||
uint32_t ready_idx = radv_video_write_memory_supported(pdev) ? RADV_ENC_FEEDBACK_STATUS_IDX : 1;
|
||||
uint32_t ready_idx = write_memory ? RADV_ENC_FEEDBACK_STATUS_IDX : 1;
|
||||
uint32_t value;
|
||||
do {
|
||||
value = p_atomic_read(&src32[ready_idx]);
|
||||
|
|
|
|||
|
|
@ -367,6 +367,10 @@ radv_shader_choose_subgroup_size(struct radv_device *device, nir_shader *nir,
|
|||
.requiredSubgroupSize = stage_key->subgroup_required_size * 32,
|
||||
};
|
||||
|
||||
/* Do not allow for the SPIR-V 1.6 varying subgroup size rules. */
|
||||
if (pdev->cache_key.no_implicit_varying_subgroup_size)
|
||||
spirv_version = 0x10000;
|
||||
|
||||
vk_set_subgroup_size(&device->vk, nir, spirv_version, rss_info.requiredSubgroupSize ? &rss_info : NULL,
|
||||
stage_key->subgroup_allow_varying, stage_key->subgroup_require_full);
|
||||
|
||||
|
|
|
|||
|
|
@ -508,7 +508,9 @@ radv_begin_sqtt(struct radv_queue *queue)
|
|||
device->sqtt.start_cs[family] = NULL;
|
||||
}
|
||||
|
||||
cs.b = ws->cs_create(ws, radv_queue_ring(queue), false);
|
||||
radv_init_cmd_stream(&cs, radv_queue_ring(queue));
|
||||
|
||||
cs.b = ws->cs_create(ws, cs.hw_ip, false);
|
||||
if (!cs.b)
|
||||
return false;
|
||||
|
||||
|
|
@ -585,7 +587,9 @@ radv_end_sqtt(struct radv_queue *queue)
|
|||
device->sqtt.stop_cs[family] = NULL;
|
||||
}
|
||||
|
||||
cs.b = ws->cs_create(ws, radv_queue_ring(queue), false);
|
||||
radv_init_cmd_stream(&cs, radv_queue_ring(queue));
|
||||
|
||||
cs.b = ws->cs_create(ws, cs.hw_ip, false);
|
||||
if (!cs.b)
|
||||
return false;
|
||||
|
||||
|
|
|
|||
|
|
@ -149,10 +149,16 @@ radv_vcn_write_memory(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned
|
|||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct rvcn_sq_var sq;
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
enum radv_video_write_memory_support support = radv_video_write_memory_supported(pdev);
|
||||
|
||||
if (!radv_video_write_memory_supported(pdev))
|
||||
if (support == RADV_VIDEO_WRITE_MEMORY_SUPPORT_NONE)
|
||||
return;
|
||||
|
||||
if (support == RADV_VIDEO_WRITE_MEMORY_SUPPORT_PCIE_ATOMICS) {
|
||||
fprintf(stderr, "radv: VCN WRITE_MEMORY requires PCIe atomics support. Expect issues "
|
||||
"if PCIe atomics are not enabled on current device.\n");
|
||||
}
|
||||
|
||||
bool separate_queue = pdev->vid_decode_ip != AMD_IP_VCN_UNIFIED;
|
||||
if (cmd_buffer->qf == RADV_QUEUE_VIDEO_DEC && separate_queue && pdev->vid_dec_reg.data2) {
|
||||
radeon_check_space(device->ws, cs->b, 8);
|
||||
|
|
@ -819,6 +825,32 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, cons
|
|||
if (cap && !cap->valid)
|
||||
cap = NULL;
|
||||
|
||||
if (cap) {
|
||||
pCapabilities->maxCodedExtent.width = cap->max_width;
|
||||
pCapabilities->maxCodedExtent.height = cap->max_height;
|
||||
} else {
|
||||
switch (pVideoProfile->videoCodecOperation) {
|
||||
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
|
||||
pCapabilities->maxCodedExtent.width = (pdev->info.family < CHIP_TONGA) ? 2048 : 4096;
|
||||
pCapabilities->maxCodedExtent.height = (pdev->info.family < CHIP_TONGA) ? 1152 : 4096;
|
||||
break;
|
||||
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
|
||||
pCapabilities->maxCodedExtent.width =
|
||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
|
||||
pCapabilities->maxCodedExtent.height =
|
||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
|
||||
break;
|
||||
case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
|
||||
pCapabilities->maxCodedExtent.width =
|
||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
|
||||
pCapabilities->maxCodedExtent.height =
|
||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pCapabilities->flags = 0;
|
||||
pCapabilities->pictureAccessGranularity.width = VK_VIDEO_H264_MACROBLOCK_WIDTH;
|
||||
pCapabilities->pictureAccessGranularity.height = VK_VIDEO_H264_MACROBLOCK_HEIGHT;
|
||||
|
|
@ -1126,32 +1158,6 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, cons
|
|||
break;
|
||||
}
|
||||
|
||||
if (cap) {
|
||||
pCapabilities->maxCodedExtent.width = cap->max_width;
|
||||
pCapabilities->maxCodedExtent.height = cap->max_height;
|
||||
} else {
|
||||
switch (pVideoProfile->videoCodecOperation) {
|
||||
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
|
||||
pCapabilities->maxCodedExtent.width = (pdev->info.family < CHIP_TONGA) ? 2048 : 4096;
|
||||
pCapabilities->maxCodedExtent.height = (pdev->info.family < CHIP_TONGA) ? 1152 : 4096;
|
||||
break;
|
||||
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
|
||||
pCapabilities->maxCodedExtent.width =
|
||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
|
||||
pCapabilities->maxCodedExtent.height =
|
||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
|
||||
break;
|
||||
case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
|
||||
pCapabilities->maxCodedExtent.width =
|
||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
|
||||
pCapabilities->maxCodedExtent.height =
|
||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -73,6 +73,19 @@ struct radv_video_session {
|
|||
bool session_initialized;
|
||||
};
|
||||
|
||||
/**
|
||||
* WRITE_MEMORY support in FW.
|
||||
*
|
||||
* none: Not supported at all. Old VCN FW and all UVD.
|
||||
* pcie_atomics: Supported, relies on PCIe atomics.
|
||||
* full: Supported, works also without PCIe atomics.
|
||||
*/
|
||||
enum radv_video_write_memory_support {
|
||||
RADV_VIDEO_WRITE_MEMORY_SUPPORT_NONE = 0,
|
||||
RADV_VIDEO_WRITE_MEMORY_SUPPORT_PCIE_ATOMICS,
|
||||
RADV_VIDEO_WRITE_MEMORY_SUPPORT_FULL,
|
||||
};
|
||||
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(radv_video_session, vk.base, VkVideoSessionKHR, VK_OBJECT_TYPE_VIDEO_SESSION_KHR)
|
||||
|
||||
void radv_init_physical_device_decoder(struct radv_physical_device *pdev);
|
||||
|
|
@ -98,7 +111,7 @@ void radv_video_get_enc_dpb_image(struct radv_device *device, const struct VkVid
|
|||
bool radv_video_decode_vp9_supported(const struct radv_physical_device *pdev);
|
||||
bool radv_video_encode_av1_supported(const struct radv_physical_device *pdev);
|
||||
bool radv_video_encode_qp_map_supported(const struct radv_physical_device *pdev);
|
||||
bool radv_video_write_memory_supported(const struct radv_physical_device *pdev);
|
||||
enum radv_video_write_memory_support radv_video_write_memory_supported(const struct radv_physical_device *pdev);
|
||||
uint32_t radv_video_get_qp_map_texel_size(VkVideoCodecOperationFlagBitsKHR codec);
|
||||
bool radv_check_vcn_fw_version(const struct radv_physical_device *pdev, uint32_t dec, uint32_t enc, uint32_t rev);
|
||||
|
||||
|
|
|
|||
|
|
@ -890,7 +890,6 @@ radv_enc_slice_header(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
|||
uint32_t num_bits[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0};
|
||||
const struct VkVideoEncodeH264PictureInfoKHR *h264_picture_info =
|
||||
vk_find_struct_const(enc_info->pNext, VIDEO_ENCODE_H264_PICTURE_INFO_KHR);
|
||||
int slice_count = h264_picture_info->naluSliceEntryCount;
|
||||
const StdVideoEncodeH264PictureInfo *pic = h264_picture_info->pStdPictureInfo;
|
||||
const StdVideoH264SequenceParameterSet *sps =
|
||||
vk_video_find_h264_enc_std_sps(cmd_buffer->video.params, pic->seq_parameter_set_id);
|
||||
|
|
@ -903,8 +902,6 @@ radv_enc_slice_header(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
|||
unsigned int cdw_filled = 0;
|
||||
unsigned int bits_copied = 0;
|
||||
|
||||
assert(slice_count <= 1);
|
||||
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
|
@ -2861,7 +2858,8 @@ radv_vcn_encode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
|||
|
||||
if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_2) {
|
||||
radv_vcn_sq_tail(cs, &cmd_buffer->video.sq);
|
||||
radv_vcn_write_memory(cmd_buffer, feedback_query_va + RADV_ENC_FEEDBACK_STATUS_IDX * sizeof(uint32_t), 1);
|
||||
if (radv_video_write_memory_supported(pdev) == RADV_VIDEO_WRITE_MEMORY_SUPPORT_FULL)
|
||||
radv_vcn_write_memory(cmd_buffer, feedback_query_va + RADV_ENC_FEEDBACK_STATUS_IDX * sizeof(uint32_t), 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3166,6 +3164,36 @@ radv_video_patch_encode_session_parameters(struct radv_device *device, struct vk
|
|||
}
|
||||
break;
|
||||
case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR: {
|
||||
for (unsigned i = 0; i < params->h265_enc.h265_sps_count; i++) {
|
||||
uint32_t pic_width_in_luma_samples =
|
||||
params->h265_enc.h265_sps[i].base.pic_width_in_luma_samples;
|
||||
uint32_t pic_height_in_luma_samples =
|
||||
params->h265_enc.h265_sps[i].base.pic_height_in_luma_samples;
|
||||
uint32_t aligned_pic_width = align(pic_width_in_luma_samples, 64);
|
||||
uint32_t aligned_pic_height = align(pic_height_in_luma_samples, 16);
|
||||
|
||||
/* Override the unaligned pic_{width,height} and make up for it with conformance window
|
||||
* cropping */
|
||||
params->h265_enc.h265_sps[i].base.pic_width_in_luma_samples = aligned_pic_width;
|
||||
params->h265_enc.h265_sps[i].base.pic_height_in_luma_samples = aligned_pic_height;
|
||||
|
||||
if (aligned_pic_width != pic_width_in_luma_samples ||
|
||||
aligned_pic_height != pic_height_in_luma_samples) {
|
||||
params->h265_enc.h265_sps[i].base.flags.conformance_window_flag = 1;
|
||||
params->h265_enc.h265_sps[i].base.conf_win_right_offset +=
|
||||
(aligned_pic_width - pic_width_in_luma_samples) / 2;
|
||||
params->h265_enc.h265_sps[i].base.conf_win_bottom_offset +=
|
||||
(aligned_pic_height - pic_height_in_luma_samples) / 2;
|
||||
}
|
||||
|
||||
/* VCN supports only the following block sizes (resulting in 64x64 CTBs with any coding
|
||||
* block size) */
|
||||
params->h265_enc.h265_sps[i].base.log2_min_luma_coding_block_size_minus3 = 0;
|
||||
params->h265_enc.h265_sps[i].base.log2_diff_max_min_luma_coding_block_size = 3;
|
||||
params->h265_enc.h265_sps[i].base.log2_min_luma_transform_block_size_minus2 = 0;
|
||||
params->h265_enc.h265_sps[i].base.log2_diff_max_min_luma_transform_block_size = 3;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < params->h265_enc.h265_pps_count; i++) {
|
||||
/* cu_qp_delta needs to be enabled if rate control is enabled. VCN2 and newer can also enable
|
||||
* it with rate control disabled. Since we don't know what rate control will be used, we
|
||||
|
|
@ -3268,6 +3296,14 @@ radv_GetEncodedVideoSessionParametersKHR(VkDevice device,
|
|||
assert(sps);
|
||||
char *data_ptr = pData ? (char *)pData + vps_size : NULL;
|
||||
vk_video_encode_h265_sps(sps, size_limit, &sps_size, data_ptr);
|
||||
|
||||
if (pFeedbackInfo) {
|
||||
struct VkVideoEncodeH265SessionParametersFeedbackInfoKHR *h265_feedback_info =
|
||||
vk_find_struct(pFeedbackInfo->pNext, VIDEO_ENCODE_H265_SESSION_PARAMETERS_FEEDBACK_INFO_KHR);
|
||||
pFeedbackInfo->hasOverrides = VK_TRUE;
|
||||
if (h265_feedback_info)
|
||||
h265_feedback_info->hasStdSPSOverrides = VK_TRUE;
|
||||
}
|
||||
}
|
||||
if (h265_get_info->writeStdPPS) {
|
||||
const StdVideoH265PictureParameterSet *pps = vk_video_find_h265_enc_std_pps(templ, h265_get_info->stdPPSId);
|
||||
|
|
@ -3421,17 +3457,20 @@ radv_video_encode_qp_map_supported(const struct radv_physical_device *pdev)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
enum radv_video_write_memory_support
|
||||
radv_video_write_memory_supported(const struct radv_physical_device *pdev)
|
||||
{
|
||||
if (pdev->info.vcn_ip_version >= VCN_5_0_0)
|
||||
return true;
|
||||
else if (pdev->info.vcn_ip_version >= VCN_4_0_0)
|
||||
return pdev->info.vcn_enc_minor_version >= 22;
|
||||
else if (pdev->info.vcn_ip_version >= VCN_3_0_0)
|
||||
return pdev->info.vcn_enc_minor_version >= 33;
|
||||
else if (pdev->info.vcn_ip_version >= VCN_2_0_0)
|
||||
return pdev->info.vcn_enc_minor_version >= 24;
|
||||
else /* VCN 1 and UVD */
|
||||
return false;
|
||||
if (pdev->info.vcn_ip_version >= VCN_5_0_0) {
|
||||
return RADV_VIDEO_WRITE_MEMORY_SUPPORT_PCIE_ATOMICS;
|
||||
} else if (pdev->info.vcn_ip_version >= VCN_4_0_0) {
|
||||
if (pdev->info.vcn_enc_minor_version >= 22)
|
||||
return RADV_VIDEO_WRITE_MEMORY_SUPPORT_PCIE_ATOMICS;
|
||||
} else if (pdev->info.vcn_ip_version >= VCN_3_0_0) {
|
||||
if (pdev->info.vcn_enc_minor_version >= 33)
|
||||
return RADV_VIDEO_WRITE_MEMORY_SUPPORT_PCIE_ATOMICS;
|
||||
} else if (pdev->info.vcn_ip_version >= VCN_2_0_0) {
|
||||
if (pdev->info.vcn_enc_minor_version >= 24)
|
||||
return RADV_VIDEO_WRITE_MEMORY_SUPPORT_PCIE_ATOMICS;
|
||||
}
|
||||
return RADV_VIDEO_WRITE_MEMORY_SUPPORT_NONE;
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -24,7 +24,8 @@ ail_initialize_linear(struct ail_layout *layout)
|
|||
layout->layer_stride_B = align64(
|
||||
(uint64_t)layout->linear_stride_B * layout->height_px, AIL_CACHELINE);
|
||||
|
||||
layout->size_B = layout->layer_stride_B * layout->depth_px;
|
||||
layout->size_B =
|
||||
layout->level_offsets_B[0] + (layout->layer_stride_B * layout->depth_px);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -341,6 +342,7 @@ ail_make_miptree(struct ail_layout *layout)
|
|||
assert(layout->linear_stride_B == 0 && "Invalid nonlinear layout");
|
||||
assert(layout->levels >= 1 && "Invalid dimensions");
|
||||
assert(layout->sample_count_sa >= 1 && "Invalid sample count");
|
||||
assert(layout->level_offsets_B[0] == 0 && "Invalid offset");
|
||||
}
|
||||
|
||||
assert(!(layout->writeable_image && layout->compressed) &&
|
||||
|
|
|
|||
|
|
@ -133,6 +133,7 @@ agx_virtio_bo_bind(struct agx_device *dev, struct drm_asahi_gem_bind_op *ops,
|
|||
memcpy(req->payload, ops, payload_size);
|
||||
|
||||
int ret = vdrm_send_req(dev->vdrm, &req->hdr, false);
|
||||
free(req);
|
||||
if (ret) {
|
||||
fprintf(stderr, "ASAHI_CCMD_GEM_BIND failed: %d\n", ret);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -992,28 +992,34 @@ hk_CmdEndRendering(VkCommandBuffer commandBuffer)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
hk_init_heap(const void *data) {
|
||||
struct hk_cmd_buffer *cmd = (struct hk_cmd_buffer *) data;
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
|
||||
perf_debug(cmd, "Allocating heap");
|
||||
|
||||
size_t size = 128 * 1024 * 1024;
|
||||
dev->heap = agx_bo_create(&dev->dev, size, 0, 0, "Geometry heap");
|
||||
|
||||
/* The geometry state buffer is initialized here and then is treated by
|
||||
* the CPU as rodata, even though the GPU uses it for scratch internally.
|
||||
*/
|
||||
off_t off = dev->rodata.heap - dev->rodata.bo->va->addr;
|
||||
struct agx_heap *map = agx_bo_map(dev->rodata.bo) + off;
|
||||
|
||||
*map = (struct agx_heap){
|
||||
.base = dev->heap->va->addr,
|
||||
.size = size,
|
||||
};
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
hk_heap(struct hk_cmd_buffer *cmd)
|
||||
{
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
|
||||
if (unlikely(!dev->heap)) {
|
||||
perf_debug(cmd, "Allocating heap");
|
||||
|
||||
size_t size = 128 * 1024 * 1024;
|
||||
dev->heap = agx_bo_create(&dev->dev, size, 0, 0, "Geometry heap");
|
||||
|
||||
/* The geometry state buffer is initialized here and then is treated by
|
||||
* the CPU as rodata, even though the GPU uses it for scratch internally.
|
||||
*/
|
||||
off_t off = dev->rodata.heap - dev->rodata.bo->va->addr;
|
||||
struct agx_heap *map = agx_bo_map(dev->rodata.bo) + off;
|
||||
|
||||
*map = (struct agx_heap){
|
||||
.base = dev->heap->va->addr,
|
||||
.size = size,
|
||||
};
|
||||
}
|
||||
util_call_once_data(&dev->heap_init_once, hk_init_heap, cmd);
|
||||
|
||||
/* We need to free all allocations after each command buffer execution */
|
||||
if (!cmd->uses_heap) {
|
||||
|
|
|
|||
|
|
@ -330,6 +330,7 @@ hk_GetDescriptorSetLayoutSupport(
|
|||
uint64_t non_variable_size = 0;
|
||||
uint32_t variable_stride = 0;
|
||||
uint32_t variable_count = 0;
|
||||
bool variable_is_inline_uniform_block = false;
|
||||
uint8_t dynamic_buffer_count = 0;
|
||||
|
||||
for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
|
||||
|
|
@ -362,6 +363,10 @@ hk_GetDescriptorSetLayoutSupport(
|
|||
*/
|
||||
variable_count = MAX2(1, binding->descriptorCount);
|
||||
variable_stride = stride;
|
||||
|
||||
variable_is_inline_uniform_block =
|
||||
binding->descriptorType ==
|
||||
VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK;
|
||||
} else {
|
||||
/* Since we're aligning to the maximum and since this is just a
|
||||
* check for whether or not the max buffer size is big enough, we
|
||||
|
|
@ -393,12 +398,21 @@ hk_GetDescriptorSetLayoutSupport(
|
|||
switch (ext->sType) {
|
||||
case VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT: {
|
||||
VkDescriptorSetVariableDescriptorCountLayoutSupport *vs = (void *)ext;
|
||||
uint32_t max_var_count;
|
||||
|
||||
if (variable_stride > 0) {
|
||||
vs->maxVariableDescriptorCount =
|
||||
max_var_count =
|
||||
(max_buffer_size - non_variable_size) / variable_stride;
|
||||
} else {
|
||||
vs->maxVariableDescriptorCount = 0;
|
||||
max_var_count = 0;
|
||||
}
|
||||
|
||||
if (variable_is_inline_uniform_block) {
|
||||
max_var_count =
|
||||
MIN2(max_var_count, HK_MAX_INLINE_UNIFORM_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
vs->maxVariableDescriptorCount = max_var_count;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -92,6 +92,7 @@ struct hk_device {
|
|||
* expected to be a legitimate problem. If it is, we can rework later.
|
||||
*/
|
||||
struct agx_bo *heap;
|
||||
util_once_flag heap_init_once;
|
||||
|
||||
struct {
|
||||
struct agx_scratch vs, fs, cs;
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ get_drm_format_modifier_properties_list(
|
|||
{
|
||||
*out_props = (VkDrmFormatModifierPropertiesEXT){
|
||||
.drmFormatModifier = mod,
|
||||
.drmFormatModifierPlaneCount = 1 /* no planar mods */,
|
||||
.drmFormatModifierPlaneCount = vk_format_get_plane_count(vk_format),
|
||||
.drmFormatModifierTilingFeatures = flags,
|
||||
};
|
||||
};
|
||||
|
|
@ -96,7 +96,7 @@ get_drm_format_modifier_properties_list_2(
|
|||
{
|
||||
*out_props = (VkDrmFormatModifierProperties2EXT){
|
||||
.drmFormatModifier = mod,
|
||||
.drmFormatModifierPlaneCount = 1, /* no planar mods */
|
||||
.drmFormatModifierPlaneCount = vk_format_get_plane_count(vk_format),
|
||||
.drmFormatModifierTilingFeatures = flags,
|
||||
};
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1424,6 +1424,13 @@ hk_copy_memory_to_image(struct hk_device *device, struct hk_image *dst_image,
|
|||
uint32_t src_height = info->memoryImageHeight ?: extent.height;
|
||||
|
||||
uint32_t blocksize_B = util_format_get_blocksize(layout->format);
|
||||
|
||||
/* Align width and height to block */
|
||||
src_width =
|
||||
DIV_ROUND_UP(src_width, util_format_get_blockwidth(layout->format));
|
||||
src_height =
|
||||
DIV_ROUND_UP(src_height, util_format_get_blockheight(layout->format));
|
||||
|
||||
uint32_t src_pitch = src_width * blocksize_B;
|
||||
|
||||
unsigned start_layer = (dst_image->vk.image_type == VK_IMAGE_TYPE_3D)
|
||||
|
|
@ -1496,6 +1503,13 @@ hk_copy_image_to_memory(struct hk_device *device, struct hk_image *src_image,
|
|||
#endif
|
||||
|
||||
uint32_t blocksize_B = util_format_get_blocksize(layout->format);
|
||||
|
||||
/* Align width and height to block */
|
||||
dst_width =
|
||||
DIV_ROUND_UP(dst_width, util_format_get_blockwidth(layout->format));
|
||||
dst_height =
|
||||
DIV_ROUND_UP(dst_height, util_format_get_blockheight(layout->format));
|
||||
|
||||
uint32_t dst_pitch = dst_width * blocksize_B;
|
||||
|
||||
unsigned start_layer = (src_image->vk.image_type == VK_IMAGE_TYPE_3D)
|
||||
|
|
@ -1649,11 +1663,6 @@ hk_copy_image_to_image_cpu(struct hk_device *device, struct hk_image *src_image,
|
|||
&device->physical_device->ubwc_config);
|
||||
#endif
|
||||
} else {
|
||||
/* Work tile-by-tile, holding the unswizzled tile in a temporary
|
||||
* buffer.
|
||||
*/
|
||||
char temp_tile[16384];
|
||||
|
||||
unsigned src_level = info->srcSubresource.mipLevel;
|
||||
unsigned dst_level = info->dstSubresource.mipLevel;
|
||||
uint32_t block_width = src_layout->tilesize_el[src_level].width_el;
|
||||
|
|
@ -1667,6 +1676,12 @@ hk_copy_image_to_image_cpu(struct hk_device *device, struct hk_image *src_image,
|
|||
}
|
||||
|
||||
uint32_t temp_pitch = block_width * src_block_B;
|
||||
size_t temp_tile_size = temp_pitch * (src_offset.y + extent.height);
|
||||
|
||||
/* Work tile-by-tile, holding the unswizzled tile in a temporary
|
||||
* buffer.
|
||||
*/
|
||||
char *temp_tile = malloc(temp_tile_size);
|
||||
|
||||
for (unsigned by = src_offset.y / block_height;
|
||||
by * block_height < src_offset.y + extent.height; by++) {
|
||||
|
|
@ -1683,14 +1698,14 @@ hk_copy_image_to_image_cpu(struct hk_device *device, struct hk_image *src_image,
|
|||
MIN2((bx + 1) * block_width, src_offset.x + extent.width) -
|
||||
src_x_start;
|
||||
|
||||
assert(height * temp_pitch <= ARRAY_SIZE(temp_tile));
|
||||
|
||||
ail_detile((void *)src, temp_tile, src_layout, src_level,
|
||||
temp_pitch, src_x_start, src_y_start, width, height);
|
||||
ail_tile(dst, temp_tile, dst_layout, dst_level, temp_pitch,
|
||||
dst_x_start, dst_y_start, width, height);
|
||||
}
|
||||
}
|
||||
|
||||
free(temp_tile);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -859,7 +859,7 @@ hk_get_device_properties(const struct agx_device *dev,
|
|||
.maxSubgroupSize = 32,
|
||||
.maxComputeWorkgroupSubgroups = 1024 / 32,
|
||||
.requiredSubgroupSizeStages = 0,
|
||||
.maxInlineUniformBlockSize = 1 << 16,
|
||||
.maxInlineUniformBlockSize = HK_MAX_INLINE_UNIFORM_BLOCK_SIZE,
|
||||
.maxPerStageDescriptorInlineUniformBlocks = 32,
|
||||
.maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32,
|
||||
.maxDescriptorSetInlineUniformBlocks = 6 * 32,
|
||||
|
|
@ -953,7 +953,7 @@ hk_get_device_properties(const struct agx_device *dev,
|
|||
.robustUniformBufferAccessSizeAlignment = HK_MIN_UBO_ALIGNMENT,
|
||||
|
||||
/* VK_EXT_sample_locations */
|
||||
.sampleLocationSampleCounts = sample_counts,
|
||||
.sampleLocationSampleCounts = sample_counts & ~VK_SAMPLE_COUNT_1_BIT,
|
||||
.maxSampleLocationGridSize = (VkExtent2D){1, 1},
|
||||
.sampleLocationCoordinateRange[0] = 0.0f,
|
||||
.sampleLocationCoordinateRange[1] = 0.9375f,
|
||||
|
|
|
|||
|
|
@ -12,18 +12,19 @@
|
|||
#include "vk_log.h"
|
||||
#include "vk_util.h"
|
||||
|
||||
#define HK_MAX_SETS 8
|
||||
#define HK_MAX_PUSH_SIZE 256
|
||||
#define HK_MAX_DYNAMIC_BUFFERS 64
|
||||
#define HK_MAX_RTS 8
|
||||
#define HK_MIN_SSBO_ALIGNMENT 16
|
||||
#define HK_MIN_TEXEL_BUFFER_ALIGNMENT 16
|
||||
#define HK_MIN_UBO_ALIGNMENT 64
|
||||
#define HK_MAX_VIEWPORTS 16
|
||||
#define HK_MAX_DESCRIPTOR_SIZE 64
|
||||
#define HK_MAX_PUSH_DESCRIPTORS 32
|
||||
#define HK_MAX_DESCRIPTOR_SET_SIZE (1u << 30)
|
||||
#define HK_MAX_DESCRIPTORS (1 << 20)
|
||||
#define HK_MAX_SETS 8
|
||||
#define HK_MAX_PUSH_SIZE 256
|
||||
#define HK_MAX_DYNAMIC_BUFFERS 64
|
||||
#define HK_MAX_RTS 8
|
||||
#define HK_MIN_SSBO_ALIGNMENT 16
|
||||
#define HK_MIN_TEXEL_BUFFER_ALIGNMENT 16
|
||||
#define HK_MIN_UBO_ALIGNMENT 64
|
||||
#define HK_MAX_VIEWPORTS 16
|
||||
#define HK_MAX_DESCRIPTOR_SIZE 64
|
||||
#define HK_MAX_PUSH_DESCRIPTORS 32
|
||||
#define HK_MAX_DESCRIPTOR_SET_SIZE (1u << 30)
|
||||
#define HK_MAX_INLINE_UNIFORM_BLOCK_SIZE (1u << 16)
|
||||
#define HK_MAX_DESCRIPTORS (1 << 20)
|
||||
#define HK_PUSH_DESCRIPTOR_SET_SIZE \
|
||||
(HK_MAX_PUSH_DESCRIPTORS * HK_MAX_DESCRIPTOR_SIZE)
|
||||
#define HK_SSBO_BOUNDS_CHECK_ALIGNMENT 4
|
||||
|
|
|
|||
|
|
@ -812,11 +812,6 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
|
|||
/* Now setup the command structs */
|
||||
struct util_dynarray payload;
|
||||
util_dynarray_init(&payload, NULL);
|
||||
union drm_asahi_cmd *cmds = malloc(sizeof(*cmds) * command_count);
|
||||
if (cmds == NULL) {
|
||||
free(cmds);
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
unsigned nr_vdm = 0, nr_cdm = 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -319,14 +319,10 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_load_base_global_invocation_id:
|
||||
case nir_intrinsic_load_base_workgroup_id:
|
||||
case nir_intrinsic_load_alpha_reference_amd:
|
||||
case nir_intrinsic_load_ubo_uniform_block_intel:
|
||||
case nir_intrinsic_load_ssbo_uniform_block_intel:
|
||||
case nir_intrinsic_load_shared_uniform_block_intel:
|
||||
case nir_intrinsic_load_barycentric_optimize_amd:
|
||||
case nir_intrinsic_load_poly_line_smooth_enabled:
|
||||
case nir_intrinsic_load_rasterization_primitive_amd:
|
||||
case nir_intrinsic_unit_test_uniform_amd:
|
||||
case nir_intrinsic_load_global_constant_uniform_block_intel:
|
||||
case nir_intrinsic_load_debug_log_desc_amd:
|
||||
case nir_intrinsic_load_xfb_state_address_gfx12_amd:
|
||||
case nir_intrinsic_cmat_length:
|
||||
|
|
@ -364,6 +360,24 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
is_divergent = false;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ubo_uniform_block_intel:
|
||||
case nir_intrinsic_load_ssbo_uniform_block_intel:
|
||||
case nir_intrinsic_load_shared_uniform_block_intel:
|
||||
case nir_intrinsic_load_global_constant_uniform_block_intel:
|
||||
if (options & (nir_divergence_across_subgroups |
|
||||
nir_divergence_multiple_workgroup_per_compute_subgroup)) {
|
||||
unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
|
||||
for (unsigned i = 0; i < num_srcs; i++) {
|
||||
if (src_divergent(instr->src[i], state)) {
|
||||
is_divergent = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
is_divergent = false;
|
||||
}
|
||||
break;
|
||||
|
||||
/* This is divergent because it specifically loads sequential values into
|
||||
* successive SIMD lanes.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1069,6 +1069,7 @@ nir_get_io_index_src_number(const nir_intrinsic_instr *instr)
|
|||
IMG_CASE(atomic):
|
||||
IMG_CASE(atomic_swap):
|
||||
IMG_CASE(size):
|
||||
IMG_CASE(levels):
|
||||
IMG_CASE(samples):
|
||||
IMG_CASE(texel_address):
|
||||
IMG_CASE(samples_identical):
|
||||
|
|
|
|||
|
|
@ -1228,8 +1228,16 @@ wrap_instr(nir_builder *b, nir_instr *instr, void *data)
|
|||
static bool
|
||||
wrap_instrs(nir_shader *shader, wrap_instr_callback callback)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader, wrap_instr,
|
||||
nir_metadata_none, callback);
|
||||
bool progress = nir_shader_instructions_pass(shader, wrap_instr,
|
||||
nir_metadata_none, callback);
|
||||
/* Wrapping jump instructions that are located inside ifs can break SSA
|
||||
* invariants because the else block no longer dominates the merge block.
|
||||
* Repair the SSA to make the validator happy again.
|
||||
*/
|
||||
if (progress)
|
||||
nir_repair_ssa(shader);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
|
|||
|
|
@ -4096,9 +4096,9 @@ distribute_src_mods = [
|
|||
(('fneg', ('fmul(is_used_once)', a, b)), ('fmul', ('fneg', a), b)),
|
||||
(('fabs', ('fmul(is_used_once)', a, b)), ('fmul', ('fabs', a), ('fabs', b))),
|
||||
|
||||
(('fneg', ('ffma(is_used_once)', a, b, c)), ('ffma', ('fneg', a), b, ('fneg', c))),
|
||||
(('fneg', ('ffma(is_used_once,nsz)', a, b, c)), ('ffma', ('fneg', a), b, ('fneg', c))),
|
||||
(('fneg', ('flrp(is_used_once)', a, b, c)), ('flrp', ('fneg', a), ('fneg', b), c)),
|
||||
(('fneg', ('~fadd(is_used_once)', a, b)), ('fadd', ('fneg', a), ('fneg', b))),
|
||||
(('fneg', ('fadd(is_used_once,nsz)', a, b)), ('fadd', ('fneg', a), ('fneg', b))),
|
||||
|
||||
# Note that fmin <-> fmax. I don't think there is a way to distribute
|
||||
# fabs() into fmin or fmax.
|
||||
|
|
|
|||
|
|
@ -82,7 +82,9 @@ opt_shrink_store_instr(nir_builder *b, nir_intrinsic_instr *instr, bool shrink_i
|
|||
|
||||
/* Trim the num_components stored according to the write mask. */
|
||||
unsigned write_mask = nir_intrinsic_write_mask(instr);
|
||||
unsigned last_bit = util_last_bit(write_mask);
|
||||
/* Don't trim down to an invalid number of components, though. */
|
||||
unsigned last_bit = nir_round_up_components(util_last_bit(write_mask));
|
||||
|
||||
if (last_bit < instr->num_components) {
|
||||
nir_def *def = nir_trim_vector(b, instr->src[0].ssa, last_bit);
|
||||
nir_src_rewrite(&instr->src[0], def);
|
||||
|
|
|
|||
|
|
@ -652,6 +652,7 @@ nir_precompiled_build_variant(const nir_function *libfunc,
|
|||
|
||||
assert(libfunc->workgroup_size[0] != 0 && "must set workgroup size");
|
||||
|
||||
b.shader->info.workgroup_size_variable = false;
|
||||
b.shader->info.workgroup_size[0] = libfunc->workgroup_size[0];
|
||||
b.shader->info.workgroup_size[1] = libfunc->workgroup_size[1];
|
||||
b.shader->info.workgroup_size[2] = libfunc->workgroup_size[2];
|
||||
|
|
|
|||
|
|
@ -506,8 +506,8 @@ vtn_pointer_dereference(struct vtn_builder *b,
|
|||
type = type->array_element;
|
||||
}
|
||||
tail = nir_build_deref_array(&b->nb, tail, arr_index);
|
||||
tail->arr.in_bounds = deref_chain->in_bounds;
|
||||
}
|
||||
tail->arr.in_bounds = deref_chain->in_bounds;
|
||||
|
||||
access |= type->access;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -564,7 +564,7 @@ tiled_to_linear_2cpp(char *_tiled, char *_linear, uint32_t linear_pitch)
|
|||
"v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15");
|
||||
}
|
||||
#else
|
||||
memcpy_small<2, LINEAR_TO_TILED, FDL_MACROTILE_4_CHANNEL>(
|
||||
memcpy_small<2, TILED_TO_LINEAR, FDL_MACROTILE_4_CHANNEL>(
|
||||
0, 0, 32, 4, _tiled, _linear, linear_pitch, 0, 0, 0);
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2300,6 +2300,17 @@ insert_live_out_moves(struct ra_ctx *ctx)
|
|||
insert_file_live_out_moves(ctx, &ctx->shared);
|
||||
}
|
||||
|
||||
static bool
|
||||
has_merge_set_preferred_reg(struct ir3_register *reg)
|
||||
{
|
||||
assert(reg->merge_set);
|
||||
assert(reg->num != INVALID_REG);
|
||||
|
||||
return reg->merge_set->preferred_reg != (physreg_t)~0 &&
|
||||
ra_reg_get_physreg(reg) ==
|
||||
reg->merge_set->preferred_reg + reg->merge_set_offset;
|
||||
}
|
||||
|
||||
static void
|
||||
handle_block(struct ra_ctx *ctx, struct ir3_block *block)
|
||||
{
|
||||
|
|
@ -2338,17 +2349,15 @@ handle_block(struct ra_ctx *ctx, struct ir3_block *block)
|
|||
struct ir3_register *dst = input->dsts[0];
|
||||
assert(dst->num != INVALID_REG);
|
||||
|
||||
physreg_t dst_start = ra_reg_get_physreg(dst);
|
||||
physreg_t dst_end;
|
||||
|
||||
if (dst->merge_set) {
|
||||
if (dst->merge_set && has_merge_set_preferred_reg(dst)) {
|
||||
/* Take the whole merge set into account to prevent its range being
|
||||
* allocated for defs not part of the merge set.
|
||||
*/
|
||||
assert(dst_start >= dst->merge_set_offset);
|
||||
dst_end = dst_start - dst->merge_set_offset + dst->merge_set->size;
|
||||
dst_end = dst->merge_set->preferred_reg + dst->merge_set->size;
|
||||
} else {
|
||||
dst_end = dst_start + reg_size(dst);
|
||||
dst_end = ra_reg_get_physreg(dst) + reg_size(dst);
|
||||
}
|
||||
|
||||
struct ra_file *file = ra_get_file(ctx, dst);
|
||||
|
|
|
|||
|
|
@ -1461,6 +1461,15 @@ r3d_dst_gmem(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
gmem_offset = tu_attachment_gmem_offset(cmd, att, layer);
|
||||
}
|
||||
|
||||
/* On a7xx we must always use FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8. See
|
||||
* blit_base_format().
|
||||
*/
|
||||
if (CHIP >= A7XX && att->format == VK_FORMAT_D24_UNORM_S8_UINT) {
|
||||
RB_MRT_BUF_INFO = pkt_field_set(A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT,
|
||||
RB_MRT_BUF_INFO,
|
||||
FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8);
|
||||
}
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
RB_MRT_BUF_INFO(CHIP, 0, .dword = RB_MRT_BUF_INFO),
|
||||
A6XX_RB_MRT_PITCH(0, 0),
|
||||
|
|
@ -1533,7 +1542,8 @@ r3d_setup(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_emit_call(cs, cmd->device->dbg_renderpass_stomp_cs);
|
||||
}
|
||||
|
||||
enum a6xx_format fmt = blit_base_format<CHIP>(dst_format, ubwc, false);
|
||||
enum a6xx_format fmt = blit_base_format<CHIP>(dst_format, ubwc,
|
||||
blit_param & R3D_DST_GMEM);
|
||||
fixup_dst_format(src_format, &dst_format, &fmt);
|
||||
|
||||
if (!cmd->state.pass) {
|
||||
|
|
@ -4638,7 +4648,7 @@ clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
enum pipe_format format = vk_format_to_pipe_format(vk_format);
|
||||
const struct tu_framebuffer *fb = cmd->state.framebuffer;
|
||||
const struct tu_image_view *iview = cmd->state.attachments[a];
|
||||
const uint32_t clear_views = cmd->state.pass->attachments[a].clear_views;
|
||||
const uint32_t clear_views = cmd->state.pass->attachments[a].used_views;
|
||||
const struct blit_ops *ops = &r2d_ops<CHIP>;
|
||||
const VkClearValue *value = &cmd->state.clear_values[a];
|
||||
if (cmd->state.pass->attachments[a].samples > 1)
|
||||
|
|
@ -4734,7 +4744,7 @@ tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
|
||||
tu_emit_clear_gmem_attachment<CHIP>(cmd, cs, resolve_group, a, 0,
|
||||
cmd->state.framebuffer->layers,
|
||||
attachment->clear_views,
|
||||
attachment->used_views,
|
||||
attachment->clear_mask,
|
||||
&cmd->state.clear_values[a], NULL);
|
||||
}
|
||||
|
|
@ -4755,7 +4765,7 @@ tu7_generic_clear_attachment(struct tu_cmd_buffer *cmd,
|
|||
iview->view.ubwc_enabled, att->samples);
|
||||
|
||||
enum pipe_format format = vk_format_to_pipe_format(att->format);
|
||||
for_each_layer(i, att->clear_views, cmd->state.framebuffer->layers) {
|
||||
for_each_layer(i, att->used_views, cmd->state.framebuffer->layers) {
|
||||
uint32_t layer = i + 0;
|
||||
uint32_t mask =
|
||||
aspect_write_mask_generic_clear(format, att->clear_mask);
|
||||
|
|
@ -4836,7 +4846,7 @@ tu_emit_blit(struct tu_cmd_buffer *cmd,
|
|||
uint32_t buffer_id = tu_resolve_group_include_buffer<CHIP>(resolve_group, format);
|
||||
event_blit_setup(cs, buffer_id, attachment, blit_event_type, clear_mask);
|
||||
|
||||
for_each_layer(i, attachment->clear_views, cmd->state.framebuffer->layers) {
|
||||
for_each_layer(i, attachment->used_views, cmd->state.framebuffer->layers) {
|
||||
event_blit_dst_view blt_view = blt_view_from_tu_view(iview, i);
|
||||
event_blit_run<CHIP>(cmd, cs, attachment, &blt_view, separate_stencil);
|
||||
}
|
||||
|
|
@ -4951,7 +4961,7 @@ load_3d_blit(struct tu_cmd_buffer *cmd,
|
|||
/* Wait for CACHE_INVALIDATE to land */
|
||||
tu_cs_emit_wfi(cs);
|
||||
|
||||
for_each_layer(i, att->clear_views, cmd->state.framebuffer->layers) {
|
||||
for_each_layer(i, att->used_views, cmd->state.framebuffer->layers) {
|
||||
if (cmd->state.pass->has_fdm) {
|
||||
struct apply_load_coords_state state = {
|
||||
.view = i,
|
||||
|
|
|
|||
|
|
@ -1616,7 +1616,7 @@ tu6_emit_gmem_stores(struct tu_cmd_buffer *cmd,
|
|||
scissor_emitted = true;
|
||||
}
|
||||
tu_store_gmem_attachment<CHIP>(cmd, cs, resolve_group, a, a,
|
||||
fb->layers, subpass->multiview_mask,
|
||||
fb->layers, att->used_views,
|
||||
cond_exec_allowed);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -208,8 +208,8 @@ tu_CreateDescriptorSetLayout(
|
|||
if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
|
||||
set_layout->has_inline_uniforms = true;
|
||||
|
||||
if (variable_flags && binding->binding < variable_flags->bindingCount &&
|
||||
(variable_flags->pBindingFlags[binding->binding] &
|
||||
if (variable_flags && j < variable_flags->bindingCount &&
|
||||
(variable_flags->pBindingFlags[j] &
|
||||
VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) {
|
||||
assert(!binding->pImmutableSamplers); /* Terribly ill defined how
|
||||
many samplers are valid */
|
||||
|
|
@ -377,7 +377,7 @@ tu_GetDescriptorSetLayoutSupport(
|
|||
uint64_t max_count = MAX_SET_SIZE;
|
||||
unsigned descriptor_count = binding->descriptorCount;
|
||||
if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
|
||||
max_count = MAX_SET_SIZE - size;
|
||||
max_count = MAX_INLINE_UBO_RANGE - size;
|
||||
descriptor_count = descriptor_sz;
|
||||
descriptor_sz = 1;
|
||||
} else if (descriptor_sz) {
|
||||
|
|
@ -388,9 +388,9 @@ tu_GetDescriptorSetLayoutSupport(
|
|||
supported = false;
|
||||
}
|
||||
|
||||
if (variable_flags && binding->binding < variable_flags->bindingCount &&
|
||||
if (variable_flags && i < variable_flags->bindingCount &&
|
||||
variable_count &&
|
||||
(variable_flags->pBindingFlags[binding->binding] &
|
||||
(variable_flags->pBindingFlags[i] &
|
||||
VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) {
|
||||
variable_count->maxVariableDescriptorCount =
|
||||
MIN2(UINT32_MAX, max_count);
|
||||
|
|
|
|||
|
|
@ -417,7 +417,8 @@ tu_render_pass_patch_input_gmem(struct tu_render_pass *pass)
|
|||
uint32_t a = subpass->input_attachments[j].attachment;
|
||||
if (a == VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
subpass->input_attachments[j].patch_input_gmem = written[a];
|
||||
subpass->input_attachments[j].patch_input_gmem =
|
||||
written[a] && pass->attachments[a].gmem;
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < subpass->color_count; j++) {
|
||||
|
|
@ -884,7 +885,7 @@ tu_subpass_use_attachment(struct tu_render_pass *pass, int i, uint32_t a, const
|
|||
|
||||
att->gmem = true;
|
||||
update_samples(subpass, pCreateInfo->pAttachments[a].samples);
|
||||
att->clear_views |= subpass->multiview_mask;
|
||||
att->used_views |= subpass->multiview_mask;
|
||||
|
||||
/* Loads and clears are emitted at the start of the subpass that needs them. */
|
||||
att->first_subpass_idx = MIN2(i, att->first_subpass_idx);
|
||||
|
|
@ -1126,6 +1127,7 @@ tu_CreateRenderPass2(VkDevice _device,
|
|||
if (!att->gmem) {
|
||||
att->clear_mask = 0;
|
||||
att->load = false;
|
||||
att->load_stencil = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1235,7 +1237,7 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
|
|||
VK_FROM_HANDLE(tu_image_view, view, att_info->imageView);
|
||||
tu_setup_dynamic_attachment(att, view);
|
||||
att->gmem = true;
|
||||
att->clear_views = info->viewMask;
|
||||
att->used_views = info->viewMask;
|
||||
attachment_set_ops(device, att, att_info->loadOp,
|
||||
VK_ATTACHMENT_LOAD_OP_DONT_CARE, att_info->storeOp,
|
||||
VK_ATTACHMENT_STORE_OP_DONT_CARE);
|
||||
|
|
@ -1279,7 +1281,7 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
|
|||
struct tu_render_pass_attachment *att = &pass->attachments[a];
|
||||
tu_setup_dynamic_attachment(att, view);
|
||||
att->gmem = true;
|
||||
att->clear_views = info->viewMask;
|
||||
att->used_views = info->viewMask;
|
||||
subpass->depth_stencil_attachment.attachment = a++;
|
||||
subpass->input_attachments[0].attachment =
|
||||
subpass->depth_stencil_attachment.attachment;
|
||||
|
|
|
|||
|
|
@ -94,7 +94,19 @@ struct tu_render_pass_attachment
|
|||
VkSampleCountFlagBits samples;
|
||||
uint32_t cpp;
|
||||
VkImageAspectFlags clear_mask;
|
||||
uint32_t clear_views;
|
||||
|
||||
/* All views that are used with the attachment in all subpasses. Used to
|
||||
* determine which views to apply loadOp/storeOp to.
|
||||
*/
|
||||
uint32_t used_views;
|
||||
/* The internal MSRTSS attachment to clear when the user says to clear
|
||||
* this attachment. Clear values must be remapped to this attachment.
|
||||
*/
|
||||
uint32_t remapped_clear_att;
|
||||
/* For internal attachments created for MSRTSS, the original user attachment
|
||||
* which it is resolved/unresolved to.
|
||||
*/
|
||||
uint32_t user_att;
|
||||
bool load;
|
||||
bool store;
|
||||
bool gmem;
|
||||
|
|
|
|||
|
|
@ -3157,8 +3157,6 @@ tu6_emit_blend(struct tu_cs *cs,
|
|||
|
||||
bool dual_src_blend = tu_blend_state_is_dual_src(cb);
|
||||
|
||||
tu_cs_emit_regs(cs, A6XX_SP_PS_MRT_CNTL(.mrt = num_rts));
|
||||
tu_cs_emit_regs(cs, A6XX_RB_PS_MRT_CNTL(.mrt = num_rts));
|
||||
tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL(.enable_blend = blend_enable_mask,
|
||||
.unk8 = true,
|
||||
.dual_color_in_enable =
|
||||
|
|
@ -3180,10 +3178,12 @@ tu6_emit_blend(struct tu_cs *cs,
|
|||
.alpha_to_one = alpha_to_one_enable,
|
||||
.sample_mask = sample_mask));
|
||||
|
||||
unsigned num_remapped_rts = 0;
|
||||
for (unsigned i = 0; i < num_rts; i++) {
|
||||
if (cal->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
unsigned remapped_idx = cal->color_map[i];
|
||||
num_remapped_rts = MAX2(num_remapped_rts, remapped_idx + 1);
|
||||
const struct vk_color_blend_attachment_state *att = &cb->attachments[i];
|
||||
if ((cb->color_write_enables & (1u << i)) && i < cb->attachment_count) {
|
||||
const enum a3xx_rb_blend_opcode color_op = tu6_blend_op(att->color_blend_op);
|
||||
|
|
@ -3227,6 +3227,8 @@ tu6_emit_blend(struct tu_cs *cs,
|
|||
A6XX_RB_MRT_BLEND_CONTROL(remapped_idx,));
|
||||
}
|
||||
}
|
||||
tu_cs_emit_regs(cs, A6XX_SP_PS_MRT_CNTL(.mrt = num_remapped_rts));
|
||||
tu_cs_emit_regs(cs, A6XX_RB_PS_MRT_CNTL(.mrt = num_remapped_rts));
|
||||
}
|
||||
|
||||
static const enum mesa_vk_dynamic_graphics_state tu_blend_constants_state[] = {
|
||||
|
|
|
|||
|
|
@ -88,10 +88,9 @@
|
|||
#define LLVMCreateBuilder ILLEGAL_LLVM_FUNCTION
|
||||
|
||||
typedef struct lp_context_ref {
|
||||
#if GALLIVM_USE_ORCJIT
|
||||
LLVMOrcThreadSafeContextRef ref;
|
||||
#else
|
||||
LLVMContextRef ref;
|
||||
#if GALLIVM_USE_ORCJIT
|
||||
LLVMOrcThreadSafeContextRef tsref;
|
||||
#endif
|
||||
bool owned;
|
||||
} lp_context_ref;
|
||||
|
|
@ -101,18 +100,21 @@ lp_context_create(lp_context_ref *context)
|
|||
{
|
||||
assert(context != NULL);
|
||||
#if GALLIVM_USE_ORCJIT
|
||||
context->ref = LLVMOrcCreateNewThreadSafeContext();
|
||||
#if LLVM_VERSION_MAJOR >= 21
|
||||
context->ref = LLVMContextCreate();
|
||||
/* Ownership of ref is then transferred to tsref */
|
||||
context->tsref = LLVMOrcCreateNewThreadSafeContextFromLLVMContext(context->ref);
|
||||
#else
|
||||
context->tsref = LLVMOrcCreateNewThreadSafeContext();
|
||||
context->ref = LLVMOrcThreadSafeContextGetContext(context->tsref);
|
||||
#endif
|
||||
#else
|
||||
context->ref = LLVMContextCreate();
|
||||
#endif
|
||||
context->owned = true;
|
||||
#if LLVM_VERSION_MAJOR == 15
|
||||
if (context->ref) {
|
||||
#if GALLIVM_USE_ORCJIT
|
||||
LLVMContextSetOpaquePointers(LLVMOrcThreadSafeContextGetContext(context->ref), false);
|
||||
#else
|
||||
LLVMContextSetOpaquePointers(context->ref, false);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
@ -123,7 +125,7 @@ lp_context_destroy(lp_context_ref *context)
|
|||
assert(context != NULL);
|
||||
if (context->owned) {
|
||||
#if GALLIVM_USE_ORCJIT
|
||||
LLVMOrcDisposeThreadSafeContext(context->ref);
|
||||
LLVMOrcDisposeThreadSafeContext(context->tsref);
|
||||
#else
|
||||
LLVMContextDispose(context->ref);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -555,8 +555,8 @@ init_gallivm_state(struct gallivm_state *gallivm, const char *name,
|
|||
|
||||
gallivm->cache = cache;
|
||||
|
||||
gallivm->_ts_context = context->ref;
|
||||
gallivm->context = LLVMContextCreate();
|
||||
gallivm->_ts_context = context->tsref;
|
||||
gallivm->context = context->ref;
|
||||
|
||||
gallivm->module_name = LPJit::get_unique_name(name);
|
||||
gallivm->module = LLVMModuleCreateWithNameInContext(gallivm->module_name,
|
||||
|
|
|
|||
|
|
@ -3163,7 +3163,7 @@ do_int_divide(struct lp_build_nir_soa_context *bld,
|
|||
|
||||
static LLVMValueRef
|
||||
do_int_mod(struct lp_build_nir_soa_context *bld,
|
||||
bool is_unsigned, unsigned src_bit_size,
|
||||
bool is_unsigned, bool use_src2_sign, unsigned src_bit_size,
|
||||
LLVMValueRef src, LLVMValueRef src2)
|
||||
{
|
||||
struct gallivm_state *gallivm = bld->base.gallivm;
|
||||
|
|
@ -3180,8 +3180,18 @@ do_int_mod(struct lp_build_nir_soa_context *bld,
|
|||
divisor = get_signed_divisor(gallivm, int_bld, mask_bld,
|
||||
src_bit_size, src, divisor);
|
||||
}
|
||||
LLVMValueRef result = lp_build_mod(int_bld, src, divisor);
|
||||
return LLVMBuildOr(builder, div_mask, result, "");
|
||||
LLVMValueRef rem = lp_build_mod(int_bld, src, divisor);
|
||||
rem = LLVMBuildOr(builder, div_mask, rem, "");
|
||||
|
||||
if (use_src2_sign) {
|
||||
LLVMValueRef add_src2 = LLVMBuildICmp(builder, LLVMIntNE, rem, int_bld->zero, "");
|
||||
LLVMValueRef signs_different = LLVMBuildXor(builder, LLVMBuildICmp(builder, LLVMIntSLT, src, int_bld->zero, ""),
|
||||
LLVMBuildICmp(builder, LLVMIntSLT, src2, int_bld->zero, ""), "");
|
||||
add_src2 = LLVMBuildAnd(builder, add_src2, signs_different, "");
|
||||
rem = LLVMBuildSelect(builder, add_src2, LLVMBuildAdd(builder, rem, src2, ""), rem, "");
|
||||
}
|
||||
|
||||
return rem;
|
||||
}
|
||||
|
||||
static LLVMValueRef
|
||||
|
|
@ -3493,7 +3503,7 @@ do_alu_action(struct lp_build_nir_soa_context *bld,
|
|||
break;
|
||||
case nir_op_imod:
|
||||
case nir_op_irem:
|
||||
result = do_int_mod(bld, false, src_bit_size[0], src[0], src[1]);
|
||||
result = do_int_mod(bld, false, instr->op == nir_op_imod, src_bit_size[0], src[0], src[1]);
|
||||
break;
|
||||
case nir_op_ishl: {
|
||||
if (src_bit_size[0] == 64)
|
||||
|
|
@ -3592,7 +3602,7 @@ do_alu_action(struct lp_build_nir_soa_context *bld,
|
|||
result = lp_build_min(uint_bld, src[0], src[1]);
|
||||
break;
|
||||
case nir_op_umod:
|
||||
result = do_int_mod(bld, true, src_bit_size[0], src[0], src[1]);
|
||||
result = do_int_mod(bld, true, false, src_bit_size[0], src[0], src[1]);
|
||||
break;
|
||||
case nir_op_umul_high: {
|
||||
LLVMValueRef hi_bits;
|
||||
|
|
|
|||
|
|
@ -634,8 +634,8 @@ asahi_add_attachment(struct attachments *att, struct agx_resource *rsrc)
|
|||
assert(att->count < MAX_ATTACHMENTS);
|
||||
|
||||
att->list[att->count++] = (struct drm_asahi_attachment){
|
||||
.size = rsrc->layout.size_B,
|
||||
.pointer = rsrc->bo->va->addr,
|
||||
.size = rsrc->layout.size_B - rsrc->layout.level_offsets_B[0],
|
||||
.pointer = agx_map_gpu(rsrc),
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -210,13 +210,13 @@ agx_resource_from_handle(struct pipe_screen *pscreen,
|
|||
|
||||
if (rsc->layout.tiling == AIL_TILING_LINEAR) {
|
||||
rsc->layout.linear_stride_B = whandle->stride;
|
||||
} else if (whandle->stride != ail_get_wsi_stride_B(&rsc->layout, 0)) {
|
||||
rsc->layout.level_offsets_B[0] = whandle->offset;
|
||||
} else if (whandle->stride != ail_get_wsi_stride_B(&rsc->layout, 0) ||
|
||||
whandle->offset != 0) {
|
||||
FREE(rsc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
assert(whandle->offset == 0);
|
||||
|
||||
ail_make_miptree(&rsc->layout);
|
||||
|
||||
if (prsc->target == PIPE_BUFFER) {
|
||||
|
|
@ -301,7 +301,8 @@ agx_resource_get_param(struct pipe_screen *pscreen, struct pipe_context *pctx,
|
|||
enum pipe_resource_param param, unsigned usage,
|
||||
uint64_t *value)
|
||||
{
|
||||
struct agx_resource *rsrc = (struct agx_resource *)prsc;
|
||||
struct agx_resource *rsrc =
|
||||
(struct agx_resource *)util_resource_at_index(prsc, plane);
|
||||
|
||||
switch (param) {
|
||||
case PIPE_RESOURCE_PARAM_STRIDE:
|
||||
|
|
@ -1292,7 +1293,7 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
|
|||
|
||||
if (zres->layout.compressed) {
|
||||
c->depth.comp_base =
|
||||
agx_map_texture_gpu(zres, 0) + zres->layout.metadata_offset_B +
|
||||
agx_map_gpu(zres) + zres->layout.metadata_offset_B +
|
||||
(first_layer * zres->layout.compression_layer_stride_B) +
|
||||
zres->layout.level_offsets_compressed_B[level];
|
||||
|
||||
|
|
@ -1329,7 +1330,7 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
|
|||
|
||||
if (sres->layout.compressed) {
|
||||
c->stencil.comp_base =
|
||||
agx_map_texture_gpu(sres, 0) + sres->layout.metadata_offset_B +
|
||||
agx_map_gpu(sres) + sres->layout.metadata_offset_B +
|
||||
(first_layer * sres->layout.compression_layer_stride_B) +
|
||||
sres->layout.level_offsets_compressed_B[level];
|
||||
|
||||
|
|
|
|||
|
|
@ -503,7 +503,7 @@ agx_get_query_result_resource_gpu(struct agx_context *ctx,
|
|||
: 0;
|
||||
|
||||
libagx_copy_query_gl(batch, agx_1d(1), AGX_BARRIER_ALL, query->ptr.gpu,
|
||||
rsrc->bo->va->addr + offset, result_type, bool_size);
|
||||
agx_map_gpu(rsrc) + offset, result_type, bool_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -726,7 +726,7 @@ agx_pack_texture(void *out, struct agx_resource *rsrc,
|
|||
|
||||
if (rsrc->layout.compressed) {
|
||||
cfg.acceleration_buffer =
|
||||
agx_map_texture_gpu(rsrc, 0) + rsrc->layout.metadata_offset_B +
|
||||
agx_map_gpu(rsrc) + rsrc->layout.metadata_offset_B +
|
||||
(first_layer * rsrc->layout.compression_layer_stride_B);
|
||||
}
|
||||
|
||||
|
|
@ -1262,7 +1262,7 @@ agx_batch_upload_pbe(struct agx_batch *batch, struct agx_pbe_packed *out,
|
|||
cfg.extended = true;
|
||||
|
||||
cfg.acceleration_buffer =
|
||||
agx_map_texture_gpu(tex, 0) + tex->layout.metadata_offset_B +
|
||||
agx_map_gpu(tex) + tex->layout.metadata_offset_B +
|
||||
(layer * tex->layout.compression_layer_stride_B);
|
||||
}
|
||||
|
||||
|
|
@ -3756,8 +3756,9 @@ agx_index_buffer_rsrc_ptr(struct agx_batch *batch,
|
|||
struct agx_resource *rsrc = agx_resource(info->index.resource);
|
||||
agx_batch_reads(batch, rsrc);
|
||||
|
||||
*extent = ALIGN_POT(rsrc->layout.size_B, 4);
|
||||
return rsrc->bo->va->addr;
|
||||
*extent =
|
||||
ALIGN_POT(rsrc->layout.size_B - rsrc->layout.level_offsets_B[0], 4);
|
||||
return agx_map_gpu(rsrc);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
|
|
@ -3948,7 +3949,7 @@ agx_batch_geometry_params(struct agx_batch *batch, uint64_t input_index_buffer,
|
|||
params.xfb_size[i] = size;
|
||||
|
||||
if (rsrc) {
|
||||
params.xfb_offs_ptrs[i] = rsrc->bo->va->addr;
|
||||
params.xfb_offs_ptrs[i] = agx_map_gpu(rsrc);
|
||||
agx_batch_writes(batch, rsrc, 0);
|
||||
batch->incoherent_writes = true;
|
||||
}
|
||||
|
|
@ -4054,7 +4055,7 @@ agx_indirect_buffer_ptr(struct agx_batch *batch,
|
|||
|
||||
struct agx_resource *rsrc = agx_resource(indirect->buffer);
|
||||
agx_batch_reads(batch, rsrc);
|
||||
return rsrc->bo->va->addr + indirect->offset;
|
||||
return agx_map_gpu(rsrc) + indirect->offset;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -5388,7 +5389,7 @@ agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
|
|||
if (info->indirect) {
|
||||
struct agx_resource *rsrc = agx_resource(info->indirect);
|
||||
agx_batch_reads(batch, rsrc);
|
||||
indirect = rsrc->bo->va->addr + info->indirect_offset;
|
||||
indirect = agx_map_gpu(rsrc) + info->indirect_offset;
|
||||
}
|
||||
|
||||
/* Increment the pipeline stats query.
|
||||
|
|
@ -5493,7 +5494,7 @@ agx_set_global_binding(struct pipe_context *pipe, unsigned first,
|
|||
struct agx_resource *rsrc = agx_resource(resources[i]);
|
||||
|
||||
memcpy(&addr, handles[i], sizeof(addr));
|
||||
addr += rsrc->bo->va->addr;
|
||||
addr += agx_map_gpu(rsrc);
|
||||
memcpy(handles[i], &addr, sizeof(addr));
|
||||
} else {
|
||||
pipe_resource_reference(res, NULL);
|
||||
|
|
@ -5534,7 +5535,7 @@ agx_decompress_inplace(struct agx_batch *batch, struct pipe_surface *surf,
|
|||
surf->last_layer - surf->first_layer + 1);
|
||||
|
||||
libagx_decompress(batch, grid, AGX_BARRIER_ALL, layout, surf->first_layer,
|
||||
level, agx_map_texture_gpu(rsrc, 0), images.gpu);
|
||||
level, agx_map_gpu(rsrc), images.gpu);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -970,10 +970,16 @@ agx_map_texture_cpu(struct agx_resource *rsrc, unsigned level, unsigned z)
|
|||
ail_get_layer_level_B(&rsrc->layout, z, level);
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
agx_map_gpu(struct agx_resource *rsrc)
|
||||
{
|
||||
return rsrc->bo->va->addr + rsrc->layout.level_offsets_B[0];
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
agx_map_texture_gpu(struct agx_resource *rsrc, unsigned z)
|
||||
{
|
||||
return rsrc->bo->va->addr +
|
||||
return agx_map_gpu(rsrc) +
|
||||
(uint64_t)ail_get_layer_offset_B(&rsrc->layout, z);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ agx_batch_get_so_address(struct agx_batch *batch, unsigned buffer,
|
|||
target->buffer_size);
|
||||
|
||||
*size = target->buffer_size;
|
||||
return rsrc->bo->va->addr + target->buffer_offset;
|
||||
return agx_map_gpu(rsrc) + target->buffer_offset;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -3,12 +3,9 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include "asahi/genxml/agx_pack.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "util/half_float.h"
|
||||
#include "util/macros.h"
|
||||
#include "agx_abi.h"
|
||||
#include "agx_device.h"
|
||||
#include "agx_state.h"
|
||||
#include "pool.h"
|
||||
|
|
@ -19,8 +16,7 @@ agx_const_buffer_ptr(struct agx_batch *batch, struct pipe_constant_buffer *cb)
|
|||
if (cb->buffer) {
|
||||
struct agx_resource *rsrc = agx_resource(cb->buffer);
|
||||
agx_batch_reads(batch, rsrc);
|
||||
|
||||
return rsrc->bo->va->addr + cb->buffer_offset;
|
||||
return agx_map_gpu(rsrc) + cb->buffer_offset;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -42,8 +38,9 @@ agx_upload_vbos(struct agx_batch *batch)
|
|||
struct agx_resource *rsrc = agx_resource(vb.buffer.resource);
|
||||
agx_batch_reads(batch, rsrc);
|
||||
|
||||
buffers[vbo] = rsrc->bo->va->addr + vb.buffer_offset;
|
||||
buf_sizes[vbo] = rsrc->layout.size_B - vb.buffer_offset;
|
||||
buffers[vbo] = agx_map_gpu(rsrc) + vb.buffer_offset;
|
||||
buf_sizes[vbo] = rsrc->layout.size_B - vb.buffer_offset -
|
||||
rsrc->layout.level_offsets_B[0];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -144,7 +141,7 @@ agx_set_ssbo_uniforms(struct agx_batch *batch, mesa_shader_stage stage)
|
|||
agx_batch_reads(batch, rsrc);
|
||||
}
|
||||
|
||||
unif->ssbo_base[cb] = rsrc->bo->va->addr + sb->buffer_offset;
|
||||
unif->ssbo_base[cb] = agx_map_gpu(rsrc) + sb->buffer_offset;
|
||||
unif->ssbo_size[cb] = st->ssbo[cb].buffer_size;
|
||||
} else {
|
||||
/* Invalid, so use the sink */
|
||||
|
|
|
|||
|
|
@ -223,9 +223,9 @@ iris_apply_brw_tes_prog_data(struct iris_compiled_shader *shader,
|
|||
|
||||
iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
|
||||
|
||||
iris->partitioning = brw->partitioning;
|
||||
iris->output_topology = brw->output_topology;
|
||||
iris->domain = brw->domain;
|
||||
iris->partitioning = brw_tess_info_partitioning(brw->tess_info);
|
||||
iris->output_topology = brw_tess_info_output_topology(brw->tess_info);
|
||||
iris->domain = brw_tess_info_domain(brw->tess_info);
|
||||
iris->include_primitive_id = brw->include_primitive_id;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -338,7 +338,6 @@ attribs_update_simple(struct lp_build_interp_soa_context *bld,
|
|||
LLVMBuilderRef builder = gallivm->builder;
|
||||
struct lp_build_context *coeff_bld = &bld->coeff_bld;
|
||||
struct lp_build_context *setup_bld = &bld->setup_bld;
|
||||
LLVMValueRef oow = NULL;
|
||||
LLVMValueRef pixoffx;
|
||||
LLVMValueRef pixoffy;
|
||||
LLVMValueRef ptr;
|
||||
|
|
@ -425,25 +424,23 @@ attribs_update_simple(struct lp_build_interp_soa_context *bld,
|
|||
}
|
||||
|
||||
if (interp == LP_INTERP_PERSPECTIVE) {
|
||||
if (oow == NULL) {
|
||||
LLVMValueRef w;
|
||||
assert(attrib != 0);
|
||||
assert(bld->mask[0] & TGSI_WRITEMASK_W);
|
||||
if (bld->coverage_samples > 1 &&
|
||||
(loc == TGSI_INTERPOLATE_LOC_SAMPLE ||
|
||||
loc == TGSI_INTERPOLATE_LOC_CENTROID)) {
|
||||
/*
|
||||
* We can't use the precalculated 1/w since we didn't know
|
||||
* the actual position yet (we were assuming center).
|
||||
*/
|
||||
LLVMValueRef indexw = lp_build_const_int32(gallivm, 3);
|
||||
w = interp_attrib_linear(bld, 0, indexw, chan_pixoffx, chan_pixoffy);
|
||||
}
|
||||
else {
|
||||
w = bld->attribs[0][3];
|
||||
}
|
||||
oow = lp_build_rcp(coeff_bld, w);
|
||||
LLVMValueRef w;
|
||||
assert(attrib != 0);
|
||||
assert(bld->mask[0] & TGSI_WRITEMASK_W);
|
||||
if (bld->coverage_samples > 1 &&
|
||||
(loc == TGSI_INTERPOLATE_LOC_SAMPLE ||
|
||||
loc == TGSI_INTERPOLATE_LOC_CENTROID)) {
|
||||
/*
|
||||
* We can't use the precalculated 1/w since we didn't know
|
||||
* the actual position yet (we were assuming center).
|
||||
*/
|
||||
LLVMValueRef indexw = lp_build_const_int32(gallivm, 3);
|
||||
w = interp_attrib_linear(bld, 0, indexw, chan_pixoffx, chan_pixoffy);
|
||||
}
|
||||
else {
|
||||
w = bld->attribs[0][3];
|
||||
}
|
||||
LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
|
||||
a = lp_build_mul(coeff_bld, a, oow);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1236,44 +1236,6 @@ spec@nv_texture_env_combine4@nv_texture_env_combine4-combine,Fail
|
|||
|
||||
spec@oes_texture_float@oes_texture_float half,Fail
|
||||
|
||||
# Remaining fallout from 9d359c6d10adb1cd2978a0e13714a3f98544aae8
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB NPOT,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgb_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt3_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt5_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt3_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt5_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@gen-compressed-teximage,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT NPOT,Fail
|
||||
|
||||
# uprev Piglit in Mesa
|
||||
spec@!opengl 1.1@teximage-scale-bias,Fail
|
||||
spec@glsl-1.10@execution@glsl-fs-texture2d-mipmap-const-bias-01,Fail
|
||||
|
|
|
|||
|
|
@ -1280,44 +1280,6 @@ spec@nv_texture_env_combine4@nv_texture_env_combine4-combine,Fail
|
|||
|
||||
spec@oes_texture_float@oes_texture_float half,Fail
|
||||
|
||||
# Remaining fallout from 9d359c6d10adb1cd2978a0e13714a3f98544aae8
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB NPOT,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgb_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt3_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt5_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt3_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt5_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@gen-compressed-teximage,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT NPOT,Fail
|
||||
|
||||
# uprev Piglit in Mesa
|
||||
spec@!opengl 1.1@teximage-scale-bias,Fail
|
||||
spec@glsl-1.10@execution@glsl-fs-texture2d-mipmap-const-bias-01,Fail
|
||||
|
|
|
|||
|
|
@ -778,78 +778,6 @@ dEQP-GLES2.functional.texture.mipmap.2d.projected.linear_nearest_repeat,Fail
|
|||
dEQP-GLES2.functional.texture.mipmap.2d.projected.linear_nearest_mirror,Fail
|
||||
dEQP-GLES2.functional.texture.mipmap.2d.projected.linear_nearest_clamp,Fail
|
||||
|
||||
# Remaining fallout from 9d359c6d10adb1cd2978a0e13714a3f98544aae8
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE NPOT,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA NPOT,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB NPOT,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA NPOT,Fail
|
||||
spec@ati_texture_compression_3dc@fbo-generatemipmap-formats,Fail
|
||||
spec@ati_texture_compression_3dc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI,Fail
|
||||
spec@ati_texture_compression_3dc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI NPOT,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats-signed,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_LATC1_EXT,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_LATC1_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_rgtc@compressedteximage gl_compressed_red_green_rgtc2_ext,Fail
|
||||
spec@ext_texture_compression_rgtc@compressedteximage gl_compressed_signed_red_green_rgtc2_ext,Fail
|
||||
spec@ext_texture_compression_rgtc@compressedteximage gl_compressed_signed_red_rgtc1_ext,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats-signed,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_RED_RGTC1,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_RED_RGTC1 NPOT,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_RG_RGTC2,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_RG_RGTC2 NPOT,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RED,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RED NPOT,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RED_RGTC1,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RED_RGTC1 NPOT,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RG,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RG_RGTC2,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RG_RGTC2 NPOT,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RG NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgb_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt3_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt5_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt3_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt5_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@gen-compressed-teximage,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT NPOT,Fail
|
||||
|
||||
# uprev Piglit in Mesa
|
||||
spec@!opengl 1.1@teximage-scale-bias,Fail
|
||||
spec@ext_framebuffer_multisample@accuracy all_samples color depthstencil linear,Fail
|
||||
|
|
|
|||
|
|
@ -947,6 +947,32 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
|
|||
util_framebuffer_init(pipe, state, r300->fb_cbufs, &r300->fb_zsbuf);
|
||||
util_copy_framebuffer_state(r300->fb_state.state, state);
|
||||
|
||||
/* DXTC blits require that blocks are 2x1 or 4x1 pixels, but
|
||||
* pipe_surface_width sets the framebuffer width as if blocks were 1x1
|
||||
* pixels. Override the width to correct that.
|
||||
*/
|
||||
if (state->nr_cbufs == 1 && state->cbufs[0].texture &&
|
||||
state->cbufs[0].format == PIPE_FORMAT_R8G8B8A8_UNORM &&
|
||||
util_format_is_compressed(state->cbufs[0].texture->format)) {
|
||||
struct pipe_framebuffer_state *fb =
|
||||
(struct pipe_framebuffer_state*)r300->fb_state.state;
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(state->cbufs[0].texture->format);
|
||||
unsigned width = u_minify(state->cbufs[0].texture->width0,
|
||||
state->cbufs[0].level);
|
||||
|
||||
assert(desc->block.width == 4 && desc->block.height == 4);
|
||||
|
||||
/* Each 64-bit DXT block is 2x1 pixels, and each 128-bit DXT
|
||||
* block is 4x1 pixels when blitting.
|
||||
*/
|
||||
width = align(width, 4); /* align to the DXT block width. */
|
||||
if (desc->block.bits == 64)
|
||||
width = DIV_ROUND_UP(width, 2);
|
||||
|
||||
fb->width = width;
|
||||
}
|
||||
|
||||
/* Remove trailing NULL colorbuffers. */
|
||||
while (current_state->nr_cbufs && !current_state->cbufs[current_state->nr_cbufs-1].texture)
|
||||
current_state->nr_cbufs--;
|
||||
|
|
|
|||
|
|
@ -201,6 +201,7 @@ void r600_draw_rectangle(struct blitter_context *blitter,
|
|||
rctx->b.set_vertex_buffers(&rctx->b, 1, &vbuffer);
|
||||
util_draw_arrays_instanced(&rctx->b, R600_PRIM_RECTANGLE_LIST, 0, 3,
|
||||
0, num_instances);
|
||||
pipe_resource_reference(&buf, NULL);
|
||||
}
|
||||
|
||||
static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
#include "util/u_memory.h"
|
||||
#include "util/u_pack_color.h"
|
||||
#include "util/u_surface.h"
|
||||
#include "util/u_resource.h"
|
||||
#include "util/os_time.h"
|
||||
#include "frontend/winsys_handle.h"
|
||||
#include <errno.h>
|
||||
|
|
@ -442,7 +443,7 @@ static bool r600_texture_get_param(struct pipe_screen *screen,
|
|||
|
||||
switch (param) {
|
||||
case PIPE_RESOURCE_PARAM_NPLANES:
|
||||
*value = 1;
|
||||
*value = util_resource_num(resource);
|
||||
return true;
|
||||
|
||||
case PIPE_RESOURCE_PARAM_STRIDE:
|
||||
|
|
|
|||
|
|
@ -20,6 +20,16 @@ AluGroup::AluGroup()
|
|||
m_free_slots = has_t() ? 0x1f : 0xf;
|
||||
}
|
||||
|
||||
void
|
||||
AluGroup::apply_add_instr(AluInstr *instr)
|
||||
{
|
||||
instr->set_parent_group(this);
|
||||
instr->pin_dest_to_chan();
|
||||
m_has_kill_op |= instr->is_kill();
|
||||
m_has_pred_update |= instr->has_alu_flag(alu_update_exec);
|
||||
assert(!(m_has_kill_op && m_has_pred_update));
|
||||
}
|
||||
|
||||
bool
|
||||
AluGroup::add_instruction(AluInstr *instr)
|
||||
{
|
||||
|
|
@ -32,17 +42,13 @@ AluGroup::add_instruction(AluInstr *instr)
|
|||
ASSERTED auto opinfo = alu_ops.find(instr->opcode());
|
||||
assert(opinfo->second.can_channel(AluOp::t, s_chip_class));
|
||||
if (add_trans_instructions(instr)) {
|
||||
instr->set_parent_group(this);
|
||||
instr->pin_dest_to_chan();
|
||||
m_has_kill_op |= instr->is_kill();
|
||||
apply_add_instr(instr);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (add_vec_instructions(instr) && !instr->has_alu_flag(alu_is_trans)) {
|
||||
instr->set_parent_group(this);
|
||||
instr->pin_dest_to_chan();
|
||||
m_has_kill_op |= instr->is_kill();
|
||||
apply_add_instr(instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -51,9 +57,7 @@ AluGroup::add_instruction(AluInstr *instr)
|
|||
|
||||
if (s_max_slots > 4 && opinfo->second.can_channel(AluOp::t, s_chip_class) &&
|
||||
add_trans_instructions(instr)) {
|
||||
instr->set_parent_group(this);
|
||||
instr->pin_dest_to_chan();
|
||||
m_has_kill_op |= instr->is_kill();
|
||||
apply_add_instr(instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -128,6 +132,8 @@ AluGroup::add_trans_instructions(AluInstr *instr)
|
|||
* make sure the corresponding vector channel is used */
|
||||
assert(instr->has_alu_flag(alu_is_trans) || m_slots[instr->dest_chan()]);
|
||||
m_has_kill_op |= instr->is_kill();
|
||||
m_has_pred_update |= instr->has_alu_flag(alu_update_exec);
|
||||
|
||||
m_slot_assignemnt_order[m_next_slot_assignemnt++] = 4;
|
||||
return true;
|
||||
}
|
||||
|
|
@ -170,17 +176,12 @@ AluGroup::add_vec_instructions(AluInstr *instr)
|
|||
if (!m_slots[preferred_chan]) {
|
||||
if (instr->bank_swizzle() != alu_vec_unknown) {
|
||||
if (try_readport(instr, instr->bank_swizzle())) {
|
||||
m_has_kill_op |= instr->is_kill();
|
||||
m_slot_assignemnt_order[m_next_slot_assignemnt++] = preferred_chan;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
|
||||
if (try_readport(instr, i)) {
|
||||
m_has_kill_op |= instr->is_kill();
|
||||
m_slot_assignemnt_order[m_next_slot_assignemnt++] = preferred_chan;
|
||||
if (try_readport(instr, i))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
|
@ -209,18 +210,12 @@ AluGroup::add_vec_instructions(AluInstr *instr)
|
|||
sfn_log << SfnLog::schedule << "V: Try force channel " << free_chan << "\n";
|
||||
dest->set_chan(free_chan);
|
||||
if (instr->bank_swizzle() != alu_vec_unknown) {
|
||||
if (try_readport(instr, instr->bank_swizzle())) {
|
||||
m_has_kill_op |= instr->is_kill();
|
||||
m_slot_assignemnt_order[m_next_slot_assignemnt++] = free_chan;
|
||||
if (try_readport(instr, instr->bank_swizzle()))
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
|
||||
if (try_readport(instr, i)) {
|
||||
m_has_kill_op |= instr->is_kill();
|
||||
m_slot_assignemnt_order[m_next_slot_assignemnt++] = free_chan;
|
||||
if (try_readport(instr, i))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -318,6 +313,9 @@ AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle)
|
|||
else if (dest->pin() == pin_group)
|
||||
dest->set_pin(pin_chgr);
|
||||
}
|
||||
m_has_kill_op |= instr->is_kill();
|
||||
m_has_pred_update |= instr->has_alu_flag(alu_update_exec);
|
||||
m_slot_assignemnt_order[m_next_slot_assignemnt++] = preferred_chan;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ public:
|
|||
using iterator = Slots::iterator;
|
||||
using const_iterator = Slots::const_iterator;
|
||||
|
||||
void extracted(AluInstr *& instr);
|
||||
bool add_instruction(AluInstr *instr);
|
||||
bool add_trans_instructions(AluInstr *instr);
|
||||
bool add_vec_instructions(AluInstr *instr);
|
||||
|
|
@ -82,6 +83,7 @@ public:
|
|||
|
||||
bool addr_for_src() const { return m_addr_for_src; }
|
||||
bool has_kill_op() const { return m_has_kill_op; }
|
||||
bool has_update_exec() const { return m_has_pred_update; }
|
||||
|
||||
void set_origin(AluInstr *o) { m_origin = o;}
|
||||
|
||||
|
|
@ -100,6 +102,8 @@ private:
|
|||
bool update_indirect_access(AluInstr *instr);
|
||||
bool try_readport(AluInstr *instr, AluBankSwizzle cycle);
|
||||
|
||||
void apply_add_instr(AluInstr *instr);
|
||||
|
||||
Slots m_slots;
|
||||
uint8_t m_next_slot_assignemnt{0};
|
||||
std::array<int8_t, 5> m_slot_assignemnt_order{-1, -1, -1, -1, -1};
|
||||
|
|
@ -119,6 +123,7 @@ private:
|
|||
bool m_addr_is_index{false};
|
||||
bool m_addr_for_src{false};
|
||||
bool m_has_kill_op{false};
|
||||
bool m_has_pred_update{false};
|
||||
AluInstr *m_origin{nullptr};
|
||||
|
||||
uint8_t m_free_slots;
|
||||
|
|
|
|||
|
|
@ -869,8 +869,8 @@ BlockScheduler::schedule_alu_to_group_vec(AluGroup *group)
|
|||
bool success = false;
|
||||
auto i = alu_vec_ready.begin();
|
||||
auto e = alu_vec_ready.end();
|
||||
bool group_has_kill = false;
|
||||
bool group_has_update_pred = false;
|
||||
bool group_has_kill = group->has_kill_op();
|
||||
bool group_has_update_pred = group->has_update_exec();
|
||||
while (i != e) {
|
||||
sfn_log << SfnLog::schedule << "Try schedule to vec " << **i;
|
||||
|
||||
|
|
@ -945,6 +945,7 @@ BlockScheduler::schedule_alu_to_group_vec(AluGroup *group)
|
|||
success = true;
|
||||
|
||||
group_has_kill |= is_kill;
|
||||
group_has_update_pred |= does_update_pred;
|
||||
|
||||
sfn_log << SfnLog::schedule << " success\n";
|
||||
} else {
|
||||
|
|
@ -965,8 +966,20 @@ BlockScheduler::schedule_alu_multislot_to_group_vec(AluGroup *group, ValueFactor
|
|||
auto i = alu_multi_slot_ready.begin();
|
||||
auto e = alu_multi_slot_ready.end();
|
||||
|
||||
bool group_has_kill = group->has_kill_op();
|
||||
|
||||
while (i != e && util_bitcount(group->free_slot_mask()) > 1) {
|
||||
|
||||
/* A kill instruction and a predicate update in the same
|
||||
* group don't mix well, so skip adding a predicate changing
|
||||
* multi-slot op if we already have a kill. (There are no
|
||||
* multi-slot kill ops).
|
||||
*/
|
||||
if (group_has_kill && (*i)->has_alu_flag(alu_update_exec)) {
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto dest = (*i)->dest();
|
||||
|
||||
bool can_merge = false;
|
||||
|
|
@ -1038,6 +1051,10 @@ BlockScheduler::schedule_alu_to_group_trans(AluGroup *group,
|
|||
bool success = false;
|
||||
auto i = readylist.begin();
|
||||
auto e = readylist.end();
|
||||
|
||||
bool group_has_kill = group->has_kill_op();
|
||||
bool group_has_update_pred = group->has_update_exec();
|
||||
|
||||
while (i != e) {
|
||||
|
||||
if (check_array_reads(**i)) {
|
||||
|
|
@ -1052,6 +1069,12 @@ BlockScheduler::schedule_alu_to_group_trans(AluGroup *group,
|
|||
continue;
|
||||
}
|
||||
|
||||
if ((group_has_kill && (*i)->has_alu_flag(alu_update_exec)) ||
|
||||
(group_has_update_pred && (*i)->is_kill())) {
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (group->add_trans_instructions(*i)) {
|
||||
(*i)->pin_dest_to_chan();
|
||||
auto old_i = i;
|
||||
|
|
|
|||
|
|
@ -88,8 +88,10 @@ class CollectDeps : public ConstRegisterVisitor {
|
|||
public:
|
||||
void visit(const Register& r) override
|
||||
{
|
||||
for (auto p : r.parents())
|
||||
add_dep(p);
|
||||
for (auto p : r.parents()) {
|
||||
if (instr->block_id() == p->block_id() && instr->index() < p->index())
|
||||
add_dep(p);
|
||||
}
|
||||
}
|
||||
void visit(const LocalArray& value) override {(void)value; UNREACHABLE("Array is not a value");}
|
||||
void visit(const LocalArrayValue& r) override
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ r600_test_dep = declare_dependency(
|
|||
|
||||
if with_tests
|
||||
foreach t : ['valuefactory', 'value', 'instr', 'instrfromstring', 'liverange',
|
||||
'optimizer', 'shaderfromstring', 'split_address_loads' ]
|
||||
'optimizer', 'regression', 'shaderfromstring', 'split_address_loads' ]
|
||||
test(
|
||||
t,
|
||||
executable('test-@0@-r600-sfn'.format(t),
|
||||
|
|
|
|||
65
src/gallium/drivers/r600/sfn/tests/sfn_regression_test.cpp
Normal file
65
src/gallium/drivers/r600/sfn/tests/sfn_regression_test.cpp
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
#include "sfn_test_shaders.h"
|
||||
|
||||
#include "../sfn_optimizer.h"
|
||||
#include "../sfn_ra.h"
|
||||
#include "../sfn_scheduler.h"
|
||||
#include "../sfn_shader.h"
|
||||
#include "../sfn_split_address_loads.h"
|
||||
|
||||
using namespace r600;
|
||||
using std::ostringstream;
|
||||
|
||||
TEST_F(TestShaderFromNir, CombineRegisterToTexSrc)
|
||||
{
|
||||
const char *shader_input =
|
||||
R"(VS
|
||||
CHIPCLASS EVERGREEN
|
||||
INPUT LOC:0
|
||||
INPUT LOC:1
|
||||
OUTPUT LOC:0 VARYING_SLOT:0 MASK:15
|
||||
OUTPUT LOC:1 VARYING_SLOT:32 MASK:3
|
||||
REGISTERS R1.xyzw R2.xyzw R6.y R7.x R8.y R9.x R10.y
|
||||
ARRAYS A3[2].zw
|
||||
SHADER
|
||||
BLOCK_START
|
||||
ALU MOV R10.y@free : R2.x@fully{sb} {W}
|
||||
ALU MOV R9.x@free : R2.y@fully{sb} {W}
|
||||
ALU MOV R8.y@free : R2.z@fully{sb} {W}
|
||||
ALU MOV R7.x@free : R2.w@fully{sb} {W}
|
||||
ALU MOV R6.y@free : I[0] {W}
|
||||
LOOP_BEGIN
|
||||
BLOCK_END
|
||||
BLOCK_START
|
||||
IF (( ALU PRED_SETGE_INT __.x : R6.y@free KC0[0].x {LEP} PUSH_BEFORE ))
|
||||
BLOCK_END
|
||||
BLOCK_START
|
||||
BREAK
|
||||
BLOCK_END
|
||||
BLOCK_START
|
||||
ENDIF
|
||||
BLOCK_END
|
||||
BLOCK_START
|
||||
ALU INT_TO_FLT CLAMP S22.y@free{s} : R6.y@free {W}
|
||||
ALU TRUNC S24.w@free{s} : S22.y@free{s} {W}
|
||||
ALU FLT_TO_INT S25.x@free{s} : S24.w@free{s} {W}
|
||||
ALU MOV A3[S25.x@free].z : R10.y@free {W}
|
||||
ALU MOV A3[S25.x@free].w : R9.x@free {W}
|
||||
ALU MUL_IEEE R5.x@free : R9.x@free I[0.5] {W}
|
||||
ALU MUL_IEEE R9.x@free : R8.y@free I[0.5] {W}
|
||||
ALU MUL_IEEE R8.y@free : R7.x@free I[0.5] {W}
|
||||
ALU MUL_IEEE R7.x@free : R10.y@free I[0.5] {W}
|
||||
ALU ADD_INT R6.y@free : R6.y@free I[1] {W}
|
||||
ALU MOV R10.y@free : R5.x@free {W}
|
||||
LOOP_END
|
||||
BLOCK_END
|
||||
BLOCK_START
|
||||
ALU ADD S47.z@group{s} : A3[0].z A3[1].z {W}
|
||||
ALU ADD S47.x@group{s} : A3[0].w A3[1].w {W}
|
||||
EXPORT_DONE PARAM 0 S47.zx__
|
||||
EXPORT_DONE POS 0 R1.xyzw
|
||||
BLOCK_END)";
|
||||
|
||||
auto sh = from_string(shader_input);
|
||||
split_address_loads(*sh);
|
||||
schedule(sh);
|
||||
}
|
||||
|
|
@ -56,6 +56,19 @@ static bool si_update_shaders(struct si_context *sctx)
|
|||
struct si_shader *old_ps = sctx->shader.ps.current;
|
||||
int r;
|
||||
|
||||
if (GFX_VERSION >= GFX9) {
|
||||
/* For merged shaders, mark the next shader as dirty so its previous_stage is updated. */
|
||||
if (is_vs_state_changed) {
|
||||
if (HAS_TESS) {
|
||||
is_tess_state_changed = true;
|
||||
} else if (HAS_GS) {
|
||||
is_gs_state_changed = true;
|
||||
}
|
||||
}
|
||||
if ((sctx->dirty_shaders_mask & BITFIELD_BIT(MESA_SHADER_TESS_EVAL)) && HAS_GS && HAS_TESS)
|
||||
is_gs_state_changed = true;
|
||||
}
|
||||
|
||||
/* Update TCS and TES. */
|
||||
if (HAS_TESS && is_tess_state_changed) {
|
||||
if (!sctx->has_tessellation) {
|
||||
|
|
|
|||
|
|
@ -690,6 +690,7 @@ v3d_get_sand8_fs(struct pipe_context *pctx, int cpp)
|
|||
nir_variable_create(b.shader, nir_var_shader_out,
|
||||
vec4, "f_color");
|
||||
color_out->data.location = FRAG_RESULT_COLOR;
|
||||
b.shader->info.outputs_written |= BITFIELD_BIT(FRAG_RESULT_COLOR);
|
||||
|
||||
nir_variable *pos_in =
|
||||
nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos");
|
||||
|
|
@ -998,6 +999,7 @@ v3d_get_sand30_fs(struct pipe_context *pctx)
|
|||
nir_var_shader_out,
|
||||
glsl_uvec4, "f_color");
|
||||
color_out->data.location = FRAG_RESULT_COLOR;
|
||||
b.shader->info.outputs_written |= BITFIELD_BIT(FRAG_RESULT_COLOR);
|
||||
|
||||
nir_variable *pos_in =
|
||||
nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos");
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# Please include a comment with the log message and a testcase triggering each
|
||||
# VUID at the bottom of the file.
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-vkCmdDrawMultiIndexedEXT-format-07753,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-vkCmdDrawMultiIndexedEXT-None-10909,VUID-vkDestroyDevice-device-05137,VUID-vkCmdDrawMultiEXT-None-08879,VUID-VkShaderCreateInfoEXT-pSetLayouts-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-VkPhysicalDeviceMeshShaderFeaturesEXT-primitiveFragmentShadingRateMeshShader-07033
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-vkCmdDrawMultiIndexedEXT-format-07753,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-vkCmdDrawMultiIndexedEXT-None-10909,VUID-vkDestroyDevice-device-05137,VUID-vkCmdDrawMultiEXT-None-08879,VUID-VkShaderCreateInfoEXT-pSetLayouts-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-08879
|
||||
khronos_validation.report_flags = error
|
||||
khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK
|
||||
VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# Please include a comment with the log message and a testcase triggering each
|
||||
# VUID at the bottom of the file.
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-vkCmdDrawMultiIndexedEXT-None-10909,VUID-VkPhysicalDeviceMeshShaderFeaturesEXT-primitiveFragmentShadingRateMeshShader-07033
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-vkCmdDrawMultiIndexedEXT-None-10909
|
||||
khronos_validation.report_flags = error
|
||||
khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK
|
||||
VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# Please include a comment with the log message and a testcase triggering each
|
||||
# VUID at the bottom of the file.
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-vkCmdDrawMultiIndexedEXT-format-07753,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-vkCmdDrawMultiIndexedEXT-None-10909,VUID-vkDestroyDevice-device-05137,VUID-VkPhysicalDeviceMeshShaderFeaturesEXT-primitiveFragmentShadingRateMeshShader-07033
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-vkCmdDrawMultiIndexedEXT-format-07753,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-vkCmdDrawMultiIndexedEXT-None-10909,VUID-vkDestroyDevice-device-05137
|
||||
khronos_validation.report_flags = error
|
||||
khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK
|
||||
VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# Please include a comment with the log message and a testcase triggering each
|
||||
# VUID at the bottom of the file.
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-vkCmdDrawMultiEXT-None-02699,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-vkCmdPipelineBarrier2-shaderTileImageColorReadAccess-08718,VUID-VkGraphicsPipelineCreateInfo-flags-06482,VUID-vkCmdPipelineBarrier2-None-08719,VUID-vkCmdDrawMultiEXT-rasterizationSamples-07474,VUID-vkDestroyDevice-device-05137,VUID-VkRectLayerKHR-offset-04864,VUID-vkAcquireNextImageKHR-semaphore-01779,VUID-vkQueueSubmit-pSignalSemaphores-00067,VUID-VkImageMemoryBarrier2-srcAccessMask-07454,UNASSIGNED-GeneralParameterError-RequiredHandle,VUID-VkImageMemoryBarrier2-image-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-10909,VUID-VkPhysicalDeviceMeshShaderFeaturesEXT-primitiveFragmentShadingRateMeshShader-07033
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-vkCmdDrawMultiEXT-None-02699,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-vkCmdPipelineBarrier2-shaderTileImageColorReadAccess-08718,VUID-VkGraphicsPipelineCreateInfo-flags-06482,VUID-vkCmdPipelineBarrier2-None-08719,VUID-vkCmdDrawMultiEXT-rasterizationSamples-07474,VUID-vkDestroyDevice-device-05137,VUID-VkRectLayerKHR-offset-04864,VUID-vkAcquireNextImageKHR-semaphore-01779,VUID-vkQueueSubmit-pSignalSemaphores-00067,VUID-VkImageMemoryBarrier2-srcAccessMask-07454,UNASSIGNED-GeneralParameterError-RequiredHandle,VUID-VkImageMemoryBarrier2-image-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-10909
|
||||
khronos_validation.report_flags = error
|
||||
khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK
|
||||
VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Please include a comment with the log message and a testcase triggering each
|
||||
# VUID at the bottom of the file.
|
||||
#khronos_validation.message_id_filter = VUID-RuntimeSpirv-Location-06272,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicPrimitiveTopologyUnrestricted-07500,VUID-vkCmdDrawMultiEXT-None-08879,VUID-vkCmdDrawMultiIndexedEXT-dynamicPrimitiveTopologyUnrestricted-07500,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicRenderingUnusedAttachments-08911,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737,VUID-vkCmdDrawMultiEXT-None-08879,VUID-VkShaderCreateInfoEXT-pSetLayouts-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-10909,UNASSIGNED-Draw-primitiveTopologyPatchListRestart,VUID-VkPhysicalDeviceMeshShaderFeaturesEXT-primitiveFragmentShadingRateMeshShader-07033
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicRenderingUnusedAttachments-08911,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737,VUID-vkCmdDrawMultiEXT-None-08879,VUID-VkShaderCreateInfoEXT-pSetLayouts-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-10909,UNASSIGNED-Draw-primitiveTopologyPatchListRestart
|
||||
khronos_validation.report_flags = error
|
||||
khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK
|
||||
VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Please include a comment with the log message and a testcase triggering each
|
||||
# VUID at the bottom of the file.
|
||||
#khronos_validation.message_id_filter = VUID-RuntimeSpirv-Location-06272,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicPrimitiveTopologyUnrestricted-07500,VUID-vkCmdDrawMultiEXT-None-08879,VUID-vkCmdDrawMultiIndexedEXT-dynamicPrimitiveTopologyUnrestricted-07500,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-VkRenderingAttachmentInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicRenderingUnusedAttachments-08911,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737,VUID-vkCmdDrawMultiEXT-None-08879,VUID-VkShaderCreateInfoEXT-pSetLayouts-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-10909,UNASSIGNED-Draw-primitiveTopologyPatchListRestart,VUID-VkPhysicalDeviceMeshShaderFeaturesEXT-primitiveFragmentShadingRateMeshShader-07033
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-VkRenderingAttachmentInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicRenderingUnusedAttachments-08911,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737,VUID-vkCmdDrawMultiEXT-None-08879,VUID-VkShaderCreateInfoEXT-pSetLayouts-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-10909,UNASSIGNED-Draw-primitiveTopologyPatchListRestart
|
||||
khronos_validation.report_flags = error
|
||||
khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK
|
||||
VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Please include a comment with the log message and a testcase triggering each
|
||||
# VUID at the bottom of the file.
|
||||
#khronos_validation.message_id_filter = VUID-RuntimeSpirv-Location-06272,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicPrimitiveTopologyUnrestricted-07500,VUID-vkCmdDrawMultiEXT-None-08879,VUID-vkCmdDrawMultiIndexedEXT-dynamicPrimitiveTopologyUnrestricted-07500,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicRenderingUnusedAttachments-08911,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737,VUID-vkCmdDrawMultiEXT-None-08879,VUID-VkShaderCreateInfoEXT-pSetLayouts-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-10909,UNASSIGNED-Draw-primitiveTopologyPatchListRestart,VUID-VkPhysicalDeviceMeshShaderFeaturesEXT-primitiveFragmentShadingRateMeshShader-07033
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicRenderingUnusedAttachments-08911,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737,VUID-vkCmdDrawMultiEXT-None-08879,VUID-VkShaderCreateInfoEXT-pSetLayouts-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-10909,UNASSIGNED-Draw-primitiveTopologyPatchListRestart
|
||||
khronos_validation.report_flags = error
|
||||
khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK
|
||||
VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT
|
||||
|
|
|
|||
|
|
@ -3368,9 +3368,8 @@ begin_rendering(struct zink_context *ctx, bool check_msaa_expand)
|
|||
VK_TRUE,
|
||||
ctx->gfx_pipeline_state.rast_samples + 1,
|
||||
};
|
||||
ctx->dynamic_fb.info.pNext = ctx->transient_attachments && !ctx->blitting && has_msrtss ? &msrtss : NULL;
|
||||
|
||||
if (has_msrtss && !ctx->blitting)
|
||||
ctx->dynamic_fb.info.pNext = ctx->transient_attachments ? &msrtss : NULL;
|
||||
VKCTX(CmdBeginRendering)(ctx->bs->cmdbuf, &ctx->dynamic_fb.info);
|
||||
ctx->in_rp = true;
|
||||
return clear_buffers;
|
||||
|
|
|
|||
|
|
@ -1725,6 +1725,7 @@ zink_descriptors_deinit(struct zink_context *ctx)
|
|||
VKSCR(DestroyDescriptorSetLayout)(screen->dev, ctx->dd.push_dsl[0]->layout, NULL);
|
||||
if (ctx->dd.push_dsl[1])
|
||||
VKSCR(DestroyDescriptorSetLayout)(screen->dev, ctx->dd.push_dsl[1]->layout, NULL);
|
||||
VKSCR(DestroyDescriptorSetLayout)(screen->dev, ctx->dd.old_push_dsl, NULL);
|
||||
}
|
||||
|
||||
/* called on screen creation */
|
||||
|
|
@ -1766,7 +1767,8 @@ zink_descriptor_util_init_fbfetch(struct zink_context *ctx)
|
|||
return;
|
||||
|
||||
struct zink_screen *screen = zink_screen(ctx->base.screen);
|
||||
VKSCR(DestroyDescriptorSetLayout)(screen->dev, ctx->dd.push_dsl[0]->layout, NULL);
|
||||
/* save this layout; it may be used by programs, and tracking that is extra complexity */
|
||||
ctx->dd.old_push_dsl = ctx->dd.push_dsl[0]->layout;
|
||||
//don't free these now, let ralloc free on teardown to avoid invalid access
|
||||
//ralloc_free(ctx->dd.push_dsl[0]);
|
||||
//ralloc_free(ctx->dd.push_layout_keys[0]);
|
||||
|
|
|
|||
|
|
@ -270,8 +270,7 @@ update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum
|
|||
pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, true, false>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
|
||||
else
|
||||
pipeline = zink_get_gfx_pipeline<DYNAMIC_STATE, false, false>(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode);
|
||||
}
|
||||
if (pipeline) {
|
||||
assert(pipeline);
|
||||
pipeline_changed = prev_pipeline != pipeline || ctx->shobj_draw;
|
||||
if (BATCH_CHANGED || pipeline_changed)
|
||||
VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
|
||||
|
|
@ -986,8 +985,7 @@ update_mesh_pipeline(struct zink_context *ctx, struct zink_batch_state *bs)
|
|||
pipeline = zink_get_gfx_pipeline<ZINK_DYNAMIC_STATE3, true, true>(ctx, ctx->mesh_program, &ctx->gfx_pipeline_state, MESA_PRIM_COUNT);
|
||||
else
|
||||
pipeline = zink_get_gfx_pipeline<ZINK_DYNAMIC_STATE3, false, true>(ctx, ctx->mesh_program, &ctx->gfx_pipeline_state, MESA_PRIM_COUNT);
|
||||
}
|
||||
if (pipeline) {
|
||||
assert(pipeline);
|
||||
pipeline_changed = prev_pipeline != pipeline || ctx->shobj_draw;
|
||||
if (BATCH_CHANGED || pipeline_changed)
|
||||
VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
|
||||
|
|
|
|||
|
|
@ -119,7 +119,9 @@ pipeline_statistic_convert(enum pipe_statistics_query_index idx)
|
|||
[PIPE_STAT_QUERY_PS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT,
|
||||
[PIPE_STAT_QUERY_HS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT,
|
||||
[PIPE_STAT_QUERY_DS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT,
|
||||
[PIPE_STAT_QUERY_CS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT
|
||||
[PIPE_STAT_QUERY_CS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT,
|
||||
[PIPE_STAT_QUERY_MS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT,
|
||||
[PIPE_STAT_QUERY_TS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT,
|
||||
};
|
||||
assert(idx < ARRAY_SIZE(map));
|
||||
return map[idx];
|
||||
|
|
|
|||
|
|
@ -3133,6 +3133,12 @@ init_driver_workarounds(struct zink_screen *screen)
|
|||
screen->info.have_EXT_host_image_copy = false;
|
||||
}
|
||||
|
||||
static void
|
||||
disable_features(struct zink_screen *screen)
|
||||
{
|
||||
screen->info.mesh_feats.primitiveFragmentShadingRateMeshShader = false;
|
||||
}
|
||||
|
||||
static void
|
||||
check_hic_shader_read(struct zink_screen *screen)
|
||||
{
|
||||
|
|
@ -3513,6 +3519,7 @@ zink_internal_create_screen(const struct pipe_screen_config *config, int64_t dev
|
|||
check_hic_shader_read(screen);
|
||||
|
||||
init_driver_workarounds(screen);
|
||||
disable_features(screen);
|
||||
|
||||
screen->dev = zink_create_logical_device(screen);
|
||||
if (!screen->dev)
|
||||
|
|
|
|||
|
|
@ -437,6 +437,7 @@ struct zink_descriptor_data {
|
|||
uint8_t state_changed[ZINK_PIPELINE_MAX]; //gfx, compute, mesh
|
||||
struct zink_descriptor_layout_key *push_layout_keys[2]; //gfx, compute
|
||||
struct zink_descriptor_layout *push_dsl[2]; //gfx, compute
|
||||
VkDescriptorSetLayout old_push_dsl; //the non-fbfetch layout; this can't be destroyed because it may be in use
|
||||
VkDescriptorUpdateTemplate push_template[2]; //gfx, compute
|
||||
|
||||
struct zink_descriptor_layout *dummy_dsl;
|
||||
|
|
|
|||
|
|
@ -574,7 +574,7 @@ lvp_encode_as(struct vk_acceleration_structure *dst, VkDeviceAddress intermediat
|
|||
/* The BVH exceeds the maximum depth supported by the traversal stack,
|
||||
* flatten the offending parts of the tree.
|
||||
*/
|
||||
if (max_node_depth >= 24)
|
||||
if (max_node_depth >= (geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR ? LVP_MAX_TLAS_DEPTH : LVP_MAX_BLAS_DEPTH))
|
||||
lvp_flatten_as(header, ir_box_nodes, root_offset, node_depth, output);
|
||||
|
||||
free(node_depth);
|
||||
|
|
|
|||
|
|
@ -547,6 +547,7 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSets(
|
|||
desc[didx + p].functions = iview->planes[p].image_handle->functions;
|
||||
}
|
||||
} else {
|
||||
memset(&desc[didx], 0, sizeof(desc[didx]) * bind_layout->stride);
|
||||
for (unsigned k = 0; k < bind_layout->stride; k++)
|
||||
desc[didx + k].functions = device->null_image_handle->functions;
|
||||
}
|
||||
|
|
@ -577,6 +578,7 @@ VKAPI_ATTR void VKAPI_CALL lvp_UpdateDescriptorSets(
|
|||
lp_jit_image_from_pipe(&desc[j].image, &bview->iv);
|
||||
desc[j].functions = bview->image_handle->functions;
|
||||
} else {
|
||||
memset(&desc[j].image, 0, sizeof(desc[j].image));
|
||||
desc[j].functions = device->null_image_handle->functions;
|
||||
}
|
||||
}
|
||||
|
|
@ -846,6 +848,7 @@ lvp_descriptor_set_update_with_template(VkDevice _device, VkDescriptorSet descri
|
|||
desc[idx + p].functions = iview->planes[p].image_handle->functions;
|
||||
}
|
||||
} else {
|
||||
memset(&desc[idx], 0, sizeof(desc[idx]) * bind_layout->stride);
|
||||
for (unsigned k = 0; k < bind_layout->stride; k++)
|
||||
desc[idx + k].functions = device->null_image_handle->functions;
|
||||
}
|
||||
|
|
@ -872,6 +875,7 @@ lvp_descriptor_set_update_with_template(VkDevice _device, VkDescriptorSet descri
|
|||
lp_jit_image_from_pipe(&desc[idx].image, &bview->iv);
|
||||
desc[idx].functions = bview->image_handle->functions;
|
||||
} else {
|
||||
memset(&desc[idx].image, 0, sizeof(desc[idx].image));
|
||||
desc[idx].functions = device->null_image_handle->functions;
|
||||
}
|
||||
break;
|
||||
|
|
@ -1073,8 +1077,9 @@ VKAPI_ATTR void VKAPI_CALL lvp_GetDescriptorEXT(
|
|||
desc[p].functions = iview->planes[p].image_handle->functions;
|
||||
}
|
||||
} else {
|
||||
unsigned plane_count = size / sizeof(struct lp_descriptor);
|
||||
memset(desc, 0, size);
|
||||
|
||||
unsigned plane_count = size / sizeof(struct lp_descriptor);
|
||||
for (unsigned p = 0; p < plane_count; p++)
|
||||
desc[p].functions = device->null_image_handle->functions;
|
||||
}
|
||||
|
|
@ -1087,6 +1092,7 @@ VKAPI_ATTR void VKAPI_CALL lvp_GetDescriptorEXT(
|
|||
lp_jit_bindless_texture_buffer_from_bda(&desc->texture, (void*)(uintptr_t)bda->address);
|
||||
desc->functions = get_texture_handle_bda(device, bda->address, bda->range, pformat).functions;
|
||||
} else {
|
||||
memset(desc, 0, size);
|
||||
desc->functions = device->null_texture_handle->functions;
|
||||
desc->texture.sampler_index = 0;
|
||||
}
|
||||
|
|
@ -1099,6 +1105,7 @@ VKAPI_ATTR void VKAPI_CALL lvp_GetDescriptorEXT(
|
|||
lp_jit_image_buffer_from_bda(&desc->image, (void *)(uintptr_t)bda->address, bda->range, pformat);
|
||||
desc->functions = get_image_handle_bda(device, bda->address, bda->range, pformat).functions;
|
||||
} else {
|
||||
memset(desc, 0, size);
|
||||
desc->functions = device->null_image_handle->functions;
|
||||
}
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -1271,8 +1271,8 @@ lvp_get_properties(const struct lvp_physical_device *device, struct vk_propertie
|
|||
|
||||
/* VK_KHR_acceleration_structure */
|
||||
.maxGeometryCount = (1 << 24) - 1,
|
||||
.maxInstanceCount = (1 << 24) - 1,
|
||||
.maxPrimitiveCount = (1 << 24) - 1,
|
||||
.maxInstanceCount = (1 << LVP_MAX_TLAS_DEPTH) - 1,
|
||||
.maxPrimitiveCount = (1 << LVP_MAX_BLAS_DEPTH) - 1,
|
||||
.maxPerStageDescriptorAccelerationStructures = MAX_DESCRIPTORS,
|
||||
.maxPerStageDescriptorUpdateAfterBindAccelerationStructures = MAX_DESCRIPTORS,
|
||||
.maxDescriptorSetAccelerationStructures = MAX_DESCRIPTORS,
|
||||
|
|
|
|||
|
|
@ -100,13 +100,16 @@ extern "C" {
|
|||
#define MAX_DESCRIPTORS 1000000 /* Required by vkd3d-proton */
|
||||
#define MAX_PUSH_CONSTANTS_SIZE 256
|
||||
#define MAX_PUSH_DESCRIPTORS 32
|
||||
#define MAX_DESCRIPTOR_UNIFORM_BLOCK_SIZE 4096
|
||||
#define MAX_DESCRIPTOR_UNIFORM_BLOCK_SIZE MAX_DESCRIPTORS
|
||||
#define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BLOCKS 8
|
||||
#define MAX_DGC_STREAMS 16
|
||||
#define MAX_DGC_TOKENS 16
|
||||
/* Currently lavapipe does not support more than 1 image plane */
|
||||
#define LVP_MAX_PLANE_COUNT 1
|
||||
|
||||
#define LVP_MAX_TLAS_DEPTH 24
|
||||
#define LVP_MAX_BLAS_DEPTH 29
|
||||
|
||||
#ifdef _WIN32
|
||||
#define lvp_printflike(a, b)
|
||||
#else
|
||||
|
|
|
|||
|
|
@ -356,7 +356,7 @@ lvp_ray_traversal_state_init(nir_function_impl *impl, struct lvp_ray_traversal_s
|
|||
state->current_node = nir_local_variable_create(impl, glsl_uint_type(), "traversal.current_node");
|
||||
state->stack_base = nir_local_variable_create(impl, glsl_uint_type(), "traversal.stack_base");
|
||||
state->stack_ptr = nir_local_variable_create(impl, glsl_uint_type(), "traversal.stack_ptr");
|
||||
state->stack = nir_local_variable_create(impl, glsl_array_type(glsl_uint_type(), 24 * 2, 0), "traversal.stack");
|
||||
state->stack = nir_local_variable_create(impl, glsl_array_type(glsl_uint_type(), LVP_MAX_TLAS_DEPTH + LVP_MAX_BLAS_DEPTH, 0), "traversal.stack");
|
||||
state->hit = nir_local_variable_create(impl, glsl_bool_type(), "traversal.hit");
|
||||
|
||||
state->instance_addr = nir_local_variable_create(impl, glsl_uint64_t_type(), "traversal.instance_addr");
|
||||
|
|
|
|||
|
|
@ -171,7 +171,8 @@ init_ray_query_traversal_vars(void *ctx, nir_shader *shader, unsigned array_leng
|
|||
result.stack_base =
|
||||
rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_stack_base"));
|
||||
result.stack_ptr = rq_variable_create(ctx, shader, array_length, glsl_uint_type(), VAR_NAME("_stack_ptr"));
|
||||
result.stack = rq_variable_create(ctx, shader, array_length, glsl_array_type(glsl_uint_type(), 24 * 2, 0), VAR_NAME("_stack"));
|
||||
result.stack = rq_variable_create(ctx, shader, array_length,
|
||||
glsl_array_type(glsl_uint_type(), LVP_MAX_TLAS_DEPTH + LVP_MAX_BLAS_DEPTH, 0), VAR_NAME("_stack"));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,14 @@
|
|||
# Copyright © 2017 Dylan Baker
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
libradeonwinsys_deps = [idep_mesautil, dep_libdrm]
|
||||
libradeonwinsys_c_args = []
|
||||
|
||||
if with_gallium_radeonsi
|
||||
libradeonwinsys_deps += [idep_amdgfxregs_h]
|
||||
libradeonwinsys_c_args = ['-DHAVE_GALLIUM_RADEONSI']
|
||||
endif
|
||||
|
||||
libradeonwinsys = static_library(
|
||||
'radeonwinsys',
|
||||
files('radeon_drm_bo.c',
|
||||
|
|
@ -14,5 +22,6 @@ libradeonwinsys = static_library(
|
|||
'radeon_surface.h'),
|
||||
include_directories : [inc_src, inc_include, inc_gallium, inc_gallium_aux],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
dependencies : [idep_mesautil, dep_libdrm],
|
||||
c_args : libradeonwinsys_c_args,
|
||||
dependencies : libradeonwinsys_deps,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -8,6 +8,10 @@
|
|||
#include "radeon_drm_bo.h"
|
||||
#include "radeon_drm_cs.h"
|
||||
|
||||
#ifdef HAVE_GALLIUM_RADEONSI
|
||||
#include "amdgfxregs.h"
|
||||
#endif
|
||||
|
||||
#include "util/os_file.h"
|
||||
#include "util/simple_mtx.h"
|
||||
#include "util/thread_sched.h"
|
||||
|
|
@ -105,6 +109,73 @@ static bool radeon_get_drm_value(int fd, unsigned request,
|
|||
return true;
|
||||
}
|
||||
|
||||
static void get_hs_info(struct radeon_info *info)
|
||||
{
|
||||
/* This is the size of all TCS outputs in memory per workgroup.
|
||||
* Hawaii can't handle num_workgroups > 256 with 8K per workgroup, so use 4K.
|
||||
*/
|
||||
unsigned max_hs_out_vram_dwords_per_wg = info->family == CHIP_HAWAII ? 4096 : 8192;
|
||||
unsigned max_workgroups_per_se;
|
||||
|
||||
#ifdef HAVE_GALLIUM_RADEONSI /* for gfx6+ register definitions */
|
||||
unsigned max_hs_out_vram_dwords_enum = 0;
|
||||
|
||||
switch (max_hs_out_vram_dwords_per_wg) {
|
||||
case 8192:
|
||||
max_hs_out_vram_dwords_enum = V_03093C_X_8K_DWORDS;
|
||||
break;
|
||||
case 4096:
|
||||
max_hs_out_vram_dwords_enum = V_03093C_X_4K_DWORDS;
|
||||
break;
|
||||
case 2048:
|
||||
max_hs_out_vram_dwords_enum = V_03093C_X_2K_DWORDS;
|
||||
break;
|
||||
case 1024:
|
||||
max_hs_out_vram_dwords_enum = V_03093C_X_1K_DWORDS;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("invalid TCS workgroup size");
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Gfx7 should limit num_workgroups to 508 (127 per SE)
|
||||
* Gfx6 should limit num_workgroups to 126 (63 per SE)
|
||||
*/
|
||||
if (info->gfx_level == GFX7) {
|
||||
max_workgroups_per_se = 127;
|
||||
} else {
|
||||
max_workgroups_per_se = 63;
|
||||
}
|
||||
|
||||
/* Limit to 4 workgroups per CU for TCS, which exhausts LDS if each workgroup occupies 16KB.
|
||||
* Note that the offchip allocation isn't deallocated until the corresponding TES waves finish.
|
||||
*/
|
||||
unsigned num_offchip_wg_per_cu = 4;
|
||||
unsigned num_workgroups_per_se = MIN2(num_offchip_wg_per_cu * info->max_good_cu_per_sa *
|
||||
info->max_sa_per_se, max_workgroups_per_se);
|
||||
unsigned num_workgroups = num_workgroups_per_se * info->max_se;
|
||||
|
||||
#ifdef HAVE_GALLIUM_RADEONSI /* for gfx6+ register definitions */
|
||||
if (info->gfx_level == GFX7) {
|
||||
info->hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(num_workgroups) |
|
||||
S_03093C_OFFCHIP_GRANULARITY_GFX7(max_hs_out_vram_dwords_enum);
|
||||
} else {
|
||||
info->hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(num_workgroups) |
|
||||
S_0089B0_OFFCHIP_GRANULARITY(max_hs_out_vram_dwords_enum);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* The typical size of tess factors of 1 TCS workgroup if all patches are triangles. */
|
||||
unsigned typical_tess_factor_size_per_wg = (192 / 3) * 16;
|
||||
unsigned num_tess_factor_wg_per_cu = 3;
|
||||
|
||||
info->hs_offchip_workgroup_dw_size = max_hs_out_vram_dwords_per_wg;
|
||||
info->tess_offchip_ring_size = num_workgroups * max_hs_out_vram_dwords_per_wg * 4;
|
||||
info->tess_factor_ring_size = typical_tess_factor_size_per_wg * num_tess_factor_wg_per_cu *
|
||||
info->max_good_cu_per_sa * info->max_sa_per_se * info->max_se;
|
||||
info->total_tess_ring_size = info->tess_offchip_ring_size + info->tess_factor_ring_size;
|
||||
}
|
||||
|
||||
/* Helper function to do the ioctls needed for setup and init. */
|
||||
static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
||||
{
|
||||
|
|
@ -639,6 +710,9 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
|||
default:;
|
||||
}
|
||||
|
||||
if (ws->gen == DRV_SI)
|
||||
get_hs_info(&ws->info);
|
||||
|
||||
ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL ||
|
||||
strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL;
|
||||
ws->noop_cs = debug_get_bool_option("RADEON_NOOP", false);
|
||||
|
|
|
|||
|
|
@ -196,6 +196,9 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
|||
|
||||
brw_prog_data_init(&prog_data->base.base, ¶ms->base);
|
||||
|
||||
brw_fill_tess_info_from_shader_info(&prog_data->tess_info,
|
||||
&nir->info);
|
||||
|
||||
nir->info.outputs_written = key->outputs_written;
|
||||
nir->info.patch_outputs_written = key->patch_outputs_written;
|
||||
|
||||
|
|
@ -221,6 +224,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
|||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
|
||||
|
||||
prog_data->input_vertices = key->input_vertices;
|
||||
prog_data->output_vertices = nir->info.tess.tcs_vertices_out;
|
||||
prog_data->patch_count_threshold = get_patch_count_threshold(key->input_vertices);
|
||||
|
||||
if (compiler->use_tcs_multi_patch) {
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue