mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 12:10:09 +01:00
Compare commits
89 commits
main
...
mesa-25.3.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
315b688976 | ||
|
|
3a71d94735 | ||
|
|
8a2bf930bb | ||
|
|
ac492d42be | ||
|
|
2e17fd0cb2 | ||
|
|
9311f170c4 | ||
|
|
3e227a04b1 | ||
|
|
f63a5df30b | ||
|
|
9ba765e3e3 | ||
|
|
8010d0cd39 | ||
|
|
f1f32d557e | ||
|
|
05e5db1a4d | ||
|
|
5ae8474029 | ||
|
|
b3470359bf | ||
|
|
5e1a88cea0 | ||
|
|
040453857b | ||
|
|
28e172e956 | ||
|
|
74880f8954 | ||
|
|
f02f5e217f | ||
|
|
d9636807f7 | ||
|
|
b768139858 | ||
|
|
498a25cfb8 | ||
|
|
9728bbf7b0 | ||
|
|
f142fdc273 | ||
|
|
1c52a94428 | ||
|
|
2cfd3c52b2 | ||
|
|
606ebb042e | ||
|
|
424f37b348 | ||
|
|
7f75931019 | ||
|
|
ba107091c2 | ||
|
|
b74000dbce | ||
|
|
fb2273df78 | ||
|
|
65eb3aed4b | ||
|
|
a9653fa019 | ||
|
|
159d397437 | ||
|
|
6a7effe059 | ||
|
|
2a0a2cc5b0 | ||
|
|
3f9f4d79d3 | ||
|
|
cd253df92a | ||
|
|
bfd09d9891 | ||
|
|
dcecd8fd1e | ||
|
|
1648f759c1 | ||
|
|
d5f7261ce5 | ||
|
|
2c1c52a8c8 | ||
|
|
fe3a3b08c9 | ||
|
|
d9812eaea8 | ||
|
|
be191ceff7 | ||
|
|
49bfddbd11 | ||
|
|
0182cde848 | ||
|
|
94ec7c686d | ||
|
|
4202ea6c7f | ||
|
|
10475e8ac1 | ||
|
|
c1cf6e75ae | ||
|
|
2b8675fd86 | ||
|
|
e967da84a8 | ||
|
|
2a8f2ff397 | ||
|
|
7a30a71c45 | ||
|
|
9c57c0a194 | ||
|
|
425c49ebf2 | ||
|
|
7b7cb63a14 | ||
|
|
1941ada4a6 | ||
|
|
e982234bb6 | ||
|
|
dbbadebe13 | ||
|
|
0d100cc078 | ||
|
|
f656d062e3 | ||
|
|
847ad886d6 | ||
|
|
5dcc65643c | ||
|
|
ab7bda0a1b | ||
|
|
a02d8d5767 | ||
|
|
13fa1460dd | ||
|
|
14544ef278 | ||
|
|
602b4a2924 | ||
|
|
717e8a8caf | ||
|
|
40ff53c5b8 | ||
|
|
bf9e1f2e37 | ||
|
|
c3cf272a04 | ||
|
|
30ba8880b4 | ||
|
|
42ab1c6f3c | ||
|
|
674e2a702a | ||
|
|
756618ee3b | ||
|
|
ca7d2daf5f | ||
|
|
45aafef631 | ||
|
|
8711394383 | ||
|
|
289c768e88 | ||
|
|
84655b4b5d | ||
|
|
fd6b9c70b6 | ||
|
|
9bb7bf9c66 | ||
|
|
f510e6a1bd | ||
|
|
40f7bef16c |
87 changed files with 6012 additions and 1615 deletions
5052
.pick_status.json
Normal file
5052
.pick_status.json
Normal file
File diff suppressed because it is too large
Load diff
2
VERSION
2
VERSION
|
|
@ -1 +1 @@
|
|||
25.3.0-devel
|
||||
25.3.0-rc3
|
||||
|
|
|
|||
|
|
@ -122,9 +122,8 @@ Enable the site and restart nginx:
|
|||
# Second download should be cached.
|
||||
wget http://localhost/cache/?uri=https://s3.freedesktop.org/mesa-tracie-public/itoral-gl-terrain-demo/demo-v2.trace
|
||||
|
||||
Now, set ``download-url`` in your ``traces-*.yml`` entry to something like
|
||||
``http://caching-proxy/cache/?uri=https://s3.freedesktop.org/mesa-tracie-public``
|
||||
and you should have cached downloads for traces. Add it to
|
||||
``FDO_HTTP_CACHE_URI=`` in your ``config.toml`` runner environment lines and you
|
||||
can use it for cached artifact downloads instead of going all the way to
|
||||
freedesktop.org on each job.
|
||||
The trace runner script automatically sets the caching proxy, so there's no
|
||||
need to modify anything in the Mesa CI YAML files.
|
||||
Add ``LAVA_HTTP_CACHE_URI=http://localhost/cache/?uri=`` to your ``config.toml``
|
||||
runner environment lines and you can use it for cached artifact downloads
|
||||
instead of going all the way to freedesktop.org on each job.
|
||||
|
|
|
|||
|
|
@ -109,23 +109,37 @@ lower_mem_access_cb(nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size, ui
|
|||
nir_mem_access_size_align res;
|
||||
|
||||
if (intrin == nir_intrinsic_load_shared || intrin == nir_intrinsic_store_shared) {
|
||||
/* Split unsupported shared access. */
|
||||
res.bit_size = MIN2(bit_size, combined_align * 8ull);
|
||||
res.align = res.bit_size / 8;
|
||||
/* Don't use >64-bit LDS loads for performance reasons. */
|
||||
unsigned max_bytes = intrin == nir_intrinsic_store_shared && cb_data->gfx_level >= GFX7 ? 16 : 8;
|
||||
bytes = MIN3(bytes, combined_align, max_bytes);
|
||||
bytes = bytes == 12 ? bytes : round_down_to_power_of_2(bytes);
|
||||
|
||||
/* Split unsupported shared access. */
|
||||
res.bit_size = MIN2(bit_size, bytes * 8ull);
|
||||
res.align = res.bit_size / 8;
|
||||
res.num_components = bytes / res.align;
|
||||
res.shift = nir_mem_access_shift_method_bytealign_amd;
|
||||
return res;
|
||||
}
|
||||
|
||||
const bool is_buffer_load = intrin == nir_intrinsic_load_ubo ||
|
||||
intrin == nir_intrinsic_load_ssbo ||
|
||||
intrin == nir_intrinsic_load_constant;
|
||||
|
||||
if (is_smem) {
|
||||
const bool supported_subdword = cb_data->gfx_level >= GFX12 &&
|
||||
intrin != nir_intrinsic_load_push_constant &&
|
||||
(!cb_data->use_llvm || intrin != nir_intrinsic_load_ubo);
|
||||
|
||||
/* Round up subdword loads if unsupported. */
|
||||
const bool supported_subdword = cb_data->gfx_level >= GFX12 && intrin != nir_intrinsic_load_push_constant;
|
||||
if (bit_size < 32 && (bytes >= 3 || !supported_subdword))
|
||||
if (bytes <= 2 && combined_align % bytes == 0 && supported_subdword) {
|
||||
bit_size = bytes * 8;
|
||||
} else if (bytes % 4 || combined_align % 4) {
|
||||
if (is_buffer_load)
|
||||
bytes += 4 - MIN2(combined_align, 4);
|
||||
bytes = align(bytes, 4);
|
||||
bit_size = 32;
|
||||
}
|
||||
|
||||
/* Generally, require an alignment of 4. */
|
||||
res.align = MIN2(4, bytes);
|
||||
|
|
@ -138,9 +152,6 @@ lower_mem_access_cb(nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size, ui
|
|||
if (!util_is_power_of_two_nonzero(bytes) && (cb_data->gfx_level < GFX12 || bytes != 12)) {
|
||||
const uint8_t larger = util_next_power_of_two(bytes);
|
||||
const uint8_t smaller = larger / 2;
|
||||
const bool is_buffer_load = intrin == nir_intrinsic_load_ubo ||
|
||||
intrin == nir_intrinsic_load_ssbo ||
|
||||
intrin == nir_intrinsic_load_constant;
|
||||
const bool is_aligned = align_mul % smaller == 0;
|
||||
|
||||
/* Overfetch up to 1 dword if this is a bounds-checked buffer load or the access is aligned. */
|
||||
|
|
@ -185,8 +196,8 @@ lower_mem_access_cb(nir_intrinsic_op intrin, uint8_t bytes, uint8_t bit_size, ui
|
|||
|
||||
const uint32_t max_pad = 4 - MIN2(combined_align, 4);
|
||||
|
||||
/* Global loads don't have bounds checking, so increasing the size might not be safe. */
|
||||
if (intrin == nir_intrinsic_load_global || intrin == nir_intrinsic_load_global_constant) {
|
||||
/* Global/scratch loads don't have bounds checking, so increasing the size might not be safe. */
|
||||
if (!is_buffer_load) {
|
||||
if (align_mul < 4) {
|
||||
/* If we split the load, only lower it to 32-bit if this is a SMEM load. */
|
||||
const unsigned chunk_bytes = align(bytes, 4) - max_pad;
|
||||
|
|
|
|||
|
|
@ -508,6 +508,8 @@ lower_ms_intrinsic(nir_builder *b, nir_instr *instr, void *state)
|
|||
return update_ms_barrier(b, intrin, s);
|
||||
case nir_intrinsic_load_workgroup_index:
|
||||
return lower_ms_load_workgroup_index(b, intrin, s);
|
||||
case nir_intrinsic_load_num_subgroups:
|
||||
return nir_imm_int(b, DIV_ROUND_UP(s->api_workgroup_size, s->wave_size));
|
||||
case nir_intrinsic_set_vertex_and_primitive_count:
|
||||
return lower_ms_set_vertex_and_primitive_count(b, intrin, s);
|
||||
default:
|
||||
|
|
@ -529,6 +531,7 @@ filter_ms_intrinsic(const nir_instr *instr,
|
|||
intrin->intrinsic == nir_intrinsic_store_per_primitive_output ||
|
||||
intrin->intrinsic == nir_intrinsic_barrier ||
|
||||
intrin->intrinsic == nir_intrinsic_load_workgroup_index ||
|
||||
intrin->intrinsic == nir_intrinsic_load_num_subgroups ||
|
||||
intrin->intrinsic == nir_intrinsic_set_vertex_and_primitive_count;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -214,6 +214,8 @@ select_rt_prolog(Program* program, ac_shader_config* config,
|
|||
bld.sop2(Builder::s_cselect, Definition(vcc, bld.lm),
|
||||
Operand::c32_or_c64(-1u, program->wave_size == 64),
|
||||
Operand::c32_or_c64(0, program->wave_size == 64), Operand(scc, s1));
|
||||
bld.sop2(aco_opcode::s_cselect_b32, Definition(out_launch_size_y, s1),
|
||||
Operand(out_launch_size_y, s1), Operand::c32(1), Operand(scc, s1));
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, Definition(out_launch_ids[0], v1),
|
||||
Operand(tmp_invocation_idx, v1), Operand(out_launch_ids[0], v1), Operand(vcc, bld.lm));
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, Definition(out_launch_ids[1], v1), Operand::zero(),
|
||||
|
|
|
|||
|
|
@ -8114,6 +8114,8 @@ radv_mark_descriptors_dirty(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPo
|
|||
struct radv_descriptor_state *descriptors_state = radv_get_descriptors_state(cmd_buffer, bind_point);
|
||||
|
||||
descriptors_state->dirty |= descriptors_state->valid;
|
||||
if (descriptors_state->dynamic_offset_count)
|
||||
descriptors_state->dirty_dynamic = true;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -8642,7 +8644,6 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
|
|||
|
||||
if (cmd_buffer->state.compute_pipeline == compute_pipeline)
|
||||
return;
|
||||
radv_mark_descriptors_dirty(cmd_buffer, pipelineBindPoint);
|
||||
|
||||
radv_bind_shader(cmd_buffer, compute_pipeline->base.shaders[MESA_SHADER_COMPUTE], MESA_SHADER_COMPUTE);
|
||||
|
||||
|
|
@ -8656,7 +8657,6 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
|
|||
|
||||
if (cmd_buffer->state.rt_pipeline == rt_pipeline)
|
||||
return;
|
||||
radv_mark_descriptors_dirty(cmd_buffer, pipelineBindPoint);
|
||||
|
||||
radv_bind_shader(cmd_buffer, rt_pipeline->base.base.shaders[MESA_SHADER_INTERSECTION], MESA_SHADER_INTERSECTION);
|
||||
radv_bind_rt_prolog(cmd_buffer, rt_pipeline->prolog);
|
||||
|
|
@ -8690,7 +8690,6 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
|
|||
|
||||
if (cmd_buffer->state.graphics_pipeline == graphics_pipeline)
|
||||
return;
|
||||
radv_mark_descriptors_dirty(cmd_buffer, pipelineBindPoint);
|
||||
|
||||
radv_foreach_stage (
|
||||
stage, (cmd_buffer->state.active_stages | graphics_pipeline->active_stages) & RADV_GRAPHICS_STAGE_BITS) {
|
||||
|
|
@ -8744,6 +8743,8 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
|
|||
cmd_buffer->descriptors[vk_to_bind_point(pipelineBindPoint)].dynamic_offset_count = pipeline->dynamic_offset_count;
|
||||
cmd_buffer->descriptors[vk_to_bind_point(pipelineBindPoint)].need_indirect_descriptors =
|
||||
pipeline->need_indirect_descriptors;
|
||||
|
||||
radv_mark_descriptors_dirty(cmd_buffer, pipelineBindPoint);
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
|
|
|
|||
|
|
@ -508,7 +508,9 @@ radv_begin_sqtt(struct radv_queue *queue)
|
|||
device->sqtt.start_cs[family] = NULL;
|
||||
}
|
||||
|
||||
cs.b = ws->cs_create(ws, radv_queue_ring(queue), false);
|
||||
radv_init_cmd_stream(&cs, radv_queue_ring(queue));
|
||||
|
||||
cs.b = ws->cs_create(ws, cs.hw_ip, false);
|
||||
if (!cs.b)
|
||||
return false;
|
||||
|
||||
|
|
@ -585,7 +587,9 @@ radv_end_sqtt(struct radv_queue *queue)
|
|||
device->sqtt.stop_cs[family] = NULL;
|
||||
}
|
||||
|
||||
cs.b = ws->cs_create(ws, radv_queue_ring(queue), false);
|
||||
radv_init_cmd_stream(&cs, radv_queue_ring(queue));
|
||||
|
||||
cs.b = ws->cs_create(ws, cs.hw_ip, false);
|
||||
if (!cs.b)
|
||||
return false;
|
||||
|
||||
|
|
|
|||
|
|
@ -819,6 +819,32 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, cons
|
|||
if (cap && !cap->valid)
|
||||
cap = NULL;
|
||||
|
||||
if (cap) {
|
||||
pCapabilities->maxCodedExtent.width = cap->max_width;
|
||||
pCapabilities->maxCodedExtent.height = cap->max_height;
|
||||
} else {
|
||||
switch (pVideoProfile->videoCodecOperation) {
|
||||
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
|
||||
pCapabilities->maxCodedExtent.width = (pdev->info.family < CHIP_TONGA) ? 2048 : 4096;
|
||||
pCapabilities->maxCodedExtent.height = (pdev->info.family < CHIP_TONGA) ? 1152 : 4096;
|
||||
break;
|
||||
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
|
||||
pCapabilities->maxCodedExtent.width =
|
||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
|
||||
pCapabilities->maxCodedExtent.height =
|
||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
|
||||
break;
|
||||
case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
|
||||
pCapabilities->maxCodedExtent.width =
|
||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
|
||||
pCapabilities->maxCodedExtent.height =
|
||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pCapabilities->flags = 0;
|
||||
pCapabilities->pictureAccessGranularity.width = VK_VIDEO_H264_MACROBLOCK_WIDTH;
|
||||
pCapabilities->pictureAccessGranularity.height = VK_VIDEO_H264_MACROBLOCK_HEIGHT;
|
||||
|
|
@ -1126,32 +1152,6 @@ radv_GetPhysicalDeviceVideoCapabilitiesKHR(VkPhysicalDevice physicalDevice, cons
|
|||
break;
|
||||
}
|
||||
|
||||
if (cap) {
|
||||
pCapabilities->maxCodedExtent.width = cap->max_width;
|
||||
pCapabilities->maxCodedExtent.height = cap->max_height;
|
||||
} else {
|
||||
switch (pVideoProfile->videoCodecOperation) {
|
||||
case VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR:
|
||||
pCapabilities->maxCodedExtent.width = (pdev->info.family < CHIP_TONGA) ? 2048 : 4096;
|
||||
pCapabilities->maxCodedExtent.height = (pdev->info.family < CHIP_TONGA) ? 1152 : 4096;
|
||||
break;
|
||||
case VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR:
|
||||
pCapabilities->maxCodedExtent.width =
|
||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
|
||||
pCapabilities->maxCodedExtent.height =
|
||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
|
||||
break;
|
||||
case VK_VIDEO_CODEC_OPERATION_DECODE_VP9_BIT_KHR:
|
||||
pCapabilities->maxCodedExtent.width =
|
||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 2048 : 4096) : 8192;
|
||||
pCapabilities->maxCodedExtent.height =
|
||||
(pdev->info.family < CHIP_RENOIR) ? ((pdev->info.family < CHIP_TONGA) ? 1152 : 4096) : 4352;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -890,7 +890,6 @@ radv_enc_slice_header(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
|||
uint32_t num_bits[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0};
|
||||
const struct VkVideoEncodeH264PictureInfoKHR *h264_picture_info =
|
||||
vk_find_struct_const(enc_info->pNext, VIDEO_ENCODE_H264_PICTURE_INFO_KHR);
|
||||
int slice_count = h264_picture_info->naluSliceEntryCount;
|
||||
const StdVideoEncodeH264PictureInfo *pic = h264_picture_info->pStdPictureInfo;
|
||||
const StdVideoH264SequenceParameterSet *sps =
|
||||
vk_video_find_h264_enc_std_sps(cmd_buffer->video.params, pic->seq_parameter_set_id);
|
||||
|
|
@ -903,8 +902,6 @@ radv_enc_slice_header(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
|||
unsigned int cdw_filled = 0;
|
||||
unsigned int bits_copied = 0;
|
||||
|
||||
assert(slice_count <= 1);
|
||||
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_cmd_stream *cs = cmd_buffer->cs;
|
||||
|
|
@ -3166,6 +3163,36 @@ radv_video_patch_encode_session_parameters(struct radv_device *device, struct vk
|
|||
}
|
||||
break;
|
||||
case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR: {
|
||||
for (unsigned i = 0; i < params->h265_enc.h265_sps_count; i++) {
|
||||
uint32_t pic_width_in_luma_samples =
|
||||
params->h265_enc.h265_sps[i].base.pic_width_in_luma_samples;
|
||||
uint32_t pic_height_in_luma_samples =
|
||||
params->h265_enc.h265_sps[i].base.pic_height_in_luma_samples;
|
||||
uint32_t aligned_pic_width = align(pic_width_in_luma_samples, 64);
|
||||
uint32_t aligned_pic_height = align(pic_height_in_luma_samples, 16);
|
||||
|
||||
/* Override the unaligned pic_{width,height} and make up for it with conformance window
|
||||
* cropping */
|
||||
params->h265_enc.h265_sps[i].base.pic_width_in_luma_samples = aligned_pic_width;
|
||||
params->h265_enc.h265_sps[i].base.pic_height_in_luma_samples = aligned_pic_height;
|
||||
|
||||
if (aligned_pic_width != pic_width_in_luma_samples ||
|
||||
aligned_pic_height != pic_height_in_luma_samples) {
|
||||
params->h265_enc.h265_sps[i].base.flags.conformance_window_flag = 1;
|
||||
params->h265_enc.h265_sps[i].base.conf_win_right_offset +=
|
||||
(aligned_pic_width - pic_width_in_luma_samples) / 2;
|
||||
params->h265_enc.h265_sps[i].base.conf_win_bottom_offset +=
|
||||
(aligned_pic_height - pic_height_in_luma_samples) / 2;
|
||||
}
|
||||
|
||||
/* VCN supports only the following block sizes (resulting in 64x64 CTBs with any coding
|
||||
* block size) */
|
||||
params->h265_enc.h265_sps[i].base.log2_min_luma_coding_block_size_minus3 = 0;
|
||||
params->h265_enc.h265_sps[i].base.log2_diff_max_min_luma_coding_block_size = 3;
|
||||
params->h265_enc.h265_sps[i].base.log2_min_luma_transform_block_size_minus2 = 0;
|
||||
params->h265_enc.h265_sps[i].base.log2_diff_max_min_luma_transform_block_size = 3;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < params->h265_enc.h265_pps_count; i++) {
|
||||
/* cu_qp_delta needs to be enabled if rate control is enabled. VCN2 and newer can also enable
|
||||
* it with rate control disabled. Since we don't know what rate control will be used, we
|
||||
|
|
@ -3268,6 +3295,14 @@ radv_GetEncodedVideoSessionParametersKHR(VkDevice device,
|
|||
assert(sps);
|
||||
char *data_ptr = pData ? (char *)pData + vps_size : NULL;
|
||||
vk_video_encode_h265_sps(sps, size_limit, &sps_size, data_ptr);
|
||||
|
||||
if (pFeedbackInfo) {
|
||||
struct VkVideoEncodeH265SessionParametersFeedbackInfoKHR *h265_feedback_info =
|
||||
vk_find_struct(pFeedbackInfo->pNext, VIDEO_ENCODE_H265_SESSION_PARAMETERS_FEEDBACK_INFO_KHR);
|
||||
pFeedbackInfo->hasOverrides = VK_TRUE;
|
||||
if (h265_feedback_info)
|
||||
h265_feedback_info->hasStdSPSOverrides = VK_TRUE;
|
||||
}
|
||||
}
|
||||
if (h265_get_info->writeStdPPS) {
|
||||
const StdVideoH265PictureParameterSet *pps = vk_video_find_h265_enc_std_pps(templ, h265_get_info->stdPPSId);
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -992,28 +992,34 @@ hk_CmdEndRendering(VkCommandBuffer commandBuffer)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
hk_init_heap(const void *data) {
|
||||
struct hk_cmd_buffer *cmd = (struct hk_cmd_buffer *) data;
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
|
||||
perf_debug(cmd, "Allocating heap");
|
||||
|
||||
size_t size = 128 * 1024 * 1024;
|
||||
dev->heap = agx_bo_create(&dev->dev, size, 0, 0, "Geometry heap");
|
||||
|
||||
/* The geometry state buffer is initialized here and then is treated by
|
||||
* the CPU as rodata, even though the GPU uses it for scratch internally.
|
||||
*/
|
||||
off_t off = dev->rodata.heap - dev->rodata.bo->va->addr;
|
||||
struct agx_heap *map = agx_bo_map(dev->rodata.bo) + off;
|
||||
|
||||
*map = (struct agx_heap){
|
||||
.base = dev->heap->va->addr,
|
||||
.size = size,
|
||||
};
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
hk_heap(struct hk_cmd_buffer *cmd)
|
||||
{
|
||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||
|
||||
if (unlikely(!dev->heap)) {
|
||||
perf_debug(cmd, "Allocating heap");
|
||||
|
||||
size_t size = 128 * 1024 * 1024;
|
||||
dev->heap = agx_bo_create(&dev->dev, size, 0, 0, "Geometry heap");
|
||||
|
||||
/* The geometry state buffer is initialized here and then is treated by
|
||||
* the CPU as rodata, even though the GPU uses it for scratch internally.
|
||||
*/
|
||||
off_t off = dev->rodata.heap - dev->rodata.bo->va->addr;
|
||||
struct agx_heap *map = agx_bo_map(dev->rodata.bo) + off;
|
||||
|
||||
*map = (struct agx_heap){
|
||||
.base = dev->heap->va->addr,
|
||||
.size = size,
|
||||
};
|
||||
}
|
||||
util_call_once_data(&dev->heap_init_once, hk_init_heap, cmd);
|
||||
|
||||
/* We need to free all allocations after each command buffer execution */
|
||||
if (!cmd->uses_heap) {
|
||||
|
|
|
|||
|
|
@ -330,6 +330,7 @@ hk_GetDescriptorSetLayoutSupport(
|
|||
uint64_t non_variable_size = 0;
|
||||
uint32_t variable_stride = 0;
|
||||
uint32_t variable_count = 0;
|
||||
bool variable_is_inline_uniform_block = false;
|
||||
uint8_t dynamic_buffer_count = 0;
|
||||
|
||||
for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
|
||||
|
|
@ -362,6 +363,10 @@ hk_GetDescriptorSetLayoutSupport(
|
|||
*/
|
||||
variable_count = MAX2(1, binding->descriptorCount);
|
||||
variable_stride = stride;
|
||||
|
||||
variable_is_inline_uniform_block =
|
||||
binding->descriptorType ==
|
||||
VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK;
|
||||
} else {
|
||||
/* Since we're aligning to the maximum and since this is just a
|
||||
* check for whether or not the max buffer size is big enough, we
|
||||
|
|
@ -393,12 +398,21 @@ hk_GetDescriptorSetLayoutSupport(
|
|||
switch (ext->sType) {
|
||||
case VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT: {
|
||||
VkDescriptorSetVariableDescriptorCountLayoutSupport *vs = (void *)ext;
|
||||
uint32_t max_var_count;
|
||||
|
||||
if (variable_stride > 0) {
|
||||
vs->maxVariableDescriptorCount =
|
||||
max_var_count =
|
||||
(max_buffer_size - non_variable_size) / variable_stride;
|
||||
} else {
|
||||
vs->maxVariableDescriptorCount = 0;
|
||||
max_var_count = 0;
|
||||
}
|
||||
|
||||
if (variable_is_inline_uniform_block) {
|
||||
max_var_count =
|
||||
MIN2(max_var_count, HK_MAX_INLINE_UNIFORM_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
vs->maxVariableDescriptorCount = max_var_count;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -92,6 +92,7 @@ struct hk_device {
|
|||
* expected to be a legitimate problem. If it is, we can rework later.
|
||||
*/
|
||||
struct agx_bo *heap;
|
||||
util_once_flag heap_init_once;
|
||||
|
||||
struct {
|
||||
struct agx_scratch vs, fs, cs;
|
||||
|
|
|
|||
|
|
@ -1424,6 +1424,13 @@ hk_copy_memory_to_image(struct hk_device *device, struct hk_image *dst_image,
|
|||
uint32_t src_height = info->memoryImageHeight ?: extent.height;
|
||||
|
||||
uint32_t blocksize_B = util_format_get_blocksize(layout->format);
|
||||
|
||||
/* Align width and height to block */
|
||||
src_width =
|
||||
DIV_ROUND_UP(src_width, util_format_get_blockwidth(layout->format));
|
||||
src_height =
|
||||
DIV_ROUND_UP(src_height, util_format_get_blockheight(layout->format));
|
||||
|
||||
uint32_t src_pitch = src_width * blocksize_B;
|
||||
|
||||
unsigned start_layer = (dst_image->vk.image_type == VK_IMAGE_TYPE_3D)
|
||||
|
|
@ -1496,6 +1503,13 @@ hk_copy_image_to_memory(struct hk_device *device, struct hk_image *src_image,
|
|||
#endif
|
||||
|
||||
uint32_t blocksize_B = util_format_get_blocksize(layout->format);
|
||||
|
||||
/* Align width and height to block */
|
||||
dst_width =
|
||||
DIV_ROUND_UP(dst_width, util_format_get_blockwidth(layout->format));
|
||||
dst_height =
|
||||
DIV_ROUND_UP(dst_height, util_format_get_blockheight(layout->format));
|
||||
|
||||
uint32_t dst_pitch = dst_width * blocksize_B;
|
||||
|
||||
unsigned start_layer = (src_image->vk.image_type == VK_IMAGE_TYPE_3D)
|
||||
|
|
@ -1649,11 +1663,6 @@ hk_copy_image_to_image_cpu(struct hk_device *device, struct hk_image *src_image,
|
|||
&device->physical_device->ubwc_config);
|
||||
#endif
|
||||
} else {
|
||||
/* Work tile-by-tile, holding the unswizzled tile in a temporary
|
||||
* buffer.
|
||||
*/
|
||||
char temp_tile[16384];
|
||||
|
||||
unsigned src_level = info->srcSubresource.mipLevel;
|
||||
unsigned dst_level = info->dstSubresource.mipLevel;
|
||||
uint32_t block_width = src_layout->tilesize_el[src_level].width_el;
|
||||
|
|
@ -1667,6 +1676,12 @@ hk_copy_image_to_image_cpu(struct hk_device *device, struct hk_image *src_image,
|
|||
}
|
||||
|
||||
uint32_t temp_pitch = block_width * src_block_B;
|
||||
size_t temp_tile_size = temp_pitch * (src_offset.y + extent.height);
|
||||
|
||||
/* Work tile-by-tile, holding the unswizzled tile in a temporary
|
||||
* buffer.
|
||||
*/
|
||||
char *temp_tile = malloc(temp_tile_size);
|
||||
|
||||
for (unsigned by = src_offset.y / block_height;
|
||||
by * block_height < src_offset.y + extent.height; by++) {
|
||||
|
|
@ -1683,14 +1698,14 @@ hk_copy_image_to_image_cpu(struct hk_device *device, struct hk_image *src_image,
|
|||
MIN2((bx + 1) * block_width, src_offset.x + extent.width) -
|
||||
src_x_start;
|
||||
|
||||
assert(height * temp_pitch <= ARRAY_SIZE(temp_tile));
|
||||
|
||||
ail_detile((void *)src, temp_tile, src_layout, src_level,
|
||||
temp_pitch, src_x_start, src_y_start, width, height);
|
||||
ail_tile(dst, temp_tile, dst_layout, dst_level, temp_pitch,
|
||||
dst_x_start, dst_y_start, width, height);
|
||||
}
|
||||
}
|
||||
|
||||
free(temp_tile);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -859,7 +859,7 @@ hk_get_device_properties(const struct agx_device *dev,
|
|||
.maxSubgroupSize = 32,
|
||||
.maxComputeWorkgroupSubgroups = 1024 / 32,
|
||||
.requiredSubgroupSizeStages = 0,
|
||||
.maxInlineUniformBlockSize = 1 << 16,
|
||||
.maxInlineUniformBlockSize = HK_MAX_INLINE_UNIFORM_BLOCK_SIZE,
|
||||
.maxPerStageDescriptorInlineUniformBlocks = 32,
|
||||
.maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32,
|
||||
.maxDescriptorSetInlineUniformBlocks = 6 * 32,
|
||||
|
|
@ -953,7 +953,7 @@ hk_get_device_properties(const struct agx_device *dev,
|
|||
.robustUniformBufferAccessSizeAlignment = HK_MIN_UBO_ALIGNMENT,
|
||||
|
||||
/* VK_EXT_sample_locations */
|
||||
.sampleLocationSampleCounts = sample_counts,
|
||||
.sampleLocationSampleCounts = sample_counts & ~VK_SAMPLE_COUNT_1_BIT,
|
||||
.maxSampleLocationGridSize = (VkExtent2D){1, 1},
|
||||
.sampleLocationCoordinateRange[0] = 0.0f,
|
||||
.sampleLocationCoordinateRange[1] = 0.9375f,
|
||||
|
|
|
|||
|
|
@ -12,18 +12,19 @@
|
|||
#include "vk_log.h"
|
||||
#include "vk_util.h"
|
||||
|
||||
#define HK_MAX_SETS 8
|
||||
#define HK_MAX_PUSH_SIZE 256
|
||||
#define HK_MAX_DYNAMIC_BUFFERS 64
|
||||
#define HK_MAX_RTS 8
|
||||
#define HK_MIN_SSBO_ALIGNMENT 16
|
||||
#define HK_MIN_TEXEL_BUFFER_ALIGNMENT 16
|
||||
#define HK_MIN_UBO_ALIGNMENT 64
|
||||
#define HK_MAX_VIEWPORTS 16
|
||||
#define HK_MAX_DESCRIPTOR_SIZE 64
|
||||
#define HK_MAX_PUSH_DESCRIPTORS 32
|
||||
#define HK_MAX_DESCRIPTOR_SET_SIZE (1u << 30)
|
||||
#define HK_MAX_DESCRIPTORS (1 << 20)
|
||||
#define HK_MAX_SETS 8
|
||||
#define HK_MAX_PUSH_SIZE 256
|
||||
#define HK_MAX_DYNAMIC_BUFFERS 64
|
||||
#define HK_MAX_RTS 8
|
||||
#define HK_MIN_SSBO_ALIGNMENT 16
|
||||
#define HK_MIN_TEXEL_BUFFER_ALIGNMENT 16
|
||||
#define HK_MIN_UBO_ALIGNMENT 64
|
||||
#define HK_MAX_VIEWPORTS 16
|
||||
#define HK_MAX_DESCRIPTOR_SIZE 64
|
||||
#define HK_MAX_PUSH_DESCRIPTORS 32
|
||||
#define HK_MAX_DESCRIPTOR_SET_SIZE (1u << 30)
|
||||
#define HK_MAX_INLINE_UNIFORM_BLOCK_SIZE (1u << 16)
|
||||
#define HK_MAX_DESCRIPTORS (1 << 20)
|
||||
#define HK_PUSH_DESCRIPTOR_SET_SIZE \
|
||||
(HK_MAX_PUSH_DESCRIPTORS * HK_MAX_DESCRIPTOR_SIZE)
|
||||
#define HK_SSBO_BOUNDS_CHECK_ALIGNMENT 4
|
||||
|
|
|
|||
|
|
@ -812,11 +812,6 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
|
|||
/* Now setup the command structs */
|
||||
struct util_dynarray payload;
|
||||
util_dynarray_init(&payload, NULL);
|
||||
union drm_asahi_cmd *cmds = malloc(sizeof(*cmds) * command_count);
|
||||
if (cmds == NULL) {
|
||||
free(cmds);
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
unsigned nr_vdm = 0, nr_cdm = 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -319,14 +319,10 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
case nir_intrinsic_load_base_global_invocation_id:
|
||||
case nir_intrinsic_load_base_workgroup_id:
|
||||
case nir_intrinsic_load_alpha_reference_amd:
|
||||
case nir_intrinsic_load_ubo_uniform_block_intel:
|
||||
case nir_intrinsic_load_ssbo_uniform_block_intel:
|
||||
case nir_intrinsic_load_shared_uniform_block_intel:
|
||||
case nir_intrinsic_load_barycentric_optimize_amd:
|
||||
case nir_intrinsic_load_poly_line_smooth_enabled:
|
||||
case nir_intrinsic_load_rasterization_primitive_amd:
|
||||
case nir_intrinsic_unit_test_uniform_amd:
|
||||
case nir_intrinsic_load_global_constant_uniform_block_intel:
|
||||
case nir_intrinsic_load_debug_log_desc_amd:
|
||||
case nir_intrinsic_load_xfb_state_address_gfx12_amd:
|
||||
case nir_intrinsic_cmat_length:
|
||||
|
|
@ -364,6 +360,24 @@ visit_intrinsic(nir_intrinsic_instr *instr, struct divergence_state *state)
|
|||
is_divergent = false;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ubo_uniform_block_intel:
|
||||
case nir_intrinsic_load_ssbo_uniform_block_intel:
|
||||
case nir_intrinsic_load_shared_uniform_block_intel:
|
||||
case nir_intrinsic_load_global_constant_uniform_block_intel:
|
||||
if (options & (nir_divergence_across_subgroups |
|
||||
nir_divergence_multiple_workgroup_per_compute_subgroup)) {
|
||||
unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
|
||||
for (unsigned i = 0; i < num_srcs; i++) {
|
||||
if (src_divergent(instr->src[i], state)) {
|
||||
is_divergent = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
is_divergent = false;
|
||||
}
|
||||
break;
|
||||
|
||||
/* This is divergent because it specifically loads sequential values into
|
||||
* successive SIMD lanes.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1069,6 +1069,7 @@ nir_get_io_index_src_number(const nir_intrinsic_instr *instr)
|
|||
IMG_CASE(atomic):
|
||||
IMG_CASE(atomic_swap):
|
||||
IMG_CASE(size):
|
||||
IMG_CASE(levels):
|
||||
IMG_CASE(samples):
|
||||
IMG_CASE(texel_address):
|
||||
IMG_CASE(samples_identical):
|
||||
|
|
|
|||
|
|
@ -1228,8 +1228,16 @@ wrap_instr(nir_builder *b, nir_instr *instr, void *data)
|
|||
static bool
|
||||
wrap_instrs(nir_shader *shader, wrap_instr_callback callback)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader, wrap_instr,
|
||||
nir_metadata_none, callback);
|
||||
bool progress = nir_shader_instructions_pass(shader, wrap_instr,
|
||||
nir_metadata_none, callback);
|
||||
/* Wrapping jump instructions that are located inside ifs can break SSA
|
||||
* invariants because the else block no longer dominates the merge block.
|
||||
* Repair the SSA to make the validator happy again.
|
||||
*/
|
||||
if (progress)
|
||||
nir_repair_ssa(shader);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
|
|||
|
|
@ -4096,9 +4096,9 @@ distribute_src_mods = [
|
|||
(('fneg', ('fmul(is_used_once)', a, b)), ('fmul', ('fneg', a), b)),
|
||||
(('fabs', ('fmul(is_used_once)', a, b)), ('fmul', ('fabs', a), ('fabs', b))),
|
||||
|
||||
(('fneg', ('ffma(is_used_once)', a, b, c)), ('ffma', ('fneg', a), b, ('fneg', c))),
|
||||
(('fneg', ('ffma(is_used_once,nsz)', a, b, c)), ('ffma', ('fneg', a), b, ('fneg', c))),
|
||||
(('fneg', ('flrp(is_used_once)', a, b, c)), ('flrp', ('fneg', a), ('fneg', b), c)),
|
||||
(('fneg', ('~fadd(is_used_once)', a, b)), ('fadd', ('fneg', a), ('fneg', b))),
|
||||
(('fneg', ('fadd(is_used_once,nsz)', a, b)), ('fadd', ('fneg', a), ('fneg', b))),
|
||||
|
||||
# Note that fmin <-> fmax. I don't think there is a way to distribute
|
||||
# fabs() into fmin or fmax.
|
||||
|
|
|
|||
|
|
@ -82,7 +82,9 @@ opt_shrink_store_instr(nir_builder *b, nir_intrinsic_instr *instr, bool shrink_i
|
|||
|
||||
/* Trim the num_components stored according to the write mask. */
|
||||
unsigned write_mask = nir_intrinsic_write_mask(instr);
|
||||
unsigned last_bit = util_last_bit(write_mask);
|
||||
/* Don't trim down to an invalid number of components, though. */
|
||||
unsigned last_bit = nir_round_up_components(util_last_bit(write_mask));
|
||||
|
||||
if (last_bit < instr->num_components) {
|
||||
nir_def *def = nir_trim_vector(b, instr->src[0].ssa, last_bit);
|
||||
nir_src_rewrite(&instr->src[0], def);
|
||||
|
|
|
|||
|
|
@ -652,6 +652,7 @@ nir_precompiled_build_variant(const nir_function *libfunc,
|
|||
|
||||
assert(libfunc->workgroup_size[0] != 0 && "must set workgroup size");
|
||||
|
||||
b.shader->info.workgroup_size_variable = false;
|
||||
b.shader->info.workgroup_size[0] = libfunc->workgroup_size[0];
|
||||
b.shader->info.workgroup_size[1] = libfunc->workgroup_size[1];
|
||||
b.shader->info.workgroup_size[2] = libfunc->workgroup_size[2];
|
||||
|
|
|
|||
|
|
@ -506,8 +506,8 @@ vtn_pointer_dereference(struct vtn_builder *b,
|
|||
type = type->array_element;
|
||||
}
|
||||
tail = nir_build_deref_array(&b->nb, tail, arr_index);
|
||||
tail->arr.in_bounds = deref_chain->in_bounds;
|
||||
}
|
||||
tail->arr.in_bounds = deref_chain->in_bounds;
|
||||
|
||||
access |= type->access;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2300,6 +2300,17 @@ insert_live_out_moves(struct ra_ctx *ctx)
|
|||
insert_file_live_out_moves(ctx, &ctx->shared);
|
||||
}
|
||||
|
||||
static bool
|
||||
has_merge_set_preferred_reg(struct ir3_register *reg)
|
||||
{
|
||||
assert(reg->merge_set);
|
||||
assert(reg->num != INVALID_REG);
|
||||
|
||||
return reg->merge_set->preferred_reg != (physreg_t)~0 &&
|
||||
ra_reg_get_physreg(reg) ==
|
||||
reg->merge_set->preferred_reg + reg->merge_set_offset;
|
||||
}
|
||||
|
||||
static void
|
||||
handle_block(struct ra_ctx *ctx, struct ir3_block *block)
|
||||
{
|
||||
|
|
@ -2338,17 +2349,15 @@ handle_block(struct ra_ctx *ctx, struct ir3_block *block)
|
|||
struct ir3_register *dst = input->dsts[0];
|
||||
assert(dst->num != INVALID_REG);
|
||||
|
||||
physreg_t dst_start = ra_reg_get_physreg(dst);
|
||||
physreg_t dst_end;
|
||||
|
||||
if (dst->merge_set) {
|
||||
if (dst->merge_set && has_merge_set_preferred_reg(dst)) {
|
||||
/* Take the whole merge set into account to prevent its range being
|
||||
* allocated for defs not part of the merge set.
|
||||
*/
|
||||
assert(dst_start >= dst->merge_set_offset);
|
||||
dst_end = dst_start - dst->merge_set_offset + dst->merge_set->size;
|
||||
dst_end = dst->merge_set->preferred_reg + dst->merge_set->size;
|
||||
} else {
|
||||
dst_end = dst_start + reg_size(dst);
|
||||
dst_end = ra_reg_get_physreg(dst) + reg_size(dst);
|
||||
}
|
||||
|
||||
struct ra_file *file = ra_get_file(ctx, dst);
|
||||
|
|
|
|||
|
|
@ -1461,6 +1461,15 @@ r3d_dst_gmem(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
|||
gmem_offset = tu_attachment_gmem_offset(cmd, att, layer);
|
||||
}
|
||||
|
||||
/* On a7xx we must always use FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8. See
|
||||
* blit_base_format().
|
||||
*/
|
||||
if (CHIP >= A7XX && att->format == VK_FORMAT_D24_UNORM_S8_UINT) {
|
||||
RB_MRT_BUF_INFO = pkt_field_set(A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT,
|
||||
RB_MRT_BUF_INFO,
|
||||
FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8);
|
||||
}
|
||||
|
||||
tu_cs_emit_regs(cs,
|
||||
RB_MRT_BUF_INFO(CHIP, 0, .dword = RB_MRT_BUF_INFO),
|
||||
A6XX_RB_MRT_PITCH(0, 0),
|
||||
|
|
@ -1533,7 +1542,8 @@ r3d_setup(struct tu_cmd_buffer *cmd,
|
|||
tu_cs_emit_call(cs, cmd->device->dbg_renderpass_stomp_cs);
|
||||
}
|
||||
|
||||
enum a6xx_format fmt = blit_base_format<CHIP>(dst_format, ubwc, false);
|
||||
enum a6xx_format fmt = blit_base_format<CHIP>(dst_format, ubwc,
|
||||
blit_param & R3D_DST_GMEM);
|
||||
fixup_dst_format(src_format, &dst_format, &fmt);
|
||||
|
||||
if (!cmd->state.pass) {
|
||||
|
|
|
|||
|
|
@ -208,8 +208,8 @@ tu_CreateDescriptorSetLayout(
|
|||
if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
|
||||
set_layout->has_inline_uniforms = true;
|
||||
|
||||
if (variable_flags && binding->binding < variable_flags->bindingCount &&
|
||||
(variable_flags->pBindingFlags[binding->binding] &
|
||||
if (variable_flags && j < variable_flags->bindingCount &&
|
||||
(variable_flags->pBindingFlags[j] &
|
||||
VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) {
|
||||
assert(!binding->pImmutableSamplers); /* Terribly ill defined how
|
||||
many samplers are valid */
|
||||
|
|
@ -377,7 +377,7 @@ tu_GetDescriptorSetLayoutSupport(
|
|||
uint64_t max_count = MAX_SET_SIZE;
|
||||
unsigned descriptor_count = binding->descriptorCount;
|
||||
if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
|
||||
max_count = MAX_SET_SIZE - size;
|
||||
max_count = MAX_INLINE_UBO_RANGE - size;
|
||||
descriptor_count = descriptor_sz;
|
||||
descriptor_sz = 1;
|
||||
} else if (descriptor_sz) {
|
||||
|
|
@ -388,9 +388,9 @@ tu_GetDescriptorSetLayoutSupport(
|
|||
supported = false;
|
||||
}
|
||||
|
||||
if (variable_flags && binding->binding < variable_flags->bindingCount &&
|
||||
if (variable_flags && i < variable_flags->bindingCount &&
|
||||
variable_count &&
|
||||
(variable_flags->pBindingFlags[binding->binding] &
|
||||
(variable_flags->pBindingFlags[i] &
|
||||
VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT)) {
|
||||
variable_count->maxVariableDescriptorCount =
|
||||
MIN2(UINT32_MAX, max_count);
|
||||
|
|
|
|||
|
|
@ -417,7 +417,8 @@ tu_render_pass_patch_input_gmem(struct tu_render_pass *pass)
|
|||
uint32_t a = subpass->input_attachments[j].attachment;
|
||||
if (a == VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
subpass->input_attachments[j].patch_input_gmem = written[a];
|
||||
subpass->input_attachments[j].patch_input_gmem =
|
||||
written[a] && pass->attachments[a].gmem;
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < subpass->color_count; j++) {
|
||||
|
|
@ -1126,6 +1127,7 @@ tu_CreateRenderPass2(VkDevice _device,
|
|||
if (!att->gmem) {
|
||||
att->clear_mask = 0;
|
||||
att->load = false;
|
||||
att->load_stencil = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3157,8 +3157,6 @@ tu6_emit_blend(struct tu_cs *cs,
|
|||
|
||||
bool dual_src_blend = tu_blend_state_is_dual_src(cb);
|
||||
|
||||
tu_cs_emit_regs(cs, A6XX_SP_PS_MRT_CNTL(.mrt = num_rts));
|
||||
tu_cs_emit_regs(cs, A6XX_RB_PS_MRT_CNTL(.mrt = num_rts));
|
||||
tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL(.enable_blend = blend_enable_mask,
|
||||
.unk8 = true,
|
||||
.dual_color_in_enable =
|
||||
|
|
@ -3180,10 +3178,12 @@ tu6_emit_blend(struct tu_cs *cs,
|
|||
.alpha_to_one = alpha_to_one_enable,
|
||||
.sample_mask = sample_mask));
|
||||
|
||||
unsigned num_remapped_rts = 0;
|
||||
for (unsigned i = 0; i < num_rts; i++) {
|
||||
if (cal->color_map[i] == MESA_VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
unsigned remapped_idx = cal->color_map[i];
|
||||
num_remapped_rts = MAX2(num_remapped_rts, remapped_idx + 1);
|
||||
const struct vk_color_blend_attachment_state *att = &cb->attachments[i];
|
||||
if ((cb->color_write_enables & (1u << i)) && i < cb->attachment_count) {
|
||||
const enum a3xx_rb_blend_opcode color_op = tu6_blend_op(att->color_blend_op);
|
||||
|
|
@ -3227,6 +3227,8 @@ tu6_emit_blend(struct tu_cs *cs,
|
|||
A6XX_RB_MRT_BLEND_CONTROL(remapped_idx,));
|
||||
}
|
||||
}
|
||||
tu_cs_emit_regs(cs, A6XX_SP_PS_MRT_CNTL(.mrt = num_remapped_rts));
|
||||
tu_cs_emit_regs(cs, A6XX_RB_PS_MRT_CNTL(.mrt = num_remapped_rts));
|
||||
}
|
||||
|
||||
static const enum mesa_vk_dynamic_graphics_state tu_blend_constants_state[] = {
|
||||
|
|
|
|||
|
|
@ -223,9 +223,9 @@ iris_apply_brw_tes_prog_data(struct iris_compiled_shader *shader,
|
|||
|
||||
iris_apply_brw_vue_prog_data(&brw->base, &iris->base);
|
||||
|
||||
iris->partitioning = brw->partitioning;
|
||||
iris->output_topology = brw->output_topology;
|
||||
iris->domain = brw->domain;
|
||||
iris->partitioning = brw_tess_info_partitioning(brw->tess_info);
|
||||
iris->output_topology = brw_tess_info_output_topology(brw->tess_info);
|
||||
iris->domain = brw_tess_info_domain(brw->tess_info);
|
||||
iris->include_primitive_id = brw->include_primitive_id;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1236,44 +1236,6 @@ spec@nv_texture_env_combine4@nv_texture_env_combine4-combine,Fail
|
|||
|
||||
spec@oes_texture_float@oes_texture_float half,Fail
|
||||
|
||||
# Remaining fallout from 9d359c6d10adb1cd2978a0e13714a3f98544aae8
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB NPOT,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgb_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt3_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt5_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt3_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt5_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@gen-compressed-teximage,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT NPOT,Fail
|
||||
|
||||
# uprev Piglit in Mesa
|
||||
spec@!opengl 1.1@teximage-scale-bias,Fail
|
||||
spec@glsl-1.10@execution@glsl-fs-texture2d-mipmap-const-bias-01,Fail
|
||||
|
|
|
|||
|
|
@ -1280,44 +1280,6 @@ spec@nv_texture_env_combine4@nv_texture_env_combine4-combine,Fail
|
|||
|
||||
spec@oes_texture_float@oes_texture_float half,Fail
|
||||
|
||||
# Remaining fallout from 9d359c6d10adb1cd2978a0e13714a3f98544aae8
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB NPOT,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgb_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt3_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt5_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt3_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt5_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@gen-compressed-teximage,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT NPOT,Fail
|
||||
|
||||
# uprev Piglit in Mesa
|
||||
spec@!opengl 1.1@teximage-scale-bias,Fail
|
||||
spec@glsl-1.10@execution@glsl-fs-texture2d-mipmap-const-bias-01,Fail
|
||||
|
|
|
|||
|
|
@ -778,78 +778,6 @@ dEQP-GLES2.functional.texture.mipmap.2d.projected.linear_nearest_repeat,Fail
|
|||
dEQP-GLES2.functional.texture.mipmap.2d.projected.linear_nearest_mirror,Fail
|
||||
dEQP-GLES2.functional.texture.mipmap.2d.projected.linear_nearest_clamp,Fail
|
||||
|
||||
# Remaining fallout from 9d359c6d10adb1cd2978a0e13714a3f98544aae8
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE NPOT,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA NPOT,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGB NPOT,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA,Fail
|
||||
spec@arb_texture_compression@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA NPOT,Fail
|
||||
spec@ati_texture_compression_3dc@fbo-generatemipmap-formats,Fail
|
||||
spec@ati_texture_compression_3dc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI,Fail
|
||||
spec@ati_texture_compression_3dc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI NPOT,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats-signed,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_LATC1_EXT,Fail
|
||||
spec@ext_texture_compression_latc@fbo-generatemipmap-formats@GL_COMPRESSED_LUMINANCE_LATC1_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_rgtc@compressedteximage gl_compressed_red_green_rgtc2_ext,Fail
|
||||
spec@ext_texture_compression_rgtc@compressedteximage gl_compressed_signed_red_green_rgtc2_ext,Fail
|
||||
spec@ext_texture_compression_rgtc@compressedteximage gl_compressed_signed_red_rgtc1_ext,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats-signed,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_RED_RGTC1,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_RED_RGTC1 NPOT,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_RG_RGTC2,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats-signed@GL_COMPRESSED_SIGNED_RG_RGTC2 NPOT,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RED,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RED NPOT,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RED_RGTC1,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RED_RGTC1 NPOT,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RG,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RG_RGTC2,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RG_RGTC2 NPOT,Fail
|
||||
spec@ext_texture_compression_rgtc@fbo-generatemipmap-formats@GL_COMPRESSED_RG NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgb_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt3_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_rgba_s3tc_dxt5_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt3_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_alpha_s3tc_dxt5_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@compressedteximage gl_compressed_srgb_s3tc_dxt1_ext,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT3_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGBA_S3TC_DXT5_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_compression_s3tc@fbo-generatemipmap-formats@GL_COMPRESSED_RGB_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_compression_s3tc@gen-compressed-teximage,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT NPOT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT,Fail
|
||||
spec@ext_texture_srgb@fbo-generatemipmap-formats-s3tc@GL_COMPRESSED_SRGB_S3TC_DXT1_EXT NPOT,Fail
|
||||
|
||||
# uprev Piglit in Mesa
|
||||
spec@!opengl 1.1@teximage-scale-bias,Fail
|
||||
spec@ext_framebuffer_multisample@accuracy all_samples color depthstencil linear,Fail
|
||||
|
|
|
|||
|
|
@ -947,6 +947,32 @@ r300_set_framebuffer_state(struct pipe_context* pipe,
|
|||
util_framebuffer_init(pipe, state, r300->fb_cbufs, &r300->fb_zsbuf);
|
||||
util_copy_framebuffer_state(r300->fb_state.state, state);
|
||||
|
||||
/* DXTC blits require that blocks are 2x1 or 4x1 pixels, but
|
||||
* pipe_surface_width sets the framebuffer width as if blocks were 1x1
|
||||
* pixels. Override the width to correct that.
|
||||
*/
|
||||
if (state->nr_cbufs == 1 && state->cbufs[0].texture &&
|
||||
state->cbufs[0].format == PIPE_FORMAT_R8G8B8A8_UNORM &&
|
||||
util_format_is_compressed(state->cbufs[0].texture->format)) {
|
||||
struct pipe_framebuffer_state *fb =
|
||||
(struct pipe_framebuffer_state*)r300->fb_state.state;
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(state->cbufs[0].texture->format);
|
||||
unsigned width = u_minify(state->cbufs[0].texture->width0,
|
||||
state->cbufs[0].level);
|
||||
|
||||
assert(desc->block.width == 4 && desc->block.height == 4);
|
||||
|
||||
/* Each 64-bit DXT block is 2x1 pixels, and each 128-bit DXT
|
||||
* block is 4x1 pixels when blitting.
|
||||
*/
|
||||
width = align(width, 4); /* align to the DXT block width. */
|
||||
if (desc->block.bits == 64)
|
||||
width = DIV_ROUND_UP(width, 2);
|
||||
|
||||
fb->width = width;
|
||||
}
|
||||
|
||||
/* Remove trailing NULL colorbuffers. */
|
||||
while (current_state->nr_cbufs && !current_state->cbufs[current_state->nr_cbufs-1].texture)
|
||||
current_state->nr_cbufs--;
|
||||
|
|
|
|||
|
|
@ -201,6 +201,7 @@ void r600_draw_rectangle(struct blitter_context *blitter,
|
|||
rctx->b.set_vertex_buffers(&rctx->b, 1, &vbuffer);
|
||||
util_draw_arrays_instanced(&rctx->b, R600_PRIM_RECTANGLE_LIST, 0, 3,
|
||||
0, num_instances);
|
||||
pipe_resource_reference(&buf, NULL);
|
||||
}
|
||||
|
||||
static void r600_dma_emit_wait_idle(struct r600_common_context *rctx)
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
#include "util/u_memory.h"
|
||||
#include "util/u_pack_color.h"
|
||||
#include "util/u_surface.h"
|
||||
#include "util/u_resource.h"
|
||||
#include "util/os_time.h"
|
||||
#include "frontend/winsys_handle.h"
|
||||
#include <errno.h>
|
||||
|
|
@ -442,7 +443,7 @@ static bool r600_texture_get_param(struct pipe_screen *screen,
|
|||
|
||||
switch (param) {
|
||||
case PIPE_RESOURCE_PARAM_NPLANES:
|
||||
*value = 1;
|
||||
*value = util_resource_num(resource);
|
||||
return true;
|
||||
|
||||
case PIPE_RESOURCE_PARAM_STRIDE:
|
||||
|
|
|
|||
|
|
@ -56,6 +56,19 @@ static bool si_update_shaders(struct si_context *sctx)
|
|||
struct si_shader *old_ps = sctx->shader.ps.current;
|
||||
int r;
|
||||
|
||||
if (GFX_VERSION >= GFX9) {
|
||||
/* For merged shaders, mark the next shader as dirty so its previous_stage is updated. */
|
||||
if (is_vs_state_changed) {
|
||||
if (HAS_TESS) {
|
||||
is_tess_state_changed = true;
|
||||
} else if (HAS_GS) {
|
||||
is_gs_state_changed = true;
|
||||
}
|
||||
}
|
||||
if ((sctx->dirty_shaders_mask & BITFIELD_BIT(MESA_SHADER_TESS_EVAL)) && HAS_GS && HAS_TESS)
|
||||
is_gs_state_changed = true;
|
||||
}
|
||||
|
||||
/* Update TCS and TES. */
|
||||
if (HAS_TESS && is_tess_state_changed) {
|
||||
if (!sctx->has_tessellation) {
|
||||
|
|
|
|||
|
|
@ -690,6 +690,7 @@ v3d_get_sand8_fs(struct pipe_context *pctx, int cpp)
|
|||
nir_variable_create(b.shader, nir_var_shader_out,
|
||||
vec4, "f_color");
|
||||
color_out->data.location = FRAG_RESULT_COLOR;
|
||||
b.shader->info.outputs_written |= BITFIELD_BIT(FRAG_RESULT_COLOR);
|
||||
|
||||
nir_variable *pos_in =
|
||||
nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos");
|
||||
|
|
@ -998,6 +999,7 @@ v3d_get_sand30_fs(struct pipe_context *pctx)
|
|||
nir_var_shader_out,
|
||||
glsl_uvec4, "f_color");
|
||||
color_out->data.location = FRAG_RESULT_COLOR;
|
||||
b.shader->info.outputs_written |= BITFIELD_BIT(FRAG_RESULT_COLOR);
|
||||
|
||||
nir_variable *pos_in =
|
||||
nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos");
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# Please include a comment with the log message and a testcase triggering each
|
||||
# VUID at the bottom of the file.
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-vkCmdDrawMultiIndexedEXT-format-07753,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-vkCmdDrawMultiIndexedEXT-None-10909,VUID-vkDestroyDevice-device-05137,VUID-vkCmdDrawMultiEXT-None-08879,VUID-VkShaderCreateInfoEXT-pSetLayouts-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-VkPhysicalDeviceMeshShaderFeaturesEXT-primitiveFragmentShadingRateMeshShader-07033
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-vkCmdDrawMultiIndexedEXT-format-07753,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-vkCmdDrawMultiIndexedEXT-None-10909,VUID-vkDestroyDevice-device-05137,VUID-vkCmdDrawMultiEXT-None-08879,VUID-VkShaderCreateInfoEXT-pSetLayouts-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-08879
|
||||
khronos_validation.report_flags = error
|
||||
khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK
|
||||
VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# Please include a comment with the log message and a testcase triggering each
|
||||
# VUID at the bottom of the file.
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-vkCmdDrawMultiIndexedEXT-None-10909,VUID-VkPhysicalDeviceMeshShaderFeaturesEXT-primitiveFragmentShadingRateMeshShader-07033
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-vkCmdDrawMultiIndexedEXT-None-10909
|
||||
khronos_validation.report_flags = error
|
||||
khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK
|
||||
VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# Please include a comment with the log message and a testcase triggering each
|
||||
# VUID at the bottom of the file.
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-vkCmdDrawMultiIndexedEXT-format-07753,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-vkCmdDrawMultiIndexedEXT-None-10909,VUID-vkDestroyDevice-device-05137,VUID-VkPhysicalDeviceMeshShaderFeaturesEXT-primitiveFragmentShadingRateMeshShader-07033
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-vkCmdDrawMultiIndexedEXT-format-07753,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-vkCmdDrawMultiIndexedEXT-None-10909,VUID-vkDestroyDevice-device-05137
|
||||
khronos_validation.report_flags = error
|
||||
khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK
|
||||
VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
# Please include a comment with the log message and a testcase triggering each
|
||||
# VUID at the bottom of the file.
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-vkCmdDrawMultiEXT-None-02699,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-vkCmdPipelineBarrier2-shaderTileImageColorReadAccess-08718,VUID-VkGraphicsPipelineCreateInfo-flags-06482,VUID-vkCmdPipelineBarrier2-None-08719,VUID-vkCmdDrawMultiEXT-rasterizationSamples-07474,VUID-vkDestroyDevice-device-05137,VUID-VkRectLayerKHR-offset-04864,VUID-vkAcquireNextImageKHR-semaphore-01779,VUID-vkQueueSubmit-pSignalSemaphores-00067,VUID-VkImageMemoryBarrier2-srcAccessMask-07454,UNASSIGNED-GeneralParameterError-RequiredHandle,VUID-VkImageMemoryBarrier2-image-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-10909,VUID-VkPhysicalDeviceMeshShaderFeaturesEXT-primitiveFragmentShadingRateMeshShader-07033
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-vkCmdDrawMultiEXT-None-02699,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-vkCmdPipelineBarrier2-shaderTileImageColorReadAccess-08718,VUID-VkGraphicsPipelineCreateInfo-flags-06482,VUID-vkCmdPipelineBarrier2-None-08719,VUID-vkCmdDrawMultiEXT-rasterizationSamples-07474,VUID-vkDestroyDevice-device-05137,VUID-VkRectLayerKHR-offset-04864,VUID-vkAcquireNextImageKHR-semaphore-01779,VUID-vkQueueSubmit-pSignalSemaphores-00067,VUID-VkImageMemoryBarrier2-srcAccessMask-07454,UNASSIGNED-GeneralParameterError-RequiredHandle,VUID-VkImageMemoryBarrier2-image-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-10909
|
||||
khronos_validation.report_flags = error
|
||||
khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK
|
||||
VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Please include a comment with the log message and a testcase triggering each
|
||||
# VUID at the bottom of the file.
|
||||
#khronos_validation.message_id_filter = VUID-RuntimeSpirv-Location-06272,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicPrimitiveTopologyUnrestricted-07500,VUID-vkCmdDrawMultiEXT-None-08879,VUID-vkCmdDrawMultiIndexedEXT-dynamicPrimitiveTopologyUnrestricted-07500,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicRenderingUnusedAttachments-08911,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737,VUID-vkCmdDrawMultiEXT-None-08879,VUID-VkShaderCreateInfoEXT-pSetLayouts-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-10909,UNASSIGNED-Draw-primitiveTopologyPatchListRestart,VUID-VkPhysicalDeviceMeshShaderFeaturesEXT-primitiveFragmentShadingRateMeshShader-07033
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicRenderingUnusedAttachments-08911,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737,VUID-vkCmdDrawMultiEXT-None-08879,VUID-VkShaderCreateInfoEXT-pSetLayouts-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-10909,UNASSIGNED-Draw-primitiveTopologyPatchListRestart
|
||||
khronos_validation.report_flags = error
|
||||
khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK
|
||||
VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Please include a comment with the log message and a testcase triggering each
|
||||
# VUID at the bottom of the file.
|
||||
#khronos_validation.message_id_filter = VUID-RuntimeSpirv-Location-06272,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicPrimitiveTopologyUnrestricted-07500,VUID-vkCmdDrawMultiEXT-None-08879,VUID-vkCmdDrawMultiIndexedEXT-dynamicPrimitiveTopologyUnrestricted-07500,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-VkRenderingAttachmentInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicRenderingUnusedAttachments-08911,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737,VUID-vkCmdDrawMultiEXT-None-08879,VUID-VkShaderCreateInfoEXT-pSetLayouts-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-10909,UNASSIGNED-Draw-primitiveTopologyPatchListRestart,VUID-VkPhysicalDeviceMeshShaderFeaturesEXT-primitiveFragmentShadingRateMeshShader-07033
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-VkRenderingAttachmentInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicRenderingUnusedAttachments-08911,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737,VUID-vkCmdDrawMultiEXT-None-08879,VUID-VkShaderCreateInfoEXT-pSetLayouts-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-10909,UNASSIGNED-Draw-primitiveTopologyPatchListRestart
|
||||
khronos_validation.report_flags = error
|
||||
khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK
|
||||
VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Please include a comment with the log message and a testcase triggering each
|
||||
# VUID at the bottom of the file.
|
||||
#khronos_validation.message_id_filter = VUID-RuntimeSpirv-Location-06272,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicPrimitiveTopologyUnrestricted-07500,VUID-vkCmdDrawMultiEXT-None-08879,VUID-vkCmdDrawMultiIndexedEXT-dynamicPrimitiveTopologyUnrestricted-07500,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicRenderingUnusedAttachments-08911,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737,VUID-vkCmdDrawMultiEXT-None-08879,VUID-VkShaderCreateInfoEXT-pSetLayouts-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-10909,UNASSIGNED-Draw-primitiveTopologyPatchListRestart,VUID-VkPhysicalDeviceMeshShaderFeaturesEXT-primitiveFragmentShadingRateMeshShader-07033
|
||||
khronos_validation.message_id_filter = VUID-VkPhysicalDeviceProperties2-pNext-pNext,VUID-VkDeviceCreateInfo-pNext-pNext,VUID-RuntimeSpirv-Location-06272,VUID-RuntimeSpirv-OpEntryPoint-08743,VUID-StandaloneSpirv-OpEntryPoint-08721,VUID-vkCmdDrawMultiEXT-dynamicRenderingUnusedAttachments-08911,VUID-vkCmdDrawMultiIndexedEXT-None-08879,VUID-vkDestroyDevice-device-05137,VUID-vkQueueSubmit-pCommandBuffers-00065,VUID-VkShaderCreateInfoEXT-pCode-08737,VUID-vkCmdDrawMultiEXT-None-08879,VUID-VkShaderCreateInfoEXT-pSetLayouts-parameter,VUID-vkCmdDrawMultiIndexedEXT-None-10909,UNASSIGNED-Draw-primitiveTopologyPatchListRestart
|
||||
khronos_validation.report_flags = error
|
||||
khronos_validation.debug_action = VK_DBG_LAYER_ACTION_LOG_MSG,VK_DBG_LAYER_ACTION_BREAK
|
||||
VK_LAYER_ENABLES=VK_VALIDATION_FEATURE_ENABLE_DEBUG_PRINTF_EXT
|
||||
|
|
|
|||
|
|
@ -3368,9 +3368,8 @@ begin_rendering(struct zink_context *ctx, bool check_msaa_expand)
|
|||
VK_TRUE,
|
||||
ctx->gfx_pipeline_state.rast_samples + 1,
|
||||
};
|
||||
ctx->dynamic_fb.info.pNext = ctx->transient_attachments && !ctx->blitting && has_msrtss ? &msrtss : NULL;
|
||||
|
||||
if (has_msrtss && !ctx->blitting)
|
||||
ctx->dynamic_fb.info.pNext = ctx->transient_attachments ? &msrtss : NULL;
|
||||
VKCTX(CmdBeginRendering)(ctx->bs->cmdbuf, &ctx->dynamic_fb.info);
|
||||
ctx->in_rp = true;
|
||||
return clear_buffers;
|
||||
|
|
|
|||
|
|
@ -119,7 +119,9 @@ pipeline_statistic_convert(enum pipe_statistics_query_index idx)
|
|||
[PIPE_STAT_QUERY_PS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT,
|
||||
[PIPE_STAT_QUERY_HS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT,
|
||||
[PIPE_STAT_QUERY_DS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT,
|
||||
[PIPE_STAT_QUERY_CS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT
|
||||
[PIPE_STAT_QUERY_CS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT,
|
||||
[PIPE_STAT_QUERY_MS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_MESH_SHADER_INVOCATIONS_BIT_EXT,
|
||||
[PIPE_STAT_QUERY_TS_INVOCATIONS] = VK_QUERY_PIPELINE_STATISTIC_TASK_SHADER_INVOCATIONS_BIT_EXT,
|
||||
};
|
||||
assert(idx < ARRAY_SIZE(map));
|
||||
return map[idx];
|
||||
|
|
|
|||
|
|
@ -3133,6 +3133,12 @@ init_driver_workarounds(struct zink_screen *screen)
|
|||
screen->info.have_EXT_host_image_copy = false;
|
||||
}
|
||||
|
||||
static void
|
||||
disable_features(struct zink_screen *screen)
|
||||
{
|
||||
screen->info.mesh_feats.primitiveFragmentShadingRateMeshShader = false;
|
||||
}
|
||||
|
||||
static void
|
||||
check_hic_shader_read(struct zink_screen *screen)
|
||||
{
|
||||
|
|
@ -3513,6 +3519,7 @@ zink_internal_create_screen(const struct pipe_screen_config *config, int64_t dev
|
|||
check_hic_shader_read(screen);
|
||||
|
||||
init_driver_workarounds(screen);
|
||||
disable_features(screen);
|
||||
|
||||
screen->dev = zink_create_logical_device(screen);
|
||||
if (!screen->dev)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,14 @@
|
|||
# Copyright © 2017 Dylan Baker
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
libradeonwinsys_deps = [idep_mesautil, dep_libdrm]
|
||||
libradeonwinsys_c_args = []
|
||||
|
||||
if with_gallium_radeonsi
|
||||
libradeonwinsys_deps += [idep_amdgfxregs_h]
|
||||
libradeonwinsys_c_args = ['-DHAVE_GALLIUM_RADEONSI']
|
||||
endif
|
||||
|
||||
libradeonwinsys = static_library(
|
||||
'radeonwinsys',
|
||||
files('radeon_drm_bo.c',
|
||||
|
|
@ -14,5 +22,6 @@ libradeonwinsys = static_library(
|
|||
'radeon_surface.h'),
|
||||
include_directories : [inc_src, inc_include, inc_gallium, inc_gallium_aux],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
dependencies : [idep_mesautil, dep_libdrm],
|
||||
c_args : libradeonwinsys_c_args,
|
||||
dependencies : libradeonwinsys_deps,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -8,6 +8,10 @@
|
|||
#include "radeon_drm_bo.h"
|
||||
#include "radeon_drm_cs.h"
|
||||
|
||||
#ifdef HAVE_GALLIUM_RADEONSI
|
||||
#include "amdgfxregs.h"
|
||||
#endif
|
||||
|
||||
#include "util/os_file.h"
|
||||
#include "util/simple_mtx.h"
|
||||
#include "util/thread_sched.h"
|
||||
|
|
@ -105,6 +109,73 @@ static bool radeon_get_drm_value(int fd, unsigned request,
|
|||
return true;
|
||||
}
|
||||
|
||||
static void get_hs_info(struct radeon_info *info)
|
||||
{
|
||||
/* This is the size of all TCS outputs in memory per workgroup.
|
||||
* Hawaii can't handle num_workgroups > 256 with 8K per workgroup, so use 4K.
|
||||
*/
|
||||
unsigned max_hs_out_vram_dwords_per_wg = info->family == CHIP_HAWAII ? 4096 : 8192;
|
||||
unsigned max_workgroups_per_se;
|
||||
|
||||
#ifdef HAVE_GALLIUM_RADEONSI /* for gfx6+ register definitions */
|
||||
unsigned max_hs_out_vram_dwords_enum = 0;
|
||||
|
||||
switch (max_hs_out_vram_dwords_per_wg) {
|
||||
case 8192:
|
||||
max_hs_out_vram_dwords_enum = V_03093C_X_8K_DWORDS;
|
||||
break;
|
||||
case 4096:
|
||||
max_hs_out_vram_dwords_enum = V_03093C_X_4K_DWORDS;
|
||||
break;
|
||||
case 2048:
|
||||
max_hs_out_vram_dwords_enum = V_03093C_X_2K_DWORDS;
|
||||
break;
|
||||
case 1024:
|
||||
max_hs_out_vram_dwords_enum = V_03093C_X_1K_DWORDS;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("invalid TCS workgroup size");
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Gfx7 should limit num_workgroups to 508 (127 per SE)
|
||||
* Gfx6 should limit num_workgroups to 126 (63 per SE)
|
||||
*/
|
||||
if (info->gfx_level == GFX7) {
|
||||
max_workgroups_per_se = 127;
|
||||
} else {
|
||||
max_workgroups_per_se = 63;
|
||||
}
|
||||
|
||||
/* Limit to 4 workgroups per CU for TCS, which exhausts LDS if each workgroup occupies 16KB.
|
||||
* Note that the offchip allocation isn't deallocated until the corresponding TES waves finish.
|
||||
*/
|
||||
unsigned num_offchip_wg_per_cu = 4;
|
||||
unsigned num_workgroups_per_se = MIN2(num_offchip_wg_per_cu * info->max_good_cu_per_sa *
|
||||
info->max_sa_per_se, max_workgroups_per_se);
|
||||
unsigned num_workgroups = num_workgroups_per_se * info->max_se;
|
||||
|
||||
#ifdef HAVE_GALLIUM_RADEONSI /* for gfx6+ register definitions */
|
||||
if (info->gfx_level == GFX7) {
|
||||
info->hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(num_workgroups) |
|
||||
S_03093C_OFFCHIP_GRANULARITY_GFX7(max_hs_out_vram_dwords_enum);
|
||||
} else {
|
||||
info->hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(num_workgroups) |
|
||||
S_0089B0_OFFCHIP_GRANULARITY(max_hs_out_vram_dwords_enum);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* The typical size of tess factors of 1 TCS workgroup if all patches are triangles. */
|
||||
unsigned typical_tess_factor_size_per_wg = (192 / 3) * 16;
|
||||
unsigned num_tess_factor_wg_per_cu = 3;
|
||||
|
||||
info->hs_offchip_workgroup_dw_size = max_hs_out_vram_dwords_per_wg;
|
||||
info->tess_offchip_ring_size = num_workgroups * max_hs_out_vram_dwords_per_wg * 4;
|
||||
info->tess_factor_ring_size = typical_tess_factor_size_per_wg * num_tess_factor_wg_per_cu *
|
||||
info->max_good_cu_per_sa * info->max_sa_per_se * info->max_se;
|
||||
info->total_tess_ring_size = info->tess_offchip_ring_size + info->tess_factor_ring_size;
|
||||
}
|
||||
|
||||
/* Helper function to do the ioctls needed for setup and init. */
|
||||
static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
||||
{
|
||||
|
|
@ -639,6 +710,9 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
|||
default:;
|
||||
}
|
||||
|
||||
if (ws->gen == DRV_SI)
|
||||
get_hs_info(&ws->info);
|
||||
|
||||
ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL ||
|
||||
strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL;
|
||||
ws->noop_cs = debug_get_bool_option("RADEON_NOOP", false);
|
||||
|
|
|
|||
|
|
@ -196,6 +196,9 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
|||
|
||||
brw_prog_data_init(&prog_data->base.base, ¶ms->base);
|
||||
|
||||
brw_fill_tess_info_from_shader_info(&prog_data->tess_info,
|
||||
&nir->info);
|
||||
|
||||
nir->info.outputs_written = key->outputs_written;
|
||||
nir->info.patch_outputs_written = key->patch_outputs_written;
|
||||
|
||||
|
|
@ -221,6 +224,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
|||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
|
||||
|
||||
prog_data->input_vertices = key->input_vertices;
|
||||
prog_data->output_vertices = nir->info.tess.tcs_vertices_out;
|
||||
prog_data->patch_count_threshold = get_patch_count_threshold(key->input_vertices);
|
||||
|
||||
if (compiler->use_tcs_multi_patch) {
|
||||
|
|
|
|||
|
|
@ -59,6 +59,22 @@ run_tes(brw_shader &s)
|
|||
return !s.failed;
|
||||
}
|
||||
|
||||
extern "C" void
|
||||
brw_fill_tess_info_from_shader_info(struct brw_tess_info *brw_info,
|
||||
const shader_info *shader_info)
|
||||
{
|
||||
STATIC_ASSERT(INTEL_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1);
|
||||
STATIC_ASSERT(INTEL_TESS_PARTITIONING_ODD_FRACTIONAL ==
|
||||
TESS_SPACING_FRACTIONAL_ODD - 1);
|
||||
STATIC_ASSERT(INTEL_TESS_PARTITIONING_EVEN_FRACTIONAL ==
|
||||
TESS_SPACING_FRACTIONAL_EVEN - 1);
|
||||
|
||||
brw_info->primitive_mode = shader_info->tess._primitive_mode;
|
||||
brw_info->spacing = shader_info->tess.spacing;
|
||||
brw_info->ccw = shader_info->tess.ccw;
|
||||
brw_info->point_mode = shader_info->tess.point_mode;
|
||||
}
|
||||
|
||||
const unsigned *
|
||||
brw_compile_tes(const struct brw_compiler *compiler,
|
||||
brw_compile_tes_params *params)
|
||||
|
|
@ -121,39 +137,8 @@ brw_compile_tes(const struct brw_compiler *compiler,
|
|||
|
||||
prog_data->base.urb_read_length = 0;
|
||||
|
||||
STATIC_ASSERT(INTEL_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1);
|
||||
STATIC_ASSERT(INTEL_TESS_PARTITIONING_ODD_FRACTIONAL ==
|
||||
TESS_SPACING_FRACTIONAL_ODD - 1);
|
||||
STATIC_ASSERT(INTEL_TESS_PARTITIONING_EVEN_FRACTIONAL ==
|
||||
TESS_SPACING_FRACTIONAL_EVEN - 1);
|
||||
|
||||
prog_data->partitioning =
|
||||
(enum intel_tess_partitioning) (nir->info.tess.spacing - 1);
|
||||
|
||||
switch (nir->info.tess._primitive_mode) {
|
||||
case TESS_PRIMITIVE_QUADS:
|
||||
prog_data->domain = INTEL_TESS_DOMAIN_QUAD;
|
||||
break;
|
||||
case TESS_PRIMITIVE_TRIANGLES:
|
||||
prog_data->domain = INTEL_TESS_DOMAIN_TRI;
|
||||
break;
|
||||
case TESS_PRIMITIVE_ISOLINES:
|
||||
prog_data->domain = INTEL_TESS_DOMAIN_ISOLINE;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("invalid domain shader primitive mode");
|
||||
}
|
||||
|
||||
if (nir->info.tess.point_mode) {
|
||||
prog_data->output_topology = INTEL_TESS_OUTPUT_TOPOLOGY_POINT;
|
||||
} else if (nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) {
|
||||
prog_data->output_topology = INTEL_TESS_OUTPUT_TOPOLOGY_LINE;
|
||||
} else {
|
||||
/* Hardware winding order is backwards from OpenGL */
|
||||
prog_data->output_topology =
|
||||
nir->info.tess.ccw ? INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW
|
||||
: INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW;
|
||||
}
|
||||
brw_fill_tess_info_from_shader_info(&prog_data->tess_info,
|
||||
&nir->info);
|
||||
|
||||
if (unlikely(debug_enabled)) {
|
||||
fprintf(stderr, "TES Input ");
|
||||
|
|
|
|||
|
|
@ -1158,13 +1158,26 @@ struct brw_vs_prog_data {
|
|||
uint32_t vf_component_packing[4];
|
||||
};
|
||||
|
||||
struct brw_tess_info {
|
||||
enum tess_primitive_mode primitive_mode:8;
|
||||
uint8_t spacing:2;
|
||||
bool ccw:1;
|
||||
bool point_mode:1;
|
||||
uint32_t pad:20;
|
||||
};
|
||||
|
||||
struct brw_tcs_prog_data
|
||||
{
|
||||
struct brw_vue_prog_data base;
|
||||
|
||||
struct brw_tess_info tess_info;
|
||||
|
||||
/** Number of input vertices, 0 means dynamic */
|
||||
unsigned input_vertices;
|
||||
|
||||
/** Number of output vertices */
|
||||
unsigned output_vertices;
|
||||
|
||||
/** Should the non-SINGLE_PATCH payload provide primitive ID? */
|
||||
bool include_primitive_id;
|
||||
|
||||
|
|
@ -1187,14 +1200,12 @@ struct brw_tcs_prog_data
|
|||
unsigned tess_config_param;
|
||||
};
|
||||
|
||||
|
||||
struct brw_tes_prog_data
|
||||
{
|
||||
struct brw_vue_prog_data base;
|
||||
|
||||
enum intel_tess_partitioning partitioning;
|
||||
enum intel_tess_output_topology output_topology;
|
||||
enum intel_tess_domain domain;
|
||||
struct brw_tess_info tess_info;
|
||||
|
||||
bool include_primitive_id;
|
||||
|
||||
/**
|
||||
|
|
@ -1367,6 +1378,64 @@ DEFINE_PROG_DATA_DOWNCAST(mesh, prog_data->stage == MESA_SHADER_MESH)
|
|||
|
||||
#undef DEFINE_PROG_DATA_DOWNCAST
|
||||
|
||||
static inline struct brw_tess_info
|
||||
brw_merge_tess_info(struct brw_tess_info tcs_info,
|
||||
struct brw_tess_info tes_info)
|
||||
{
|
||||
/* Just merge by OR'ing the raw bits */
|
||||
uint32_t x, y;
|
||||
|
||||
assert(sizeof(x) == sizeof(tcs_info));
|
||||
|
||||
memcpy(&x, &tcs_info, sizeof(x));
|
||||
memcpy(&y, &tes_info, sizeof(y));
|
||||
|
||||
x |= y;
|
||||
|
||||
struct brw_tess_info out;
|
||||
memcpy(&out, &x, sizeof(out));
|
||||
return out;
|
||||
}
|
||||
|
||||
static inline enum intel_tess_partitioning
|
||||
brw_tess_info_partitioning(struct brw_tess_info info)
|
||||
{
|
||||
return (enum intel_tess_partitioning)(info.spacing - 1);
|
||||
}
|
||||
|
||||
static inline enum intel_tess_domain
|
||||
brw_tess_info_domain(struct brw_tess_info info)
|
||||
{
|
||||
switch (info.primitive_mode) {
|
||||
case TESS_PRIMITIVE_QUADS:
|
||||
return INTEL_TESS_DOMAIN_QUAD;
|
||||
break;
|
||||
case TESS_PRIMITIVE_TRIANGLES:
|
||||
return INTEL_TESS_DOMAIN_TRI;
|
||||
break;
|
||||
case TESS_PRIMITIVE_ISOLINES:
|
||||
return INTEL_TESS_DOMAIN_ISOLINE;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("invalid primitive mode");
|
||||
}
|
||||
}
|
||||
|
||||
static inline enum intel_tess_output_topology
|
||||
brw_tess_info_output_topology(struct brw_tess_info info)
|
||||
{
|
||||
if (info.point_mode) {
|
||||
return INTEL_TESS_OUTPUT_TOPOLOGY_POINT;
|
||||
} else if (info.primitive_mode == TESS_PRIMITIVE_ISOLINES) {
|
||||
return INTEL_TESS_OUTPUT_TOPOLOGY_LINE;
|
||||
} else {
|
||||
/* Hardware winding order is backwards from OpenGL */
|
||||
return info.ccw ?
|
||||
INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW :
|
||||
INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW;
|
||||
}
|
||||
}
|
||||
|
||||
/** @} */
|
||||
|
||||
struct brw_compiler *
|
||||
|
|
|
|||
|
|
@ -2126,6 +2126,7 @@ flag_fused_eu_disable_instr(nir_builder *b, nir_instr *instr, void *data)
|
|||
case nir_intrinsic_bindless_image_atomic:
|
||||
case nir_intrinsic_bindless_image_atomic_swap: {
|
||||
int src_idx = nir_get_io_index_src_number(intrin);
|
||||
assert(src_idx >= 0);
|
||||
if (nir_src_is_divergent(&intrin->src[src_idx])) {
|
||||
nir_intrinsic_set_access(intrin,
|
||||
nir_intrinsic_access(intrin) |
|
||||
|
|
@ -2181,7 +2182,11 @@ brw_postprocess_nir_opts(nir_shader *nir, const struct brw_compiler *compiler,
|
|||
if (OPT(nir_lower_tex, &tex_options))
|
||||
OPT(nir_lower_tex, &tex_options);
|
||||
|
||||
OPT(brw_nir_lower_mcs_fetch, devinfo);
|
||||
/* MCS lowering can introduce u2u16 conversions. We need to lower those to
|
||||
* make constant offsets detectable by brw_nir_texture_backend_opcode().
|
||||
*/
|
||||
if (OPT(brw_nir_lower_mcs_fetch, devinfo))
|
||||
OPT(nir_opt_constant_folding);
|
||||
|
||||
const struct brw_nir_lower_texture_opts brw_tex_options = {
|
||||
.combined_lod_and_array_index = compiler->devinfo->ver >= 20,
|
||||
|
|
|
|||
|
|
@ -36,6 +36,10 @@ extern "C" {
|
|||
|
||||
extern const struct nir_shader_compiler_options brw_scalar_nir_options;
|
||||
|
||||
void
|
||||
brw_fill_tess_info_from_shader_info(struct brw_tess_info *brw_info,
|
||||
const shader_info *shader_info);
|
||||
|
||||
int type_size_vec4(const struct glsl_type *type, bool bindless);
|
||||
int type_size_dvec4(const struct glsl_type *type, bool bindless);
|
||||
|
||||
|
|
|
|||
|
|
@ -235,6 +235,7 @@ lower_ray_query_intrinsic(nir_builder *b,
|
|||
brw_nir_rt_sync_stack_addr(b, state->globals.base_mem_addr,
|
||||
state->globals.num_dss_rt_stacks);
|
||||
nir_def *stack_addr = shadow_stack_addr ? shadow_stack_addr : hw_stack_addr;
|
||||
mesa_shader_stage stage = b->shader->info.stage;
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_rq_initialize: {
|
||||
|
|
@ -447,11 +448,27 @@ lower_ray_query_intrinsic(nir_builder *b,
|
|||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_object_ray_direction:
|
||||
sysval = world_ray_in.dir;
|
||||
if (stage == MESA_SHADER_CLOSEST_HIT) {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr,
|
||||
state->devinfo);
|
||||
sysval = brw_nir_build_vec3_mat_mult_col_major(
|
||||
b, world_ray_in.dir, leaf.world_to_object, false);
|
||||
} else {
|
||||
sysval = object_ray_in.dir;
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_object_ray_origin:
|
||||
sysval = world_ray_in.orig;
|
||||
if (stage == MESA_SHADER_CLOSEST_HIT) {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr,
|
||||
state->devinfo);
|
||||
sysval = brw_nir_build_vec3_mat_mult_col_major(
|
||||
b, world_ray_in.orig, leaf.world_to_object, true);
|
||||
} else {
|
||||
sysval = object_ray_in.orig;
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_ray_query_value_intersection_object_to_world: {
|
||||
|
|
|
|||
|
|
@ -24,24 +24,6 @@
|
|||
#include "brw_nir_rt.h"
|
||||
#include "brw_nir_rt_builder.h"
|
||||
|
||||
static nir_def *
|
||||
nir_build_vec3_mat_mult_col_major(nir_builder *b, nir_def *vec,
|
||||
nir_def *matrix[], bool translation)
|
||||
{
|
||||
nir_def *result_components[3] = {
|
||||
nir_channel(b, matrix[3], 0),
|
||||
nir_channel(b, matrix[3], 1),
|
||||
nir_channel(b, matrix[3], 2),
|
||||
};
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
for (unsigned j = 0; j < 3; ++j) {
|
||||
nir_def *v = nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[j], 1 << i));
|
||||
result_components[i] = (translation || j) ? nir_fadd(b, result_components[i], v) : v;
|
||||
}
|
||||
}
|
||||
return nir_vec(b, result_components, 3);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
build_leaf_is_procedural(nir_builder *b, struct brw_nir_rt_mem_hit_defs *hit)
|
||||
{
|
||||
|
|
@ -193,7 +175,7 @@ lower_rt_intrinsics_impl(nir_function_impl *impl,
|
|||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr,
|
||||
devinfo);
|
||||
|
||||
sysval = nir_build_vec3_mat_mult_col_major(
|
||||
sysval = brw_nir_build_vec3_mat_mult_col_major(
|
||||
b, world_ray_in.orig, leaf.world_to_object, true);
|
||||
} else {
|
||||
sysval = object_ray_in.orig;
|
||||
|
|
@ -206,7 +188,7 @@ lower_rt_intrinsics_impl(nir_function_impl *impl,
|
|||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr,
|
||||
devinfo);
|
||||
|
||||
sysval = nir_build_vec3_mat_mult_col_major(
|
||||
sysval = brw_nir_build_vec3_mat_mult_col_major(
|
||||
b, world_ray_in.dir, leaf.world_to_object, false);
|
||||
} else {
|
||||
sysval = object_ray_in.dir;
|
||||
|
|
|
|||
|
|
@ -84,6 +84,24 @@ nir_shader *
|
|||
brw_nir_create_null_ahs_shader(const struct brw_compiler *compiler,
|
||||
void *mem_ctx);
|
||||
|
||||
static inline nir_def *
|
||||
brw_nir_build_vec3_mat_mult_col_major(nir_builder *b, nir_def *vec,
|
||||
nir_def *matrix[], bool translation)
|
||||
{
|
||||
nir_def *result_components[3] = {
|
||||
nir_channel(b, matrix[3], 0),
|
||||
nir_channel(b, matrix[3], 1),
|
||||
nir_channel(b, matrix[3], 2),
|
||||
};
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
for (unsigned j = 0; j < 3; ++j) {
|
||||
nir_def *v = nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[j], 1 << i));
|
||||
result_components[i] = (translation || j) ? nir_fadd(b, result_components[i], v) : v;
|
||||
}
|
||||
}
|
||||
return nir_vec(b, result_components, 3);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1128,8 +1128,26 @@ has_cross_lane_access(const brw_inst *inst)
|
|||
|
||||
for (unsigned s = 0; s < inst->sources; s++) {
|
||||
if (inst->src[s].file == VGRF) {
|
||||
if (inst->src[s].stride == 0)
|
||||
/* The instruction reads a particular lane (only relevant with non
|
||||
* scalar values, otherwise this is just the way we read uniform
|
||||
* values produced in reduced SIMD size).
|
||||
*/
|
||||
if (!inst->src[s].is_scalar && inst->src[s].stride == 0)
|
||||
return true;
|
||||
} else if (inst->src[s].file == ARF &&
|
||||
inst->src[s].nr >= BRW_ARF_FLAG &&
|
||||
inst->src[s].nr < BRW_ARF_MASK) {
|
||||
/* The instruction reads the flag register which represents states
|
||||
* from all the lanes.
|
||||
*
|
||||
* Note that although this prevents moving instructions reading the
|
||||
* flag registers past a HALT kind of instruction, this doesn't
|
||||
* prevent the instructions that generated the flag value from moving
|
||||
* on either side of the HALT instruction. So it's possible for
|
||||
* ballot instructions to produce incorrect values when used in a
|
||||
* shader with HALT.
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1387,6 +1387,10 @@ VkResult anv_CreateDescriptorPool(
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void anv_descriptor_set_destroy(struct anv_device *device,
|
||||
struct anv_descriptor_pool *pool,
|
||||
struct anv_descriptor_set *set);
|
||||
|
||||
void anv_DestroyDescriptorPool(
|
||||
VkDevice _device,
|
||||
VkDescriptorPool _pool,
|
||||
|
|
@ -1400,9 +1404,16 @@ void anv_DestroyDescriptorPool(
|
|||
|
||||
ANV_RMV(resource_destroy, device, pool);
|
||||
|
||||
/* From the Vulkan spec, vkDestroyDescriptorPool:
|
||||
*
|
||||
* "When a pool is destroyed, all descriptor sets allocated from the
|
||||
* pool are implicitly freed and become invalid. Descriptor sets
|
||||
* allocated from a given pool do not need to be freed before destroying
|
||||
* that descriptor pool."
|
||||
*/
|
||||
list_for_each_entry_safe(struct anv_descriptor_set, set,
|
||||
&pool->desc_sets, pool_link) {
|
||||
vk_descriptor_set_layout_unref(&device->vk, &set->layout->vk);
|
||||
anv_descriptor_set_destroy(device, pool, set);
|
||||
}
|
||||
|
||||
util_vma_heap_finish(&pool->host_heap);
|
||||
|
|
|
|||
|
|
@ -480,6 +480,9 @@ anv_CopyImageToMemory(
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/* This functions copies from one image to another through an intermediate
|
||||
* linear buffer.
|
||||
*/
|
||||
static void
|
||||
copy_image_to_image(struct anv_device *device,
|
||||
struct anv_image *src_image,
|
||||
|
|
@ -505,14 +508,11 @@ copy_image_to_image(struct anv_device *device,
|
|||
isl_surf_get_tile_info(src_surf, &src_tile);
|
||||
isl_surf_get_tile_info(dst_surf, &dst_tile);
|
||||
|
||||
uint32_t tile_width_B;
|
||||
uint32_t tile_width_el, tile_height_el;
|
||||
if (src_tile.phys_extent_B.w > dst_tile.phys_extent_B.w) {
|
||||
tile_width_B = src_tile.phys_extent_B.w;
|
||||
tile_width_el = src_tile.logical_extent_el.w;
|
||||
tile_height_el = src_tile.logical_extent_el.h;
|
||||
} else {
|
||||
tile_width_B = dst_tile.phys_extent_B.w;
|
||||
tile_width_el = dst_tile.logical_extent_el.w;
|
||||
tile_height_el = dst_tile.logical_extent_el.h;
|
||||
}
|
||||
|
|
@ -527,14 +527,18 @@ copy_image_to_image(struct anv_device *device,
|
|||
VkExtent3D extent_el =
|
||||
vk_extent3d_to_el(src_surf->format, region->extent);
|
||||
|
||||
uint32_t linear_stride_B;
|
||||
/* linear-to-linear case */
|
||||
if (tile_width_el == 1 && tile_height_el == 1) {
|
||||
tile_width_el = MIN2(4096 / (src_tile.format_bpb / 8),
|
||||
extent_el.width);
|
||||
tile_height_el = 4096 / (tile_width_el * (src_tile.format_bpb / 8));
|
||||
tile_width_B = tile_width_el * src_tile.format_bpb / 8;
|
||||
linear_stride_B = tile_width_el * src_tile.format_bpb / 8;
|
||||
} else {
|
||||
linear_stride_B = src_tile.logical_extent_el.w * src_tile.format_bpb / 8;
|
||||
}
|
||||
|
||||
|
||||
uint32_t layer_count =
|
||||
vk_image_subresource_layer_count(&src_image->vk, ®ion->srcSubresource);
|
||||
for (uint32_t a = 0; a < layer_count; a++) {
|
||||
|
|
@ -559,7 +563,7 @@ copy_image_to_image(struct anv_device *device,
|
|||
src_binding,
|
||||
src_anv_surf->memory_range.offset,
|
||||
tmp_map,
|
||||
tile_width_B,
|
||||
linear_stride_B,
|
||||
&src_offset, &extent,
|
||||
region->srcSubresource.mipLevel,
|
||||
region->srcSubresource.baseArrayLayer,
|
||||
|
|
@ -570,7 +574,7 @@ copy_image_to_image(struct anv_device *device,
|
|||
dst_binding,
|
||||
dst_anv_surf->memory_range.offset,
|
||||
tmp_map,
|
||||
tile_width_B,
|
||||
linear_stride_B,
|
||||
&dst_offset, &extent,
|
||||
region->dstSubresource.mipLevel,
|
||||
region->dstSubresource.baseArrayLayer,
|
||||
|
|
|
|||
|
|
@ -2161,8 +2161,15 @@ struct anv_gfx_dynamic_state {
|
|||
uint32_t SampleMask;
|
||||
} sm;
|
||||
|
||||
/* 3DSTATE_DS */
|
||||
struct {
|
||||
bool ComputeWCoordinateEnable;
|
||||
} ds;
|
||||
|
||||
/* 3DSTATE_TE */
|
||||
struct {
|
||||
uint32_t TEDomain;
|
||||
uint32_t Partitioning;
|
||||
uint32_t OutputTopology;
|
||||
uint32_t TessellationDistributionMode;
|
||||
} te;
|
||||
|
|
@ -2251,7 +2258,7 @@ struct anv_gfx_dynamic_state {
|
|||
uint32_t BackfaceStencilPassDepthPassOp;
|
||||
uint32_t BackfaceStencilPassDepthFailOp;
|
||||
uint32_t BackfaceStencilTestFunction;
|
||||
} ds;
|
||||
} wm_ds;
|
||||
|
||||
/* 3DSTATE_TBIMR_TILE_PASS_INFO */
|
||||
struct {
|
||||
|
|
|
|||
|
|
@ -1054,6 +1054,7 @@ anv_shader_compile_bs(struct anv_device *device,
|
|||
.should_remat_callback = should_remat_cb,
|
||||
};
|
||||
|
||||
NIR_PASS(_, nir, brw_nir_lower_rt_intrinsics_pre_trace);
|
||||
NIR_PASS(_, nir, nir_lower_shader_calls, &opts,
|
||||
&resume_shaders, &num_resume_shaders, mem_ctx);
|
||||
NIR_PASS(_, nir, brw_nir_lower_shader_calls, &lowering_state);
|
||||
|
|
|
|||
|
|
@ -490,7 +490,12 @@ anv_raster_polygon_mode(const struct anv_cmd_graphics_state *gfx,
|
|||
}
|
||||
UNREACHABLE("Unsupported GS output topology");
|
||||
} else if (gfx->shaders[MESA_SHADER_TESS_EVAL] != NULL) {
|
||||
switch (get_gfx_tes_prog_data(gfx)->output_topology) {
|
||||
struct brw_tess_info tess_info =
|
||||
brw_merge_tess_info(
|
||||
get_gfx_tcs_prog_data(gfx)->tess_info,
|
||||
get_gfx_tes_prog_data(gfx)->tess_info);
|
||||
|
||||
switch (brw_tess_info_output_topology(tess_info)) {
|
||||
case INTEL_TESS_OUTPUT_TOPOLOGY_POINT:
|
||||
return VK_POLYGON_MODE_POINT;
|
||||
|
||||
|
|
@ -500,8 +505,10 @@ anv_raster_polygon_mode(const struct anv_cmd_graphics_state *gfx,
|
|||
case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW:
|
||||
case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW:
|
||||
return polygon_mode;
|
||||
|
||||
default:
|
||||
UNREACHABLE("Unsupported TCS output topology");
|
||||
}
|
||||
UNREACHABLE("Unsupported TCS output topology");
|
||||
} else {
|
||||
switch (primitive_topology) {
|
||||
case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
|
||||
|
|
@ -1317,6 +1324,22 @@ update_cps(struct anv_gfx_dynamic_state *hw_state,
|
|||
}
|
||||
#endif
|
||||
|
||||
ALWAYS_INLINE static void
|
||||
update_ds(struct anv_gfx_dynamic_state *hw_state,
|
||||
const struct anv_cmd_graphics_state *gfx)
|
||||
{
|
||||
const struct brw_tes_prog_data *tes_prog_data = get_gfx_tes_prog_data(gfx);
|
||||
|
||||
if (tes_prog_data) {
|
||||
struct brw_tess_info tess_info =
|
||||
brw_merge_tess_info(get_gfx_tcs_prog_data(gfx)->tess_info,
|
||||
tes_prog_data->tess_info);
|
||||
|
||||
SET(DS, ds.ComputeWCoordinateEnable,
|
||||
brw_tess_info_domain(tess_info) == INTEL_TESS_DOMAIN_TRI);
|
||||
}
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static void
|
||||
update_te(struct anv_gfx_dynamic_state *hw_state,
|
||||
const struct anv_device *device,
|
||||
|
|
@ -1326,16 +1349,28 @@ update_te(struct anv_gfx_dynamic_state *hw_state,
|
|||
const struct brw_tes_prog_data *tes_prog_data = get_gfx_tes_prog_data(gfx);
|
||||
|
||||
if (tes_prog_data) {
|
||||
struct brw_tess_info tess_info =
|
||||
brw_merge_tess_info(get_gfx_tcs_prog_data(gfx)->tess_info,
|
||||
tes_prog_data->tess_info);
|
||||
|
||||
SET(TE, te.TEDomain, brw_tess_info_domain(tess_info));
|
||||
SET(TE, te.Partitioning, brw_tess_info_partitioning(tess_info));
|
||||
if (dyn->ts.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) {
|
||||
SET(TE, te.OutputTopology, tes_prog_data->output_topology);
|
||||
SET(TE, te.OutputTopology, brw_tess_info_output_topology(tess_info));
|
||||
} else {
|
||||
/* When the origin is upper-left, we have to flip the winding order */
|
||||
if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) {
|
||||
/* When the origin is upper-left, we have to flip the winding order */
|
||||
enum intel_tess_output_topology output_topology =
|
||||
brw_tess_info_output_topology(tess_info);
|
||||
switch (output_topology) {
|
||||
case OUTPUT_TRI_CCW:
|
||||
SET(TE, te.OutputTopology, OUTPUT_TRI_CW);
|
||||
} else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) {
|
||||
break;
|
||||
case OUTPUT_TRI_CW:
|
||||
SET(TE, te.OutputTopology, OUTPUT_TRI_CCW);
|
||||
} else {
|
||||
SET(TE, te.OutputTopology, tes_prog_data->output_topology);
|
||||
break;
|
||||
default:
|
||||
SET(TE, te.OutputTopology, output_topology);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1595,48 +1630,48 @@ update_wm_depth_stencil(struct anv_gfx_dynamic_state *hw_state,
|
|||
struct vk_depth_stencil_state opt_ds = dyn->ds;
|
||||
vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
|
||||
|
||||
SET(WM_DEPTH_STENCIL, ds.DoubleSidedStencilEnable, true);
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.DoubleSidedStencilEnable, true);
|
||||
|
||||
SET(WM_DEPTH_STENCIL, ds.StencilTestMask,
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.StencilTestMask,
|
||||
opt_ds.stencil.front.compare_mask & 0xff);
|
||||
SET(WM_DEPTH_STENCIL, ds.StencilWriteMask,
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.StencilWriteMask,
|
||||
opt_ds.stencil.front.write_mask & 0xff);
|
||||
|
||||
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilTestMask, opt_ds.stencil.back.compare_mask & 0xff);
|
||||
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilWriteMask, opt_ds.stencil.back.write_mask & 0xff);
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.BackfaceStencilTestMask, opt_ds.stencil.back.compare_mask & 0xff);
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.BackfaceStencilWriteMask, opt_ds.stencil.back.write_mask & 0xff);
|
||||
|
||||
SET(WM_DEPTH_STENCIL, ds.StencilReferenceValue,
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.StencilReferenceValue,
|
||||
opt_ds.stencil.front.reference & 0xff);
|
||||
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilReferenceValue,
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.BackfaceStencilReferenceValue,
|
||||
opt_ds.stencil.back.reference & 0xff);
|
||||
|
||||
SET(WM_DEPTH_STENCIL, ds.DepthTestEnable, opt_ds.depth.test_enable);
|
||||
SET(WM_DEPTH_STENCIL, ds.DepthBufferWriteEnable, opt_ds.depth.write_enable);
|
||||
SET(WM_DEPTH_STENCIL, ds.DepthTestFunction,
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.DepthTestEnable, opt_ds.depth.test_enable);
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.DepthBufferWriteEnable, opt_ds.depth.write_enable);
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.DepthTestFunction,
|
||||
vk_to_intel_compare_op[opt_ds.depth.compare_op]);
|
||||
SET(WM_DEPTH_STENCIL, ds.StencilTestEnable, opt_ds.stencil.test_enable);
|
||||
SET(WM_DEPTH_STENCIL, ds.StencilBufferWriteEnable,
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.StencilTestEnable, opt_ds.stencil.test_enable);
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.StencilBufferWriteEnable,
|
||||
opt_ds.stencil.write_enable);
|
||||
SET(WM_DEPTH_STENCIL, ds.StencilFailOp,
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.StencilFailOp,
|
||||
vk_to_intel_stencil_op[opt_ds.stencil.front.op.fail]);
|
||||
SET(WM_DEPTH_STENCIL, ds.StencilPassDepthPassOp,
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.StencilPassDepthPassOp,
|
||||
vk_to_intel_stencil_op[opt_ds.stencil.front.op.pass]);
|
||||
SET(WM_DEPTH_STENCIL, ds.StencilPassDepthFailOp,
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.StencilPassDepthFailOp,
|
||||
vk_to_intel_stencil_op[
|
||||
opt_ds.stencil.front.op.depth_fail]);
|
||||
SET(WM_DEPTH_STENCIL, ds.StencilTestFunction,
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.StencilTestFunction,
|
||||
vk_to_intel_compare_op[
|
||||
opt_ds.stencil.front.op.compare]);
|
||||
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilFailOp,
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.BackfaceStencilFailOp,
|
||||
vk_to_intel_stencil_op[
|
||||
opt_ds.stencil.back.op.fail]);
|
||||
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilPassDepthPassOp,
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.BackfaceStencilPassDepthPassOp,
|
||||
vk_to_intel_stencil_op[
|
||||
opt_ds.stencil.back.op.pass]);
|
||||
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilPassDepthFailOp,
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.BackfaceStencilPassDepthFailOp,
|
||||
vk_to_intel_stencil_op[
|
||||
opt_ds.stencil.back.op.depth_fail]);
|
||||
SET(WM_DEPTH_STENCIL, ds.BackfaceStencilTestFunction,
|
||||
SET(WM_DEPTH_STENCIL, wm_ds.BackfaceStencilTestFunction,
|
||||
vk_to_intel_compare_op[
|
||||
opt_ds.stencil.back.op.compare]);
|
||||
|
||||
|
|
@ -2335,11 +2370,14 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
update_cps(hw_state, device, dyn);
|
||||
#endif /* GFX_VER >= 11 */
|
||||
|
||||
if (gfx->dirty & (ANV_CMD_DIRTY_HS | ANV_CMD_DIRTY_DS))
|
||||
update_ds(hw_state, gfx);
|
||||
|
||||
if (
|
||||
#if GFX_VERx10 >= 125
|
||||
(gfx->dirty & ANV_CMD_DIRTY_PRERASTER_SHADERS) ||
|
||||
#else
|
||||
(gfx->dirty & ANV_CMD_DIRTY_DS) ||
|
||||
(gfx->dirty & (ANV_CMD_DIRTY_HS | ANV_CMD_DIRTY_DS)) ||
|
||||
#endif
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN))
|
||||
update_te(hw_state, device, dyn, gfx);
|
||||
|
|
@ -2506,10 +2544,14 @@ cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
((gfx->dirty & (ANV_CMD_DIRTY_HS | ANV_CMD_DIRTY_DS)) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS))) {
|
||||
assert(tcs_prog_data != NULL && tes_prog_data != NULL);
|
||||
struct brw_tess_info tess_info =
|
||||
brw_merge_tess_info(tcs_prog_data->tess_info,
|
||||
tes_prog_data->tess_info);
|
||||
|
||||
SET(TESS_CONFIG, tess_config,
|
||||
intel_tess_config(dyn->ts.patch_control_points,
|
||||
tcs_prog_data->instances,
|
||||
tes_prog_data->domain,
|
||||
tcs_prog_data->output_vertices,
|
||||
brw_tess_info_domain(tess_info),
|
||||
tcs_prog_data->base.vue_map.num_per_patch_slots,
|
||||
tcs_prog_data->base.vue_map.num_per_vertex_slots,
|
||||
tcs_prog_data->base.vue_map.builtins_slot_offset));
|
||||
|
|
@ -2975,6 +3017,8 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
if (anv_gfx_has_stage(gfx, MESA_SHADER_TESS_EVAL)) {
|
||||
anv_gfx_pack_merge(te, GENX(3DSTATE_TE),
|
||||
MESA_SHADER_TESS_EVAL, ds.te, te) {
|
||||
SET(te, te, TEDomain);
|
||||
SET(te, te, Partitioning);
|
||||
SET(te, te, OutputTopology);
|
||||
#if GFX_VERx10 >= 125
|
||||
SET(te, te, TessellationDistributionMode);
|
||||
|
|
@ -2986,27 +3030,27 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
}
|
||||
|
||||
if (IS_DIRTY(WM_DEPTH_STENCIL)) {
|
||||
anv_gfx_pack(wm_ds, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
|
||||
SET(ds, ds, DoubleSidedStencilEnable);
|
||||
SET(ds, ds, StencilTestMask);
|
||||
SET(ds, ds, StencilWriteMask);
|
||||
SET(ds, ds, BackfaceStencilTestMask);
|
||||
SET(ds, ds, BackfaceStencilWriteMask);
|
||||
SET(ds, ds, StencilReferenceValue);
|
||||
SET(ds, ds, BackfaceStencilReferenceValue);
|
||||
SET(ds, ds, DepthTestEnable);
|
||||
SET(ds, ds, DepthBufferWriteEnable);
|
||||
SET(ds, ds, DepthTestFunction);
|
||||
SET(ds, ds, StencilTestEnable);
|
||||
SET(ds, ds, StencilBufferWriteEnable);
|
||||
SET(ds, ds, StencilFailOp);
|
||||
SET(ds, ds, StencilPassDepthPassOp);
|
||||
SET(ds, ds, StencilPassDepthFailOp);
|
||||
SET(ds, ds, StencilTestFunction);
|
||||
SET(ds, ds, BackfaceStencilFailOp);
|
||||
SET(ds, ds, BackfaceStencilPassDepthPassOp);
|
||||
SET(ds, ds, BackfaceStencilPassDepthFailOp);
|
||||
SET(ds, ds, BackfaceStencilTestFunction);
|
||||
anv_gfx_pack(wm_ds, GENX(3DSTATE_WM_DEPTH_STENCIL), wm_ds) {
|
||||
SET(wm_ds, wm_ds, DoubleSidedStencilEnable);
|
||||
SET(wm_ds, wm_ds, StencilTestMask);
|
||||
SET(wm_ds, wm_ds, StencilWriteMask);
|
||||
SET(wm_ds, wm_ds, BackfaceStencilTestMask);
|
||||
SET(wm_ds, wm_ds, BackfaceStencilWriteMask);
|
||||
SET(wm_ds, wm_ds, StencilReferenceValue);
|
||||
SET(wm_ds, wm_ds, BackfaceStencilReferenceValue);
|
||||
SET(wm_ds, wm_ds, DepthTestEnable);
|
||||
SET(wm_ds, wm_ds, DepthBufferWriteEnable);
|
||||
SET(wm_ds, wm_ds, DepthTestFunction);
|
||||
SET(wm_ds, wm_ds, StencilTestEnable);
|
||||
SET(wm_ds, wm_ds, StencilBufferWriteEnable);
|
||||
SET(wm_ds, wm_ds, StencilFailOp);
|
||||
SET(wm_ds, wm_ds, StencilPassDepthPassOp);
|
||||
SET(wm_ds, wm_ds, StencilPassDepthFailOp);
|
||||
SET(wm_ds, wm_ds, StencilTestFunction);
|
||||
SET(wm_ds, wm_ds, BackfaceStencilFailOp);
|
||||
SET(wm_ds, wm_ds, BackfaceStencilPassDepthPassOp);
|
||||
SET(wm_ds, wm_ds, BackfaceStencilPassDepthFailOp);
|
||||
SET(wm_ds, wm_ds, BackfaceStencilTestFunction);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -3230,8 +3274,12 @@ cmd_buffer_repack_gfx_state(struct anv_gfx_dynamic_state *hw_state,
|
|||
if (IS_DIRTY(HS))
|
||||
anv_gfx_copy_protected(hs, GENX(3DSTATE_HS), MESA_SHADER_TESS_CTRL, hs.hs);
|
||||
|
||||
if (IS_DIRTY(DS))
|
||||
anv_gfx_copy_protected(ds, GENX(3DSTATE_DS), MESA_SHADER_TESS_EVAL, ds.ds);
|
||||
if (IS_DIRTY(DS)) {
|
||||
anv_gfx_pack_merge_protected(ds, GENX(3DSTATE_DS),
|
||||
MESA_SHADER_TESS_EVAL, ds.ds, ds) {
|
||||
SET(ds, ds, ComputeWCoordinateEnable);
|
||||
}
|
||||
}
|
||||
|
||||
if (IS_DIRTY(GS)) {
|
||||
anv_gfx_pack_merge_protected(gs, GENX(3DSTATE_GS),
|
||||
|
|
|
|||
|
|
@ -695,8 +695,6 @@ emit_ds_shader(struct anv_batch *batch,
|
|||
|
||||
anv_shader_emit(batch, shader, ds.te, GENX(3DSTATE_TE), te) {
|
||||
te.TEEnable = true;
|
||||
te.Partitioning = tes_prog_data->partitioning;
|
||||
te.TEDomain = tes_prog_data->domain;
|
||||
te.MaximumTessellationFactorOdd = 63.0;
|
||||
te.MaximumTessellationFactorNotOdd = 64.0;
|
||||
#if GFX_VERx10 >= 125
|
||||
|
|
@ -731,9 +729,6 @@ emit_ds_shader(struct anv_batch *batch,
|
|||
ds.BindingTableEntryCount = shader->bind_map.surface_count;
|
||||
ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
|
||||
|
||||
ds.ComputeWCoordinateEnable =
|
||||
tes_prog_data->domain == INTEL_TESS_DOMAIN_TRI;
|
||||
|
||||
ds.PatchURBEntryReadLength = tes_prog_data->base.urb_read_length;
|
||||
ds.PatchURBEntryReadOffset = 0;
|
||||
ds.DispatchGRFStartRegisterForURBData =
|
||||
|
|
|
|||
|
|
@ -588,8 +588,9 @@ impl RegLatencySM80 {
|
|||
| FP16 | FP16_Alu | FP16_F32 => 1,
|
||||
HFMA2_MMA | RedirectedFP64 => pred(has_pred, 3, 3),
|
||||
Clmad => pred(has_pred, 5, 3),
|
||||
IMMA_88 | MMA_1x_collect => pred(has_pred, 8, 1),
|
||||
MMA_2x_collect => pred(has_pred, 12, 1),
|
||||
IMMA_88 => pred(has_pred, 8, 1),
|
||||
MMA_1x_collect => pred(has_pred, 11, 1),
|
||||
MMA_2x_collect => pred(has_pred, 19, 1),
|
||||
DMMA => pred(has_pred, 20, 1),
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -603,8 +604,9 @@ impl RegLatencySM80 {
|
|||
| IMADWideWriteDH | FP16 | FP16_Alu | FP16_F32 => 1,
|
||||
HFMA2_MMA | RedirectedFP64 => pred(has_pred, 3, 1),
|
||||
Clmad => pred(has_pred, 5, 1),
|
||||
IMMA_88 | MMA_1x_collect => 8,
|
||||
MMA_2x_collect => 12,
|
||||
IMMA_88 => 8,
|
||||
MMA_1x_collect => 11,
|
||||
MMA_2x_collect => 19,
|
||||
DMMA => 20,
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -620,8 +622,9 @@ impl RegLatencySM80 {
|
|||
IMADWideWriteDH => pred(has_pred, 1, 1),
|
||||
HFMA2_MMA | RedirectedFP64 => pred(has_pred, 3, 3),
|
||||
Clmad => pred(has_pred, 5, 3),
|
||||
IMMA_88 | MMA_1x_collect => pred(has_pred, 8, 1),
|
||||
MMA_2x_collect => pred(has_pred, 12, 1),
|
||||
IMMA_88 => pred(has_pred, 8, 1),
|
||||
MMA_1x_collect => pred(has_pred, 11, 1),
|
||||
MMA_2x_collect => pred(has_pred, 19, 1),
|
||||
DMMA => pred(has_pred, 20, 1),
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -639,8 +642,9 @@ impl RegLatencySM80 {
|
|||
FP16 | FP16_Alu | FP16_F32 => pred(has_pred, 1, 2),
|
||||
HFMA2_MMA | RedirectedFP64 => pred(has_pred, 5, 3),
|
||||
Clmad => pred(has_pred, 5, 5),
|
||||
IMMA_88 | MMA_1x_collect => pred(has_pred, 8, 3),
|
||||
MMA_2x_collect => pred(has_pred, 12, 3),
|
||||
IMMA_88 => pred(has_pred, 8, 3),
|
||||
MMA_1x_collect => pred(has_pred, 11, 3),
|
||||
MMA_2x_collect => pred(has_pred, 19, 3),
|
||||
DMMA => pred(has_pred, 20, 3),
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -657,8 +661,9 @@ impl RegLatencySM80 {
|
|||
| FP16_F32 => 1,
|
||||
HFMA2_MMA | RedirectedFP64 => pred(has_pred, 5, 1),
|
||||
Clmad => pred(has_pred, 5, 3),
|
||||
IMMA_88 | MMA_1x_collect => pred(has_pred, 8, 1),
|
||||
MMA_2x_collect => pred(has_pred, 12, 1),
|
||||
IMMA_88 => pred(has_pred, 8, 1),
|
||||
MMA_1x_collect => pred(has_pred, 11, 1),
|
||||
MMA_2x_collect => pred(has_pred, 19, 1),
|
||||
DMMA => pred(has_pred, 20, 1),
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -675,8 +680,9 @@ impl RegLatencySM80 {
|
|||
| FP16_F32 => 1,
|
||||
HFMA2_MMA | RedirectedFP64 => pred(has_pred, 3, 3),
|
||||
Clmad => pred(has_pred, 5, 3),
|
||||
IMMA_88 | MMA_1x_collect => pred(has_pred, 8, 1),
|
||||
MMA_2x_collect => pred(has_pred, 12, 1),
|
||||
IMMA_88 => pred(has_pred, 8, 1),
|
||||
MMA_1x_collect => pred(has_pred, 11, 1),
|
||||
MMA_2x_collect => pred(has_pred, 19, 1),
|
||||
DMMA => pred(has_pred, 20, 1),
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -690,8 +696,9 @@ impl RegLatencySM80 {
|
|||
| IMADWideWriteDH | FP16 | FP16_Alu | FP16_F32 => 1,
|
||||
HFMA2_MMA | RedirectedFP64 => pred(has_pred, 3, 2),
|
||||
Clmad => pred(has_pred, 5, 2),
|
||||
IMMA_88 | MMA_1x_collect => 8,
|
||||
MMA_2x_collect => 12,
|
||||
IMMA_88 => 8,
|
||||
MMA_1x_collect => 11,
|
||||
MMA_2x_collect => 19,
|
||||
DMMA => 20,
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -706,8 +713,9 @@ impl RegLatencySM80 {
|
|||
HFMA2_MMA => 2,
|
||||
RedirectedFP64 => 3,
|
||||
Clmad => pred(has_pred, 5, 1),
|
||||
IMMA_88 | MMA_1x_collect => 8,
|
||||
MMA_2x_collect => 12,
|
||||
IMMA_88 => 8,
|
||||
MMA_1x_collect => 11,
|
||||
MMA_2x_collect => 19,
|
||||
DMMA => 20,
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -722,8 +730,9 @@ impl RegLatencySM80 {
|
|||
HFMA2_MMA => 2,
|
||||
RedirectedFP64 => 2,
|
||||
Clmad => pred(has_pred, 4, 2),
|
||||
IMMA_88 | MMA_1x_collect => 7,
|
||||
MMA_2x_collect => 11,
|
||||
IMMA_88 => 7,
|
||||
MMA_1x_collect => 10,
|
||||
MMA_2x_collect => 18,
|
||||
DMMA => 19,
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -736,8 +745,9 @@ impl RegLatencySM80 {
|
|||
CoupledAlu | CoupledDisp64 | CoupledFMA | IMADWideWriteDL
|
||||
| IMADWideWriteDH | FP16 | FP16_Alu | FP16_F32 | HFMA2_MMA
|
||||
| RedirectedFP64 | Clmad => 2,
|
||||
IMMA_88 | MMA_1x_collect => 7,
|
||||
MMA_2x_collect => 11,
|
||||
IMMA_88 => 7,
|
||||
MMA_1x_collect => 10,
|
||||
MMA_2x_collect => 18,
|
||||
DMMA => 19,
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -750,8 +760,9 @@ impl RegLatencySM80 {
|
|||
CoupledAlu | CoupledDisp64 | CoupledFMA | IMADWideWriteDL
|
||||
| IMADWideWriteDH | FP16 | FP16_Alu | FP16_F32 | HFMA2_MMA
|
||||
| RedirectedFP64 | Clmad => 2,
|
||||
IMMA_88 | MMA_1x_collect => 4,
|
||||
MMA_2x_collect => 8,
|
||||
IMMA_88 => 4,
|
||||
MMA_1x_collect => 8,
|
||||
MMA_2x_collect => 16,
|
||||
DMMA => 17,
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -764,8 +775,9 @@ impl RegLatencySM80 {
|
|||
CoupledAlu | CoupledDisp64 | CoupledFMA | IMADWideWriteDL
|
||||
| IMADWideWriteDH | FP16 | FP16_Alu | FP16_F32 | HFMA2_MMA
|
||||
| RedirectedFP64 | Clmad => 2,
|
||||
IMMA_88 | MMA_1x_collect => 4,
|
||||
MMA_2x_collect => 8,
|
||||
IMMA_88 => 4,
|
||||
MMA_1x_collect => 8,
|
||||
MMA_2x_collect => 16,
|
||||
DMMA => 16,
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -781,8 +793,9 @@ impl RegLatencySM80 {
|
|||
}
|
||||
HFMA2_MMA | RedirectedFP64 => pred(has_pred, 1, 9),
|
||||
Clmad => pred(has_pred, 1, 11),
|
||||
IMMA_88 | MMA_1x_collect => pred(has_pred, 7, 6),
|
||||
MMA_2x_collect => pred(has_pred, 11, 6),
|
||||
IMMA_88 => pred(has_pred, 7, 6),
|
||||
MMA_1x_collect => pred(has_pred, 10, 5),
|
||||
MMA_2x_collect => pred(has_pred, 18, 5),
|
||||
DMMA => pred(has_pred, 19, 6),
|
||||
Cbu => 1,
|
||||
Decoupled => 1,
|
||||
|
|
@ -801,15 +814,25 @@ impl RegLatencySM80 {
|
|||
use RegLatencySM80::*;
|
||||
match writer {
|
||||
CoupledAlu | CoupledDisp64 | CoupledFMA | IMADWideWriteDL
|
||||
| IMADWideWriteDH | FP16 | FP16_Alu | FP16_F32 | HFMA2_MMA
|
||||
| RedirectedFP64 => match reader {
|
||||
MMA_2x_collect => 7,
|
||||
_ => 1,
|
||||
},
|
||||
Clmad | IMMA_88 | MMA_1x_collect | MMA_2x_collect | DMMA | Cbu
|
||||
| IMADWideWriteDH | FP16 | FP16_Alu | FP16_F32 | HFMA2_MMA => {
|
||||
match reader {
|
||||
MMA_2x_collect => 7,
|
||||
_ => 1,
|
||||
}
|
||||
}
|
||||
RedirectedFP64 => 1,
|
||||
Clmad | IMMA_88 | MMA_1x_collect | MMA_2x_collect | DMMA
|
||||
| Decoupled | DecoupledAgu => match reader {
|
||||
CoupledAlu | CoupledDisp64 | CoupledFMA | IMADWideReadAB
|
||||
| IMADWideReadCL | IMADWideReadCH => 2,
|
||||
| IMADWideReadCL | IMADWideReadCH | FP16 | FP16_Alu
|
||||
| FP16_F32 | HFMA2_MMA => 2,
|
||||
_ => 1,
|
||||
},
|
||||
Cbu => match reader {
|
||||
CoupledAlu | CoupledDisp64 | CoupledFMA | IMADWideReadAB
|
||||
| IMADWideReadCL | IMADWideReadCH | FP16 | FP16_Alu
|
||||
| FP16_F32 | HFMA2_MMA => 2,
|
||||
MMA_2x_collect => 7,
|
||||
_ => 1,
|
||||
},
|
||||
_ => {
|
||||
|
|
|
|||
|
|
@ -671,6 +671,43 @@ try_lower_cmat_load_to_ldsm(nir_builder *b, nir_intrinsic_instr *intr)
|
|||
.matrix_layout = layout);
|
||||
}
|
||||
|
||||
static nir_deref_instr*
|
||||
get_cmat_component_deref(nir_builder *b, nir_intrinsic_instr *intr,
|
||||
nir_def *lane_id, unsigned idx)
|
||||
{
|
||||
unsigned deref_src = intr->intrinsic == nir_intrinsic_cmat_store ? 0 : 1;
|
||||
unsigned cmat_src = intr->intrinsic == nir_intrinsic_cmat_store ? 1 : 0;
|
||||
|
||||
const struct glsl_cmat_description desc = cmat_src_desc(intr->src[cmat_src]);
|
||||
nir_deref_instr *deref = nir_def_as_deref(intr->src[deref_src].ssa);
|
||||
|
||||
const enum glsl_matrix_layout layout = nir_intrinsic_matrix_layout(intr);
|
||||
nir_def *stride = intr->src[2].ssa;
|
||||
|
||||
nir_def *col_offset;
|
||||
nir_def *row_offset;
|
||||
compute_matrix_offsets(b, desc, layout, lane_id, idx,
|
||||
&col_offset, &row_offset);
|
||||
|
||||
row_offset = nir_imul(b, row_offset, stride);
|
||||
col_offset = nir_u2uN(b, col_offset, deref->def.bit_size);
|
||||
row_offset = nir_u2uN(b, row_offset, deref->def.bit_size);
|
||||
|
||||
/* We have to ignore the incoming stride, but have to choose the type of
|
||||
* the pointer as the declared stride is in multiple of the pointer type */
|
||||
deref = nir_build_deref_cast(
|
||||
b, &deref->def, deref->modes,
|
||||
deref->type,
|
||||
glsl_get_vector_elements(deref->type) * glsl_get_bit_size(deref->type) / 8
|
||||
);
|
||||
deref = nir_build_deref_ptr_as_array(b, deref, row_offset);
|
||||
deref = nir_build_deref_cast(
|
||||
b, &deref->def, deref->modes,
|
||||
glsl_scalar_type(desc.element_type),
|
||||
glsl_base_type_bit_size(desc.element_type) / 8);
|
||||
return nir_build_deref_ptr_as_array(b, deref, col_offset);
|
||||
}
|
||||
|
||||
static void
|
||||
lower_cmat_load(nir_builder *b, nir_intrinsic_instr *intr)
|
||||
{
|
||||
|
|
@ -682,10 +719,6 @@ lower_cmat_load(nir_builder *b, nir_intrinsic_instr *intr)
|
|||
|
||||
const struct glsl_cmat_description desc = cmat_src_desc(intr->src[0]);
|
||||
const unsigned length = get_cmat_length(desc);
|
||||
const enum glsl_matrix_layout layout = nir_intrinsic_matrix_layout(intr);
|
||||
|
||||
nir_deref_instr *deref = nir_def_as_deref(intr->src[1].ssa);
|
||||
nir_def *stride = intr->src[2].ssa;
|
||||
|
||||
nir_def *vars[NIR_MAX_VEC_COMPONENTS];
|
||||
for (unsigned i = 0; i < length; ++i)
|
||||
|
|
@ -694,26 +727,8 @@ lower_cmat_load(nir_builder *b, nir_intrinsic_instr *intr)
|
|||
nir_def *lane_id = nir_load_subgroup_invocation(b);
|
||||
|
||||
for (unsigned idx = 0; idx < length; idx++) {
|
||||
nir_def *col_offset;
|
||||
nir_def *row_offset;
|
||||
|
||||
compute_matrix_offsets(b, desc, layout, lane_id, idx,
|
||||
&col_offset, &row_offset);
|
||||
|
||||
row_offset = nir_imul(b, row_offset, stride);
|
||||
|
||||
col_offset = nir_u2uN(b, col_offset, deref->def.bit_size);
|
||||
row_offset = nir_u2uN(b, row_offset, deref->def.bit_size);
|
||||
|
||||
nir_deref_instr *iter_deref =
|
||||
nir_build_deref_ptr_as_array(b, deref, row_offset);
|
||||
iter_deref = nir_build_deref_cast(
|
||||
b, &iter_deref->def, deref->modes,
|
||||
glsl_scalar_type(desc.element_type),
|
||||
glsl_base_type_bit_size(desc.element_type) / 8);
|
||||
iter_deref =
|
||||
nir_build_deref_ptr_as_array(b, iter_deref, col_offset);
|
||||
|
||||
get_cmat_component_deref(b, intr, lane_id, idx);
|
||||
vars[idx] = nir_load_deref(b, iter_deref);
|
||||
}
|
||||
|
||||
|
|
@ -764,11 +779,6 @@ lower_cmat_instr(nir_builder *b,
|
|||
}
|
||||
|
||||
case nir_intrinsic_cmat_store: {
|
||||
enum glsl_matrix_layout layout = nir_intrinsic_matrix_layout(intr);
|
||||
|
||||
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
|
||||
nir_def *stride = intr->src[2].ssa;
|
||||
|
||||
const struct glsl_cmat_description desc = cmat_src_desc(intr->src[1]);
|
||||
const unsigned length = get_cmat_length(desc);
|
||||
nir_def *src = load_cmat_src(b, intr->src[1]);
|
||||
|
|
@ -780,26 +790,8 @@ lower_cmat_instr(nir_builder *b,
|
|||
nir_def *lane_id = nir_load_subgroup_invocation(b);
|
||||
|
||||
for (unsigned idx = 0; idx < length; idx++) {
|
||||
nir_def *col_offset;
|
||||
nir_def *row_offset;
|
||||
|
||||
compute_matrix_offsets(b, desc, layout, lane_id, idx,
|
||||
&col_offset, &row_offset);
|
||||
|
||||
row_offset = nir_imul(b, row_offset, stride);
|
||||
|
||||
col_offset = nir_u2uN(b, col_offset, deref->def.bit_size);
|
||||
row_offset = nir_u2uN(b, row_offset, deref->def.bit_size);
|
||||
|
||||
nir_deref_instr *iter_deref =
|
||||
nir_build_deref_ptr_as_array(b, deref, row_offset);
|
||||
iter_deref = nir_build_deref_cast(
|
||||
b, &iter_deref->def, deref->modes,
|
||||
glsl_scalar_type(desc.element_type),
|
||||
glsl_base_type_bit_size(desc.element_type) / 8);
|
||||
iter_deref =
|
||||
nir_build_deref_ptr_as_array(b, iter_deref, col_offset);
|
||||
|
||||
get_cmat_component_deref(b, intr, lane_id, idx);
|
||||
nir_store_deref(b, iter_deref, vars[idx], 1);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,10 @@
|
|||
#include "nvk_queue.h"
|
||||
#include "nvkmd/nvkmd.h"
|
||||
|
||||
#define NVK_BUFFER_CREATE_CAPTURE_REPLAY_BITS \
|
||||
(VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT | \
|
||||
VK_BUFFER_CREATE_DESCRIPTOR_BUFFER_CAPTURE_REPLAY_BIT_EXT)
|
||||
|
||||
static uint32_t
|
||||
nvk_get_buffer_alignment(const struct nvk_physical_device *pdev,
|
||||
VkBufferUsageFlags2KHR usage_flags,
|
||||
|
|
@ -32,7 +36,7 @@ nvk_get_buffer_alignment(const struct nvk_physical_device *pdev,
|
|||
alignment = MAX2(alignment, NVK_DGC_ALIGN);
|
||||
|
||||
if (create_flags & (VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
|
||||
VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT))
|
||||
NVK_BUFFER_CREATE_CAPTURE_REPLAY_BITS))
|
||||
alignment = MAX2(alignment, pdev->nvkmd->bind_align_B);
|
||||
|
||||
return alignment;
|
||||
|
|
@ -70,6 +74,22 @@ nvk_get_bda_replay_addr(const VkBufferCreateInfo *pCreateInfo)
|
|||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_OPAQUE_CAPTURE_DESCRIPTOR_DATA_CREATE_INFO_EXT: {
|
||||
const VkOpaqueCaptureDescriptorDataCreateInfoEXT *dd = (void *)ext;
|
||||
if (dd->opaqueCaptureDescriptorData != NULL) {
|
||||
uint64_t dd_addr = 0;
|
||||
memcpy(&dd_addr, dd->opaqueCaptureDescriptorData, sizeof(dd_addr));
|
||||
|
||||
#ifdef NDEBUG
|
||||
return dd_addr;
|
||||
#else
|
||||
assert(addr == 0 || dd_addr == addr);
|
||||
addr = dd_addr;
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -98,7 +118,7 @@ nvk_CreateBuffer(VkDevice device,
|
|||
|
||||
if (buffer->vk.size > 0 &&
|
||||
(buffer->vk.create_flags & (VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
|
||||
VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT))) {
|
||||
NVK_BUFFER_CREATE_CAPTURE_REPLAY_BITS))) {
|
||||
const uint32_t alignment =
|
||||
nvk_get_buffer_alignment(nvk_device_physical(dev),
|
||||
buffer->vk.usage,
|
||||
|
|
@ -111,7 +131,7 @@ nvk_CreateBuffer(VkDevice device,
|
|||
va_flags |= NVKMD_VA_SPARSE;
|
||||
|
||||
uint64_t fixed_addr = 0;
|
||||
if (buffer->vk.create_flags & VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT) {
|
||||
if (buffer->vk.create_flags & NVK_BUFFER_CREATE_CAPTURE_REPLAY_BITS) {
|
||||
va_flags |= NVKMD_VA_REPLAY;
|
||||
|
||||
fixed_addr = nvk_get_bda_replay_addr(pCreateInfo);
|
||||
|
|
@ -327,5 +347,10 @@ nvk_GetBufferOpaqueCaptureDescriptorDataEXT(
|
|||
const VkBufferCaptureDescriptorDataInfoEXT *pInfo,
|
||||
void *pData)
|
||||
{
|
||||
VK_FROM_HANDLE(nvk_buffer, buffer, pInfo->buffer);
|
||||
const uint64_t addr = vk_buffer_address(&buffer->vk, 0);
|
||||
|
||||
memcpy(pData, &addr, sizeof(addr));
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -531,6 +531,21 @@ nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd,
|
|||
{
|
||||
enum nvk_barrier barriers = 0;
|
||||
|
||||
/* For asymmetric, we don't know what the access flags will be yet.
|
||||
* Handle this by setting access to everything.
|
||||
*/
|
||||
if (dep->dependencyFlags & VK_DEPENDENCY_ASYMMETRIC_EVENT_BIT_KHR) {
|
||||
/* VUID-vkCmdSetEvent2-dependencyFlags-10785, 10786, 10787 */
|
||||
assert(dep->memoryBarrierCount == 1 &&
|
||||
dep->bufferMemoryBarrierCount == 0 &&
|
||||
dep->imageMemoryBarrierCount == 0);
|
||||
|
||||
const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[0];
|
||||
barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
|
||||
VK_ACCESS_2_MEMORY_READ_BIT |
|
||||
VK_ACCESS_2_MEMORY_WRITE_BIT);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < dep->memoryBarrierCount; i++) {
|
||||
const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[i];
|
||||
barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
|
||||
|
|
|
|||
|
|
@ -1822,17 +1822,28 @@ nvk_flush_vi_state(struct nvk_cmd_buffer *cmd)
|
|||
|
||||
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI) ||
|
||||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDINGS_VALID)) {
|
||||
u_foreach_bit(a, dyn->vi->attributes_valid) {
|
||||
const struct nvk_va_format *fmt =
|
||||
nvk_get_va_format(pdev, dyn->vi->attributes[a].format);
|
||||
P_MTHD(p, NV9097, SET_VERTEX_ATTRIBUTE_A(0));
|
||||
for (uint32_t a = 0; a < 32; a++) {
|
||||
if (dyn->vi->attributes_valid & BITFIELD_BIT(a)) {
|
||||
const struct nvk_va_format *fmt =
|
||||
nvk_get_va_format(pdev, dyn->vi->attributes[a].format);
|
||||
|
||||
P_IMMD(p, NV9097, SET_VERTEX_ATTRIBUTE_A(a), {
|
||||
.stream = dyn->vi->attributes[a].binding,
|
||||
.offset = dyn->vi->attributes[a].offset,
|
||||
.component_bit_widths = fmt->bit_widths,
|
||||
.numerical_type = fmt->type,
|
||||
.swap_r_and_b = fmt->swap_rb,
|
||||
});
|
||||
P_NV9097_SET_VERTEX_ATTRIBUTE_A(p, a, {
|
||||
.stream = dyn->vi->attributes[a].binding,
|
||||
.source = SOURCE_ACTIVE,
|
||||
.offset = dyn->vi->attributes[a].offset,
|
||||
.component_bit_widths = fmt->bit_widths,
|
||||
.numerical_type = fmt->type,
|
||||
.swap_r_and_b = fmt->swap_rb,
|
||||
});
|
||||
} else {
|
||||
P_NV9097_SET_VERTEX_ATTRIBUTE_A(p, a, {
|
||||
.source = SOURCE_INACTIVE,
|
||||
/* Using RGBA32 gives us (0, 0, 0, 0) for inactive attributes. */
|
||||
.component_bit_widths = COMPONENT_BIT_WIDTHS_R32_G32_B32_A32,
|
||||
.numerical_type = NUMERICAL_TYPE_NUM_FLOAT,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
u_foreach_bit(b, dyn->vi->bindings_valid) {
|
||||
|
|
|
|||
|
|
@ -1193,11 +1193,11 @@ nvk_GetDescriptorEXT(VkDevice _device,
|
|||
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
|
||||
struct nvk_addr_range addr_range = { };
|
||||
if (pDescriptorInfo->data.pUniformBuffer != NULL &&
|
||||
pDescriptorInfo->data.pUniformBuffer->address != 0) {
|
||||
if (pDescriptorInfo->data.pStorageBuffer != NULL &&
|
||||
pDescriptorInfo->data.pStorageBuffer->address != 0) {
|
||||
addr_range = (const struct nvk_addr_range) {
|
||||
.addr = pDescriptorInfo->data.pUniformBuffer->address,
|
||||
.range = pDescriptorInfo->data.pUniformBuffer->range,
|
||||
.addr = pDescriptorInfo->data.pStorageBuffer->address,
|
||||
.range = pDescriptorInfo->data.pStorageBuffer->range,
|
||||
};
|
||||
}
|
||||
union nvk_buffer_descriptor desc = ssbo_desc(addr_range);
|
||||
|
|
|
|||
|
|
@ -474,9 +474,6 @@ nvk_GetDescriptorSetLayoutSupport(VkDevice device,
|
|||
assert(stride <= UINT8_MAX);
|
||||
assert(util_is_power_of_two_nonzero(alignment));
|
||||
|
||||
variable_is_inline_uniform_block =
|
||||
binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK;
|
||||
|
||||
if (flags & VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT) {
|
||||
/* From the Vulkan 1.3.256 spec:
|
||||
*
|
||||
|
|
@ -486,6 +483,9 @@ nvk_GetDescriptorSetLayoutSupport(VkDevice device,
|
|||
*/
|
||||
variable_count = MAX2(1, binding->descriptorCount);
|
||||
variable_stride = stride;
|
||||
|
||||
variable_is_inline_uniform_block =
|
||||
binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK;
|
||||
} else {
|
||||
/* Since we're aligning to the maximum and since this is just a
|
||||
* check for whether or not the max buffer size is big enough, we
|
||||
|
|
@ -507,8 +507,6 @@ nvk_GetDescriptorSetLayoutSupport(VkDevice device,
|
|||
if (pCreateInfo->flags &
|
||||
VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)
|
||||
max_buffer_size = NVK_MAX_PUSH_DESCRIPTORS * nvk_max_descriptor_size(&pdev->info);
|
||||
else if (variable_is_inline_uniform_block)
|
||||
max_buffer_size = NVK_MAX_INLINE_UNIFORM_BLOCK_SIZE;
|
||||
else
|
||||
max_buffer_size = NVK_MAX_DESCRIPTOR_SET_SIZE;
|
||||
|
||||
|
|
@ -519,12 +517,21 @@ nvk_GetDescriptorSetLayoutSupport(VkDevice device,
|
|||
switch (ext->sType) {
|
||||
case VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT: {
|
||||
VkDescriptorSetVariableDescriptorCountLayoutSupport *vs = (void *)ext;
|
||||
uint32_t max_var_count;
|
||||
|
||||
if (variable_stride > 0) {
|
||||
vs->maxVariableDescriptorCount =
|
||||
max_var_count =
|
||||
(max_buffer_size - non_variable_size) / variable_stride;
|
||||
} else {
|
||||
vs->maxVariableDescriptorCount = 0;
|
||||
max_var_count = 0;
|
||||
}
|
||||
|
||||
if (variable_is_inline_uniform_block) {
|
||||
max_var_count =
|
||||
MIN2(max_var_count, NVK_MAX_INLINE_UNIFORM_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
vs->maxVariableDescriptorCount = max_var_count;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1045,7 +1045,7 @@ nvk_get_device_properties(const struct nvk_instance *instance,
|
|||
.maxSamplerDescriptorBufferBindings = 32,
|
||||
.maxEmbeddedImmutableSamplerBindings = 32,
|
||||
.maxEmbeddedImmutableSamplers = 4000,
|
||||
.bufferCaptureReplayDescriptorDataSize = 0,
|
||||
.bufferCaptureReplayDescriptorDataSize = sizeof(uint64_t),
|
||||
.imageCaptureReplayDescriptorDataSize = 0,
|
||||
.imageViewCaptureReplayDescriptorDataSize =
|
||||
sizeof(struct nvk_image_view_capture),
|
||||
|
|
@ -1141,8 +1141,13 @@ nvk_get_device_properties(const struct nvk_instance *instance,
|
|||
.robustStorageBufferAccessSizeAlignment = NVK_SSBO_BOUNDS_CHECK_ALIGNMENT,
|
||||
.robustUniformBufferAccessSizeAlignment = nvk_min_cbuf_alignment(info),
|
||||
|
||||
/* VK_EXT_sample_locations */
|
||||
.sampleLocationSampleCounts = sample_counts,
|
||||
/* VK_EXT_sample_locations
|
||||
*
|
||||
* There's a weird HW issue with per-sample interpolation for 1x. It
|
||||
* always interpolates at (0.5, 0.5) so we just disable custom sample
|
||||
* locations for 1x.
|
||||
*/
|
||||
.sampleLocationSampleCounts = sample_counts & ~VK_SAMPLE_COUNT_1_BIT,
|
||||
.maxSampleLocationGridSize = (VkExtent2D){ 1, 1 },
|
||||
.sampleLocationCoordinateRange[0] = 0.0f,
|
||||
.sampleLocationCoordinateRange[1] = 0.9375f,
|
||||
|
|
@ -1267,6 +1272,9 @@ nvk_physical_device_init_pipeline_cache(struct nvk_physical_device *pdev)
|
|||
_mesa_sha1_update(&sha_ctx, instance->driver_build_sha,
|
||||
sizeof(instance->driver_build_sha));
|
||||
|
||||
_mesa_sha1_update(&sha_ctx, &pdev->info.chipset,
|
||||
sizeof(pdev->info.chipset));
|
||||
|
||||
const uint64_t compiler_flags = nvk_physical_device_compiler_flags(pdev);
|
||||
_mesa_sha1_update(&sha_ctx, &compiler_flags, sizeof(compiler_flags));
|
||||
|
||||
|
|
|
|||
|
|
@ -100,6 +100,12 @@ compile(void *memctx, const uint32_t *spirv, size_t spirv_size, unsigned arch)
|
|||
nir_shader *nir =
|
||||
spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL,
|
||||
"library", &spirv_options, nir_options);
|
||||
/* Workgroup size may be different between different entrypoints, so we
|
||||
* mark it as variable to prevent it from being lowered to a constant while
|
||||
* we are still processing all entrypoints together. This is tempoary,
|
||||
* nir_precompiled_build_variant will set the fixed workgroup size for each
|
||||
* entrypoint and set workgroup_size_variable back to false. */
|
||||
nir->info.workgroup_size_variable = true;
|
||||
nir_validate_shader(nir, "after spirv_to_nir");
|
||||
nir_validate_ssa_dominance(nir, "after spirv_to_nir");
|
||||
ralloc_steal(memctx, nir);
|
||||
|
|
|
|||
|
|
@ -2557,7 +2557,8 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
uint32_t patch_attribs =
|
||||
cmdbuf->state.gfx.vi.attribs_changing_on_base_instance;
|
||||
uint32_t vs_res_table_size =
|
||||
panvk_shader_res_table_count(&cmdbuf->state.gfx.vs.desc);
|
||||
panvk_shader_res_table_count(&cmdbuf->state.gfx.vs.desc) *
|
||||
pan_size(RESOURCE);
|
||||
bool patch_faus = shader_uses_sysval(vs, graphics, vs.first_vertex) ||
|
||||
shader_uses_sysval(vs, graphics, vs.base_instance);
|
||||
struct cs_index draw_params_addr = cs_scratch_reg64(b, 0);
|
||||
|
|
@ -2583,6 +2584,9 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
if (patch_faus)
|
||||
cs_move64_to(b, vs_fau_addr, cmdbuf->state.gfx.vs.push_uniforms);
|
||||
|
||||
if (patch_attribs != 0)
|
||||
cs_move64_to(b, vs_drv_set, vs_desc_state->driver_set.dev_addr);
|
||||
|
||||
cs_move64_to(b, draw_params_addr, draw->indirect.buffer_dev_addr);
|
||||
cs_move32_to(b, draw_id, 0);
|
||||
|
||||
|
|
@ -2610,8 +2614,6 @@ panvk_cmd_draw_indirect(struct panvk_cmd_buffer *cmdbuf,
|
|||
}
|
||||
|
||||
if (patch_attribs != 0) {
|
||||
cs_move64_to(b, vs_drv_set, vs_desc_state->driver_set.dev_addr);
|
||||
|
||||
/* If firstInstance=0, skip the offset adjustment. */
|
||||
cs_if(b, MALI_CS_CONDITION_NEQUAL,
|
||||
cs_sr_reg32(b, IDVS, INSTANCE_OFFSET)) {
|
||||
|
|
|
|||
|
|
@ -922,12 +922,11 @@ panvk_per_arch(get_physical_device_properties)(
|
|||
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
|
||||
.maxInlineUniformTotalSize =
|
||||
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS * MAX_INLINE_UNIFORM_BLOCK_SIZE,
|
||||
.integerDotProduct8BitUnsignedAccelerated = true,
|
||||
.integerDotProduct8BitSignedAccelerated = true,
|
||||
.integerDotProduct8BitUnsignedAccelerated = false,
|
||||
.integerDotProduct8BitSignedAccelerated = false,
|
||||
.integerDotProduct8BitMixedSignednessAccelerated = false,
|
||||
.integerDotProduct4x8BitPackedUnsignedAccelerated = true,
|
||||
.integerDotProduct4x8BitPackedSignedAccelerated = true,
|
||||
.integerDotProduct4x8BitPackedSignedAccelerated = false,
|
||||
.integerDotProduct4x8BitPackedUnsignedAccelerated = PAN_ARCH >= 9,
|
||||
.integerDotProduct4x8BitPackedSignedAccelerated = PAN_ARCH >= 9,
|
||||
.integerDotProduct16BitUnsignedAccelerated = false,
|
||||
.integerDotProduct16BitSignedAccelerated = false,
|
||||
.integerDotProduct16BitMixedSignednessAccelerated = false,
|
||||
|
|
@ -940,8 +939,8 @@ panvk_per_arch(get_physical_device_properties)(
|
|||
.integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false,
|
||||
.integerDotProductAccumulatingSaturating8BitSignedAccelerated = false,
|
||||
.integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false,
|
||||
.integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = false,
|
||||
.integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false,
|
||||
.integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = PAN_ARCH >= 9,
|
||||
.integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = PAN_ARCH >= 9,
|
||||
.integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = false,
|
||||
.integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false,
|
||||
.integerDotProductAccumulatingSaturating16BitSignedAccelerated = false,
|
||||
|
|
|
|||
|
|
@ -1551,10 +1551,12 @@ shader_desc_info_deserialize(struct panvk_device *dev,
|
|||
#if PAN_ARCH < 9
|
||||
shader->desc_info.dyn_ubos.count = blob_read_uint32(blob);
|
||||
blob_copy_bytes(blob, shader->desc_info.dyn_ubos.map,
|
||||
shader->desc_info.dyn_ubos.count);
|
||||
sizeof(*shader->desc_info.dyn_ubos.map) *
|
||||
shader->desc_info.dyn_ubos.count);
|
||||
shader->desc_info.dyn_ssbos.count = blob_read_uint32(blob);
|
||||
blob_copy_bytes(blob, shader->desc_info.dyn_ssbos.map,
|
||||
shader->desc_info.dyn_ssbos.count);
|
||||
sizeof(*shader->desc_info.dyn_ssbos.map) *
|
||||
shader->desc_info.dyn_ssbos.count);
|
||||
|
||||
uint32_t others_count = 0;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(shader->desc_info.others.count); i++) {
|
||||
|
|
@ -1582,6 +1584,7 @@ shader_desc_info_deserialize(struct panvk_device *dev,
|
|||
blob_copy_bytes(blob, shader->desc_info.dyn_bufs.map,
|
||||
sizeof(*shader->desc_info.dyn_bufs.map) *
|
||||
shader->desc_info.dyn_bufs.count);
|
||||
shader->desc_info.max_varying_loads = blob_read_uint32(blob);
|
||||
#endif
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
|
@ -1714,6 +1717,7 @@ shader_desc_info_serialize(struct blob *blob,
|
|||
blob_write_bytes(blob, shader->desc_info.dyn_bufs.map,
|
||||
sizeof(*shader->desc_info.dyn_bufs.map) *
|
||||
shader->desc_info.dyn_bufs.count);
|
||||
blob_write_uint32(blob, shader->desc_info.max_varying_loads);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1087,6 +1087,9 @@ TODO: document the other workarounds.
|
|||
<application name="Jusant" executable="ASC-Win64-Shipping.exe">
|
||||
<option name="force_vk_vendor" value="-1"/>
|
||||
</application>
|
||||
<application name="Wuthering Waves" executable="Client-Win64-Shipping.exe">
|
||||
<option name="force_vk_vendor" value="-1"/>
|
||||
</application>
|
||||
<application name="DIRT 5" executable="DIRT5.exe">
|
||||
<option name="fp64_workaround_enabled" value="true" />
|
||||
</application>
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@
|
|||
#include <stddef.h>
|
||||
|
||||
#include "detect_arch.h"
|
||||
#include "u_cpu_detect.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
@ -44,7 +45,13 @@ util_has_cache_ops(void)
|
|||
return false;
|
||||
#endif
|
||||
|
||||
return DETECT_ARCH_X86 || DETECT_ARCH_X86_64 || DETECT_ARCH_AARCH64;
|
||||
#if DETECT_ARCH_X86
|
||||
return util_get_cpu_caps()->has_sse2;
|
||||
#elif DETECT_ARCH_X86_64 || DETECT_ARCH_AARCH64
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
/** Returns the cache granularity
|
||||
|
|
|
|||
|
|
@ -188,7 +188,14 @@ libmesa_util_links = []
|
|||
if host_machine.cpu_family() == 'aarch64' and cc.get_id() != 'msvc'
|
||||
files_mesa_util += files('cache_ops_aarch64.c')
|
||||
elif host_machine.cpu_family() in ['x86', 'x86_64'] and cc.get_id() != 'msvc'
|
||||
files_mesa_util += files('cache_ops_x86.c')
|
||||
libmesa_util_clflush = static_library(
|
||||
'mesa_util_clflush',
|
||||
['cache_ops_x86.c'],
|
||||
include_directories : [inc_util],
|
||||
c_args : [no_override_init_args, sse2_args],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
)
|
||||
libmesa_util_links += [libmesa_util_clflush]
|
||||
if with_clflushopt
|
||||
libmesa_util_clflushopt = static_library(
|
||||
'mesa_util_clflushopt',
|
||||
|
|
@ -197,7 +204,7 @@ elif host_machine.cpu_family() in ['x86', 'x86_64'] and cc.get_id() != 'msvc'
|
|||
c_args : [no_override_init_args] + clflushopt_args,
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
)
|
||||
libmesa_util_links += libmesa_util_clflushopt
|
||||
libmesa_util_links += [libmesa_util_clflushopt]
|
||||
endif
|
||||
else
|
||||
files_mesa_util += files('cache_ops_null.c')
|
||||
|
|
|
|||
|
|
@ -288,7 +288,7 @@ os_same_file_description(int fd1, int fd2)
|
|||
if (efd < 0)
|
||||
return -1;
|
||||
|
||||
struct epoll_event evt = {};
|
||||
struct epoll_event evt = {0};
|
||||
/* Get a new file descriptor number for fd1. */
|
||||
int tmp = os_dupfd_cloexec(fd1);
|
||||
/* Add it to evt. */
|
||||
|
|
|
|||
|
|
@ -1087,6 +1087,7 @@ vk_get_command_buffer_inheritance_as_rendering_resume(
|
|||
&subpass->color_attachments[i];
|
||||
if (sp_att->attachment == VK_ATTACHMENT_UNUSED) {
|
||||
attachments[i] = (VkRenderingAttachmentInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
|
||||
.imageView = VK_NULL_HANDLE,
|
||||
};
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -757,6 +757,7 @@ wsi_create_native_image_mem(const struct wsi_swapchain *chain,
|
|||
* handling implict sync ourselves.
|
||||
*/
|
||||
.implicit_sync = !info->explicit_sync && !chain->dma_buf_semaphore,
|
||||
.dma_buf_sync_file = chain->dma_buf_semaphore,
|
||||
};
|
||||
const VkExportMemoryAllocateInfo memory_export_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
|
||||
|
|
|
|||
|
|
@ -201,9 +201,6 @@ struct wsi_swapchain {
|
|||
VkAllocationCallbacks alloc;
|
||||
VkFence* fences;
|
||||
VkPresentModeKHR present_mode;
|
||||
VkPresentGravityFlagsEXT present_gravity_x;
|
||||
VkPresentGravityFlagsEXT present_gravity_y;
|
||||
|
||||
/**
|
||||
* Timeline for presents completing according to VK_KHR_present_wait. The
|
||||
* present should complete as close as possible (before or after!) to the
|
||||
|
|
|
|||
|
|
@ -925,6 +925,7 @@ struct Colorspace {
|
|||
enum wp_color_manager_v1_primaries primaries;
|
||||
enum wp_color_manager_v1_transfer_function tf;
|
||||
bool should_use_hdr_metadata;
|
||||
bool needs_extended_range;
|
||||
};
|
||||
struct Colorspace colorspace_mapping[] = {
|
||||
{
|
||||
|
|
@ -932,48 +933,56 @@ struct Colorspace colorspace_mapping[] = {
|
|||
.primaries = WP_COLOR_MANAGER_V1_PRIMARIES_SRGB,
|
||||
.tf = WP_COLOR_MANAGER_V1_TRANSFER_FUNCTION_SRGB,
|
||||
.should_use_hdr_metadata = false,
|
||||
.needs_extended_range = false,
|
||||
},
|
||||
{
|
||||
.colorspace = VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT,
|
||||
.primaries = WP_COLOR_MANAGER_V1_PRIMARIES_DISPLAY_P3,
|
||||
.tf = WP_COLOR_MANAGER_V1_TRANSFER_FUNCTION_SRGB,
|
||||
.should_use_hdr_metadata = false,
|
||||
.needs_extended_range = false,
|
||||
},
|
||||
{
|
||||
.colorspace = VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT,
|
||||
.primaries = WP_COLOR_MANAGER_V1_PRIMARIES_SRGB,
|
||||
.tf = WP_COLOR_MANAGER_V1_TRANSFER_FUNCTION_EXT_LINEAR,
|
||||
.should_use_hdr_metadata = true,
|
||||
.needs_extended_range = true,
|
||||
},
|
||||
{
|
||||
.colorspace = VK_COLOR_SPACE_DISPLAY_P3_LINEAR_EXT,
|
||||
.primaries = WP_COLOR_MANAGER_V1_PRIMARIES_DISPLAY_P3,
|
||||
.tf = WP_COLOR_MANAGER_V1_TRANSFER_FUNCTION_EXT_LINEAR,
|
||||
.should_use_hdr_metadata = false,
|
||||
.needs_extended_range = false,
|
||||
},
|
||||
{
|
||||
.colorspace = VK_COLOR_SPACE_BT709_LINEAR_EXT,
|
||||
.primaries = WP_COLOR_MANAGER_V1_PRIMARIES_SRGB,
|
||||
.tf = WP_COLOR_MANAGER_V1_TRANSFER_FUNCTION_EXT_LINEAR,
|
||||
.should_use_hdr_metadata = false,
|
||||
.needs_extended_range = false,
|
||||
},
|
||||
{
|
||||
.colorspace = VK_COLOR_SPACE_BT709_NONLINEAR_EXT,
|
||||
.primaries = WP_COLOR_MANAGER_V1_PRIMARIES_SRGB,
|
||||
.tf = WP_COLOR_MANAGER_V1_TRANSFER_FUNCTION_BT1886,
|
||||
.should_use_hdr_metadata = false,
|
||||
.needs_extended_range = false,
|
||||
},
|
||||
{
|
||||
.colorspace = VK_COLOR_SPACE_BT2020_LINEAR_EXT,
|
||||
.primaries = WP_COLOR_MANAGER_V1_PRIMARIES_BT2020,
|
||||
.tf = WP_COLOR_MANAGER_V1_TRANSFER_FUNCTION_EXT_LINEAR,
|
||||
.should_use_hdr_metadata = false,
|
||||
.needs_extended_range = false,
|
||||
},
|
||||
{
|
||||
.colorspace = VK_COLOR_SPACE_HDR10_ST2084_EXT,
|
||||
.primaries = WP_COLOR_MANAGER_V1_PRIMARIES_BT2020,
|
||||
.tf = WP_COLOR_MANAGER_V1_TRANSFER_FUNCTION_ST2084_PQ,
|
||||
.should_use_hdr_metadata = true,
|
||||
.needs_extended_range = false,
|
||||
},
|
||||
/* VK_COLOR_SPACE_DOLBYVISION_EXT is left out because it's deprecated */
|
||||
{
|
||||
|
|
@ -981,22 +990,21 @@ struct Colorspace colorspace_mapping[] = {
|
|||
.primaries = WP_COLOR_MANAGER_V1_PRIMARIES_BT2020,
|
||||
.tf = WP_COLOR_MANAGER_V1_TRANSFER_FUNCTION_HLG,
|
||||
.should_use_hdr_metadata = true,
|
||||
.needs_extended_range = false,
|
||||
},
|
||||
{
|
||||
.colorspace = VK_COLOR_SPACE_ADOBERGB_LINEAR_EXT,
|
||||
.primaries = WP_COLOR_MANAGER_V1_PRIMARIES_ADOBE_RGB,
|
||||
.tf = WP_COLOR_MANAGER_V1_TRANSFER_FUNCTION_EXT_LINEAR,
|
||||
.should_use_hdr_metadata = false,
|
||||
.needs_extended_range = false,
|
||||
},
|
||||
/* VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT is left out because there's no
|
||||
* exactly matching transfer function in the Wayland protocol */
|
||||
/* VK_COLOR_SPACE_PASS_THROUGH_EXT is handled elsewhere */
|
||||
{
|
||||
.colorspace = VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT,
|
||||
.primaries = WP_COLOR_MANAGER_V1_PRIMARIES_SRGB,
|
||||
.tf = WP_COLOR_MANAGER_V1_TRANSFER_FUNCTION_EXT_SRGB,
|
||||
.should_use_hdr_metadata = true,
|
||||
},
|
||||
/* VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT is intentionally not added
|
||||
* as it's a bit unclear how exactly it should be used
|
||||
* and whether or not the transfer function should be gamma 2.2 or piece-wise */
|
||||
/* VK_COLOR_SPACE_DISPLAY_NATIVE_AMD isn't supported */
|
||||
/* VK_COLORSPACE_SRGB_NONLINEAR_KHR is just an alias */
|
||||
/* VK_COLOR_SPACE_DCI_P3_LINEAR_EXT is just an alias */
|
||||
|
|
@ -1033,6 +1041,9 @@ wsi_wl_display_determine_colorspaces(struct wsi_wl_display *display)
|
|||
continue;
|
||||
if (!vector_contains(tfs, colorspace_mapping[i].tf))
|
||||
continue;
|
||||
if (!display->color_features.extended_target_volume &&
|
||||
colorspace_mapping[i].needs_extended_range)
|
||||
continue;
|
||||
VkColorSpaceKHR *new_cs = u_vector_add(&display->colorspaces);
|
||||
if (!new_cs)
|
||||
return -1;
|
||||
|
|
|
|||
|
|
@ -810,10 +810,11 @@ x11_surface_get_capabilities2(VkIcdSurfaceBase *icd_surface,
|
|||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_SURFACE_PRESENT_SCALING_CAPABILITIES_EXT: {
|
||||
/* Unsupported. */
|
||||
VkSurfacePresentScalingCapabilitiesEXT *scaling = (void *)ext;
|
||||
scaling->supportedPresentScaling = VK_PRESENT_SCALING_ONE_TO_ONE_BIT_EXT;
|
||||
scaling->supportedPresentGravityX = VK_PRESENT_GRAVITY_MIN_BIT_EXT | VK_PRESENT_GRAVITY_MAX_BIT_EXT | VK_PRESENT_GRAVITY_CENTERED_BIT_EXT;
|
||||
scaling->supportedPresentGravityY = VK_PRESENT_GRAVITY_MIN_BIT_EXT | VK_PRESENT_GRAVITY_MAX_BIT_EXT | VK_PRESENT_GRAVITY_CENTERED_BIT_EXT;
|
||||
scaling->supportedPresentScaling = 0;
|
||||
scaling->supportedPresentGravityX = 0;
|
||||
scaling->supportedPresentGravityY = 0;
|
||||
scaling->minScaledImageExtent = caps->surfaceCapabilities.minImageExtent;
|
||||
scaling->maxScaledImageExtent = caps->surfaceCapabilities.maxImageExtent;
|
||||
break;
|
||||
|
|
@ -1458,46 +1459,7 @@ x11_present_to_x11_dri3(struct x11_swapchain *chain, uint32_t image_index,
|
|||
.serial = serial,
|
||||
};
|
||||
|
||||
int16_t x_off = 0;
|
||||
int16_t y_off = 0;
|
||||
|
||||
xcb_get_geometry_reply_t *geometry =
|
||||
xcb_get_geometry_reply(chain->conn, xcb_get_geometry(chain->conn, chain->window), NULL);
|
||||
|
||||
if (geometry) {
|
||||
switch (chain->base.present_gravity_x) {
|
||||
case VK_PRESENT_GRAVITY_MIN_BIT_EXT:
|
||||
x_off = 0;
|
||||
break;
|
||||
case VK_PRESENT_GRAVITY_MAX_BIT_EXT:
|
||||
x_off = geometry->width - chain->extent.width;
|
||||
break;
|
||||
case VK_PRESENT_GRAVITY_CENTERED_BIT_EXT:
|
||||
x_off = (geometry->width / 2) - (chain->extent.width / 2);
|
||||
break;
|
||||
default:
|
||||
x_off = 0;
|
||||
}
|
||||
|
||||
switch (chain->base.present_gravity_y) {
|
||||
case VK_PRESENT_GRAVITY_MIN_BIT_EXT:
|
||||
y_off = 0;
|
||||
break;
|
||||
case VK_PRESENT_GRAVITY_MAX_BIT_EXT:
|
||||
y_off = geometry->height - chain->extent.height;
|
||||
break;
|
||||
case VK_PRESENT_GRAVITY_CENTERED_BIT_EXT:
|
||||
y_off = (geometry->height / 2) - (chain->extent.height / 2);
|
||||
break;
|
||||
default:
|
||||
y_off = 0;
|
||||
}
|
||||
|
||||
free(geometry);
|
||||
}
|
||||
|
||||
xcb_void_cookie_t cookie;
|
||||
|
||||
#ifdef HAVE_DRI3_EXPLICIT_SYNC
|
||||
if (chain->base.image_info.explicit_sync) {
|
||||
uint64_t acquire_point = image->base.explicit_sync[WSI_ES_ACQUIRE].timeline;
|
||||
|
|
@ -1509,8 +1471,8 @@ x11_present_to_x11_dri3(struct x11_swapchain *chain, uint32_t image_index,
|
|||
serial,
|
||||
0, /* valid */
|
||||
image->update_area, /* update */
|
||||
x_off, /* x_off */
|
||||
y_off, /* y_off */
|
||||
0, /* x_off */
|
||||
0, /* y_off */
|
||||
XCB_NONE, /* target_crtc */
|
||||
image->dri3_syncobj[WSI_ES_ACQUIRE], /* acquire_syncobj */
|
||||
image->dri3_syncobj[WSI_ES_RELEASE], /* release_syncobj */
|
||||
|
|
@ -1529,8 +1491,8 @@ x11_present_to_x11_dri3(struct x11_swapchain *chain, uint32_t image_index,
|
|||
serial,
|
||||
0, /* valid */
|
||||
image->update_area, /* update */
|
||||
x_off, /* x_off */
|
||||
y_off, /* y_off */
|
||||
0, /* x_off */
|
||||
0, /* y_off */
|
||||
XCB_NONE, /* target_crtc */
|
||||
XCB_NONE,
|
||||
image->sync_fence,
|
||||
|
|
@ -2788,14 +2750,6 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
|
|||
chain->has_mit_shm = wsi_conn->has_mit_shm;
|
||||
chain->has_async_may_tear = present_caps & XCB_PRESENT_CAPABILITY_ASYNC_MAY_TEAR;
|
||||
|
||||
const VkSwapchainPresentScalingCreateInfoEXT* scaling_info =
|
||||
vk_find_struct_const(pCreateInfo->pNext, SWAPCHAIN_PRESENT_SCALING_CREATE_INFO_EXT);
|
||||
|
||||
if (scaling_info) {
|
||||
chain->base.present_gravity_x = scaling_info->presentGravityX;
|
||||
chain->base.present_gravity_y = scaling_info->presentGravityY;
|
||||
}
|
||||
|
||||
/* When images in the swapchain don't fit the window, X can still present them, but it won't
|
||||
* happen by flip, only by copy. So this is a suboptimal copy, because if the client would change
|
||||
* the chain extents X may be able to flip
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue