diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index e3af0854173..fbed317d2df 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -2761,3 +2761,12 @@ intrinsic("uvsw_write_pco", src_comp=[1, 0], bit_sizes=[32]) # load_vtxin_pco(offset) intrinsic("load_vtxin_pco", src_comp=[1], dest_comp=0, bit_sizes=[32]) + +# load_coeff_pco(offset) +intrinsic("load_coeff_pco", src_comp=[1], dest_comp=0, bit_sizes=[32]) + +# dma_ld_pco(address) +intrinsic("dma_ld_pco", src_comp=[2], dest_comp=0, flags=[CAN_ELIMINATE], bit_sizes=[32]) + +# dma_st_pco(address_data) +intrinsic("dma_st_pco", src_comp=[0], bit_sizes=[32]) diff --git a/src/compiler/nir/nir_lower_alu_width.c b/src/compiler/nir/nir_lower_alu_width.c index 64d6e469cb9..e23325fd5fb 100644 --- a/src/compiler/nir/nir_lower_alu_width.c +++ b/src/compiler/nir/nir_lower_alu_width.c @@ -273,6 +273,7 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data) case nir_op_unpack_snorm_2x16: case nir_op_mqsad_4x8: case nir_op_uadd64_32: + case nir_op_umad64_32: /* There is no scalar version of these ops, unless we were to break it * down to bitshifts and math (which is definitely not intended). */ diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index a99815da021..dafc9cdfb55 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -645,6 +645,12 @@ dst.x = sum & 0xffffffff; dst.y = sum >> 32; """) +opcode("umad64_32", 2, tuint32, [1, 1, 1, 1], [tuint32, tuint32, tuint32, tuint32], False, "", """ +uint64_t sum = ((uint64_t)src0.x * (uint64_t)src1.x) + ((uint64_t)src3.x << 32 | (uint64_t)src2.x); +dst.x = sum & 0xffffffff; +dst.y = sum >> 32; +""") + binop("fsub", tfloat, "", """ if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) { if (bit_size == 64) diff --git a/src/imagination/common/pvr_iface.h b/src/imagination/common/pvr_iface.h new file mode 100644 index 00000000000..73ebb102197 --- /dev/null +++ b/src/imagination/common/pvr_iface.h @@ -0,0 +1,61 @@ +/* + * Copyright © 2025 Imagination Technologies Ltd. + * SPDX-License-Identifier: MIT + */ + +#ifndef PVR_IFACE_H +#define PVR_IFACE_H + +/** + * \file pvr_iface.h + * + * \brief USC program interface. + */ + +/** Query availability shader data; shared registers. */ +enum pvr_query_availability_data { + PVR_QUERY_AVAILABILITY_DATA_INDEX_COUNT, + + PVR_QUERY_AVAILABILITY_DATA_INDEX_BO_LO, + PVR_QUERY_AVAILABILITY_DATA_INDEX_BO_HI, + + PVR_QUERY_AVAILABILITY_DATA_BO_LO, + PVR_QUERY_AVAILABILITY_DATA_BO_HI, + + _PVR_QUERY_AVAILABILITY_DATA_COUNT, +}; + +/** Query copy shader data; shared registers. */ +enum pvr_query_copy_data { + PVR_QUERY_COPY_DATA_INDEX_COUNT, + + PVR_QUERY_COPY_DATA_DEST_BO_LO, + PVR_QUERY_COPY_DATA_DEST_BO_HI, + + PVR_QUERY_COPY_DATA_AVAILABILITY_BO_LO, + PVR_QUERY_COPY_DATA_AVAILABILITY_BO_HI, + + PVR_QUERY_COPY_DATA_RESULT_BO_LO, + PVR_QUERY_COPY_DATA_RESULT_BO_HI, + + PVR_QUERY_COPY_DATA_DEST_STRIDE, + + PVR_QUERY_COPY_DATA_FLAGS, + + _PVR_QUERY_COPY_DATA_COUNT, +}; + +/** Query reset shader data; shared registers. */ +enum pvr_query_reset_data { + PVR_QUERY_RESET_DATA_INDEX_COUNT, + + PVR_QUERY_RESET_DATA_RESULT_BO_LO, + PVR_QUERY_RESET_DATA_RESULT_BO_HI, + + PVR_QUERY_RESET_DATA_AVAILABILITY_BO_LO, + PVR_QUERY_RESET_DATA_AVAILABILITY_BO_HI, + + _PVR_QUERY_RESET_DATA_COUNT, +}; + +#endif /* PVR_IFACE_H */ diff --git a/src/imagination/pco/pco_trans_nir.c b/src/imagination/pco/pco_trans_nir.c index bf28b054554..cdf125b54b4 100644 --- a/src/imagination/pco/pco_trans_nir.c +++ b/src/imagination/pco/pco_trans_nir.c @@ -263,10 +263,11 @@ static pco_instr *trans_uvsw_write(trans_ctx *tctx, return pco_uvsw_write(&tctx->b, data_src, vtxout_addr, .rpt = chans); } -static pco_instr *trans_load_vtxin(trans_ctx *tctx, - nir_intrinsic_instr *intr, - pco_ref dest, - UNUSED pco_ref offset_src) +static pco_instr *trans_load_reg(trans_ctx *tctx, + nir_intrinsic_instr *intr, + pco_ref dest, + UNUSED pco_ref offset_src, + enum pco_reg_class class) { unsigned chans = pco_ref_get_chans(dest); @@ -274,7 +275,7 @@ static pco_instr *trans_load_vtxin(trans_ctx *tctx, /* TODO: support indexed source offset. */ assert(nir_src_is_const(*noffset_src)); unsigned offset = nir_src_as_uint(*noffset_src); - pco_ref src = pco_ref_hwreg_vec(offset, PCO_REG_CLASS_VTXIN, chans); + pco_ref src = pco_ref_hwreg_vec(offset, class, chans); return pco_mov(&tctx->b, dest, src, .rpt = chans); } @@ -1691,7 +1692,11 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr) break; case nir_intrinsic_load_vtxin_pco: - instr = trans_load_vtxin(tctx, intr, dest, src[0]); + instr = trans_load_reg(tctx, intr, dest, src[0], PCO_REG_CLASS_VTXIN); + break; + + case nir_intrinsic_load_coeff_pco: + instr = trans_load_reg(tctx, intr, dest, src[0], PCO_REG_CLASS_COEFF); break; case nir_intrinsic_load_output: @@ -1796,6 +1801,35 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr) instr = trans_scratch(tctx, dest, src[1], src[0]); break; + case nir_intrinsic_dma_ld_pco: { + unsigned chans = pco_ref_get_chans(dest); + + instr = pco_ld(&tctx->b, + dest, + pco_ref_drc(PCO_DRC_0), + pco_ref_imm8(chans), + src[0]); + + break; + } + + case nir_intrinsic_dma_st_pco: { + unsigned chans = pco_ref_get_chans(src[0]) - 2; + + pco_ref data_comp = + pco_ref_new_ssa(tctx->func, pco_ref_get_bits(src[0]), chans); + pco_comp(&tctx->b, data_comp, src[0], pco_ref_val16(2)); + + instr = pco_st32(&tctx->b, + data_comp, + pco_ref_drc(PCO_DRC_0), + pco_ref_imm8(chans), + src[0], + pco_ref_null()); + + break; + } + /* Vertex sysvals. */ case nir_intrinsic_load_vertex_id: case nir_intrinsic_load_instance_id: @@ -2675,6 +2709,28 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) break; } + case nir_op_umad64_32: { + pco_ref dest_comps[2] = { + [0] = pco_ref_new_ssa32(tctx->func), + [1] = pco_ref_new_ssa32(tctx->func), + }; + + pco_imadd64(&tctx->b, + dest_comps[0], + dest_comps[1], + src[0], + src[1], + src[2], + src[3], + pco_ref_null()); + + /* TODO: mark this vec as being non-contiguous, + * add pass for expanding. + */ + instr = pco_trans_nir_vec(tctx, dest, 2, dest_comps); + break; + } + case nir_op_imul: instr = pco_imul32(&tctx->b, dest, src[0], src[1], pco_ref_null()); break; @@ -2713,6 +2769,16 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu) pco_ref_null()); break; + case nir_op_imad: + instr = pco_imadd32(&tctx->b, + dest, + src[0], + src[1], + src[2], + pco_ref_null(), + .s = true); + break; + /* Set-on (float) comparisons. */ case nir_op_slt: case nir_op_sge: diff --git a/src/imagination/pco/usclib/common.cl b/src/imagination/pco/usclib/common.cl index 67327db08ed..186a59c80f9 100644 --- a/src/imagination/pco/usclib/common.cl +++ b/src/imagination/pco/usclib/common.cl @@ -4,6 +4,7 @@ */ #include "libcl.h" +#include "compiler/libcl/libcl_vk.h" KERNEL(1) vs_nop_common(void) @@ -38,3 +39,115 @@ vs_passthrough_rta_common(void) vs_passthrough_common(); nir_uvsw_write_pco(4, nir_load_vtxin_pco(1, 3)); } + +/* TODO: uint index = cl_global_id.x; + * instead of this function once things + * are properly hooked up. +*/ +static inline uint +query_calc_global_id(void) +{ + uint local_invoc_index = nir_load_vtxin_pco(1, 0); + local_invoc_index &= get_local_size(0) - 1; + uint wg_id = nir_load_coeff_pco(1, 0); + return nir_imad(wg_id, get_local_size(0), local_invoc_index); +} + +/* TODO: support parameter passing. */ +/* TODO: switch to common implementation. */ +KERNEL(32) +cs_query_availability_common(void) +{ + uint index_count = nir_load_preamble(1, PVR_QUERY_AVAILABILITY_DATA_INDEX_COUNT, 0); + + uint index_base_addr_lo = nir_load_preamble(1, PVR_QUERY_AVAILABILITY_DATA_INDEX_BO_LO, 0); + uint index_base_addr_hi = nir_load_preamble(1, PVR_QUERY_AVAILABILITY_DATA_INDEX_BO_HI, 0); + + uint avail_base_addr_lo = nir_load_preamble(1, PVR_QUERY_AVAILABILITY_DATA_BO_LO, 0); + uint avail_base_addr_hi = nir_load_preamble(1, PVR_QUERY_AVAILABILITY_DATA_BO_HI, 0); + + uint index = query_calc_global_id(); + + if (index < index_count) { + uint2 index_addr = nir_uadd64_32(index_base_addr_lo, index_base_addr_hi, index * sizeof(uint32_t)); + uint offset = nir_dma_ld_pco(1, index_addr); + + uint2 avail_addr = nir_uadd64_32(avail_base_addr_lo, avail_base_addr_hi, offset * sizeof(uint32_t)); + + nir_dma_st_pco(avail_addr, ~0U); + } +} + +KERNEL(32) +cs_query_copy_common(void) +{ + uint index_count = nir_load_preamble(1, PVR_QUERY_COPY_DATA_INDEX_COUNT, 0); + + uint dest_base_addr_lo = nir_load_preamble(1, PVR_QUERY_COPY_DATA_DEST_BO_LO, 0); + uint dest_base_addr_hi = nir_load_preamble(1, PVR_QUERY_COPY_DATA_DEST_BO_HI, 0); + + uint avail_base_addr_lo = nir_load_preamble(1, PVR_QUERY_COPY_DATA_AVAILABILITY_BO_LO, 0); + uint avail_base_addr_hi = nir_load_preamble(1, PVR_QUERY_COPY_DATA_AVAILABILITY_BO_HI, 0); + + uint result_base_addr_lo = nir_load_preamble(1, PVR_QUERY_COPY_DATA_RESULT_BO_LO, 0); + uint result_base_addr_hi = nir_load_preamble(1, PVR_QUERY_COPY_DATA_RESULT_BO_HI, 0); + + uint dest_stride = nir_load_preamble(1, PVR_QUERY_COPY_DATA_DEST_STRIDE, 0); + + uint flags = nir_load_preamble(1, PVR_QUERY_COPY_DATA_FLAGS, 0); + + uint index = query_calc_global_id(); + + if (index < index_count) { + uint2 avail_addr = nir_uadd64_32(avail_base_addr_lo, avail_base_addr_hi, index * sizeof(uint32_t)); + uint available = nir_dma_ld_pco(1, avail_addr); + + uint2 dest_addr = nir_umad64_32(dest_stride, index, dest_base_addr_lo, dest_base_addr_hi); + + if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT)) { + uint2 result_addr = nir_uadd64_32(result_base_addr_lo, result_base_addr_hi, index * sizeof(uint32_t)); + uint result = nir_dma_ld_pco(1, result_addr); + + /* TODO: for 64/32-bit writes, just prep the 64-bit one and set the burst-length variably. */ + if (flags & VK_QUERY_RESULT_64_BIT) { + /* TODO: check if data should be (result, 0) or (0, result) */ + nir_dma_st_pco(dest_addr, result, 0); + } else { + nir_dma_st_pco(dest_addr, result); + } + } + + if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) { + if (flags & VK_QUERY_RESULT_64_BIT) { + dest_addr = nir_uadd64_32(dest_addr.x, dest_addr.y, sizeof(uint64_t)); + /* TODO: check if data should be (available, 0) or (0, available) */ + nir_dma_st_pco(dest_addr, available, 0); + } else { + dest_addr = nir_uadd64_32(dest_addr.x, dest_addr.y, sizeof(uint32_t)); + nir_dma_st_pco(dest_addr, available); + } + } + } +} + +KERNEL(32) +cs_query_reset_common(void) +{ + uint index_count = nir_load_preamble(1, PVR_QUERY_RESET_DATA_INDEX_COUNT, 0); + + uint result_base_addr_lo = nir_load_preamble(1, PVR_QUERY_RESET_DATA_RESULT_BO_LO, 0); + uint result_base_addr_hi = nir_load_preamble(1, PVR_QUERY_RESET_DATA_RESULT_BO_HI, 0); + + uint avail_base_addr_lo = nir_load_preamble(1, PVR_QUERY_RESET_DATA_AVAILABILITY_BO_LO, 0); + uint avail_base_addr_hi = nir_load_preamble(1, PVR_QUERY_RESET_DATA_AVAILABILITY_BO_HI, 0); + + uint index = query_calc_global_id(); + + if (index < index_count) { + uint2 result_addr = nir_uadd64_32(result_base_addr_lo, result_base_addr_hi, index * sizeof(uint32_t)); + nir_dma_st_pco(result_addr, 0); + + uint2 avail_addr = nir_uadd64_32(avail_base_addr_lo, avail_base_addr_hi, index * sizeof(uint32_t)); + nir_dma_st_pco(avail_addr, 0); + } +} diff --git a/src/imagination/pco/usclib/libcl.h b/src/imagination/pco/usclib/libcl.h index d70df379afc..214706c0ded 100644 --- a/src/imagination/pco/usclib/libcl.h +++ b/src/imagination/pco/usclib/libcl.h @@ -6,6 +6,7 @@ #ifndef PCO_LIBCL_H #define PCO_LIBCL_H +#include "common/pvr_iface.h" #include "compiler/libcl/libcl.h" #include "compiler/shader_enums.h" #include "pco/pco_common.h" @@ -58,4 +59,39 @@ uint3 nir_load_vtxin_pco__3(uint offset); uint4 nir_load_vtxin_pco__4(uint offset); #define nir_load_vtxin_pco(n, ...) CAT2(nir_load_vtxin_pco__, n)(__VA_ARGS__) + +uint nir_load_coeff_pco__1(uint offset); +uint2 nir_load_coeff_pco__2(uint offset); +uint3 nir_load_coeff_pco__3(uint offset); +uint4 nir_load_coeff_pco__4(uint offset); + +#define nir_load_coeff_pco(n, ...) CAT2(nir_load_coeff_pco__, n)(__VA_ARGS__) + +uint nir_load_preamble__1(uint base, uint preamble_class); +uint4 nir_load_preamble__4(uint base, uint preamble_class); + +#define nir_load_preamble(n, ...) CAT2(nir_load_preamble__, n)(__VA_ARGS__) + +void nir_store_preamble(uint data, uint base, uint preamble_class); + +uint nir_dma_ld_pco__1(uint2 addr); +uint2 nir_dma_ld_pco__2(uint2 addr); +uint3 nir_dma_ld_pco__3(uint2 addr); +uint4 nir_dma_ld_pco__4(uint2 addr); + +#define nir_dma_ld_pco(n, ...) CAT2(nir_dma_ld_pco__, n)(__VA_ARGS__) + +void nir_dma_st_pco__1(uint3 addr_data); +void nir_dma_st_pco__2(uint4 addr_data); + +#define SELECT_ARGS_ST(addr, ...) \ + ((CAT2(uint, NUM_ARGS_PLUS_2(__VA_ARGS__)))(addr, __VA_ARGS__)) + +/* clang-format off */ +#define nir_dma_st_pco(addr, ...) SELECT_NAME(nir_dma_st_pco, __, __VA_ARGS__)SELECT_ARGS_ST(addr, __VA_ARGS__) +/* clang-format on */ + +uint2 nir_uadd64_32(uint lo, uint hi, uint offset); +uint nir_imad(uint a, uint b, uint c); +uint2 nir_umad64_32(uint a, uint b, uint lo, uint hi); #endif /* PCO_LIBCL_H */ diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index e5452b7b0bd..e2543624d56 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -104,7 +104,7 @@ static void pvr_cmd_buffer_free_sub_cmd(struct pvr_cmd_buffer *cmd_buffer, break; case PVR_SUB_CMD_TYPE_COMPUTE: - case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY: + case PVR_SUB_CMD_TYPE_QUERY: pvr_csb_finish(&sub_cmd->compute.control_stream); break; @@ -286,9 +286,9 @@ static void pvr_cmd_buffer_update_barriers(struct pvr_cmd_buffer *cmd_buffer, barriers = PVR_PIPELINE_STAGE_COMPUTE_BIT; break; - case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY: + case PVR_SUB_CMD_TYPE_QUERY: case PVR_SUB_CMD_TYPE_TRANSFER: - /* Compute jobs are used for occlusion queries but to copy the results we + /* Compute jobs are used for queries but to copy the results we * have to sync with transfer jobs because vkCmdCopyQueryPoolResults() is * deemed as a transfer operation by the spec. */ @@ -674,7 +674,8 @@ static VkResult pvr_setup_texture_state_words( pvr_csb_pack (&descriptor->sampler.words[1], TEXSTATE_SAMPLER_WORD1, - sampler) {} + sampler) { + } return VK_SUCCESS; } @@ -1086,7 +1087,7 @@ static void pvr_setup_pbe_state( break; } -#define PVR_DEC_IF_NOT_ZERO(_v) (((_v) > 0) ? (_v) - 1 : 0) +#define PVR_DEC_IF_NOT_ZERO(_v) (((_v) > 0) ? (_v)-1 : 0) render_params.min_x_clip = MAX2(0, render_area->offset.x); render_params.min_y_clip = MAX2(0, render_area->offset.y); @@ -2205,7 +2206,7 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer) query_pool = gfx_sub_cmd->query_pool; } - gfx_sub_cmd->has_occlusion_query = true; + gfx_sub_cmd->has_query = true; util_dynarray_clear(&state->query_indices); } @@ -2256,7 +2257,7 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer) break; } - case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY: + case PVR_SUB_CMD_TYPE_QUERY: case PVR_SUB_CMD_TYPE_COMPUTE: { struct pvr_sub_cmd_compute *const compute_sub_cmd = &sub_cmd->compute; @@ -2331,7 +2332,7 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer) .type = PVR_EVENT_TYPE_BARRIER, .barrier = { .wait_for_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT, - .wait_at_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT, + .wait_at_stage_mask = PVR_PIPELINE_STAGE_QUERY_BIT, }, }; @@ -2487,7 +2488,7 @@ VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer, util_dynarray_init(&sub_cmd->gfx.sec_query_indices, NULL); break; - case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY: + case PVR_SUB_CMD_TYPE_QUERY: case PVR_SUB_CMD_TYPE_COMPUTE: pvr_csb_init(device, PVR_CMD_STREAM_TYPE_COMPUTE, @@ -3895,7 +3896,8 @@ static VkResult pvr_setup_descriptor_mappings( pvr_csb_pack (&point_sampler_words[1], TEXSTATE_SAMPLER_WORD1, - sampler) {} + sampler) { + } struct pvr_suballoc_bo *point_sampler_bo; result = pvr_cmd_buffer_upload_general(cmd_buffer, @@ -3930,7 +3932,8 @@ static VkResult pvr_setup_descriptor_mappings( pvr_csb_pack (&ia_sampler_words[1], TEXSTATE_SAMPLER_WORD1, - sampler) {} + sampler) { + } struct pvr_suballoc_bo *ia_sampler_bo; result = pvr_cmd_buffer_upload_general(cmd_buffer, @@ -7121,7 +7124,7 @@ static VkResult pvr_execute_sub_cmd(struct pvr_cmd_buffer *cmd_buffer, primary_sub_cmd->gfx = sec_sub_cmd->gfx; break; - case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY: + case PVR_SUB_CMD_TYPE_QUERY: case PVR_SUB_CMD_TYPE_COMPUTE: primary_sub_cmd->compute = sec_sub_cmd->compute; break; diff --git a/src/imagination/vulkan/pvr_common.h b/src/imagination/vulkan/pvr_common.h index 9113b3c88a7..753af462c77 100644 --- a/src/imagination/vulkan/pvr_common.h +++ b/src/imagination/vulkan/pvr_common.h @@ -88,7 +88,7 @@ enum pvr_sub_cmd_type { PVR_SUB_CMD_TYPE_GRAPHICS, PVR_SUB_CMD_TYPE_COMPUTE, PVR_SUB_CMD_TYPE_TRANSFER, - PVR_SUB_CMD_TYPE_OCCLUSION_QUERY, + PVR_SUB_CMD_TYPE_QUERY, PVR_SUB_CMD_TYPE_EVENT, }; @@ -110,7 +110,7 @@ enum pvr_job_type { PVR_JOB_TYPE_FRAG, PVR_JOB_TYPE_COMPUTE, PVR_JOB_TYPE_TRANSFER, - PVR_JOB_TYPE_OCCLUSION_QUERY, + PVR_JOB_TYPE_QUERY, PVR_JOB_TYPE_MAX }; @@ -128,8 +128,7 @@ enum pvr_pipeline_stage_bits { /* Note that this doesn't map to VkPipelineStageFlagBits so be careful with * this. */ - PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT = - BITFIELD_BIT(PVR_JOB_TYPE_OCCLUSION_QUERY), + PVR_PIPELINE_STAGE_QUERY_BIT = BITFIELD_BIT(PVR_JOB_TYPE_QUERY), }; #define PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS \ diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index 8ec2549f7e0..f748a23afd0 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -196,8 +196,8 @@ struct pvr_device { /* Compute shaders for queries. */ struct pvr_compute_query_shader availability_shader; - struct pvr_compute_query_shader *copy_results_shaders; - struct pvr_compute_query_shader *reset_queries_shaders; + struct pvr_compute_query_shader reset_queries_shader; + struct pvr_compute_query_shader copy_results_shader; struct pvr_suballocator suballoc_general; struct pvr_suballocator suballoc_pds; @@ -510,7 +510,7 @@ struct pvr_sub_cmd_gfx { */ bool frag_uses_texture_rw; - bool has_occlusion_query; + bool has_query; bool wait_on_previous_transfer; diff --git a/src/imagination/vulkan/pvr_query.c b/src/imagination/vulkan/pvr_query.c index 7ea7798ee27..77c6f8b9a35 100644 --- a/src/imagination/vulkan/pvr_query.c +++ b/src/imagination/vulkan/pvr_query.c @@ -288,7 +288,7 @@ void pvr_CmdResetQueryPool(VkCommandBuffer commandBuffer, .type = PVR_EVENT_TYPE_BARRIER, .barrier = { .wait_for_stage_mask = PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS, - .wait_at_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT, + .wait_at_stage_mask = PVR_PIPELINE_STAGE_QUERY_BIT, }, }; @@ -307,7 +307,7 @@ void pvr_CmdResetQueryPool(VkCommandBuffer commandBuffer, cmd_buffer->state.current_sub_cmd->event = (struct pvr_sub_cmd_event){ .type = PVR_EVENT_TYPE_BARRIER, .barrier = { - .wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT, + .wait_for_stage_mask = PVR_PIPELINE_STAGE_QUERY_BIT, .wait_at_stage_mask = PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS, }, }; @@ -371,7 +371,7 @@ void pvr_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, .type = PVR_EVENT_TYPE_BARRIER, .barrier = { .wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT, - .wait_at_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT, + .wait_at_stage_mask = PVR_PIPELINE_STAGE_QUERY_BIT, }, }; @@ -388,7 +388,7 @@ void pvr_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, cmd_buffer->state.current_sub_cmd->event = (struct pvr_sub_cmd_event){ .type = PVR_EVENT_TYPE_BARRIER, .barrier = { - .wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT, + .wait_for_stage_mask = PVR_PIPELINE_STAGE_QUERY_BIT, .wait_at_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT, }, }; diff --git a/src/imagination/vulkan/pvr_query_compute.c b/src/imagination/vulkan/pvr_query_compute.c index c06feef915f..e253e42a71c 100644 --- a/src/imagination/vulkan/pvr_query_compute.c +++ b/src/imagination/vulkan/pvr_query_compute.c @@ -28,19 +28,21 @@ #include #include +#include "common/pvr_iface.h" #include "hwdef/rogue_hw_utils.h" +#include "pco_uscgen_programs.h" #include "pvr_bo.h" #include "pvr_formats.h" #include "pvr_pds.h" #include "pvr_private.h" -#include "usc/programs/pvr_shader_factory.h" -#include "usc/programs/pvr_static_shaders.h" #include "pvr_tex_state.h" #include "pvr_types.h" #include "vk_alloc.h" #include "vk_command_pool.h" #include "vk_util.h" +/* TODO: multicore support/awareness. */ + static inline void pvr_init_primary_compute_pds_program( struct pvr_pds_compute_shader_program *program) { @@ -52,10 +54,10 @@ static inline void pvr_init_primary_compute_pds_program( program->kick_usc = true; } -static VkResult pvr_create_compute_secondary_prog( - struct pvr_device *device, - const struct pvr_shader_factory_info *shader_factory_info, - struct pvr_compute_query_shader *query_prog) +static VkResult +pvr_create_compute_secondary_prog(struct pvr_device *device, + unsigned const_shared_regs, + struct pvr_compute_query_shader *query_prog) { const size_t size = pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes(); @@ -79,8 +81,8 @@ static VkResult pvr_create_compute_secondary_prog( .buffer_id = 0, .source_offset = 0, .type = PVR_BUFFER_TYPE_COMPILE_TIME, - .size_in_dwords = shader_factory_info->const_shared_regs, - .destination = shader_factory_info->explicit_const_start_offset, + .size_in_dwords = const_shared_regs, + .destination = 0, } }, }; @@ -133,26 +135,24 @@ pvr_destroy_compute_secondary_prog(struct pvr_device *device, vk_free(&device->vk.alloc, program->info.entries); } -static VkResult pvr_create_compute_query_program( +static VkResult pvr_create_compute_query_precomp_program( struct pvr_device *device, - const struct pvr_shader_factory_info *shader_factory_info, + enum pco_usclib_program common_program_index, + unsigned const_shared_regs, struct pvr_compute_query_shader *query_prog) { const uint32_t cache_line_size = rogue_get_slc_cache_line_size(&device->pdevice->dev_info); struct pvr_pds_compute_shader_program pds_primary_prog = { 0 }; + const pco_precomp_data *precomp_data; VkResult result; memset(query_prog, 0, sizeof(*query_prog)); - /* No support for query constant calc program. */ - assert(shader_factory_info->const_calc_prog_inst_bytes == 0); - /* No support for query coefficient update program. */ - assert(shader_factory_info->coeff_update_prog_start == PVR_INVALID_INST); - + precomp_data = (pco_precomp_data *)pco_usclib_common[common_program_index]; result = pvr_gpu_upload_usc(device, - shader_factory_info->shader_code, - shader_factory_info->code_size, + precomp_data->binary, + precomp_data->size_dwords * sizeof(uint32_t), cache_line_size, &query_prog->usc_bo); if (result != VK_SUCCESS) @@ -162,7 +162,7 @@ static VkResult pvr_create_compute_query_program( pvr_pds_setup_doutu(&pds_primary_prog.usc_task_control, query_prog->usc_bo->dev_addr.addr, - shader_factory_info->temps_required, + precomp_data->temps, ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE, false); @@ -176,9 +176,8 @@ static VkResult pvr_create_compute_query_program( query_prog->primary_data_size_dw = pds_primary_prog.data_size; query_prog->primary_num_temps = pds_primary_prog.temps_used; - result = pvr_create_compute_secondary_prog(device, - shader_factory_info, - query_prog); + result = + pvr_create_compute_secondary_prog(device, const_shared_regs, query_prog); if (result != VK_SUCCESS) goto err_free_pds_prim_code_bo; @@ -224,7 +223,9 @@ static VkResult pvr_write_compute_query_pds_data_section( * not needed. If it's needed we should probably be using LITERAL entries for * this instead. */ +#if !defined(NDEBUG) memset(dword_buffer, 0xFE, PVR_DW_TO_BYTES(info->data_size_in_dwords)); +#endif /* !defined(NDEBUG) */ pipeline->pds_shared_update_data_size_dw = info->data_size_in_dwords; @@ -321,7 +322,7 @@ static void pvr_write_private_compute_dispatch( 1, }; - assert(sub_cmd->type == PVR_SUB_CMD_TYPE_OCCLUSION_QUERY); + assert(sub_cmd->type == PVR_SUB_CMD_TYPE_QUERY); pvr_compute_update_shared_private(cmd_buffer, &sub_cmd->compute, pipeline); pvr_compute_update_kernel_private(cmd_buffer, @@ -340,90 +341,41 @@ pvr_destroy_compute_query_program(struct pvr_device *device, pvr_bo_suballoc_free(program->usc_bo); } -static VkResult pvr_create_multibuffer_compute_query_program( - struct pvr_device *device, - const struct pvr_shader_factory_info *const *shader_factory_info, - struct pvr_compute_query_shader *query_programs) -{ - const uint32_t core_count = device->pdevice->dev_runtime_info.core_count; - VkResult result; - uint32_t i; - - for (i = 0; i < core_count; i++) { - result = pvr_create_compute_query_program(device, - shader_factory_info[i], - &query_programs[i]); - if (result != VK_SUCCESS) - goto err_destroy_compute_query_program; - } - - return VK_SUCCESS; - -err_destroy_compute_query_program: - for (uint32_t j = 0; j < i; j++) - pvr_destroy_compute_query_program(device, &query_programs[j]); - - return result; -} - VkResult pvr_device_create_compute_query_programs(struct pvr_device *device) { - const uint32_t core_count = device->pdevice->dev_runtime_info.core_count; VkResult result; - result = pvr_create_compute_query_program(device, - &availability_query_write_info, - &device->availability_shader); + result = pvr_create_compute_query_precomp_program( + device, + CS_QUERY_AVAILABILITY_COMMON, + _PVR_QUERY_AVAILABILITY_DATA_COUNT, + &device->availability_shader); + if (result != VK_SUCCESS) return result; - device->copy_results_shaders = - vk_alloc(&device->vk.alloc, - sizeof(*device->copy_results_shaders) * core_count, - 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (!device->copy_results_shaders) { - result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + result = + pvr_create_compute_query_precomp_program(device, + CS_QUERY_COPY_COMMON, + _PVR_QUERY_COPY_DATA_COUNT, + &device->copy_results_shader); + + if (result != VK_SUCCESS) goto err_destroy_availability_query_program; - } - result = pvr_create_multibuffer_compute_query_program( - device, - copy_query_results_collection, - device->copy_results_shaders); + result = + pvr_create_compute_query_precomp_program(device, + CS_QUERY_RESET_COMMON, + _PVR_QUERY_RESET_DATA_COUNT, + &device->reset_queries_shader); + if (result != VK_SUCCESS) - goto err_vk_free_copy_results_shaders; - - device->reset_queries_shaders = - vk_alloc(&device->vk.alloc, - sizeof(*device->reset_queries_shaders) * core_count, - 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (!device->reset_queries_shaders) { - result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); - goto err_destroy_copy_results_query_programs; - } - - result = pvr_create_multibuffer_compute_query_program( - device, - reset_query_collection, - device->reset_queries_shaders); - if (result != VK_SUCCESS) - goto err_vk_free_reset_queries_shaders; + goto err_destroy_copy_results_query_program; return VK_SUCCESS; -err_vk_free_reset_queries_shaders: - vk_free(&device->vk.alloc, device->reset_queries_shaders); - -err_destroy_copy_results_query_programs: - for (uint32_t i = 0; i < core_count; i++) { - pvr_destroy_compute_query_program(device, - &device->copy_results_shaders[i]); - } - -err_vk_free_copy_results_shaders: - vk_free(&device->vk.alloc, device->copy_results_shaders); +err_destroy_copy_results_query_program: + pvr_destroy_compute_query_program(device, &device->copy_results_shader); err_destroy_availability_query_program: pvr_destroy_compute_query_program(device, &device->availability_shader); @@ -433,53 +385,9 @@ err_destroy_availability_query_program: void pvr_device_destroy_compute_query_programs(struct pvr_device *device) { - const uint32_t core_count = device->pdevice->dev_runtime_info.core_count; - pvr_destroy_compute_query_program(device, &device->availability_shader); - - for (uint32_t i = 0; i < core_count; i++) { - pvr_destroy_compute_query_program(device, - &device->copy_results_shaders[i]); - pvr_destroy_compute_query_program(device, - &device->reset_queries_shaders[i]); - } - - vk_free(&device->vk.alloc, device->copy_results_shaders); - vk_free(&device->vk.alloc, device->reset_queries_shaders); -} - -static void pvr_init_tex_info(const struct pvr_device_info *dev_info, - struct pvr_texture_state_info *tex_info, - uint32_t width, - pvr_dev_addr_t addr) -{ - const VkFormat vk_format = VK_FORMAT_R32_UINT; - const uint8_t *swizzle_arr = pvr_get_format_swizzle(vk_format); - bool is_view_1d = !PVR_HAS_FEATURE(dev_info, tpu_extended_integer_lookup) && - !PVR_HAS_FEATURE(dev_info, tpu_image_state_v2); - - *tex_info = (struct pvr_texture_state_info){ - .format = vk_format, - .mem_layout = PVR_MEMLAYOUT_LINEAR, - .flags = PVR_TEXFLAGS_INDEX_LOOKUP, - .type = is_view_1d ? VK_IMAGE_VIEW_TYPE_1D : VK_IMAGE_VIEW_TYPE_2D, - .is_cube = false, - .tex_state_type = PVR_TEXTURE_STATE_SAMPLE, - .extent = { .width = width, .height = 1, .depth = 0 }, - .array_size = 1, - .base_level = 0, - .mip_levels = 1, - .mipmaps_present = false, - .sample_count = 1, - .stride = width, - .offset = 0, - .swizzle = { [0] = swizzle_arr[0], - [1] = swizzle_arr[1], - [2] = swizzle_arr[2], - [3] = swizzle_arr[3] }, - .addr = addr, - - }; + pvr_destroy_compute_query_program(device, &device->copy_results_shader); + pvr_destroy_compute_query_program(device, &device->reset_queries_shader); } /* TODO: Split this function into per program type functions. */ @@ -487,33 +395,16 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer, const struct pvr_query_info *query_info) { struct pvr_device *device = cmd_buffer->device; - const uint32_t core_count = device->pdevice->dev_runtime_info.core_count; - const struct pvr_device_info *dev_info = &device->pdevice->dev_info; - const struct pvr_shader_factory_info *shader_factory_info; - uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS]; - struct pvr_image_descriptor image_descriptor; const struct pvr_compute_query_shader *query_prog; struct pvr_private_compute_pipeline pipeline; - const uint32_t buffer_count = core_count; - struct pvr_texture_state_info tex_info; uint32_t num_query_indices; uint32_t *const_buffer; struct pvr_suballoc_bo *pvr_bo; VkResult result; - pvr_csb_pack (&sampler_state[0U], TEXSTATE_SAMPLER_WORD0, reg) { - reg.addrmode_u = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE; - reg.addrmode_v = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE; - reg.addrmode_w = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE; - reg.minfilter = ROGUE_TEXSTATE_FILTER_POINT; - reg.magfilter = ROGUE_TEXSTATE_FILTER_POINT; - reg.non_normalized_coords = true; - reg.dadjust = ROGUE_TEXSTATE_DADJUST_ZERO_UINT; - } - - /* clang-format off */ - pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {} - /* clang-format on */ + result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_QUERY); + if (result != VK_SUCCESS) + return result; switch (query_info->type) { case PVR_QUERY_TYPE_AVAILABILITY_WRITE: @@ -521,33 +412,28 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer, * value in availability_bo at every index in index_bo. */ query_prog = &device->availability_shader; - shader_factory_info = &availability_query_write_info; num_query_indices = query_info->availability_write.num_query_indices; + pipeline.const_shared_regs_count = _PVR_QUERY_AVAILABILITY_DATA_COUNT; break; case PVR_QUERY_TYPE_COPY_QUERY_RESULTS: /* Adds a compute shader to copy availability and query value data. */ - query_prog = &device->copy_results_shaders[buffer_count - 1]; - shader_factory_info = copy_query_results_collection[buffer_count - 1]; + query_prog = &device->copy_results_shader; num_query_indices = query_info->copy_query_results.query_count; + pipeline.const_shared_regs_count = _PVR_QUERY_COPY_DATA_COUNT; break; case PVR_QUERY_TYPE_RESET_QUERY_POOL: /* Adds a compute shader to reset availability and query value data. */ - query_prog = &device->reset_queries_shaders[buffer_count - 1]; - shader_factory_info = reset_query_collection[buffer_count - 1]; + query_prog = &device->reset_queries_shader; num_query_indices = query_info->reset_query_pool.query_count; + pipeline.const_shared_regs_count = _PVR_QUERY_RESET_DATA_COUNT; break; default: UNREACHABLE("Invalid query type"); } - result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, - PVR_SUB_CMD_TYPE_OCCLUSION_QUERY); - if (result != VK_SUCCESS) - return result; - pipeline.pds_code_offset = query_prog->pds_prim_code.code_offset; pipeline.pds_data_offset = query_prog->pds_prim_code.data_offset; @@ -556,82 +442,34 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer, pipeline.pds_data_size_dw = query_prog->primary_data_size_dw; pipeline.pds_temps_used = query_prog->primary_num_temps; - pipeline.coeff_regs_count = shader_factory_info->coeff_regs; - pipeline.unified_store_regs_count = shader_factory_info->input_regs; - pipeline.const_shared_regs_count = shader_factory_info->const_shared_regs; + /* TODO: set properly. */ + pipeline.coeff_regs_count = 3; + pipeline.unified_store_regs_count = 8; - const_buffer = - vk_alloc(&cmd_buffer->vk.pool->alloc, - PVR_DW_TO_BYTES(shader_factory_info->const_shared_regs), - 8, - VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + const_buffer = vk_alloc(&cmd_buffer->vk.pool->alloc, + PVR_DW_TO_BYTES(pipeline.const_shared_regs_count), + 8, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); if (!const_buffer) { return vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY); } - /* clang-format off */ -#define DRIVER_CONST(index) \ - assert(shader_factory_info->driver_const_location_map[index] < \ - shader_factory_info->const_shared_regs); \ - const_buffer[shader_factory_info->driver_const_location_map[index]] - /* clang-format on */ - switch (query_info->type) { case PVR_QUERY_TYPE_AVAILABILITY_WRITE: { - uint64_t image_sampler_state[3][ROGUE_NUM_TEXSTATE_SAMPLER_WORDS]; - uint32_t image_sampler_idx = 0; + uint64_t index_addr = + query_info->availability_write.index_bo->dev_addr.addr; - memcpy(&image_sampler_state[image_sampler_idx][0], - &sampler_state[0], - sizeof(sampler_state)); - image_sampler_idx++; + uint64_t avail_addr = + query_info->availability_write.availability_bo->dev_addr.addr; - pvr_init_tex_info(dev_info, - &tex_info, - num_query_indices, - query_info->availability_write.index_bo->dev_addr); + const_buffer[PVR_QUERY_AVAILABILITY_DATA_INDEX_COUNT] = num_query_indices; + const_buffer[PVR_QUERY_AVAILABILITY_DATA_INDEX_BO_LO] = index_addr & + 0xffffffff; + const_buffer[PVR_QUERY_AVAILABILITY_DATA_INDEX_BO_HI] = index_addr >> 32; + const_buffer[PVR_QUERY_AVAILABILITY_DATA_BO_LO] = avail_addr & 0xffffffff; + const_buffer[PVR_QUERY_AVAILABILITY_DATA_BO_HI] = avail_addr >> 32; - result = pvr_pack_tex_state(device, &tex_info, &image_descriptor); - memcpy(&image_sampler_state[image_sampler_idx][0], - image_descriptor.words, - sizeof(image_descriptor.words)); - - if (result != VK_SUCCESS) { - vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); - return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result); - } - - image_sampler_idx++; - - pvr_init_tex_info( - dev_info, - &tex_info, - query_info->availability_write.num_queries, - query_info->availability_write.availability_bo->dev_addr); - - result = pvr_pack_tex_state(device, &tex_info, &image_descriptor); - memcpy(&image_sampler_state[image_sampler_idx][0], - image_descriptor.words, - sizeof(image_descriptor.words)); - - if (result != VK_SUCCESS) { - vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); - return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result); - } - - image_sampler_idx++; - - memcpy(&const_buffer[0], - &image_sampler_state[0][0], - sizeof(image_sampler_state)); - - /* Only PVR_QUERY_AVAILABILITY_WRITE_COUNT driver consts allowed. */ - assert(shader_factory_info->num_driver_consts == - PVR_QUERY_AVAILABILITY_WRITE_COUNT); - - DRIVER_CONST(PVR_QUERY_AVAILABILITY_WRITE_INDEX_COUNT) = - num_query_indices; break; } @@ -642,94 +480,44 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer, PVR_FROM_HANDLE(pvr_buffer, buffer, query_info->copy_query_results.dst_buffer); - const uint32_t image_sampler_state_arr_size = - (buffer_count + 2) * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS; - uint32_t image_sampler_idx = 0; - pvr_dev_addr_t addr; - uint64_t offset; - STACK_ARRAY(uint64_t, image_sampler_state, image_sampler_state_arr_size); - if (!image_sampler_state) { - vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); - - return vk_command_buffer_set_error(&cmd_buffer->vk, - VK_ERROR_OUT_OF_HOST_MEMORY); - } - -#define SAMPLER_ARR_2D(_arr, _i, _j) \ - _arr[_i * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS + _j] - - memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0), - &sampler_state[0], - sizeof(sampler_state)); - image_sampler_idx++; - - offset = query_info->copy_query_results.first_query * sizeof(uint32_t); - - addr = PVR_DEV_ADDR_OFFSET(pool->availability_buffer->dev_addr, offset); - - pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr); - - result = pvr_pack_tex_state(device, &tex_info, &image_descriptor); - memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0), - image_descriptor.words, - sizeof(image_descriptor.words)); - - if (result != VK_SUCCESS) { - vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); - return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result); - } - - image_sampler_idx++; - - for (uint32_t i = 0; i < buffer_count; i++) { - addr = PVR_DEV_ADDR_OFFSET(pool->result_buffer->dev_addr, - offset + i * pool->result_stride); - - pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr); - - result = pvr_pack_tex_state(device, &tex_info, &image_descriptor); - memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0), - image_descriptor.words, - sizeof(image_descriptor.words)); - if (result != VK_SUCCESS) { - vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); - return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result); - } - - image_sampler_idx++; - } - - memcpy(&const_buffer[0], - &SAMPLER_ARR_2D(image_sampler_state, 0, 0), - image_sampler_state_arr_size * sizeof(image_sampler_state[0])); - - STACK_ARRAY_FINISH(image_sampler_state); - - /* Only PVR_COPY_QUERY_POOL_RESULTS_COUNT driver consts allowed. */ - assert(shader_factory_info->num_driver_consts == - PVR_COPY_QUERY_POOL_RESULTS_COUNT); + pvr_dev_addr_t dev_addr; /* Assert if no memory is bound to destination buffer. */ assert(buffer->dev_addr.addr); - addr = buffer->dev_addr; - addr.addr += query_info->copy_query_results.dst_offset; + uint64_t offset = + query_info->copy_query_results.first_query * sizeof(uint32_t); - DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_INDEX_COUNT) = num_query_indices; - DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_LOW) = addr.addr & - 0xFFFFFFFF; - DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_HIGH) = addr.addr >> - 32; - DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_DEST_STRIDE) = + dev_addr = PVR_DEV_ADDR_OFFSET(buffer->dev_addr, + query_info->copy_query_results.dst_offset); + uint64_t dest_addr = dev_addr.addr; + + dev_addr = + PVR_DEV_ADDR_OFFSET(pool->availability_buffer->dev_addr, offset); + uint64_t avail_addr = dev_addr.addr; + + dev_addr = PVR_DEV_ADDR_OFFSET(pool->result_buffer->dev_addr, offset); + uint64_t result_addr = dev_addr.addr; + + const_buffer[PVR_QUERY_COPY_DATA_INDEX_COUNT] = num_query_indices; + + const_buffer[PVR_QUERY_COPY_DATA_DEST_BO_LO] = dest_addr & 0xffffffff; + const_buffer[PVR_QUERY_COPY_DATA_DEST_BO_HI] = dest_addr >> 32; + + const_buffer[PVR_QUERY_COPY_DATA_AVAILABILITY_BO_LO] = avail_addr & + 0xffffffff; + const_buffer[PVR_QUERY_COPY_DATA_AVAILABILITY_BO_HI] = avail_addr >> 32; + + const_buffer[PVR_QUERY_COPY_DATA_RESULT_BO_LO] = result_addr & 0xffffffff; + const_buffer[PVR_QUERY_COPY_DATA_RESULT_BO_HI] = result_addr >> 32; + + const_buffer[PVR_QUERY_COPY_DATA_DEST_STRIDE] = query_info->copy_query_results.stride; - DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_PARTIAL_RESULT_FLAG) = - query_info->copy_query_results.flags & VK_QUERY_RESULT_PARTIAL_BIT; - DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_64_BIT_FLAG) = - query_info->copy_query_results.flags & VK_QUERY_RESULT_64_BIT; - DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_WITH_AVAILABILITY_FLAG) = - query_info->copy_query_results.flags & - VK_QUERY_RESULT_WITH_AVAILABILITY_BIT; + + const_buffer[PVR_QUERY_COPY_DATA_FLAGS] = + query_info->copy_query_results.flags; + break; } @@ -737,74 +525,26 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer, PVR_FROM_HANDLE(pvr_query_pool, pool, query_info->reset_query_pool.query_pool); - const uint32_t image_sampler_state_arr_size = - (buffer_count + 2) * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS; - uint32_t image_sampler_idx = 0; - pvr_dev_addr_t addr; - uint64_t offset; - STACK_ARRAY(uint64_t, image_sampler_state, image_sampler_state_arr_size); - if (!image_sampler_state) { - vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); + uint64_t offset = + query_info->reset_query_pool.first_query * sizeof(uint32_t); - return vk_command_buffer_set_error(&cmd_buffer->vk, - VK_ERROR_OUT_OF_HOST_MEMORY); - } + pvr_dev_addr_t dev_addr = + PVR_DEV_ADDR_OFFSET(pool->result_buffer->dev_addr, offset); + uint64_t result_addr = dev_addr.addr; - memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0), - &sampler_state[0], - sizeof(sampler_state)); - image_sampler_idx++; + dev_addr = + PVR_DEV_ADDR_OFFSET(pool->availability_buffer->dev_addr, offset); + uint64_t avail_addr = dev_addr.addr; - offset = query_info->reset_query_pool.first_query * sizeof(uint32_t); + const_buffer[PVR_QUERY_RESET_DATA_INDEX_COUNT] = num_query_indices; + const_buffer[PVR_QUERY_RESET_DATA_RESULT_BO_LO] = result_addr & + 0xffffffff; + const_buffer[PVR_QUERY_RESET_DATA_RESULT_BO_HI] = result_addr >> 32; + const_buffer[PVR_QUERY_RESET_DATA_AVAILABILITY_BO_LO] = avail_addr & + 0xffffffff; + const_buffer[PVR_QUERY_RESET_DATA_AVAILABILITY_BO_HI] = avail_addr >> 32; - for (uint32_t i = 0; i < buffer_count; i++) { - addr = PVR_DEV_ADDR_OFFSET(pool->result_buffer->dev_addr, - offset + i * pool->result_stride); - - pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr); - - result = pvr_pack_tex_state(device, &tex_info, &image_descriptor); - memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0), - image_descriptor.words, - sizeof(image_descriptor.words)); - - if (result != VK_SUCCESS) { - vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); - return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result); - } - - image_sampler_idx++; - } - - addr = PVR_DEV_ADDR_OFFSET(pool->availability_buffer->dev_addr, offset); - - pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr); - - result = pvr_pack_tex_state(device, &tex_info, &image_descriptor); - memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0), - image_descriptor.words, - sizeof(image_descriptor.words)); - if (result != VK_SUCCESS) { - vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); - return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result); - } - - image_sampler_idx++; - -#undef SAMPLER_ARR_2D - - memcpy(&const_buffer[0], - &image_sampler_state[0], - image_sampler_state_arr_size * sizeof(image_sampler_state[0])); - - STACK_ARRAY_FINISH(image_sampler_state); - - /* Only PVR_RESET_QUERY_POOL_COUNT driver consts allowed. */ - assert(shader_factory_info->num_driver_consts == - PVR_RESET_QUERY_POOL_COUNT); - - DRIVER_CONST(PVR_RESET_QUERY_POOL_INDEX_COUNT) = num_query_indices; break; } @@ -812,21 +552,10 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer, UNREACHABLE("Invalid query type"); } -#undef DRIVER_CONST - - for (uint32_t i = 0; i < shader_factory_info->num_static_const; i++) { - const struct pvr_static_buffer *load = - &shader_factory_info->static_const_buffer[i]; - - /* Assert if static const is out of range. */ - assert(load->dst_idx < shader_factory_info->const_shared_regs); - const_buffer[load->dst_idx] = load->value; - } - result = pvr_cmd_buffer_upload_general( cmd_buffer, const_buffer, - PVR_DW_TO_BYTES(shader_factory_info->const_shared_regs), + PVR_DW_TO_BYTES(pipeline.const_shared_regs_count), &pvr_bo); if (result != VK_SUCCESS) { vk_free(&cmd_buffer->vk.pool->alloc, const_buffer); diff --git a/src/imagination/vulkan/pvr_queue.c b/src/imagination/vulkan/pvr_queue.c index 52275b9532c..90f57016fdb 100644 --- a/src/imagination/vulkan/pvr_queue.c +++ b/src/imagination/vulkan/pvr_queue.c @@ -375,16 +375,15 @@ static VkResult pvr_process_transfer_cmds(struct pvr_device *device, return result; } -static VkResult -pvr_process_occlusion_query_cmd(struct pvr_device *device, - struct pvr_queue *queue, - struct pvr_sub_cmd_compute *sub_cmd) +static VkResult pvr_process_query_cmd(struct pvr_device *device, + struct pvr_queue *queue, + struct pvr_sub_cmd_compute *sub_cmd) { struct vk_sync *sync; VkResult result; /* TODO: Currently we add barrier event sub commands to handle the sync - * necessary for the different occlusion query types. Would we get any speed + * necessary for the different query types. Would we get any speed * up in processing the queue by doing that sync here without using event sub * commands? */ @@ -397,17 +396,17 @@ pvr_process_occlusion_query_cmd(struct pvr_device *device, if (result != VK_SUCCESS) return result; - result = pvr_compute_job_submit( - queue->query_ctx, - sub_cmd, - queue->next_job_wait_sync[PVR_JOB_TYPE_OCCLUSION_QUERY], - sync); + result = + pvr_compute_job_submit(queue->query_ctx, + sub_cmd, + queue->next_job_wait_sync[PVR_JOB_TYPE_QUERY], + sync); if (result != VK_SUCCESS) { vk_sync_destroy(&device->vk, sync); return result; } - pvr_update_job_syncs(device, queue, sync, PVR_JOB_TYPE_OCCLUSION_QUERY); + pvr_update_job_syncs(device, queue, sync, PVR_JOB_TYPE_QUERY); return result; } @@ -423,10 +422,10 @@ pvr_process_event_cmd_barrier(struct pvr_device *device, uint32_t src_wait_count = 0; VkResult result; - assert(!(src_mask & ~(PVR_PIPELINE_STAGE_ALL_BITS | - PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT))); - assert(!(dst_mask & ~(PVR_PIPELINE_STAGE_ALL_BITS | - PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT))); + assert(!(src_mask & + ~(PVR_PIPELINE_STAGE_ALL_BITS | PVR_PIPELINE_STAGE_QUERY_BIT))); + assert(!(dst_mask & + ~(PVR_PIPELINE_STAGE_ALL_BITS | PVR_PIPELINE_STAGE_QUERY_BIT))); u_foreach_bit (stage, src_mask) { if (queue->last_job_signal_sync[stage]) { @@ -494,7 +493,7 @@ pvr_process_event_cmd_set_or_reset(struct pvr_device *device, const enum pvr_event_state new_event_state) { /* Not PVR_JOB_TYPE_MAX since that also includes - * PVR_JOB_TYPE_OCCLUSION_QUERY so no stage in the src mask. + * PVR_JOB_TYPE_QUERY so no stage in the src mask. */ struct vk_sync_wait waits[PVR_NUM_SYNC_PIPELINE_STAGES]; struct vk_sync_signal signal; @@ -696,12 +695,12 @@ static VkResult pvr_process_cmd_buffer(struct pvr_device *device, link) { switch (sub_cmd->type) { case PVR_SUB_CMD_TYPE_GRAPHICS: { - /* If the fragment job utilizes occlusion queries, for data integrity - * it needs to wait for the occlusion query to be processed. + /* If the fragment job utilizes queries, for data integrity + * it needs to wait for the query to be processed. */ - if (sub_cmd->gfx.has_occlusion_query) { + if (sub_cmd->gfx.has_query) { struct pvr_sub_cmd_event_barrier barrier = { - .wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT, + .wait_for_stage_mask = PVR_PIPELINE_STAGE_QUERY_BIT, .wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT, }; @@ -761,9 +760,8 @@ static VkResult pvr_process_cmd_buffer(struct pvr_device *device, break; } - case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY: - result = - pvr_process_occlusion_query_cmd(device, queue, &sub_cmd->compute); + case PVR_SUB_CMD_TYPE_QUERY: + result = pvr_process_query_cmd(device, queue, &sub_cmd->compute); break; case PVR_SUB_CMD_TYPE_EVENT: @@ -847,11 +845,10 @@ static VkResult pvr_process_queue_signals(struct pvr_queue *queue, uint32_t wait_count = 0; for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) { - /* Exception for occlusion query jobs since that's something internal, + /* Exception for query jobs since that's something internal, * so the user provided syncs won't ever have it as a source stage. */ - if (!(signal_stage_src & BITFIELD_BIT(i)) && - i != PVR_JOB_TYPE_OCCLUSION_QUERY) + if (!(signal_stage_src & BITFIELD_BIT(i)) && i != PVR_JOB_TYPE_QUERY) continue; if (!queue->last_job_signal_sync[i]) diff --git a/src/imagination/vulkan/usc/programs/pvr_shader_factory.h b/src/imagination/vulkan/usc/programs/pvr_shader_factory.h index a81254b7be4..de46aca7153 100644 --- a/src/imagination/vulkan/usc/programs/pvr_shader_factory.h +++ b/src/imagination/vulkan/usc/programs/pvr_shader_factory.h @@ -31,30 +31,6 @@ #include "util/bitscan.h" #include "util/u_math.h" -/* Occlusion query availability writes. */ -enum pvr_query_availability_write_pool_const { - PVR_QUERY_AVAILABILITY_WRITE_INDEX_COUNT, - PVR_QUERY_AVAILABILITY_WRITE_COUNT, -}; - -/* Copy query pool results. */ -enum pvr_copy_query_pool_const { - PVR_COPY_QUERY_POOL_RESULTS_INDEX_COUNT, - PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_LOW, - PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_HIGH, - PVR_COPY_QUERY_POOL_RESULTS_DEST_STRIDE, - PVR_COPY_QUERY_POOL_RESULTS_PARTIAL_RESULT_FLAG, - PVR_COPY_QUERY_POOL_RESULTS_64_BIT_FLAG, - PVR_COPY_QUERY_POOL_RESULTS_WITH_AVAILABILITY_FLAG, - PVR_COPY_QUERY_POOL_RESULTS_COUNT, -}; - -/* Reset query pool. */ -enum pvr_reset_query_pool_pool_const { - PVR_RESET_QUERY_POOL_INDEX_COUNT, - PVR_RESET_QUERY_POOL_COUNT, -}; - /* ClearAttachments. */ enum pvr_clear_attachment_const { PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_0 = 0, /* Don't change. Indexes array. diff --git a/src/imagination/vulkan/usc/programs/pvr_static_shaders.h b/src/imagination/vulkan/usc/programs/pvr_static_shaders.h index 0d575b82e00..4242d936c13 100644 --- a/src/imagination/vulkan/usc/programs/pvr_static_shaders.h +++ b/src/imagination/vulkan/usc/programs/pvr_static_shaders.h @@ -55,110 +55,6 @@ struct pvr_shader_factory_info { uint32_t msaa_sample_count; }; -static const uint8_t availability_query_write_shader[144] = { 0 }; - -static const uint32_t availability_query_write_location_map[1] = { - 0, -}; - -static const struct pvr_static_buffer - availability_query_write_static_consts[3] = { - { 0, 0 }, - { 0, 0 }, - { 0, 0 }, - }; - -static const struct pvr_shader_factory_info availability_query_write_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(availability_query_write_shader), - availability_query_write_shader, - 0, - 0, - NULL, - PVR_INVALID_INST, - 0, - availability_query_write_location_map, - 0, - availability_query_write_static_consts, - 0, - ~0, -}; - -static const uint8_t copy_query_results_shader[384] = { 0 }; - -static const uint32_t copy_query_results_location_map[7] = { - 0, 0, 0, 0, 0, 0, 0, -}; - -static const struct pvr_static_buffer copy_query_results_static_consts[2] = { - { 0, 0 }, - { 0, 0 }, -}; - -static const struct pvr_shader_factory_info copy_query_results_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(copy_query_results_shader), - copy_query_results_shader, - 0, - 0, - NULL, - PVR_INVALID_INST, - 0, - copy_query_results_location_map, - 0, - copy_query_results_static_consts, - 0, - ~0, -}; - -static const uint8_t reset_query_shader_code[136] = { 0 }; - -static const uint32_t reset_query_location_map[1] = { - 0, -}; - -static const struct pvr_static_buffer reset_query_static_consts[2] = { - { 0, 0 }, - { 0, 0 }, -}; - -static const struct pvr_shader_factory_info reset_query_info = { - 0, - 0, - 0, - 0, - 0, - sizeof(reset_query_shader_code), - reset_query_shader_code, - 0, - 0, - NULL, - PVR_INVALID_INST, - 0, - reset_query_location_map, - 0, - reset_query_static_consts, - 0, - ~0, -}; - -static const struct pvr_shader_factory_info - *const copy_query_results_collection[1] = { - ©_query_results_info, - }; - -static const struct pvr_shader_factory_info *const reset_query_collection[1] = { - &reset_query_info, -}; - static const uint8_t clear_attachments_1_dw_0_offt_out_reg_shader_code[8] = { 0 };