mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-05 09:00:08 +01:00
pvr, pco: switch to clc query shaders
Signed-off-by: Simon Perretta <simon.perretta@imgtec.com> Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37439>
This commit is contained in:
parent
3fd3d7ee69
commit
6dd0a5ee2d
15 changed files with 465 additions and 573 deletions
|
|
@ -2761,3 +2761,12 @@ intrinsic("uvsw_write_pco", src_comp=[1, 0], bit_sizes=[32])
|
|||
|
||||
# load_vtxin_pco(offset)
|
||||
intrinsic("load_vtxin_pco", src_comp=[1], dest_comp=0, bit_sizes=[32])
|
||||
|
||||
# load_coeff_pco(offset)
|
||||
intrinsic("load_coeff_pco", src_comp=[1], dest_comp=0, bit_sizes=[32])
|
||||
|
||||
# dma_ld_pco(address)
|
||||
intrinsic("dma_ld_pco", src_comp=[2], dest_comp=0, flags=[CAN_ELIMINATE], bit_sizes=[32])
|
||||
|
||||
# dma_st_pco(address_data)
|
||||
intrinsic("dma_st_pco", src_comp=[0], bit_sizes=[32])
|
||||
|
|
|
|||
|
|
@ -273,6 +273,7 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data)
|
|||
case nir_op_unpack_snorm_2x16:
|
||||
case nir_op_mqsad_4x8:
|
||||
case nir_op_uadd64_32:
|
||||
case nir_op_umad64_32:
|
||||
/* There is no scalar version of these ops, unless we were to break it
|
||||
* down to bitshifts and math (which is definitely not intended).
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -645,6 +645,12 @@ dst.x = sum & 0xffffffff;
|
|||
dst.y = sum >> 32;
|
||||
""")
|
||||
|
||||
opcode("umad64_32", 2, tuint32, [1, 1, 1, 1], [tuint32, tuint32, tuint32, tuint32], False, "", """
|
||||
uint64_t sum = ((uint64_t)src0.x * (uint64_t)src1.x) + ((uint64_t)src3.x << 32 | (uint64_t)src2.x);
|
||||
dst.x = sum & 0xffffffff;
|
||||
dst.y = sum >> 32;
|
||||
""")
|
||||
|
||||
binop("fsub", tfloat, "", """
|
||||
if (nir_is_rounding_mode_rtz(execution_mode, bit_size)) {
|
||||
if (bit_size == 64)
|
||||
|
|
|
|||
61
src/imagination/common/pvr_iface.h
Normal file
61
src/imagination/common/pvr_iface.h
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright © 2025 Imagination Technologies Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef PVR_IFACE_H
|
||||
#define PVR_IFACE_H
|
||||
|
||||
/**
|
||||
* \file pvr_iface.h
|
||||
*
|
||||
* \brief USC program interface.
|
||||
*/
|
||||
|
||||
/** Query availability shader data; shared registers. */
|
||||
enum pvr_query_availability_data {
|
||||
PVR_QUERY_AVAILABILITY_DATA_INDEX_COUNT,
|
||||
|
||||
PVR_QUERY_AVAILABILITY_DATA_INDEX_BO_LO,
|
||||
PVR_QUERY_AVAILABILITY_DATA_INDEX_BO_HI,
|
||||
|
||||
PVR_QUERY_AVAILABILITY_DATA_BO_LO,
|
||||
PVR_QUERY_AVAILABILITY_DATA_BO_HI,
|
||||
|
||||
_PVR_QUERY_AVAILABILITY_DATA_COUNT,
|
||||
};
|
||||
|
||||
/** Query copy shader data; shared registers. */
|
||||
enum pvr_query_copy_data {
|
||||
PVR_QUERY_COPY_DATA_INDEX_COUNT,
|
||||
|
||||
PVR_QUERY_COPY_DATA_DEST_BO_LO,
|
||||
PVR_QUERY_COPY_DATA_DEST_BO_HI,
|
||||
|
||||
PVR_QUERY_COPY_DATA_AVAILABILITY_BO_LO,
|
||||
PVR_QUERY_COPY_DATA_AVAILABILITY_BO_HI,
|
||||
|
||||
PVR_QUERY_COPY_DATA_RESULT_BO_LO,
|
||||
PVR_QUERY_COPY_DATA_RESULT_BO_HI,
|
||||
|
||||
PVR_QUERY_COPY_DATA_DEST_STRIDE,
|
||||
|
||||
PVR_QUERY_COPY_DATA_FLAGS,
|
||||
|
||||
_PVR_QUERY_COPY_DATA_COUNT,
|
||||
};
|
||||
|
||||
/** Query reset shader data; shared registers. */
|
||||
enum pvr_query_reset_data {
|
||||
PVR_QUERY_RESET_DATA_INDEX_COUNT,
|
||||
|
||||
PVR_QUERY_RESET_DATA_RESULT_BO_LO,
|
||||
PVR_QUERY_RESET_DATA_RESULT_BO_HI,
|
||||
|
||||
PVR_QUERY_RESET_DATA_AVAILABILITY_BO_LO,
|
||||
PVR_QUERY_RESET_DATA_AVAILABILITY_BO_HI,
|
||||
|
||||
_PVR_QUERY_RESET_DATA_COUNT,
|
||||
};
|
||||
|
||||
#endif /* PVR_IFACE_H */
|
||||
|
|
@ -263,10 +263,11 @@ static pco_instr *trans_uvsw_write(trans_ctx *tctx,
|
|||
return pco_uvsw_write(&tctx->b, data_src, vtxout_addr, .rpt = chans);
|
||||
}
|
||||
|
||||
static pco_instr *trans_load_vtxin(trans_ctx *tctx,
|
||||
nir_intrinsic_instr *intr,
|
||||
pco_ref dest,
|
||||
UNUSED pco_ref offset_src)
|
||||
static pco_instr *trans_load_reg(trans_ctx *tctx,
|
||||
nir_intrinsic_instr *intr,
|
||||
pco_ref dest,
|
||||
UNUSED pco_ref offset_src,
|
||||
enum pco_reg_class class)
|
||||
{
|
||||
unsigned chans = pco_ref_get_chans(dest);
|
||||
|
||||
|
|
@ -274,7 +275,7 @@ static pco_instr *trans_load_vtxin(trans_ctx *tctx,
|
|||
/* TODO: support indexed source offset. */
|
||||
assert(nir_src_is_const(*noffset_src));
|
||||
unsigned offset = nir_src_as_uint(*noffset_src);
|
||||
pco_ref src = pco_ref_hwreg_vec(offset, PCO_REG_CLASS_VTXIN, chans);
|
||||
pco_ref src = pco_ref_hwreg_vec(offset, class, chans);
|
||||
|
||||
return pco_mov(&tctx->b, dest, src, .rpt = chans);
|
||||
}
|
||||
|
|
@ -1691,7 +1692,11 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
|
|||
break;
|
||||
|
||||
case nir_intrinsic_load_vtxin_pco:
|
||||
instr = trans_load_vtxin(tctx, intr, dest, src[0]);
|
||||
instr = trans_load_reg(tctx, intr, dest, src[0], PCO_REG_CLASS_VTXIN);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_coeff_pco:
|
||||
instr = trans_load_reg(tctx, intr, dest, src[0], PCO_REG_CLASS_COEFF);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_output:
|
||||
|
|
@ -1796,6 +1801,35 @@ static pco_instr *trans_intr(trans_ctx *tctx, nir_intrinsic_instr *intr)
|
|||
instr = trans_scratch(tctx, dest, src[1], src[0]);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_dma_ld_pco: {
|
||||
unsigned chans = pco_ref_get_chans(dest);
|
||||
|
||||
instr = pco_ld(&tctx->b,
|
||||
dest,
|
||||
pco_ref_drc(PCO_DRC_0),
|
||||
pco_ref_imm8(chans),
|
||||
src[0]);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_dma_st_pco: {
|
||||
unsigned chans = pco_ref_get_chans(src[0]) - 2;
|
||||
|
||||
pco_ref data_comp =
|
||||
pco_ref_new_ssa(tctx->func, pco_ref_get_bits(src[0]), chans);
|
||||
pco_comp(&tctx->b, data_comp, src[0], pco_ref_val16(2));
|
||||
|
||||
instr = pco_st32(&tctx->b,
|
||||
data_comp,
|
||||
pco_ref_drc(PCO_DRC_0),
|
||||
pco_ref_imm8(chans),
|
||||
src[0],
|
||||
pco_ref_null());
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/* Vertex sysvals. */
|
||||
case nir_intrinsic_load_vertex_id:
|
||||
case nir_intrinsic_load_instance_id:
|
||||
|
|
@ -2675,6 +2709,28 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_op_umad64_32: {
|
||||
pco_ref dest_comps[2] = {
|
||||
[0] = pco_ref_new_ssa32(tctx->func),
|
||||
[1] = pco_ref_new_ssa32(tctx->func),
|
||||
};
|
||||
|
||||
pco_imadd64(&tctx->b,
|
||||
dest_comps[0],
|
||||
dest_comps[1],
|
||||
src[0],
|
||||
src[1],
|
||||
src[2],
|
||||
src[3],
|
||||
pco_ref_null());
|
||||
|
||||
/* TODO: mark this vec as being non-contiguous,
|
||||
* add pass for expanding.
|
||||
*/
|
||||
instr = pco_trans_nir_vec(tctx, dest, 2, dest_comps);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_imul:
|
||||
instr = pco_imul32(&tctx->b, dest, src[0], src[1], pco_ref_null());
|
||||
break;
|
||||
|
|
@ -2713,6 +2769,16 @@ static pco_instr *trans_alu(trans_ctx *tctx, nir_alu_instr *alu)
|
|||
pco_ref_null());
|
||||
break;
|
||||
|
||||
case nir_op_imad:
|
||||
instr = pco_imadd32(&tctx->b,
|
||||
dest,
|
||||
src[0],
|
||||
src[1],
|
||||
src[2],
|
||||
pco_ref_null(),
|
||||
.s = true);
|
||||
break;
|
||||
|
||||
/* Set-on (float) comparisons. */
|
||||
case nir_op_slt:
|
||||
case nir_op_sge:
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
*/
|
||||
|
||||
#include "libcl.h"
|
||||
#include "compiler/libcl/libcl_vk.h"
|
||||
|
||||
KERNEL(1)
|
||||
vs_nop_common(void)
|
||||
|
|
@ -38,3 +39,115 @@ vs_passthrough_rta_common(void)
|
|||
vs_passthrough_common();
|
||||
nir_uvsw_write_pco(4, nir_load_vtxin_pco(1, 3));
|
||||
}
|
||||
|
||||
/* TODO: uint index = cl_global_id.x;
|
||||
* instead of this function once things
|
||||
* are properly hooked up.
|
||||
*/
|
||||
static inline uint
|
||||
query_calc_global_id(void)
|
||||
{
|
||||
uint local_invoc_index = nir_load_vtxin_pco(1, 0);
|
||||
local_invoc_index &= get_local_size(0) - 1;
|
||||
uint wg_id = nir_load_coeff_pco(1, 0);
|
||||
return nir_imad(wg_id, get_local_size(0), local_invoc_index);
|
||||
}
|
||||
|
||||
/* TODO: support parameter passing. */
|
||||
/* TODO: switch to common implementation. */
|
||||
KERNEL(32)
|
||||
cs_query_availability_common(void)
|
||||
{
|
||||
uint index_count = nir_load_preamble(1, PVR_QUERY_AVAILABILITY_DATA_INDEX_COUNT, 0);
|
||||
|
||||
uint index_base_addr_lo = nir_load_preamble(1, PVR_QUERY_AVAILABILITY_DATA_INDEX_BO_LO, 0);
|
||||
uint index_base_addr_hi = nir_load_preamble(1, PVR_QUERY_AVAILABILITY_DATA_INDEX_BO_HI, 0);
|
||||
|
||||
uint avail_base_addr_lo = nir_load_preamble(1, PVR_QUERY_AVAILABILITY_DATA_BO_LO, 0);
|
||||
uint avail_base_addr_hi = nir_load_preamble(1, PVR_QUERY_AVAILABILITY_DATA_BO_HI, 0);
|
||||
|
||||
uint index = query_calc_global_id();
|
||||
|
||||
if (index < index_count) {
|
||||
uint2 index_addr = nir_uadd64_32(index_base_addr_lo, index_base_addr_hi, index * sizeof(uint32_t));
|
||||
uint offset = nir_dma_ld_pco(1, index_addr);
|
||||
|
||||
uint2 avail_addr = nir_uadd64_32(avail_base_addr_lo, avail_base_addr_hi, offset * sizeof(uint32_t));
|
||||
|
||||
nir_dma_st_pco(avail_addr, ~0U);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL(32)
|
||||
cs_query_copy_common(void)
|
||||
{
|
||||
uint index_count = nir_load_preamble(1, PVR_QUERY_COPY_DATA_INDEX_COUNT, 0);
|
||||
|
||||
uint dest_base_addr_lo = nir_load_preamble(1, PVR_QUERY_COPY_DATA_DEST_BO_LO, 0);
|
||||
uint dest_base_addr_hi = nir_load_preamble(1, PVR_QUERY_COPY_DATA_DEST_BO_HI, 0);
|
||||
|
||||
uint avail_base_addr_lo = nir_load_preamble(1, PVR_QUERY_COPY_DATA_AVAILABILITY_BO_LO, 0);
|
||||
uint avail_base_addr_hi = nir_load_preamble(1, PVR_QUERY_COPY_DATA_AVAILABILITY_BO_HI, 0);
|
||||
|
||||
uint result_base_addr_lo = nir_load_preamble(1, PVR_QUERY_COPY_DATA_RESULT_BO_LO, 0);
|
||||
uint result_base_addr_hi = nir_load_preamble(1, PVR_QUERY_COPY_DATA_RESULT_BO_HI, 0);
|
||||
|
||||
uint dest_stride = nir_load_preamble(1, PVR_QUERY_COPY_DATA_DEST_STRIDE, 0);
|
||||
|
||||
uint flags = nir_load_preamble(1, PVR_QUERY_COPY_DATA_FLAGS, 0);
|
||||
|
||||
uint index = query_calc_global_id();
|
||||
|
||||
if (index < index_count) {
|
||||
uint2 avail_addr = nir_uadd64_32(avail_base_addr_lo, avail_base_addr_hi, index * sizeof(uint32_t));
|
||||
uint available = nir_dma_ld_pco(1, avail_addr);
|
||||
|
||||
uint2 dest_addr = nir_umad64_32(dest_stride, index, dest_base_addr_lo, dest_base_addr_hi);
|
||||
|
||||
if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT)) {
|
||||
uint2 result_addr = nir_uadd64_32(result_base_addr_lo, result_base_addr_hi, index * sizeof(uint32_t));
|
||||
uint result = nir_dma_ld_pco(1, result_addr);
|
||||
|
||||
/* TODO: for 64/32-bit writes, just prep the 64-bit one and set the burst-length variably. */
|
||||
if (flags & VK_QUERY_RESULT_64_BIT) {
|
||||
/* TODO: check if data should be (result, 0) or (0, result) */
|
||||
nir_dma_st_pco(dest_addr, result, 0);
|
||||
} else {
|
||||
nir_dma_st_pco(dest_addr, result);
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
|
||||
if (flags & VK_QUERY_RESULT_64_BIT) {
|
||||
dest_addr = nir_uadd64_32(dest_addr.x, dest_addr.y, sizeof(uint64_t));
|
||||
/* TODO: check if data should be (available, 0) or (0, available) */
|
||||
nir_dma_st_pco(dest_addr, available, 0);
|
||||
} else {
|
||||
dest_addr = nir_uadd64_32(dest_addr.x, dest_addr.y, sizeof(uint32_t));
|
||||
nir_dma_st_pco(dest_addr, available);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL(32)
|
||||
cs_query_reset_common(void)
|
||||
{
|
||||
uint index_count = nir_load_preamble(1, PVR_QUERY_RESET_DATA_INDEX_COUNT, 0);
|
||||
|
||||
uint result_base_addr_lo = nir_load_preamble(1, PVR_QUERY_RESET_DATA_RESULT_BO_LO, 0);
|
||||
uint result_base_addr_hi = nir_load_preamble(1, PVR_QUERY_RESET_DATA_RESULT_BO_HI, 0);
|
||||
|
||||
uint avail_base_addr_lo = nir_load_preamble(1, PVR_QUERY_RESET_DATA_AVAILABILITY_BO_LO, 0);
|
||||
uint avail_base_addr_hi = nir_load_preamble(1, PVR_QUERY_RESET_DATA_AVAILABILITY_BO_HI, 0);
|
||||
|
||||
uint index = query_calc_global_id();
|
||||
|
||||
if (index < index_count) {
|
||||
uint2 result_addr = nir_uadd64_32(result_base_addr_lo, result_base_addr_hi, index * sizeof(uint32_t));
|
||||
nir_dma_st_pco(result_addr, 0);
|
||||
|
||||
uint2 avail_addr = nir_uadd64_32(avail_base_addr_lo, avail_base_addr_hi, index * sizeof(uint32_t));
|
||||
nir_dma_st_pco(avail_addr, 0);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
#ifndef PCO_LIBCL_H
|
||||
#define PCO_LIBCL_H
|
||||
|
||||
#include "common/pvr_iface.h"
|
||||
#include "compiler/libcl/libcl.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
#include "pco/pco_common.h"
|
||||
|
|
@ -58,4 +59,39 @@ uint3 nir_load_vtxin_pco__3(uint offset);
|
|||
uint4 nir_load_vtxin_pco__4(uint offset);
|
||||
|
||||
#define nir_load_vtxin_pco(n, ...) CAT2(nir_load_vtxin_pco__, n)(__VA_ARGS__)
|
||||
|
||||
uint nir_load_coeff_pco__1(uint offset);
|
||||
uint2 nir_load_coeff_pco__2(uint offset);
|
||||
uint3 nir_load_coeff_pco__3(uint offset);
|
||||
uint4 nir_load_coeff_pco__4(uint offset);
|
||||
|
||||
#define nir_load_coeff_pco(n, ...) CAT2(nir_load_coeff_pco__, n)(__VA_ARGS__)
|
||||
|
||||
uint nir_load_preamble__1(uint base, uint preamble_class);
|
||||
uint4 nir_load_preamble__4(uint base, uint preamble_class);
|
||||
|
||||
#define nir_load_preamble(n, ...) CAT2(nir_load_preamble__, n)(__VA_ARGS__)
|
||||
|
||||
void nir_store_preamble(uint data, uint base, uint preamble_class);
|
||||
|
||||
uint nir_dma_ld_pco__1(uint2 addr);
|
||||
uint2 nir_dma_ld_pco__2(uint2 addr);
|
||||
uint3 nir_dma_ld_pco__3(uint2 addr);
|
||||
uint4 nir_dma_ld_pco__4(uint2 addr);
|
||||
|
||||
#define nir_dma_ld_pco(n, ...) CAT2(nir_dma_ld_pco__, n)(__VA_ARGS__)
|
||||
|
||||
void nir_dma_st_pco__1(uint3 addr_data);
|
||||
void nir_dma_st_pco__2(uint4 addr_data);
|
||||
|
||||
#define SELECT_ARGS_ST(addr, ...) \
|
||||
((CAT2(uint, NUM_ARGS_PLUS_2(__VA_ARGS__)))(addr, __VA_ARGS__))
|
||||
|
||||
/* clang-format off */
|
||||
#define nir_dma_st_pco(addr, ...) SELECT_NAME(nir_dma_st_pco, __, __VA_ARGS__)SELECT_ARGS_ST(addr, __VA_ARGS__)
|
||||
/* clang-format on */
|
||||
|
||||
uint2 nir_uadd64_32(uint lo, uint hi, uint offset);
|
||||
uint nir_imad(uint a, uint b, uint c);
|
||||
uint2 nir_umad64_32(uint a, uint b, uint lo, uint hi);
|
||||
#endif /* PCO_LIBCL_H */
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ static void pvr_cmd_buffer_free_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
|
|||
break;
|
||||
|
||||
case PVR_SUB_CMD_TYPE_COMPUTE:
|
||||
case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY:
|
||||
case PVR_SUB_CMD_TYPE_QUERY:
|
||||
pvr_csb_finish(&sub_cmd->compute.control_stream);
|
||||
break;
|
||||
|
||||
|
|
@ -286,9 +286,9 @@ static void pvr_cmd_buffer_update_barriers(struct pvr_cmd_buffer *cmd_buffer,
|
|||
barriers = PVR_PIPELINE_STAGE_COMPUTE_BIT;
|
||||
break;
|
||||
|
||||
case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY:
|
||||
case PVR_SUB_CMD_TYPE_QUERY:
|
||||
case PVR_SUB_CMD_TYPE_TRANSFER:
|
||||
/* Compute jobs are used for occlusion queries but to copy the results we
|
||||
/* Compute jobs are used for queries but to copy the results we
|
||||
* have to sync with transfer jobs because vkCmdCopyQueryPoolResults() is
|
||||
* deemed as a transfer operation by the spec.
|
||||
*/
|
||||
|
|
@ -674,7 +674,8 @@ static VkResult pvr_setup_texture_state_words(
|
|||
|
||||
pvr_csb_pack (&descriptor->sampler.words[1],
|
||||
TEXSTATE_SAMPLER_WORD1,
|
||||
sampler) {}
|
||||
sampler) {
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
|
@ -1086,7 +1087,7 @@ static void pvr_setup_pbe_state(
|
|||
break;
|
||||
}
|
||||
|
||||
#define PVR_DEC_IF_NOT_ZERO(_v) (((_v) > 0) ? (_v) - 1 : 0)
|
||||
#define PVR_DEC_IF_NOT_ZERO(_v) (((_v) > 0) ? (_v)-1 : 0)
|
||||
|
||||
render_params.min_x_clip = MAX2(0, render_area->offset.x);
|
||||
render_params.min_y_clip = MAX2(0, render_area->offset.y);
|
||||
|
|
@ -2205,7 +2206,7 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer)
|
|||
query_pool = gfx_sub_cmd->query_pool;
|
||||
}
|
||||
|
||||
gfx_sub_cmd->has_occlusion_query = true;
|
||||
gfx_sub_cmd->has_query = true;
|
||||
|
||||
util_dynarray_clear(&state->query_indices);
|
||||
}
|
||||
|
|
@ -2256,7 +2257,7 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer)
|
|||
break;
|
||||
}
|
||||
|
||||
case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY:
|
||||
case PVR_SUB_CMD_TYPE_QUERY:
|
||||
case PVR_SUB_CMD_TYPE_COMPUTE: {
|
||||
struct pvr_sub_cmd_compute *const compute_sub_cmd = &sub_cmd->compute;
|
||||
|
||||
|
|
@ -2331,7 +2332,7 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer)
|
|||
.type = PVR_EVENT_TYPE_BARRIER,
|
||||
.barrier = {
|
||||
.wait_for_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
|
||||
.wait_at_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
|
||||
.wait_at_stage_mask = PVR_PIPELINE_STAGE_QUERY_BIT,
|
||||
},
|
||||
};
|
||||
|
||||
|
|
@ -2487,7 +2488,7 @@ VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
|
|||
util_dynarray_init(&sub_cmd->gfx.sec_query_indices, NULL);
|
||||
break;
|
||||
|
||||
case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY:
|
||||
case PVR_SUB_CMD_TYPE_QUERY:
|
||||
case PVR_SUB_CMD_TYPE_COMPUTE:
|
||||
pvr_csb_init(device,
|
||||
PVR_CMD_STREAM_TYPE_COMPUTE,
|
||||
|
|
@ -3895,7 +3896,8 @@ static VkResult pvr_setup_descriptor_mappings(
|
|||
|
||||
pvr_csb_pack (&point_sampler_words[1],
|
||||
TEXSTATE_SAMPLER_WORD1,
|
||||
sampler) {}
|
||||
sampler) {
|
||||
}
|
||||
|
||||
struct pvr_suballoc_bo *point_sampler_bo;
|
||||
result = pvr_cmd_buffer_upload_general(cmd_buffer,
|
||||
|
|
@ -3930,7 +3932,8 @@ static VkResult pvr_setup_descriptor_mappings(
|
|||
|
||||
pvr_csb_pack (&ia_sampler_words[1],
|
||||
TEXSTATE_SAMPLER_WORD1,
|
||||
sampler) {}
|
||||
sampler) {
|
||||
}
|
||||
|
||||
struct pvr_suballoc_bo *ia_sampler_bo;
|
||||
result = pvr_cmd_buffer_upload_general(cmd_buffer,
|
||||
|
|
@ -7121,7 +7124,7 @@ static VkResult pvr_execute_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
|
|||
primary_sub_cmd->gfx = sec_sub_cmd->gfx;
|
||||
break;
|
||||
|
||||
case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY:
|
||||
case PVR_SUB_CMD_TYPE_QUERY:
|
||||
case PVR_SUB_CMD_TYPE_COMPUTE:
|
||||
primary_sub_cmd->compute = sec_sub_cmd->compute;
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ enum pvr_sub_cmd_type {
|
|||
PVR_SUB_CMD_TYPE_GRAPHICS,
|
||||
PVR_SUB_CMD_TYPE_COMPUTE,
|
||||
PVR_SUB_CMD_TYPE_TRANSFER,
|
||||
PVR_SUB_CMD_TYPE_OCCLUSION_QUERY,
|
||||
PVR_SUB_CMD_TYPE_QUERY,
|
||||
PVR_SUB_CMD_TYPE_EVENT,
|
||||
};
|
||||
|
||||
|
|
@ -110,7 +110,7 @@ enum pvr_job_type {
|
|||
PVR_JOB_TYPE_FRAG,
|
||||
PVR_JOB_TYPE_COMPUTE,
|
||||
PVR_JOB_TYPE_TRANSFER,
|
||||
PVR_JOB_TYPE_OCCLUSION_QUERY,
|
||||
PVR_JOB_TYPE_QUERY,
|
||||
PVR_JOB_TYPE_MAX
|
||||
};
|
||||
|
||||
|
|
@ -128,8 +128,7 @@ enum pvr_pipeline_stage_bits {
|
|||
/* Note that this doesn't map to VkPipelineStageFlagBits so be careful with
|
||||
* this.
|
||||
*/
|
||||
PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT =
|
||||
BITFIELD_BIT(PVR_JOB_TYPE_OCCLUSION_QUERY),
|
||||
PVR_PIPELINE_STAGE_QUERY_BIT = BITFIELD_BIT(PVR_JOB_TYPE_QUERY),
|
||||
};
|
||||
|
||||
#define PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS \
|
||||
|
|
|
|||
|
|
@ -196,8 +196,8 @@ struct pvr_device {
|
|||
|
||||
/* Compute shaders for queries. */
|
||||
struct pvr_compute_query_shader availability_shader;
|
||||
struct pvr_compute_query_shader *copy_results_shaders;
|
||||
struct pvr_compute_query_shader *reset_queries_shaders;
|
||||
struct pvr_compute_query_shader reset_queries_shader;
|
||||
struct pvr_compute_query_shader copy_results_shader;
|
||||
|
||||
struct pvr_suballocator suballoc_general;
|
||||
struct pvr_suballocator suballoc_pds;
|
||||
|
|
@ -510,7 +510,7 @@ struct pvr_sub_cmd_gfx {
|
|||
*/
|
||||
bool frag_uses_texture_rw;
|
||||
|
||||
bool has_occlusion_query;
|
||||
bool has_query;
|
||||
|
||||
bool wait_on_previous_transfer;
|
||||
|
||||
|
|
|
|||
|
|
@ -288,7 +288,7 @@ void pvr_CmdResetQueryPool(VkCommandBuffer commandBuffer,
|
|||
.type = PVR_EVENT_TYPE_BARRIER,
|
||||
.barrier = {
|
||||
.wait_for_stage_mask = PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS,
|
||||
.wait_at_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
|
||||
.wait_at_stage_mask = PVR_PIPELINE_STAGE_QUERY_BIT,
|
||||
},
|
||||
};
|
||||
|
||||
|
|
@ -307,7 +307,7 @@ void pvr_CmdResetQueryPool(VkCommandBuffer commandBuffer,
|
|||
cmd_buffer->state.current_sub_cmd->event = (struct pvr_sub_cmd_event){
|
||||
.type = PVR_EVENT_TYPE_BARRIER,
|
||||
.barrier = {
|
||||
.wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
|
||||
.wait_for_stage_mask = PVR_PIPELINE_STAGE_QUERY_BIT,
|
||||
.wait_at_stage_mask = PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS,
|
||||
},
|
||||
};
|
||||
|
|
@ -371,7 +371,7 @@ void pvr_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
|
|||
.type = PVR_EVENT_TYPE_BARRIER,
|
||||
.barrier = {
|
||||
.wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
.wait_at_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
|
||||
.wait_at_stage_mask = PVR_PIPELINE_STAGE_QUERY_BIT,
|
||||
},
|
||||
};
|
||||
|
||||
|
|
@ -388,7 +388,7 @@ void pvr_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
|
|||
cmd_buffer->state.current_sub_cmd->event = (struct pvr_sub_cmd_event){
|
||||
.type = PVR_EVENT_TYPE_BARRIER,
|
||||
.barrier = {
|
||||
.wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
|
||||
.wait_for_stage_mask = PVR_PIPELINE_STAGE_QUERY_BIT,
|
||||
.wait_at_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
},
|
||||
};
|
||||
|
|
|
|||
|
|
@ -28,19 +28,21 @@
|
|||
#include <string.h>
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
#include "common/pvr_iface.h"
|
||||
#include "hwdef/rogue_hw_utils.h"
|
||||
#include "pco_uscgen_programs.h"
|
||||
#include "pvr_bo.h"
|
||||
#include "pvr_formats.h"
|
||||
#include "pvr_pds.h"
|
||||
#include "pvr_private.h"
|
||||
#include "usc/programs/pvr_shader_factory.h"
|
||||
#include "usc/programs/pvr_static_shaders.h"
|
||||
#include "pvr_tex_state.h"
|
||||
#include "pvr_types.h"
|
||||
#include "vk_alloc.h"
|
||||
#include "vk_command_pool.h"
|
||||
#include "vk_util.h"
|
||||
|
||||
/* TODO: multicore support/awareness. */
|
||||
|
||||
static inline void pvr_init_primary_compute_pds_program(
|
||||
struct pvr_pds_compute_shader_program *program)
|
||||
{
|
||||
|
|
@ -52,10 +54,10 @@ static inline void pvr_init_primary_compute_pds_program(
|
|||
program->kick_usc = true;
|
||||
}
|
||||
|
||||
static VkResult pvr_create_compute_secondary_prog(
|
||||
struct pvr_device *device,
|
||||
const struct pvr_shader_factory_info *shader_factory_info,
|
||||
struct pvr_compute_query_shader *query_prog)
|
||||
static VkResult
|
||||
pvr_create_compute_secondary_prog(struct pvr_device *device,
|
||||
unsigned const_shared_regs,
|
||||
struct pvr_compute_query_shader *query_prog)
|
||||
{
|
||||
const size_t size =
|
||||
pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes();
|
||||
|
|
@ -79,8 +81,8 @@ static VkResult pvr_create_compute_secondary_prog(
|
|||
.buffer_id = 0,
|
||||
.source_offset = 0,
|
||||
.type = PVR_BUFFER_TYPE_COMPILE_TIME,
|
||||
.size_in_dwords = shader_factory_info->const_shared_regs,
|
||||
.destination = shader_factory_info->explicit_const_start_offset,
|
||||
.size_in_dwords = const_shared_regs,
|
||||
.destination = 0,
|
||||
}
|
||||
},
|
||||
};
|
||||
|
|
@ -133,26 +135,24 @@ pvr_destroy_compute_secondary_prog(struct pvr_device *device,
|
|||
vk_free(&device->vk.alloc, program->info.entries);
|
||||
}
|
||||
|
||||
static VkResult pvr_create_compute_query_program(
|
||||
static VkResult pvr_create_compute_query_precomp_program(
|
||||
struct pvr_device *device,
|
||||
const struct pvr_shader_factory_info *shader_factory_info,
|
||||
enum pco_usclib_program common_program_index,
|
||||
unsigned const_shared_regs,
|
||||
struct pvr_compute_query_shader *query_prog)
|
||||
{
|
||||
const uint32_t cache_line_size =
|
||||
rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
|
||||
struct pvr_pds_compute_shader_program pds_primary_prog = { 0 };
|
||||
const pco_precomp_data *precomp_data;
|
||||
VkResult result;
|
||||
|
||||
memset(query_prog, 0, sizeof(*query_prog));
|
||||
|
||||
/* No support for query constant calc program. */
|
||||
assert(shader_factory_info->const_calc_prog_inst_bytes == 0);
|
||||
/* No support for query coefficient update program. */
|
||||
assert(shader_factory_info->coeff_update_prog_start == PVR_INVALID_INST);
|
||||
|
||||
precomp_data = (pco_precomp_data *)pco_usclib_common[common_program_index];
|
||||
result = pvr_gpu_upload_usc(device,
|
||||
shader_factory_info->shader_code,
|
||||
shader_factory_info->code_size,
|
||||
precomp_data->binary,
|
||||
precomp_data->size_dwords * sizeof(uint32_t),
|
||||
cache_line_size,
|
||||
&query_prog->usc_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
|
|
@ -162,7 +162,7 @@ static VkResult pvr_create_compute_query_program(
|
|||
|
||||
pvr_pds_setup_doutu(&pds_primary_prog.usc_task_control,
|
||||
query_prog->usc_bo->dev_addr.addr,
|
||||
shader_factory_info->temps_required,
|
||||
precomp_data->temps,
|
||||
ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
|
||||
false);
|
||||
|
||||
|
|
@ -176,9 +176,8 @@ static VkResult pvr_create_compute_query_program(
|
|||
query_prog->primary_data_size_dw = pds_primary_prog.data_size;
|
||||
query_prog->primary_num_temps = pds_primary_prog.temps_used;
|
||||
|
||||
result = pvr_create_compute_secondary_prog(device,
|
||||
shader_factory_info,
|
||||
query_prog);
|
||||
result =
|
||||
pvr_create_compute_secondary_prog(device, const_shared_regs, query_prog);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_free_pds_prim_code_bo;
|
||||
|
||||
|
|
@ -224,7 +223,9 @@ static VkResult pvr_write_compute_query_pds_data_section(
|
|||
* not needed. If it's needed we should probably be using LITERAL entries for
|
||||
* this instead.
|
||||
*/
|
||||
#if !defined(NDEBUG)
|
||||
memset(dword_buffer, 0xFE, PVR_DW_TO_BYTES(info->data_size_in_dwords));
|
||||
#endif /* !defined(NDEBUG) */
|
||||
|
||||
pipeline->pds_shared_update_data_size_dw = info->data_size_in_dwords;
|
||||
|
||||
|
|
@ -321,7 +322,7 @@ static void pvr_write_private_compute_dispatch(
|
|||
1,
|
||||
};
|
||||
|
||||
assert(sub_cmd->type == PVR_SUB_CMD_TYPE_OCCLUSION_QUERY);
|
||||
assert(sub_cmd->type == PVR_SUB_CMD_TYPE_QUERY);
|
||||
|
||||
pvr_compute_update_shared_private(cmd_buffer, &sub_cmd->compute, pipeline);
|
||||
pvr_compute_update_kernel_private(cmd_buffer,
|
||||
|
|
@ -340,90 +341,41 @@ pvr_destroy_compute_query_program(struct pvr_device *device,
|
|||
pvr_bo_suballoc_free(program->usc_bo);
|
||||
}
|
||||
|
||||
static VkResult pvr_create_multibuffer_compute_query_program(
|
||||
struct pvr_device *device,
|
||||
const struct pvr_shader_factory_info *const *shader_factory_info,
|
||||
struct pvr_compute_query_shader *query_programs)
|
||||
{
|
||||
const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
|
||||
VkResult result;
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < core_count; i++) {
|
||||
result = pvr_create_compute_query_program(device,
|
||||
shader_factory_info[i],
|
||||
&query_programs[i]);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_destroy_compute_query_program;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
err_destroy_compute_query_program:
|
||||
for (uint32_t j = 0; j < i; j++)
|
||||
pvr_destroy_compute_query_program(device, &query_programs[j]);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult pvr_device_create_compute_query_programs(struct pvr_device *device)
|
||||
{
|
||||
const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
|
||||
VkResult result;
|
||||
|
||||
result = pvr_create_compute_query_program(device,
|
||||
&availability_query_write_info,
|
||||
&device->availability_shader);
|
||||
result = pvr_create_compute_query_precomp_program(
|
||||
device,
|
||||
CS_QUERY_AVAILABILITY_COMMON,
|
||||
_PVR_QUERY_AVAILABILITY_DATA_COUNT,
|
||||
&device->availability_shader);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
device->copy_results_shaders =
|
||||
vk_alloc(&device->vk.alloc,
|
||||
sizeof(*device->copy_results_shaders) * core_count,
|
||||
8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
if (!device->copy_results_shaders) {
|
||||
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
result =
|
||||
pvr_create_compute_query_precomp_program(device,
|
||||
CS_QUERY_COPY_COMMON,
|
||||
_PVR_QUERY_COPY_DATA_COUNT,
|
||||
&device->copy_results_shader);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_destroy_availability_query_program;
|
||||
}
|
||||
|
||||
result = pvr_create_multibuffer_compute_query_program(
|
||||
device,
|
||||
copy_query_results_collection,
|
||||
device->copy_results_shaders);
|
||||
result =
|
||||
pvr_create_compute_query_precomp_program(device,
|
||||
CS_QUERY_RESET_COMMON,
|
||||
_PVR_QUERY_RESET_DATA_COUNT,
|
||||
&device->reset_queries_shader);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_vk_free_copy_results_shaders;
|
||||
|
||||
device->reset_queries_shaders =
|
||||
vk_alloc(&device->vk.alloc,
|
||||
sizeof(*device->reset_queries_shaders) * core_count,
|
||||
8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
if (!device->reset_queries_shaders) {
|
||||
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
goto err_destroy_copy_results_query_programs;
|
||||
}
|
||||
|
||||
result = pvr_create_multibuffer_compute_query_program(
|
||||
device,
|
||||
reset_query_collection,
|
||||
device->reset_queries_shaders);
|
||||
if (result != VK_SUCCESS)
|
||||
goto err_vk_free_reset_queries_shaders;
|
||||
goto err_destroy_copy_results_query_program;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
err_vk_free_reset_queries_shaders:
|
||||
vk_free(&device->vk.alloc, device->reset_queries_shaders);
|
||||
|
||||
err_destroy_copy_results_query_programs:
|
||||
for (uint32_t i = 0; i < core_count; i++) {
|
||||
pvr_destroy_compute_query_program(device,
|
||||
&device->copy_results_shaders[i]);
|
||||
}
|
||||
|
||||
err_vk_free_copy_results_shaders:
|
||||
vk_free(&device->vk.alloc, device->copy_results_shaders);
|
||||
err_destroy_copy_results_query_program:
|
||||
pvr_destroy_compute_query_program(device, &device->copy_results_shader);
|
||||
|
||||
err_destroy_availability_query_program:
|
||||
pvr_destroy_compute_query_program(device, &device->availability_shader);
|
||||
|
|
@ -433,53 +385,9 @@ err_destroy_availability_query_program:
|
|||
|
||||
void pvr_device_destroy_compute_query_programs(struct pvr_device *device)
|
||||
{
|
||||
const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
|
||||
|
||||
pvr_destroy_compute_query_program(device, &device->availability_shader);
|
||||
|
||||
for (uint32_t i = 0; i < core_count; i++) {
|
||||
pvr_destroy_compute_query_program(device,
|
||||
&device->copy_results_shaders[i]);
|
||||
pvr_destroy_compute_query_program(device,
|
||||
&device->reset_queries_shaders[i]);
|
||||
}
|
||||
|
||||
vk_free(&device->vk.alloc, device->copy_results_shaders);
|
||||
vk_free(&device->vk.alloc, device->reset_queries_shaders);
|
||||
}
|
||||
|
||||
static void pvr_init_tex_info(const struct pvr_device_info *dev_info,
|
||||
struct pvr_texture_state_info *tex_info,
|
||||
uint32_t width,
|
||||
pvr_dev_addr_t addr)
|
||||
{
|
||||
const VkFormat vk_format = VK_FORMAT_R32_UINT;
|
||||
const uint8_t *swizzle_arr = pvr_get_format_swizzle(vk_format);
|
||||
bool is_view_1d = !PVR_HAS_FEATURE(dev_info, tpu_extended_integer_lookup) &&
|
||||
!PVR_HAS_FEATURE(dev_info, tpu_image_state_v2);
|
||||
|
||||
*tex_info = (struct pvr_texture_state_info){
|
||||
.format = vk_format,
|
||||
.mem_layout = PVR_MEMLAYOUT_LINEAR,
|
||||
.flags = PVR_TEXFLAGS_INDEX_LOOKUP,
|
||||
.type = is_view_1d ? VK_IMAGE_VIEW_TYPE_1D : VK_IMAGE_VIEW_TYPE_2D,
|
||||
.is_cube = false,
|
||||
.tex_state_type = PVR_TEXTURE_STATE_SAMPLE,
|
||||
.extent = { .width = width, .height = 1, .depth = 0 },
|
||||
.array_size = 1,
|
||||
.base_level = 0,
|
||||
.mip_levels = 1,
|
||||
.mipmaps_present = false,
|
||||
.sample_count = 1,
|
||||
.stride = width,
|
||||
.offset = 0,
|
||||
.swizzle = { [0] = swizzle_arr[0],
|
||||
[1] = swizzle_arr[1],
|
||||
[2] = swizzle_arr[2],
|
||||
[3] = swizzle_arr[3] },
|
||||
.addr = addr,
|
||||
|
||||
};
|
||||
pvr_destroy_compute_query_program(device, &device->copy_results_shader);
|
||||
pvr_destroy_compute_query_program(device, &device->reset_queries_shader);
|
||||
}
|
||||
|
||||
/* TODO: Split this function into per program type functions. */
|
||||
|
|
@ -487,33 +395,16 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
|
|||
const struct pvr_query_info *query_info)
|
||||
{
|
||||
struct pvr_device *device = cmd_buffer->device;
|
||||
const uint32_t core_count = device->pdevice->dev_runtime_info.core_count;
|
||||
const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
|
||||
const struct pvr_shader_factory_info *shader_factory_info;
|
||||
uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
|
||||
struct pvr_image_descriptor image_descriptor;
|
||||
const struct pvr_compute_query_shader *query_prog;
|
||||
struct pvr_private_compute_pipeline pipeline;
|
||||
const uint32_t buffer_count = core_count;
|
||||
struct pvr_texture_state_info tex_info;
|
||||
uint32_t num_query_indices;
|
||||
uint32_t *const_buffer;
|
||||
struct pvr_suballoc_bo *pvr_bo;
|
||||
VkResult result;
|
||||
|
||||
pvr_csb_pack (&sampler_state[0U], TEXSTATE_SAMPLER_WORD0, reg) {
|
||||
reg.addrmode_u = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
|
||||
reg.addrmode_v = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
|
||||
reg.addrmode_w = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
|
||||
reg.minfilter = ROGUE_TEXSTATE_FILTER_POINT;
|
||||
reg.magfilter = ROGUE_TEXSTATE_FILTER_POINT;
|
||||
reg.non_normalized_coords = true;
|
||||
reg.dadjust = ROGUE_TEXSTATE_DADJUST_ZERO_UINT;
|
||||
}
|
||||
|
||||
/* clang-format off */
|
||||
pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {}
|
||||
/* clang-format on */
|
||||
result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_QUERY);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
switch (query_info->type) {
|
||||
case PVR_QUERY_TYPE_AVAILABILITY_WRITE:
|
||||
|
|
@ -521,33 +412,28 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
|
|||
* value in availability_bo at every index in index_bo.
|
||||
*/
|
||||
query_prog = &device->availability_shader;
|
||||
shader_factory_info = &availability_query_write_info;
|
||||
num_query_indices = query_info->availability_write.num_query_indices;
|
||||
pipeline.const_shared_regs_count = _PVR_QUERY_AVAILABILITY_DATA_COUNT;
|
||||
break;
|
||||
|
||||
case PVR_QUERY_TYPE_COPY_QUERY_RESULTS:
|
||||
/* Adds a compute shader to copy availability and query value data. */
|
||||
query_prog = &device->copy_results_shaders[buffer_count - 1];
|
||||
shader_factory_info = copy_query_results_collection[buffer_count - 1];
|
||||
query_prog = &device->copy_results_shader;
|
||||
num_query_indices = query_info->copy_query_results.query_count;
|
||||
pipeline.const_shared_regs_count = _PVR_QUERY_COPY_DATA_COUNT;
|
||||
break;
|
||||
|
||||
case PVR_QUERY_TYPE_RESET_QUERY_POOL:
|
||||
/* Adds a compute shader to reset availability and query value data. */
|
||||
query_prog = &device->reset_queries_shaders[buffer_count - 1];
|
||||
shader_factory_info = reset_query_collection[buffer_count - 1];
|
||||
query_prog = &device->reset_queries_shader;
|
||||
num_query_indices = query_info->reset_query_pool.query_count;
|
||||
pipeline.const_shared_regs_count = _PVR_QUERY_RESET_DATA_COUNT;
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE("Invalid query type");
|
||||
}
|
||||
|
||||
result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer,
|
||||
PVR_SUB_CMD_TYPE_OCCLUSION_QUERY);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
pipeline.pds_code_offset = query_prog->pds_prim_code.code_offset;
|
||||
pipeline.pds_data_offset = query_prog->pds_prim_code.data_offset;
|
||||
|
||||
|
|
@ -556,82 +442,34 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
|
|||
pipeline.pds_data_size_dw = query_prog->primary_data_size_dw;
|
||||
pipeline.pds_temps_used = query_prog->primary_num_temps;
|
||||
|
||||
pipeline.coeff_regs_count = shader_factory_info->coeff_regs;
|
||||
pipeline.unified_store_regs_count = shader_factory_info->input_regs;
|
||||
pipeline.const_shared_regs_count = shader_factory_info->const_shared_regs;
|
||||
/* TODO: set properly. */
|
||||
pipeline.coeff_regs_count = 3;
|
||||
pipeline.unified_store_regs_count = 8;
|
||||
|
||||
const_buffer =
|
||||
vk_alloc(&cmd_buffer->vk.pool->alloc,
|
||||
PVR_DW_TO_BYTES(shader_factory_info->const_shared_regs),
|
||||
8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
||||
const_buffer = vk_alloc(&cmd_buffer->vk.pool->alloc,
|
||||
PVR_DW_TO_BYTES(pipeline.const_shared_regs_count),
|
||||
8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
||||
if (!const_buffer) {
|
||||
return vk_command_buffer_set_error(&cmd_buffer->vk,
|
||||
VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
/* clang-format off */
|
||||
#define DRIVER_CONST(index) \
|
||||
assert(shader_factory_info->driver_const_location_map[index] < \
|
||||
shader_factory_info->const_shared_regs); \
|
||||
const_buffer[shader_factory_info->driver_const_location_map[index]]
|
||||
/* clang-format on */
|
||||
|
||||
switch (query_info->type) {
|
||||
case PVR_QUERY_TYPE_AVAILABILITY_WRITE: {
|
||||
uint64_t image_sampler_state[3][ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
|
||||
uint32_t image_sampler_idx = 0;
|
||||
uint64_t index_addr =
|
||||
query_info->availability_write.index_bo->dev_addr.addr;
|
||||
|
||||
memcpy(&image_sampler_state[image_sampler_idx][0],
|
||||
&sampler_state[0],
|
||||
sizeof(sampler_state));
|
||||
image_sampler_idx++;
|
||||
uint64_t avail_addr =
|
||||
query_info->availability_write.availability_bo->dev_addr.addr;
|
||||
|
||||
pvr_init_tex_info(dev_info,
|
||||
&tex_info,
|
||||
num_query_indices,
|
||||
query_info->availability_write.index_bo->dev_addr);
|
||||
const_buffer[PVR_QUERY_AVAILABILITY_DATA_INDEX_COUNT] = num_query_indices;
|
||||
const_buffer[PVR_QUERY_AVAILABILITY_DATA_INDEX_BO_LO] = index_addr &
|
||||
0xffffffff;
|
||||
const_buffer[PVR_QUERY_AVAILABILITY_DATA_INDEX_BO_HI] = index_addr >> 32;
|
||||
const_buffer[PVR_QUERY_AVAILABILITY_DATA_BO_LO] = avail_addr & 0xffffffff;
|
||||
const_buffer[PVR_QUERY_AVAILABILITY_DATA_BO_HI] = avail_addr >> 32;
|
||||
|
||||
result = pvr_pack_tex_state(device, &tex_info, &image_descriptor);
|
||||
memcpy(&image_sampler_state[image_sampler_idx][0],
|
||||
image_descriptor.words,
|
||||
sizeof(image_descriptor.words));
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
|
||||
}
|
||||
|
||||
image_sampler_idx++;
|
||||
|
||||
pvr_init_tex_info(
|
||||
dev_info,
|
||||
&tex_info,
|
||||
query_info->availability_write.num_queries,
|
||||
query_info->availability_write.availability_bo->dev_addr);
|
||||
|
||||
result = pvr_pack_tex_state(device, &tex_info, &image_descriptor);
|
||||
memcpy(&image_sampler_state[image_sampler_idx][0],
|
||||
image_descriptor.words,
|
||||
sizeof(image_descriptor.words));
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
|
||||
}
|
||||
|
||||
image_sampler_idx++;
|
||||
|
||||
memcpy(&const_buffer[0],
|
||||
&image_sampler_state[0][0],
|
||||
sizeof(image_sampler_state));
|
||||
|
||||
/* Only PVR_QUERY_AVAILABILITY_WRITE_COUNT driver consts allowed. */
|
||||
assert(shader_factory_info->num_driver_consts ==
|
||||
PVR_QUERY_AVAILABILITY_WRITE_COUNT);
|
||||
|
||||
DRIVER_CONST(PVR_QUERY_AVAILABILITY_WRITE_INDEX_COUNT) =
|
||||
num_query_indices;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -642,94 +480,44 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
|
|||
PVR_FROM_HANDLE(pvr_buffer,
|
||||
buffer,
|
||||
query_info->copy_query_results.dst_buffer);
|
||||
const uint32_t image_sampler_state_arr_size =
|
||||
(buffer_count + 2) * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS;
|
||||
uint32_t image_sampler_idx = 0;
|
||||
pvr_dev_addr_t addr;
|
||||
uint64_t offset;
|
||||
|
||||
STACK_ARRAY(uint64_t, image_sampler_state, image_sampler_state_arr_size);
|
||||
if (!image_sampler_state) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
|
||||
return vk_command_buffer_set_error(&cmd_buffer->vk,
|
||||
VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
#define SAMPLER_ARR_2D(_arr, _i, _j) \
|
||||
_arr[_i * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS + _j]
|
||||
|
||||
memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
|
||||
&sampler_state[0],
|
||||
sizeof(sampler_state));
|
||||
image_sampler_idx++;
|
||||
|
||||
offset = query_info->copy_query_results.first_query * sizeof(uint32_t);
|
||||
|
||||
addr = PVR_DEV_ADDR_OFFSET(pool->availability_buffer->dev_addr, offset);
|
||||
|
||||
pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
|
||||
|
||||
result = pvr_pack_tex_state(device, &tex_info, &image_descriptor);
|
||||
memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
|
||||
image_descriptor.words,
|
||||
sizeof(image_descriptor.words));
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
|
||||
}
|
||||
|
||||
image_sampler_idx++;
|
||||
|
||||
for (uint32_t i = 0; i < buffer_count; i++) {
|
||||
addr = PVR_DEV_ADDR_OFFSET(pool->result_buffer->dev_addr,
|
||||
offset + i * pool->result_stride);
|
||||
|
||||
pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
|
||||
|
||||
result = pvr_pack_tex_state(device, &tex_info, &image_descriptor);
|
||||
memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
|
||||
image_descriptor.words,
|
||||
sizeof(image_descriptor.words));
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
|
||||
}
|
||||
|
||||
image_sampler_idx++;
|
||||
}
|
||||
|
||||
memcpy(&const_buffer[0],
|
||||
&SAMPLER_ARR_2D(image_sampler_state, 0, 0),
|
||||
image_sampler_state_arr_size * sizeof(image_sampler_state[0]));
|
||||
|
||||
STACK_ARRAY_FINISH(image_sampler_state);
|
||||
|
||||
/* Only PVR_COPY_QUERY_POOL_RESULTS_COUNT driver consts allowed. */
|
||||
assert(shader_factory_info->num_driver_consts ==
|
||||
PVR_COPY_QUERY_POOL_RESULTS_COUNT);
|
||||
pvr_dev_addr_t dev_addr;
|
||||
|
||||
/* Assert if no memory is bound to destination buffer. */
|
||||
assert(buffer->dev_addr.addr);
|
||||
|
||||
addr = buffer->dev_addr;
|
||||
addr.addr += query_info->copy_query_results.dst_offset;
|
||||
uint64_t offset =
|
||||
query_info->copy_query_results.first_query * sizeof(uint32_t);
|
||||
|
||||
DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_INDEX_COUNT) = num_query_indices;
|
||||
DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_LOW) = addr.addr &
|
||||
0xFFFFFFFF;
|
||||
DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_HIGH) = addr.addr >>
|
||||
32;
|
||||
DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_DEST_STRIDE) =
|
||||
dev_addr = PVR_DEV_ADDR_OFFSET(buffer->dev_addr,
|
||||
query_info->copy_query_results.dst_offset);
|
||||
uint64_t dest_addr = dev_addr.addr;
|
||||
|
||||
dev_addr =
|
||||
PVR_DEV_ADDR_OFFSET(pool->availability_buffer->dev_addr, offset);
|
||||
uint64_t avail_addr = dev_addr.addr;
|
||||
|
||||
dev_addr = PVR_DEV_ADDR_OFFSET(pool->result_buffer->dev_addr, offset);
|
||||
uint64_t result_addr = dev_addr.addr;
|
||||
|
||||
const_buffer[PVR_QUERY_COPY_DATA_INDEX_COUNT] = num_query_indices;
|
||||
|
||||
const_buffer[PVR_QUERY_COPY_DATA_DEST_BO_LO] = dest_addr & 0xffffffff;
|
||||
const_buffer[PVR_QUERY_COPY_DATA_DEST_BO_HI] = dest_addr >> 32;
|
||||
|
||||
const_buffer[PVR_QUERY_COPY_DATA_AVAILABILITY_BO_LO] = avail_addr &
|
||||
0xffffffff;
|
||||
const_buffer[PVR_QUERY_COPY_DATA_AVAILABILITY_BO_HI] = avail_addr >> 32;
|
||||
|
||||
const_buffer[PVR_QUERY_COPY_DATA_RESULT_BO_LO] = result_addr & 0xffffffff;
|
||||
const_buffer[PVR_QUERY_COPY_DATA_RESULT_BO_HI] = result_addr >> 32;
|
||||
|
||||
const_buffer[PVR_QUERY_COPY_DATA_DEST_STRIDE] =
|
||||
query_info->copy_query_results.stride;
|
||||
DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_PARTIAL_RESULT_FLAG) =
|
||||
query_info->copy_query_results.flags & VK_QUERY_RESULT_PARTIAL_BIT;
|
||||
DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_64_BIT_FLAG) =
|
||||
query_info->copy_query_results.flags & VK_QUERY_RESULT_64_BIT;
|
||||
DRIVER_CONST(PVR_COPY_QUERY_POOL_RESULTS_WITH_AVAILABILITY_FLAG) =
|
||||
query_info->copy_query_results.flags &
|
||||
VK_QUERY_RESULT_WITH_AVAILABILITY_BIT;
|
||||
|
||||
const_buffer[PVR_QUERY_COPY_DATA_FLAGS] =
|
||||
query_info->copy_query_results.flags;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -737,74 +525,26 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
|
|||
PVR_FROM_HANDLE(pvr_query_pool,
|
||||
pool,
|
||||
query_info->reset_query_pool.query_pool);
|
||||
const uint32_t image_sampler_state_arr_size =
|
||||
(buffer_count + 2) * ROGUE_NUM_TEXSTATE_SAMPLER_WORDS;
|
||||
uint32_t image_sampler_idx = 0;
|
||||
pvr_dev_addr_t addr;
|
||||
uint64_t offset;
|
||||
|
||||
STACK_ARRAY(uint64_t, image_sampler_state, image_sampler_state_arr_size);
|
||||
if (!image_sampler_state) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
uint64_t offset =
|
||||
query_info->reset_query_pool.first_query * sizeof(uint32_t);
|
||||
|
||||
return vk_command_buffer_set_error(&cmd_buffer->vk,
|
||||
VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
pvr_dev_addr_t dev_addr =
|
||||
PVR_DEV_ADDR_OFFSET(pool->result_buffer->dev_addr, offset);
|
||||
uint64_t result_addr = dev_addr.addr;
|
||||
|
||||
memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
|
||||
&sampler_state[0],
|
||||
sizeof(sampler_state));
|
||||
image_sampler_idx++;
|
||||
dev_addr =
|
||||
PVR_DEV_ADDR_OFFSET(pool->availability_buffer->dev_addr, offset);
|
||||
uint64_t avail_addr = dev_addr.addr;
|
||||
|
||||
offset = query_info->reset_query_pool.first_query * sizeof(uint32_t);
|
||||
const_buffer[PVR_QUERY_RESET_DATA_INDEX_COUNT] = num_query_indices;
|
||||
const_buffer[PVR_QUERY_RESET_DATA_RESULT_BO_LO] = result_addr &
|
||||
0xffffffff;
|
||||
const_buffer[PVR_QUERY_RESET_DATA_RESULT_BO_HI] = result_addr >> 32;
|
||||
const_buffer[PVR_QUERY_RESET_DATA_AVAILABILITY_BO_LO] = avail_addr &
|
||||
0xffffffff;
|
||||
const_buffer[PVR_QUERY_RESET_DATA_AVAILABILITY_BO_HI] = avail_addr >> 32;
|
||||
|
||||
for (uint32_t i = 0; i < buffer_count; i++) {
|
||||
addr = PVR_DEV_ADDR_OFFSET(pool->result_buffer->dev_addr,
|
||||
offset + i * pool->result_stride);
|
||||
|
||||
pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
|
||||
|
||||
result = pvr_pack_tex_state(device, &tex_info, &image_descriptor);
|
||||
memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
|
||||
image_descriptor.words,
|
||||
sizeof(image_descriptor.words));
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
|
||||
}
|
||||
|
||||
image_sampler_idx++;
|
||||
}
|
||||
|
||||
addr = PVR_DEV_ADDR_OFFSET(pool->availability_buffer->dev_addr, offset);
|
||||
|
||||
pvr_init_tex_info(dev_info, &tex_info, num_query_indices, addr);
|
||||
|
||||
result = pvr_pack_tex_state(device, &tex_info, &image_descriptor);
|
||||
memcpy(&SAMPLER_ARR_2D(image_sampler_state, image_sampler_idx, 0),
|
||||
image_descriptor.words,
|
||||
sizeof(image_descriptor.words));
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
|
||||
}
|
||||
|
||||
image_sampler_idx++;
|
||||
|
||||
#undef SAMPLER_ARR_2D
|
||||
|
||||
memcpy(&const_buffer[0],
|
||||
&image_sampler_state[0],
|
||||
image_sampler_state_arr_size * sizeof(image_sampler_state[0]));
|
||||
|
||||
STACK_ARRAY_FINISH(image_sampler_state);
|
||||
|
||||
/* Only PVR_RESET_QUERY_POOL_COUNT driver consts allowed. */
|
||||
assert(shader_factory_info->num_driver_consts ==
|
||||
PVR_RESET_QUERY_POOL_COUNT);
|
||||
|
||||
DRIVER_CONST(PVR_RESET_QUERY_POOL_INDEX_COUNT) = num_query_indices;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -812,21 +552,10 @@ VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer,
|
|||
UNREACHABLE("Invalid query type");
|
||||
}
|
||||
|
||||
#undef DRIVER_CONST
|
||||
|
||||
for (uint32_t i = 0; i < shader_factory_info->num_static_const; i++) {
|
||||
const struct pvr_static_buffer *load =
|
||||
&shader_factory_info->static_const_buffer[i];
|
||||
|
||||
/* Assert if static const is out of range. */
|
||||
assert(load->dst_idx < shader_factory_info->const_shared_regs);
|
||||
const_buffer[load->dst_idx] = load->value;
|
||||
}
|
||||
|
||||
result = pvr_cmd_buffer_upload_general(
|
||||
cmd_buffer,
|
||||
const_buffer,
|
||||
PVR_DW_TO_BYTES(shader_factory_info->const_shared_regs),
|
||||
PVR_DW_TO_BYTES(pipeline.const_shared_regs_count),
|
||||
&pvr_bo);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free(&cmd_buffer->vk.pool->alloc, const_buffer);
|
||||
|
|
|
|||
|
|
@ -375,16 +375,15 @@ static VkResult pvr_process_transfer_cmds(struct pvr_device *device,
|
|||
return result;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
pvr_process_occlusion_query_cmd(struct pvr_device *device,
|
||||
struct pvr_queue *queue,
|
||||
struct pvr_sub_cmd_compute *sub_cmd)
|
||||
static VkResult pvr_process_query_cmd(struct pvr_device *device,
|
||||
struct pvr_queue *queue,
|
||||
struct pvr_sub_cmd_compute *sub_cmd)
|
||||
{
|
||||
struct vk_sync *sync;
|
||||
VkResult result;
|
||||
|
||||
/* TODO: Currently we add barrier event sub commands to handle the sync
|
||||
* necessary for the different occlusion query types. Would we get any speed
|
||||
* necessary for the different query types. Would we get any speed
|
||||
* up in processing the queue by doing that sync here without using event sub
|
||||
* commands?
|
||||
*/
|
||||
|
|
@ -397,17 +396,17 @@ pvr_process_occlusion_query_cmd(struct pvr_device *device,
|
|||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = pvr_compute_job_submit(
|
||||
queue->query_ctx,
|
||||
sub_cmd,
|
||||
queue->next_job_wait_sync[PVR_JOB_TYPE_OCCLUSION_QUERY],
|
||||
sync);
|
||||
result =
|
||||
pvr_compute_job_submit(queue->query_ctx,
|
||||
sub_cmd,
|
||||
queue->next_job_wait_sync[PVR_JOB_TYPE_QUERY],
|
||||
sync);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_sync_destroy(&device->vk, sync);
|
||||
return result;
|
||||
}
|
||||
|
||||
pvr_update_job_syncs(device, queue, sync, PVR_JOB_TYPE_OCCLUSION_QUERY);
|
||||
pvr_update_job_syncs(device, queue, sync, PVR_JOB_TYPE_QUERY);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
@ -423,10 +422,10 @@ pvr_process_event_cmd_barrier(struct pvr_device *device,
|
|||
uint32_t src_wait_count = 0;
|
||||
VkResult result;
|
||||
|
||||
assert(!(src_mask & ~(PVR_PIPELINE_STAGE_ALL_BITS |
|
||||
PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT)));
|
||||
assert(!(dst_mask & ~(PVR_PIPELINE_STAGE_ALL_BITS |
|
||||
PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT)));
|
||||
assert(!(src_mask &
|
||||
~(PVR_PIPELINE_STAGE_ALL_BITS | PVR_PIPELINE_STAGE_QUERY_BIT)));
|
||||
assert(!(dst_mask &
|
||||
~(PVR_PIPELINE_STAGE_ALL_BITS | PVR_PIPELINE_STAGE_QUERY_BIT)));
|
||||
|
||||
u_foreach_bit (stage, src_mask) {
|
||||
if (queue->last_job_signal_sync[stage]) {
|
||||
|
|
@ -494,7 +493,7 @@ pvr_process_event_cmd_set_or_reset(struct pvr_device *device,
|
|||
const enum pvr_event_state new_event_state)
|
||||
{
|
||||
/* Not PVR_JOB_TYPE_MAX since that also includes
|
||||
* PVR_JOB_TYPE_OCCLUSION_QUERY so no stage in the src mask.
|
||||
* PVR_JOB_TYPE_QUERY so no stage in the src mask.
|
||||
*/
|
||||
struct vk_sync_wait waits[PVR_NUM_SYNC_PIPELINE_STAGES];
|
||||
struct vk_sync_signal signal;
|
||||
|
|
@ -696,12 +695,12 @@ static VkResult pvr_process_cmd_buffer(struct pvr_device *device,
|
|||
link) {
|
||||
switch (sub_cmd->type) {
|
||||
case PVR_SUB_CMD_TYPE_GRAPHICS: {
|
||||
/* If the fragment job utilizes occlusion queries, for data integrity
|
||||
* it needs to wait for the occlusion query to be processed.
|
||||
/* If the fragment job utilizes queries, for data integrity
|
||||
* it needs to wait for the query to be processed.
|
||||
*/
|
||||
if (sub_cmd->gfx.has_occlusion_query) {
|
||||
if (sub_cmd->gfx.has_query) {
|
||||
struct pvr_sub_cmd_event_barrier barrier = {
|
||||
.wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
|
||||
.wait_for_stage_mask = PVR_PIPELINE_STAGE_QUERY_BIT,
|
||||
.wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
|
||||
};
|
||||
|
||||
|
|
@ -761,9 +760,8 @@ static VkResult pvr_process_cmd_buffer(struct pvr_device *device,
|
|||
break;
|
||||
}
|
||||
|
||||
case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY:
|
||||
result =
|
||||
pvr_process_occlusion_query_cmd(device, queue, &sub_cmd->compute);
|
||||
case PVR_SUB_CMD_TYPE_QUERY:
|
||||
result = pvr_process_query_cmd(device, queue, &sub_cmd->compute);
|
||||
break;
|
||||
|
||||
case PVR_SUB_CMD_TYPE_EVENT:
|
||||
|
|
@ -847,11 +845,10 @@ static VkResult pvr_process_queue_signals(struct pvr_queue *queue,
|
|||
uint32_t wait_count = 0;
|
||||
|
||||
for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
|
||||
/* Exception for occlusion query jobs since that's something internal,
|
||||
/* Exception for query jobs since that's something internal,
|
||||
* so the user provided syncs won't ever have it as a source stage.
|
||||
*/
|
||||
if (!(signal_stage_src & BITFIELD_BIT(i)) &&
|
||||
i != PVR_JOB_TYPE_OCCLUSION_QUERY)
|
||||
if (!(signal_stage_src & BITFIELD_BIT(i)) && i != PVR_JOB_TYPE_QUERY)
|
||||
continue;
|
||||
|
||||
if (!queue->last_job_signal_sync[i])
|
||||
|
|
|
|||
|
|
@ -31,30 +31,6 @@
|
|||
#include "util/bitscan.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
/* Occlusion query availability writes. */
|
||||
enum pvr_query_availability_write_pool_const {
|
||||
PVR_QUERY_AVAILABILITY_WRITE_INDEX_COUNT,
|
||||
PVR_QUERY_AVAILABILITY_WRITE_COUNT,
|
||||
};
|
||||
|
||||
/* Copy query pool results. */
|
||||
enum pvr_copy_query_pool_const {
|
||||
PVR_COPY_QUERY_POOL_RESULTS_INDEX_COUNT,
|
||||
PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_LOW,
|
||||
PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_HIGH,
|
||||
PVR_COPY_QUERY_POOL_RESULTS_DEST_STRIDE,
|
||||
PVR_COPY_QUERY_POOL_RESULTS_PARTIAL_RESULT_FLAG,
|
||||
PVR_COPY_QUERY_POOL_RESULTS_64_BIT_FLAG,
|
||||
PVR_COPY_QUERY_POOL_RESULTS_WITH_AVAILABILITY_FLAG,
|
||||
PVR_COPY_QUERY_POOL_RESULTS_COUNT,
|
||||
};
|
||||
|
||||
/* Reset query pool. */
|
||||
enum pvr_reset_query_pool_pool_const {
|
||||
PVR_RESET_QUERY_POOL_INDEX_COUNT,
|
||||
PVR_RESET_QUERY_POOL_COUNT,
|
||||
};
|
||||
|
||||
/* ClearAttachments. */
|
||||
enum pvr_clear_attachment_const {
|
||||
PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_0 = 0, /* Don't change. Indexes array.
|
||||
|
|
|
|||
|
|
@ -55,110 +55,6 @@ struct pvr_shader_factory_info {
|
|||
uint32_t msaa_sample_count;
|
||||
};
|
||||
|
||||
static const uint8_t availability_query_write_shader[144] = { 0 };
|
||||
|
||||
static const uint32_t availability_query_write_location_map[1] = {
|
||||
0,
|
||||
};
|
||||
|
||||
static const struct pvr_static_buffer
|
||||
availability_query_write_static_consts[3] = {
|
||||
{ 0, 0 },
|
||||
{ 0, 0 },
|
||||
{ 0, 0 },
|
||||
};
|
||||
|
||||
static const struct pvr_shader_factory_info availability_query_write_info = {
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
sizeof(availability_query_write_shader),
|
||||
availability_query_write_shader,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
PVR_INVALID_INST,
|
||||
0,
|
||||
availability_query_write_location_map,
|
||||
0,
|
||||
availability_query_write_static_consts,
|
||||
0,
|
||||
~0,
|
||||
};
|
||||
|
||||
static const uint8_t copy_query_results_shader[384] = { 0 };
|
||||
|
||||
static const uint32_t copy_query_results_location_map[7] = {
|
||||
0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static const struct pvr_static_buffer copy_query_results_static_consts[2] = {
|
||||
{ 0, 0 },
|
||||
{ 0, 0 },
|
||||
};
|
||||
|
||||
static const struct pvr_shader_factory_info copy_query_results_info = {
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
sizeof(copy_query_results_shader),
|
||||
copy_query_results_shader,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
PVR_INVALID_INST,
|
||||
0,
|
||||
copy_query_results_location_map,
|
||||
0,
|
||||
copy_query_results_static_consts,
|
||||
0,
|
||||
~0,
|
||||
};
|
||||
|
||||
static const uint8_t reset_query_shader_code[136] = { 0 };
|
||||
|
||||
static const uint32_t reset_query_location_map[1] = {
|
||||
0,
|
||||
};
|
||||
|
||||
static const struct pvr_static_buffer reset_query_static_consts[2] = {
|
||||
{ 0, 0 },
|
||||
{ 0, 0 },
|
||||
};
|
||||
|
||||
static const struct pvr_shader_factory_info reset_query_info = {
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
sizeof(reset_query_shader_code),
|
||||
reset_query_shader_code,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
PVR_INVALID_INST,
|
||||
0,
|
||||
reset_query_location_map,
|
||||
0,
|
||||
reset_query_static_consts,
|
||||
0,
|
||||
~0,
|
||||
};
|
||||
|
||||
static const struct pvr_shader_factory_info
|
||||
*const copy_query_results_collection[1] = {
|
||||
©_query_results_info,
|
||||
};
|
||||
|
||||
static const struct pvr_shader_factory_info *const reset_query_collection[1] = {
|
||||
&reset_query_info,
|
||||
};
|
||||
|
||||
static const uint8_t clear_attachments_1_dw_0_offt_out_reg_shader_code[8] = {
|
||||
0
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue