2023-12-20 12:15:43 +02:00
|
|
|
/* Copyright © 2023 Intel Corporation
|
|
|
|
|
* SPDX-License-Identifier: MIT
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <errno.h>
|
|
|
|
|
|
|
|
|
|
#ifdef HAVE_VALGRIND
|
|
|
|
|
#include <valgrind.h>
|
|
|
|
|
#include <memcheck.h>
|
|
|
|
|
#define VG(x) x
|
|
|
|
|
#else
|
|
|
|
|
#define VG(x)
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#include "pipe/p_defines.h"
|
|
|
|
|
#include "pipe/p_state.h"
|
|
|
|
|
#include "pipe/p_context.h"
|
|
|
|
|
#include "pipe/p_screen.h"
|
|
|
|
|
#include "util/u_upload_mgr.h"
|
|
|
|
|
#include "compiler/nir/nir_builder.h"
|
|
|
|
|
#include "compiler/nir/nir_serialize.h"
|
|
|
|
|
#include "intel/compiler/brw_compiler.h"
|
|
|
|
|
#include "intel/common/intel_aux_map.h"
|
|
|
|
|
#include "intel/common/intel_l3_config.h"
|
|
|
|
|
#include "intel/common/intel_sample_positions.h"
|
|
|
|
|
#include "intel/ds/intel_tracepoints.h"
|
|
|
|
|
#include "iris_batch.h"
|
|
|
|
|
#include "iris_context.h"
|
|
|
|
|
#include "iris_defines.h"
|
|
|
|
|
#include "iris_pipe.h"
|
|
|
|
|
#include "iris_resource.h"
|
|
|
|
|
#include "iris_utrace.h"
|
|
|
|
|
|
|
|
|
|
#include "iris_genx_macros.h"
|
|
|
|
|
#include "intel/common/intel_genX_state.h"
|
|
|
|
|
|
|
|
|
|
#include "drm-uapi/i915_drm.h"
|
|
|
|
|
|
|
|
|
|
#include "libintel_shaders.h"
|
|
|
|
|
|
|
|
|
|
#if GFX_VERx10 == 80
|
|
|
|
|
# include "intel_gfx8_shaders_code.h"
|
|
|
|
|
#elif GFX_VERx10 == 90
|
|
|
|
|
# include "intel_gfx9_shaders_code.h"
|
|
|
|
|
#elif GFX_VERx10 == 110
|
|
|
|
|
# include "intel_gfx11_shaders_code.h"
|
|
|
|
|
#elif GFX_VERx10 == 120
|
|
|
|
|
# include "intel_gfx12_shaders_code.h"
|
|
|
|
|
#elif GFX_VERx10 == 125
|
|
|
|
|
# include "intel_gfx125_shaders_code.h"
|
|
|
|
|
#elif GFX_VERx10 == 200
|
|
|
|
|
# include "intel_gfx20_shaders_code.h"
|
|
|
|
|
#else
|
|
|
|
|
# error "Unsupported generation"
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#define load_param(b, bit_size, struct_name, field_name) \
|
|
|
|
|
nir_load_uniform(b, 1, bit_size, nir_imm_int(b, 0), \
|
|
|
|
|
.base = offsetof(struct_name, field_name), \
|
|
|
|
|
.range = bit_size / 8)
|
|
|
|
|
|
|
|
|
|
static nir_def *
|
|
|
|
|
load_fragment_index(nir_builder *b)
|
|
|
|
|
{
|
|
|
|
|
nir_def *pos_in = nir_f2i32(b, nir_trim_vector(b, nir_load_frag_coord(b), 2));
|
|
|
|
|
return nir_iadd(b,
|
|
|
|
|
nir_imul_imm(b, nir_channel(b, pos_in, 1), 8192),
|
|
|
|
|
nir_channel(b, pos_in, 0));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static nir_shader *
|
|
|
|
|
load_shader_lib(struct iris_screen *screen, void *mem_ctx)
|
|
|
|
|
{
|
|
|
|
|
const nir_shader_compiler_options *nir_options =
|
|
|
|
|
screen->compiler->nir_options[MESA_SHADER_KERNEL];
|
|
|
|
|
|
|
|
|
|
struct blob_reader blob;
|
|
|
|
|
blob_reader_init(&blob, (void *)genX(intel_shaders_nir),
|
|
|
|
|
sizeof(genX(intel_shaders_nir)));
|
|
|
|
|
return nir_deserialize(mem_ctx, nir_options, &blob);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static unsigned
|
|
|
|
|
iris_call_generation_shader(struct iris_screen *screen, nir_builder *b)
|
|
|
|
|
{
|
|
|
|
|
genX(libiris_write_draw)(
|
|
|
|
|
b,
|
|
|
|
|
load_param(b, 64, struct iris_gen_indirect_params, generated_cmds_addr),
|
|
|
|
|
load_param(b, 64, struct iris_gen_indirect_params, indirect_data_addr),
|
|
|
|
|
load_param(b, 64, struct iris_gen_indirect_params, draw_id_addr),
|
|
|
|
|
load_param(b, 32, struct iris_gen_indirect_params, indirect_data_stride),
|
|
|
|
|
load_param(b, 64, struct iris_gen_indirect_params, draw_count_addr),
|
|
|
|
|
load_param(b, 32, struct iris_gen_indirect_params, draw_base),
|
|
|
|
|
load_param(b, 32, struct iris_gen_indirect_params, max_draw_count),
|
|
|
|
|
load_param(b, 32, struct iris_gen_indirect_params, flags),
|
|
|
|
|
load_param(b, 32, struct iris_gen_indirect_params, ring_count),
|
|
|
|
|
load_param(b, 64, struct iris_gen_indirect_params, gen_addr),
|
|
|
|
|
load_param(b, 64, struct iris_gen_indirect_params, end_addr),
|
|
|
|
|
load_fragment_index(b));
|
|
|
|
|
return sizeof(struct iris_gen_indirect_params);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
genX(init_screen_gen_state)(struct iris_screen *screen)
|
|
|
|
|
{
|
|
|
|
|
screen->vtbl.load_shader_lib = load_shader_lib;
|
|
|
|
|
screen->vtbl.call_generation_shader = iris_call_generation_shader;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Stream out temporary/short-lived state.
|
|
|
|
|
*
|
|
|
|
|
* This allocates space, pins the BO, and includes the BO address in the
|
|
|
|
|
* returned offset (which works because all state lives in 32-bit memory
|
|
|
|
|
* zones).
|
|
|
|
|
*/
|
|
|
|
|
static void *
|
|
|
|
|
upload_state(struct iris_batch *batch,
|
|
|
|
|
struct u_upload_mgr *uploader,
|
|
|
|
|
struct iris_state_ref *ref,
|
|
|
|
|
unsigned size,
|
|
|
|
|
unsigned alignment)
|
|
|
|
|
{
|
|
|
|
|
void *p = NULL;
|
|
|
|
|
u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
|
|
|
|
|
iris_use_pinned_bo(batch, iris_resource_bo(ref->res), false, IRIS_DOMAIN_NONE);
|
|
|
|
|
return p;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static uint32_t *
|
|
|
|
|
stream_state(struct iris_batch *batch,
|
|
|
|
|
struct u_upload_mgr *uploader,
|
|
|
|
|
struct pipe_resource **out_res,
|
|
|
|
|
unsigned size,
|
|
|
|
|
unsigned alignment,
|
|
|
|
|
uint32_t *out_offset)
|
|
|
|
|
{
|
|
|
|
|
void *ptr = NULL;
|
|
|
|
|
|
|
|
|
|
u_upload_alloc(uploader, 0, size, alignment, out_offset, out_res, &ptr);
|
|
|
|
|
|
|
|
|
|
struct iris_bo *bo = iris_resource_bo(*out_res);
|
|
|
|
|
iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
|
|
|
|
|
|
|
|
|
|
iris_record_state_size(batch->state_sizes,
|
|
|
|
|
bo->address + *out_offset, size);
|
|
|
|
|
|
|
|
|
|
*out_offset += iris_bo_offset_from_base_address(bo);
|
|
|
|
|
|
|
|
|
|
return ptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
emit_indirect_generate_draw(struct iris_batch *batch,
|
|
|
|
|
struct iris_address params_addr,
|
|
|
|
|
unsigned params_size,
|
|
|
|
|
unsigned ring_count)
|
|
|
|
|
{
|
|
|
|
|
struct iris_screen *screen = batch->screen;
|
|
|
|
|
struct iris_context *ice = batch->ice;
|
|
|
|
|
struct isl_device *isl_dev = &screen->isl_dev;
|
|
|
|
|
const struct intel_device_info *devinfo = screen->devinfo;
|
|
|
|
|
|
|
|
|
|
/* State emission */
|
|
|
|
|
uint32_t ves_dws[1 + 2 * GENX(VERTEX_ELEMENT_STATE_length)];
|
|
|
|
|
iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), ves_dws, ve) {
|
|
|
|
|
ve.DWordLength = 1 + GENX(VERTEX_ELEMENT_STATE_length) * 2 -
|
|
|
|
|
GENX(3DSTATE_VERTEX_ELEMENTS_length_bias);
|
|
|
|
|
}
|
|
|
|
|
iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[1], ve) {
|
|
|
|
|
ve.VertexBufferIndex = 1;
|
|
|
|
|
ve.Valid = true;
|
|
|
|
|
ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
|
|
|
|
|
ve.SourceElementOffset = 0;
|
|
|
|
|
ve.Component0Control = VFCOMP_STORE_SRC;
|
|
|
|
|
ve.Component1Control = VFCOMP_STORE_0;
|
|
|
|
|
ve.Component2Control = VFCOMP_STORE_0;
|
|
|
|
|
ve.Component3Control = VFCOMP_STORE_0;
|
|
|
|
|
}
|
|
|
|
|
iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[3], ve) {
|
|
|
|
|
ve.VertexBufferIndex = 0;
|
|
|
|
|
ve.Valid = true;
|
|
|
|
|
ve.SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT;
|
|
|
|
|
ve.SourceElementOffset = 0;
|
|
|
|
|
ve.Component0Control = VFCOMP_STORE_SRC;
|
|
|
|
|
ve.Component1Control = VFCOMP_STORE_SRC;
|
|
|
|
|
ve.Component2Control = VFCOMP_STORE_SRC;
|
|
|
|
|
ve.Component3Control = VFCOMP_STORE_1_FP;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
iris_batch_emit(batch, ves_dws, sizeof(ves_dws));
|
|
|
|
|
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_VF_STATISTICS), vf);
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs) {
|
|
|
|
|
sgvs.InstanceIDEnable = true;
|
|
|
|
|
sgvs.InstanceIDComponentNumber = COMP_1;
|
|
|
|
|
sgvs.InstanceIDElementOffset = 0;
|
|
|
|
|
}
|
|
|
|
|
#if GFX_VER >= 11
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS_2), sgvs);
|
|
|
|
|
#endif
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
|
|
|
|
vfi.InstancingEnable = false;
|
|
|
|
|
vfi.VertexElementIndex = 0;
|
|
|
|
|
}
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
|
|
|
|
|
vfi.InstancingEnable = false;
|
|
|
|
|
vfi.VertexElementIndex = 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
|
|
|
|
|
topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ice->shaders.urb.cfg.size[MESA_SHADER_VERTEX] = 1;
|
|
|
|
|
ice->shaders.urb.cfg.size[MESA_SHADER_TESS_CTRL] = 1;
|
|
|
|
|
ice->shaders.urb.cfg.size[MESA_SHADER_TESS_EVAL] = 1;
|
|
|
|
|
ice->shaders.urb.cfg.size[MESA_SHADER_GEOMETRY] = 1;
|
|
|
|
|
genX(emit_urb_config)(batch,
|
|
|
|
|
false /* has_tess_eval */,
|
|
|
|
|
false /* has_geometry */);
|
|
|
|
|
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
|
|
|
|
|
ps_blend.HasWriteableRT = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wm);
|
|
|
|
|
|
|
|
|
|
#if GFX_VER >= 12
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
|
|
|
|
|
db.DepthBoundsTestEnable = false;
|
|
|
|
|
db.DepthBoundsTestMinValue = 0.0;
|
|
|
|
|
db.DepthBoundsTestMaxValue = 1.0;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms);
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
|
|
|
|
|
sm.SampleMask = 0x1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_VS), vs);
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_HS), hs);
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_TE), te);
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_DS), DS);
|
|
|
|
|
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), so);
|
|
|
|
|
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_GS), gs);
|
|
|
|
|
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_CLIP), clip) {
|
|
|
|
|
clip.PerspectiveDivideDisable = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_SF), sf) {
|
|
|
|
|
#if GFX_VER >= 12
|
|
|
|
|
sf.DerefBlockSize = ice->state.urb_deref_block_size;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_RASTER), raster) {
|
|
|
|
|
raster.CullMode = CULLMODE_NONE;
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-06 11:05:33 -08:00
|
|
|
const struct iris_compiled_shader *shader = ice->draw.generation.shader;
|
|
|
|
|
const struct iris_fs_data *fs_data = iris_fs_data_const(shader);
|
2023-12-20 12:15:43 +02:00
|
|
|
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) {
|
|
|
|
|
sbe.VertexURBEntryReadOffset = 1;
|
2024-02-06 11:05:33 -08:00
|
|
|
sbe.NumberofSFOutputAttributes = fs_data->num_varying_inputs;
|
|
|
|
|
sbe.VertexURBEntryReadLength = MAX2((fs_data->num_varying_inputs + 1) / 2, 1);
|
|
|
|
|
sbe.ConstantInterpolationEnable = fs_data->flat_inputs;
|
2023-12-20 12:15:43 +02:00
|
|
|
sbe.ForceVertexURBEntryReadLength = true;
|
|
|
|
|
sbe.ForceVertexURBEntryReadOffset = true;
|
|
|
|
|
#if GFX_VER >= 9
|
|
|
|
|
for (unsigned i = 0; i < 32; i++)
|
|
|
|
|
sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_WM), wm) {
|
2024-02-06 11:05:33 -08:00
|
|
|
if (fs_data->has_side_effects || fs_data->uses_kill)
|
2023-12-20 12:15:43 +02:00
|
|
|
wm.ForceThreadDispatchEnable = ForceON;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_PS), ps) {
|
2024-02-06 11:05:33 -08:00
|
|
|
struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(shader->brw_prog_data);
|
2023-12-20 12:15:43 +02:00
|
|
|
intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
|
|
|
|
|
1 /* rasterization_samples */,
|
|
|
|
|
0 /* msaa_flags */);
|
|
|
|
|
|
2024-02-06 11:05:33 -08:00
|
|
|
ps.VectorMaskEnable = fs_data->uses_vmask;
|
2023-12-20 12:15:43 +02:00
|
|
|
|
|
|
|
|
ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0;
|
|
|
|
|
#if GFX_VER < 20
|
2024-02-06 11:05:33 -08:00
|
|
|
ps.PushConstantEnable = shader->nr_params > 0 ||
|
|
|
|
|
shader->ubo_ranges[0].length;
|
2023-12-20 12:15:43 +02:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
|
|
|
|
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
|
|
|
|
|
ps.DispatchGRFStartRegisterForConstantSetupData1 =
|
|
|
|
|
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
|
|
|
|
|
#if GFX_VER < 20
|
|
|
|
|
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
|
|
|
|
brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
ps.KernelStartPointer0 = KSP(ice->draw.generation.shader) +
|
|
|
|
|
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
|
|
|
|
|
ps.KernelStartPointer1 = KSP(ice->draw.generation.shader) +
|
|
|
|
|
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
|
|
|
|
|
#if GFX_VER < 20
|
|
|
|
|
ps.KernelStartPointer2 = KSP(ice->draw.generation.shader) +
|
|
|
|
|
brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_PS_EXTRA), psx) {
|
|
|
|
|
psx.PixelShaderValid = true;
|
|
|
|
|
#if GFX_VER < 20
|
2024-02-06 11:05:33 -08:00
|
|
|
psx.AttributeEnable = fs_data->num_varying_inputs > 0;
|
2023-12-20 12:15:43 +02:00
|
|
|
#endif
|
2024-02-06 11:05:33 -08:00
|
|
|
psx.PixelShaderIsPerSample = fs_data->is_per_sample;
|
|
|
|
|
psx.PixelShaderComputedDepthMode = fs_data->computed_depth_mode;
|
2023-12-20 12:15:43 +02:00
|
|
|
#if GFX_VER >= 9
|
|
|
|
|
#if GFX_VER >= 20
|
2024-02-06 11:05:33 -08:00
|
|
|
assert(!fs_data->pulls_bary);
|
2023-12-20 12:15:43 +02:00
|
|
|
#else
|
2024-02-06 11:05:33 -08:00
|
|
|
psx.PixelShaderPullsBary = fs_data->pulls_bary;
|
2023-12-20 12:15:43 +02:00
|
|
|
#endif
|
2024-02-06 11:05:33 -08:00
|
|
|
psx.PixelShaderComputesStencil = fs_data->computed_stencil;
|
2023-12-20 12:15:43 +02:00
|
|
|
#endif
|
|
|
|
|
psx.PixelShaderHasUAV = GFX_VER == 8;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
|
|
|
|
|
uint32_t cc_vp_address;
|
|
|
|
|
uint32_t *cc_vp_map =
|
|
|
|
|
stream_state(batch, ice->state.dynamic_uploader,
|
|
|
|
|
&ice->state.last_res.cc_vp,
|
|
|
|
|
4 * GENX(CC_VIEWPORT_length), 32, &cc_vp_address);
|
|
|
|
|
|
|
|
|
|
iris_pack_state(GENX(CC_VIEWPORT), cc_vp_map, ccv) {
|
|
|
|
|
ccv.MinimumDepth = 0.0f;
|
|
|
|
|
ccv.MaximumDepth = 1.0f;
|
|
|
|
|
}
|
|
|
|
|
cc.CCViewportPointer = cc_vp_address;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#if GFX_VER >= 12
|
|
|
|
|
/* Disable Primitive Replication. */
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if GFX_VERx10 == 125
|
|
|
|
|
/* DG2: Wa_22011440098
|
|
|
|
|
* MTL: Wa_18022330953
|
|
|
|
|
*
|
|
|
|
|
* In 3D mode, after programming push constant alloc command immediately
|
|
|
|
|
* program push constant command(ZERO length) without any commit between
|
|
|
|
|
* them.
|
|
|
|
|
*
|
|
|
|
|
* Note that Wa_16011448509 isn't needed here as all address bits are zero.
|
|
|
|
|
*/
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_ALL), c) {
|
|
|
|
|
/* Update empty push constants for all stages (bitmask = 11111b) */
|
|
|
|
|
c.ShaderUpdateEnable = 0x1f;
|
|
|
|
|
c.MOCS = iris_mocs(NULL, isl_dev, 0);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
float x0 = 0.0f, x1 = MIN2(ring_count, 8192);
|
|
|
|
|
float y0 = 0.0f, y1 = DIV_ROUND_UP(ring_count, 8192);
|
|
|
|
|
float z = 0.0f;
|
|
|
|
|
|
|
|
|
|
float *vertices =
|
|
|
|
|
upload_state(batch, ice->state.dynamic_uploader,
|
|
|
|
|
&ice->draw.generation.vertices,
|
|
|
|
|
ALIGN(9 * sizeof(float), 8), 8);
|
|
|
|
|
|
|
|
|
|
vertices[0] = x1; vertices[1] = y1; vertices[2] = z; /* v0 */
|
|
|
|
|
vertices[3] = x0; vertices[4] = y1; vertices[5] = z; /* v1 */
|
|
|
|
|
vertices[6] = x0; vertices[7] = y0; vertices[8] = z; /* v2 */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uint32_t vbs_dws[1 + GENX(VERTEX_BUFFER_STATE_length)];
|
|
|
|
|
iris_pack_command(GENX(3DSTATE_VERTEX_BUFFERS), vbs_dws, vbs) {
|
|
|
|
|
vbs.DWordLength = ARRAY_SIZE(vbs_dws) -
|
|
|
|
|
GENX(3DSTATE_VERTEX_BUFFERS_length_bias);
|
|
|
|
|
}
|
|
|
|
|
_iris_pack_state(batch, GENX(VERTEX_BUFFER_STATE), &vbs_dws[1], vb) {
|
|
|
|
|
vb.VertexBufferIndex = 0;
|
|
|
|
|
vb.AddressModifyEnable = true;
|
|
|
|
|
vb.BufferStartingAddress = ro_bo(iris_resource_bo(ice->draw.generation.vertices.res),
|
|
|
|
|
ice->draw.generation.vertices.offset);
|
|
|
|
|
vb.BufferPitch = 3 * sizeof(float);
|
|
|
|
|
vb.BufferSize = 9 * sizeof(float);
|
|
|
|
|
vb.MOCS = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_VERTEX_BUFFER_BIT);
|
|
|
|
|
#if GFX_VER >= 12
|
|
|
|
|
vb.L3BypassDisable = true;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
iris_batch_emit(batch, vbs_dws, sizeof(vbs_dws));
|
|
|
|
|
|
|
|
|
|
#if GFX_VERx10 > 120
|
|
|
|
|
uint32_t const_dws[GENX(3DSTATE_CONSTANT_ALL_length) +
|
|
|
|
|
GENX(3DSTATE_CONSTANT_ALL_DATA_length)];
|
|
|
|
|
|
|
|
|
|
iris_pack_command(GENX(3DSTATE_CONSTANT_ALL), const_dws, all) {
|
|
|
|
|
all.DWordLength = ARRAY_SIZE(const_dws) -
|
|
|
|
|
GENX(3DSTATE_CONSTANT_ALL_length_bias);
|
|
|
|
|
all.ShaderUpdateEnable = 1 << MESA_SHADER_FRAGMENT;
|
|
|
|
|
all.MOCS = isl_mocs(isl_dev, 0, false);
|
|
|
|
|
all.PointerBufferMask = 0x1;
|
|
|
|
|
}
|
|
|
|
|
_iris_pack_state(batch, GENX(3DSTATE_CONSTANT_ALL_DATA),
|
|
|
|
|
&const_dws[GENX(3DSTATE_CONSTANT_ALL_length)], data) {
|
|
|
|
|
data.PointerToConstantBuffer = params_addr;
|
|
|
|
|
data.ConstantBufferReadLength = DIV_ROUND_UP(params_size, 32);
|
|
|
|
|
}
|
|
|
|
|
iris_batch_emit(batch, const_dws, sizeof(const_dws));
|
|
|
|
|
#else
|
|
|
|
|
/* The Skylake PRM contains the following restriction:
|
|
|
|
|
*
|
|
|
|
|
* "The driver must ensure The following case does not occur without a
|
|
|
|
|
* flush to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length
|
|
|
|
|
* equal to zero committed followed by a 3DSTATE_CONSTANT_* with buffer
|
|
|
|
|
* 0 read length not equal to zero committed."
|
|
|
|
|
*
|
|
|
|
|
* To avoid this, we program the highest slot.
|
|
|
|
|
*/
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_PS), c) {
|
|
|
|
|
#if GFX_VER > 8
|
|
|
|
|
c.MOCS = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
|
|
|
|
|
#endif
|
|
|
|
|
c.ConstantBody.ReadLength[3] = DIV_ROUND_UP(params_size, 32);
|
|
|
|
|
c.ConstantBody.Buffer[3] = params_addr;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if GFX_VER <= 9
|
|
|
|
|
/* Gfx9 requires 3DSTATE_BINDING_TABLE_POINTERS_XS to be re-emitted in
|
|
|
|
|
* order to commit constants. TODO: Investigate "Disable Gather at Set
|
|
|
|
|
* Shader" to go back to legacy mode...
|
|
|
|
|
*
|
|
|
|
|
* The null writes of the generation shader also appear to disturb the next
|
|
|
|
|
* RT writes, so we choose to reemit the binding table to a null RT on Gfx8
|
|
|
|
|
* too.
|
|
|
|
|
*/
|
|
|
|
|
struct iris_binder *binder = &ice->state.binder;
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), ptr) {
|
|
|
|
|
ptr.PointertoPSBindingTable =
|
|
|
|
|
binder->bt_offset[MESA_SHADER_FRAGMENT] >> IRIS_BT_OFFSET_SHIFT;
|
|
|
|
|
}
|
|
|
|
|
uint32_t *bt_map = binder->map + binder->bt_offset[MESA_SHADER_FRAGMENT];
|
|
|
|
|
uint32_t surf_base_offset = binder->bo->address;
|
|
|
|
|
bt_map[0] = ice->state.null_fb.offset - surf_base_offset;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
genX(maybe_emit_breakpoint)(batch, true);
|
|
|
|
|
|
|
|
|
|
iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
|
|
|
|
|
prim.VertexAccessType = SEQUENTIAL;
|
|
|
|
|
prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
|
|
|
|
|
prim.VertexCountPerInstance = 3;
|
|
|
|
|
prim.InstanceCount = 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* We've smashed all state compared to what the normal 3D pipeline
|
|
|
|
|
* rendering tracks for GL.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
uint64_t skip_bits = (IRIS_DIRTY_POLYGON_STIPPLE |
|
|
|
|
|
IRIS_DIRTY_SO_BUFFERS |
|
|
|
|
|
IRIS_DIRTY_SO_DECL_LIST |
|
|
|
|
|
IRIS_DIRTY_LINE_STIPPLE |
|
|
|
|
|
IRIS_ALL_DIRTY_FOR_COMPUTE |
|
|
|
|
|
IRIS_DIRTY_SCISSOR_RECT |
|
|
|
|
|
IRIS_DIRTY_VF);
|
|
|
|
|
/* Wa_14016820455
|
|
|
|
|
* On Gfx 12.5 platforms, the SF_CL_VIEWPORT pointer can be invalidated
|
|
|
|
|
* likely by a read cache invalidation when clipping is disabled, so we
|
|
|
|
|
* don't skip its dirty bit here, in order to reprogram it.
|
|
|
|
|
*/
|
|
|
|
|
if (GFX_VERx10 != 125)
|
|
|
|
|
skip_bits |= IRIS_DIRTY_SF_CL_VIEWPORT;
|
|
|
|
|
|
|
|
|
|
uint64_t skip_stage_bits = (IRIS_ALL_STAGE_DIRTY_FOR_COMPUTE |
|
|
|
|
|
IRIS_STAGE_DIRTY_UNCOMPILED_VS |
|
|
|
|
|
IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
|
|
|
|
|
IRIS_STAGE_DIRTY_UNCOMPILED_TES |
|
|
|
|
|
IRIS_STAGE_DIRTY_UNCOMPILED_GS |
|
|
|
|
|
IRIS_STAGE_DIRTY_UNCOMPILED_FS |
|
|
|
|
|
IRIS_STAGE_DIRTY_SAMPLER_STATES_VS |
|
|
|
|
|
IRIS_STAGE_DIRTY_SAMPLER_STATES_TCS |
|
|
|
|
|
IRIS_STAGE_DIRTY_SAMPLER_STATES_TES |
|
|
|
|
|
IRIS_STAGE_DIRTY_SAMPLER_STATES_GS);
|
|
|
|
|
|
|
|
|
|
if (!ice->shaders.prog[MESA_SHADER_TESS_EVAL]) {
|
|
|
|
|
/* Generation disabled tessellation, but it was already off anyway */
|
|
|
|
|
skip_stage_bits |= IRIS_STAGE_DIRTY_TCS |
|
|
|
|
|
IRIS_STAGE_DIRTY_TES |
|
|
|
|
|
IRIS_STAGE_DIRTY_CONSTANTS_TCS |
|
|
|
|
|
IRIS_STAGE_DIRTY_CONSTANTS_TES |
|
|
|
|
|
IRIS_STAGE_DIRTY_BINDINGS_TCS |
|
|
|
|
|
IRIS_STAGE_DIRTY_BINDINGS_TES;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!ice->shaders.prog[MESA_SHADER_GEOMETRY]) {
|
|
|
|
|
/* Generation disabled geometry shaders, but it was already off
|
|
|
|
|
* anyway
|
|
|
|
|
*/
|
|
|
|
|
skip_stage_bits |= IRIS_STAGE_DIRTY_GS |
|
|
|
|
|
IRIS_STAGE_DIRTY_CONSTANTS_GS |
|
|
|
|
|
IRIS_STAGE_DIRTY_BINDINGS_GS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ice->state.dirty |= ~skip_bits;
|
|
|
|
|
ice->state.stage_dirty |= ~skip_stage_bits;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < ARRAY_SIZE(ice->shaders.urb.cfg.size); i++)
|
|
|
|
|
ice->shaders.urb.cfg.size[i] = 0;
|
|
|
|
|
|
|
|
|
|
#if GFX_VER <= 9
|
|
|
|
|
/* Now reupdate the binding tables with the new offsets for the actual
|
|
|
|
|
* application shaders.
|
|
|
|
|
*/
|
|
|
|
|
iris_binder_reserve_3d(ice);
|
|
|
|
|
screen->vtbl.update_binder_address(batch, binder);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define RING_SIZE (128 * 1024)
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
ensure_ring_bo(struct iris_context *ice, struct iris_screen *screen)
|
|
|
|
|
{
|
|
|
|
|
struct iris_bufmgr *bufmgr = screen->bufmgr;
|
|
|
|
|
|
|
|
|
|
if (ice->draw.generation.ring_bo != NULL)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
ice->draw.generation.ring_bo =
|
|
|
|
|
iris_bo_alloc(bufmgr, "gen ring",
|
|
|
|
|
RING_SIZE, 8, IRIS_MEMZONE_OTHER,
|
|
|
|
|
BO_ALLOC_NO_SUBALLOC);
|
2024-01-12 10:23:39 -08:00
|
|
|
iris_get_backing_bo(ice->draw.generation.ring_bo)->real.capture = true;
|
2023-12-20 12:15:43 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct iris_gen_indirect_params *
|
|
|
|
|
genX(emit_indirect_generate)(struct iris_batch *batch,
|
|
|
|
|
const struct pipe_draw_info *draw,
|
|
|
|
|
const struct pipe_draw_indirect_info *indirect,
|
|
|
|
|
const struct pipe_draw_start_count_bias *sc,
|
|
|
|
|
struct iris_address *out_params_addr)
|
|
|
|
|
{
|
|
|
|
|
struct iris_screen *screen = batch->screen;
|
|
|
|
|
struct iris_context *ice = batch->ice;
|
|
|
|
|
|
|
|
|
|
iris_ensure_indirect_generation_shader(batch);
|
|
|
|
|
ensure_ring_bo(ice, screen);
|
|
|
|
|
|
|
|
|
|
const size_t struct_stride = draw->index_size > 0 ?
|
|
|
|
|
sizeof(uint32_t) * 5 :
|
|
|
|
|
sizeof(uint32_t) * 4;
|
|
|
|
|
unsigned cmd_stride = 0;
|
|
|
|
|
if (ice->state.vs_uses_draw_params ||
|
|
|
|
|
ice->state.vs_uses_derived_draw_params) {
|
|
|
|
|
cmd_stride += 4; /* 3DSTATE_VERTEX_BUFFERS */
|
|
|
|
|
|
|
|
|
|
if (ice->state.vs_uses_draw_params)
|
|
|
|
|
cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
|
|
|
|
|
|
|
|
|
|
if (ice->state.vs_uses_derived_draw_params)
|
|
|
|
|
cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
|
|
|
|
|
}
|
|
|
|
|
cmd_stride += 4 * GENX(3DPRIMITIVE_length);
|
|
|
|
|
|
|
|
|
|
const unsigned setup_dws =
|
|
|
|
|
#if GFX_VER >= 12
|
|
|
|
|
GENX(MI_ARB_CHECK_length) +
|
|
|
|
|
#endif
|
|
|
|
|
GENX(MI_BATCH_BUFFER_START_length);
|
|
|
|
|
const unsigned ring_count =
|
|
|
|
|
(RING_SIZE - 4 * setup_dws) /
|
|
|
|
|
(cmd_stride + 4 * 2 /* draw_id, is_indexed_draw */);
|
|
|
|
|
|
|
|
|
|
uint32_t params_size = align(sizeof(struct iris_gen_indirect_params), 32);
|
|
|
|
|
struct iris_gen_indirect_params *params =
|
|
|
|
|
upload_state(batch, ice->ctx.const_uploader,
|
|
|
|
|
&ice->draw.generation.params,
|
|
|
|
|
params_size, 64);
|
|
|
|
|
*out_params_addr =
|
|
|
|
|
ro_bo(iris_resource_bo(ice->draw.generation.params.res),
|
|
|
|
|
ice->draw.generation.params.offset);
|
|
|
|
|
|
|
|
|
|
iris_use_pinned_bo(batch,
|
|
|
|
|
iris_resource_bo(indirect->buffer),
|
|
|
|
|
false, IRIS_DOMAIN_NONE);
|
|
|
|
|
if (indirect->indirect_draw_count) {
|
|
|
|
|
iris_use_pinned_bo(batch,
|
|
|
|
|
iris_resource_bo(indirect->indirect_draw_count),
|
|
|
|
|
false, IRIS_DOMAIN_NONE);
|
|
|
|
|
}
|
|
|
|
|
iris_use_pinned_bo(batch, ice->draw.generation.ring_bo,
|
|
|
|
|
false, IRIS_DOMAIN_NONE);
|
|
|
|
|
|
|
|
|
|
*params = (struct iris_gen_indirect_params) {
|
|
|
|
|
.generated_cmds_addr = ice->draw.generation.ring_bo->address,
|
|
|
|
|
.ring_count = ring_count,
|
|
|
|
|
.draw_id_addr = ice->draw.generation.ring_bo->address +
|
|
|
|
|
ring_count * cmd_stride +
|
|
|
|
|
4 * GENX(MI_BATCH_BUFFER_START_length),
|
|
|
|
|
.draw_count_addr = indirect->indirect_draw_count ?
|
|
|
|
|
(iris_resource_bo(indirect->indirect_draw_count)->address +
|
|
|
|
|
indirect->indirect_draw_count_offset) : 0,
|
|
|
|
|
.indirect_data_addr = iris_resource_bo(indirect->buffer)->address +
|
|
|
|
|
indirect->offset,
|
|
|
|
|
.indirect_data_stride = indirect->stride == 0 ?
|
|
|
|
|
struct_stride : indirect->stride,
|
|
|
|
|
.max_draw_count = indirect->draw_count,
|
|
|
|
|
.flags = (draw->index_size > 0 ? ANV_GENERATED_FLAG_INDEXED : 0) |
|
|
|
|
|
(ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT ?
|
|
|
|
|
ANV_GENERATED_FLAG_PREDICATED : 0) |
|
|
|
|
|
(ice->state.vs_uses_draw_params ?
|
|
|
|
|
ANV_GENERATED_FLAG_BASE : 0) |
|
|
|
|
|
(ice->state.vs_uses_derived_draw_params ?
|
|
|
|
|
ANV_GENERATED_FLAG_DRAWID : 0) |
|
|
|
|
|
(iris_mocs(NULL, &screen->isl_dev,
|
|
|
|
|
ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
|
|
|
|
|
((cmd_stride / 4) << 16) |
|
|
|
|
|
util_bitcount64(ice->state.bound_vertex_buffers) << 24,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
genX(maybe_emit_breakpoint)(batch, true);
|
|
|
|
|
|
|
|
|
|
emit_indirect_generate_draw(batch, *out_params_addr, params_size,
|
|
|
|
|
MIN2(ring_count, indirect->draw_count));
|
|
|
|
|
|
|
|
|
|
genX(emit_3dprimitive_was)(batch, indirect, ice->state.prim_mode, sc->count);
|
|
|
|
|
genX(maybe_emit_breakpoint)(batch, false);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return params;
|
|
|
|
|
}
|