/* Copyright © 2023 Intel Corporation * SPDX-License-Identifier: MIT */ #include #include #ifdef HAVE_VALGRIND #include #include #define VG(x) x #else #define VG(x) #endif #include "pipe/p_defines.h" #include "pipe/p_state.h" #include "pipe/p_context.h" #include "pipe/p_screen.h" #include "util/u_upload_mgr.h" #include "compiler/nir/nir_builder.h" #include "compiler/nir/nir_serialize.h" #include "intel/compiler/brw_compiler.h" #include "intel/common/intel_aux_map.h" #include "intel/common/intel_l3_config.h" #include "intel/common/intel_sample_positions.h" #include "intel/ds/intel_tracepoints.h" #include "iris_batch.h" #include "iris_context.h" #include "iris_defines.h" #include "iris_pipe.h" #include "iris_resource.h" #include "iris_utrace.h" #include "iris_genx_macros.h" #include "intel/common/intel_genX_state.h" #include "drm-uapi/i915_drm.h" #include "libintel_shaders.h" #if GFX_VERx10 == 80 # include "intel_gfx8_shaders_code.h" #elif GFX_VERx10 == 90 # include "intel_gfx9_shaders_code.h" #elif GFX_VERx10 == 110 # include "intel_gfx11_shaders_code.h" #elif GFX_VERx10 == 120 # include "intel_gfx12_shaders_code.h" #elif GFX_VERx10 == 125 # include "intel_gfx125_shaders_code.h" #elif GFX_VERx10 == 200 # include "intel_gfx20_shaders_code.h" #else # error "Unsupported generation" #endif #define load_param(b, bit_size, struct_name, field_name) \ nir_load_uniform(b, 1, bit_size, nir_imm_int(b, 0), \ .base = offsetof(struct_name, field_name), \ .range = bit_size / 8) static nir_def * load_fragment_index(nir_builder *b) { nir_def *pos_in = nir_f2i32(b, nir_trim_vector(b, nir_load_frag_coord(b), 2)); return nir_iadd(b, nir_imul_imm(b, nir_channel(b, pos_in, 1), 8192), nir_channel(b, pos_in, 0)); } static nir_shader * load_shader_lib(struct iris_screen *screen, void *mem_ctx) { const nir_shader_compiler_options *nir_options = screen->brw->nir_options[MESA_SHADER_KERNEL]; struct blob_reader blob; blob_reader_init(&blob, (void *)genX(intel_shaders_nir), sizeof(genX(intel_shaders_nir))); return nir_deserialize(mem_ctx, nir_options, &blob); } static unsigned iris_call_generation_shader(struct iris_screen *screen, nir_builder *b) { genX(libiris_write_draw)( b, load_param(b, 64, struct iris_gen_indirect_params, generated_cmds_addr), load_param(b, 64, struct iris_gen_indirect_params, indirect_data_addr), load_param(b, 64, struct iris_gen_indirect_params, draw_id_addr), load_param(b, 32, struct iris_gen_indirect_params, indirect_data_stride), load_param(b, 64, struct iris_gen_indirect_params, draw_count_addr), load_param(b, 32, struct iris_gen_indirect_params, draw_base), load_param(b, 32, struct iris_gen_indirect_params, max_draw_count), load_param(b, 32, struct iris_gen_indirect_params, flags), load_param(b, 32, struct iris_gen_indirect_params, ring_count), load_param(b, 64, struct iris_gen_indirect_params, gen_addr), load_param(b, 64, struct iris_gen_indirect_params, end_addr), load_fragment_index(b)); return sizeof(struct iris_gen_indirect_params); } void genX(init_screen_gen_state)(struct iris_screen *screen) { screen->vtbl.load_shader_lib = load_shader_lib; screen->vtbl.call_generation_shader = iris_call_generation_shader; } /** * Stream out temporary/short-lived state. * * This allocates space, pins the BO, and includes the BO address in the * returned offset (which works because all state lives in 32-bit memory * zones). */ static void * upload_state(struct iris_batch *batch, struct u_upload_mgr *uploader, struct iris_state_ref *ref, unsigned size, unsigned alignment) { void *p = NULL; u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p); iris_use_pinned_bo(batch, iris_resource_bo(ref->res), false, IRIS_DOMAIN_NONE); return p; } static uint32_t * stream_state(struct iris_batch *batch, struct u_upload_mgr *uploader, struct pipe_resource **out_res, unsigned size, unsigned alignment, uint32_t *out_offset) { void *ptr = NULL; u_upload_alloc(uploader, 0, size, alignment, out_offset, out_res, &ptr); struct iris_bo *bo = iris_resource_bo(*out_res); iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE); iris_record_state_size(batch->state_sizes, bo->address + *out_offset, size); *out_offset += iris_bo_offset_from_base_address(bo); return ptr; } static void emit_indirect_generate_draw(struct iris_batch *batch, struct iris_address params_addr, unsigned params_size, unsigned ring_count) { struct iris_screen *screen = batch->screen; struct iris_context *ice = batch->ice; struct isl_device *isl_dev = &screen->isl_dev; const struct intel_device_info *devinfo = screen->devinfo; /* State emission */ uint32_t ves_dws[1 + 2 * GENX(VERTEX_ELEMENT_STATE_length)]; iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), ves_dws, ve) { ve.DWordLength = 1 + GENX(VERTEX_ELEMENT_STATE_length) * 2 - GENX(3DSTATE_VERTEX_ELEMENTS_length_bias); } iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[1], ve) { ve.VertexBufferIndex = 1; ve.Valid = true; ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT; ve.SourceElementOffset = 0; ve.Component0Control = VFCOMP_STORE_SRC; ve.Component1Control = VFCOMP_STORE_0; ve.Component2Control = VFCOMP_STORE_0; ve.Component3Control = VFCOMP_STORE_0; } iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[3], ve) { ve.VertexBufferIndex = 0; ve.Valid = true; ve.SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT; ve.SourceElementOffset = 0; ve.Component0Control = VFCOMP_STORE_SRC; ve.Component1Control = VFCOMP_STORE_SRC; ve.Component2Control = VFCOMP_STORE_SRC; ve.Component3Control = VFCOMP_STORE_1_FP; } iris_batch_emit(batch, ves_dws, sizeof(ves_dws)); iris_emit_cmd(batch, GENX(3DSTATE_VF_STATISTICS), vf); iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs) { sgvs.InstanceIDEnable = true; sgvs.InstanceIDComponentNumber = COMP_1; sgvs.InstanceIDElementOffset = 0; } #if GFX_VER >= 11 iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS_2), sgvs); #endif iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) { vfi.InstancingEnable = false; vfi.VertexElementIndex = 0; } iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) { vfi.InstancingEnable = false; vfi.VertexElementIndex = 1; } iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) { topo.PrimitiveTopologyType = _3DPRIM_RECTLIST; } ice->shaders.urb.cfg.size[MESA_SHADER_VERTEX] = 1; ice->shaders.urb.cfg.size[MESA_SHADER_TESS_CTRL] = 1; ice->shaders.urb.cfg.size[MESA_SHADER_TESS_EVAL] = 1; ice->shaders.urb.cfg.size[MESA_SHADER_GEOMETRY] = 1; genX(emit_urb_config)(batch, false /* has_tess_eval */, false /* has_geometry */); iris_emit_cmd(batch, GENX(3DSTATE_PS_BLEND), ps_blend) { ps_blend.HasWriteableRT = true; } iris_emit_cmd(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wm); #if GFX_VER >= 12 iris_emit_cmd(batch, GENX(3DSTATE_DEPTH_BOUNDS), db) { db.DepthBoundsTestEnable = false; db.DepthBoundsTestMinValue = 0.0; db.DepthBoundsTestMaxValue = 1.0; } #endif iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms); iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), sm) { sm.SampleMask = 0x1; } iris_emit_cmd(batch, GENX(3DSTATE_VS), vs); iris_emit_cmd(batch, GENX(3DSTATE_HS), hs); iris_emit_cmd(batch, GENX(3DSTATE_TE), te); iris_emit_cmd(batch, GENX(3DSTATE_DS), DS); iris_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), so); iris_emit_cmd(batch, GENX(3DSTATE_GS), gs); iris_emit_cmd(batch, GENX(3DSTATE_CLIP), clip) { clip.PerspectiveDivideDisable = true; } iris_emit_cmd(batch, GENX(3DSTATE_SF), sf) { #if GFX_VER >= 12 sf.DerefBlockSize = ice->state.urb_deref_block_size; #endif } iris_emit_cmd(batch, GENX(3DSTATE_RASTER), raster) { raster.CullMode = CULLMODE_NONE; } const struct iris_compiled_shader *shader = ice->draw.generation.shader; const struct iris_fs_data *fs_data = iris_fs_data_const(shader); iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) { sbe.VertexURBEntryReadOffset = 1; sbe.NumberofSFOutputAttributes = fs_data->num_varying_inputs; sbe.VertexURBEntryReadLength = MAX2((fs_data->num_varying_inputs + 1) / 2, 1); sbe.ConstantInterpolationEnable = fs_data->flat_inputs; sbe.ForceVertexURBEntryReadLength = true; sbe.ForceVertexURBEntryReadOffset = true; #if GFX_VER >= 9 for (unsigned i = 0; i < 32; i++) sbe.AttributeActiveComponentFormat[i] = ACF_XYZW; #endif } iris_emit_cmd(batch, GENX(3DSTATE_WM), wm) { if (fs_data->has_side_effects || fs_data->uses_kill) wm.ForceThreadDispatchEnable = ForceON; } iris_emit_cmd(batch, GENX(3DSTATE_PS), ps) { struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(shader->brw_prog_data); intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data, 1 /* rasterization_samples */, 0 /* msaa_flags */); ps.VectorMaskEnable = fs_data->uses_vmask; ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0; #if GFX_VER < 20 ps.PushConstantEnable = shader->nr_params > 0 || shader->ubo_ranges[0].length; #endif ps.DispatchGRFStartRegisterForConstantSetupData0 = brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); ps.DispatchGRFStartRegisterForConstantSetupData1 = brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1); #if GFX_VER < 20 ps.DispatchGRFStartRegisterForConstantSetupData2 = brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2); #endif ps.KernelStartPointer0 = KSP(ice->draw.generation.shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); ps.KernelStartPointer1 = KSP(ice->draw.generation.shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1); #if GFX_VER < 20 ps.KernelStartPointer2 = KSP(ice->draw.generation.shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2); #endif ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1; } iris_emit_cmd(batch, GENX(3DSTATE_PS_EXTRA), psx) { psx.PixelShaderValid = true; #if GFX_VER < 20 psx.AttributeEnable = fs_data->num_varying_inputs > 0; #endif psx.PixelShaderIsPerSample = fs_data->is_per_sample; psx.PixelShaderComputedDepthMode = fs_data->computed_depth_mode; #if GFX_VER >= 9 #if GFX_VER >= 20 assert(!fs_data->pulls_bary); #else psx.PixelShaderPullsBary = fs_data->pulls_bary; #endif psx.PixelShaderComputesStencil = fs_data->computed_stencil; #endif psx.PixelShaderHasUAV = GFX_VER == 8; } iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) { uint32_t cc_vp_address; uint32_t *cc_vp_map = stream_state(batch, ice->state.dynamic_uploader, &ice->state.last_res.cc_vp, 4 * GENX(CC_VIEWPORT_length), 32, &cc_vp_address); iris_pack_state(GENX(CC_VIEWPORT), cc_vp_map, ccv) { ccv.MinimumDepth = 0.0f; ccv.MaximumDepth = 1.0f; } cc.CCViewportPointer = cc_vp_address; } #if GFX_VER >= 12 /* Disable Primitive Replication. */ iris_emit_cmd(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr); #endif #if GFX_VERx10 == 125 /* DG2: Wa_22011440098 * MTL: Wa_18022330953 * * In 3D mode, after programming push constant alloc command immediately * program push constant command(ZERO length) without any commit between * them. * * Note that Wa_16011448509 isn't needed here as all address bits are zero. */ iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_ALL), c) { /* Update empty push constants for all stages (bitmask = 11111b) */ c.ShaderUpdateEnable = 0x1f; c.MOCS = iris_mocs(NULL, isl_dev, 0); } #endif float x0 = 0.0f, x1 = MIN2(ring_count, 8192); float y0 = 0.0f, y1 = DIV_ROUND_UP(ring_count, 8192); float z = 0.0f; float *vertices = upload_state(batch, ice->state.dynamic_uploader, &ice->draw.generation.vertices, ALIGN(9 * sizeof(float), 8), 8); vertices[0] = x1; vertices[1] = y1; vertices[2] = z; /* v0 */ vertices[3] = x0; vertices[4] = y1; vertices[5] = z; /* v1 */ vertices[6] = x0; vertices[7] = y0; vertices[8] = z; /* v2 */ uint32_t vbs_dws[1 + GENX(VERTEX_BUFFER_STATE_length)]; iris_pack_command(GENX(3DSTATE_VERTEX_BUFFERS), vbs_dws, vbs) { vbs.DWordLength = ARRAY_SIZE(vbs_dws) - GENX(3DSTATE_VERTEX_BUFFERS_length_bias); } _iris_pack_state(batch, GENX(VERTEX_BUFFER_STATE), &vbs_dws[1], vb) { vb.VertexBufferIndex = 0; vb.AddressModifyEnable = true; vb.BufferStartingAddress = ro_bo(iris_resource_bo(ice->draw.generation.vertices.res), ice->draw.generation.vertices.offset); vb.BufferPitch = 3 * sizeof(float); vb.BufferSize = 9 * sizeof(float); vb.MOCS = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_VERTEX_BUFFER_BIT); #if GFX_VER >= 12 vb.L3BypassDisable = true; #endif } iris_batch_emit(batch, vbs_dws, sizeof(vbs_dws)); #if GFX_VERx10 > 120 uint32_t const_dws[GENX(3DSTATE_CONSTANT_ALL_length) + GENX(3DSTATE_CONSTANT_ALL_DATA_length)]; iris_pack_command(GENX(3DSTATE_CONSTANT_ALL), const_dws, all) { all.DWordLength = ARRAY_SIZE(const_dws) - GENX(3DSTATE_CONSTANT_ALL_length_bias); all.ShaderUpdateEnable = 1 << MESA_SHADER_FRAGMENT; all.MOCS = isl_mocs(isl_dev, 0, false); all.PointerBufferMask = 0x1; } _iris_pack_state(batch, GENX(3DSTATE_CONSTANT_ALL_DATA), &const_dws[GENX(3DSTATE_CONSTANT_ALL_length)], data) { data.PointerToConstantBuffer = params_addr; data.ConstantBufferReadLength = DIV_ROUND_UP(params_size, 32); } iris_batch_emit(batch, const_dws, sizeof(const_dws)); #else /* The Skylake PRM contains the following restriction: * * "The driver must ensure The following case does not occur without a * flush to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length * equal to zero committed followed by a 3DSTATE_CONSTANT_* with buffer * 0 read length not equal to zero committed." * * To avoid this, we program the highest slot. */ iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_PS), c) { #if GFX_VER > 8 c.MOCS = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_CONSTANT_BUFFER_BIT); #endif c.ConstantBody.ReadLength[3] = DIV_ROUND_UP(params_size, 32); c.ConstantBody.Buffer[3] = params_addr; } #endif #if GFX_VER <= 9 /* Gfx9 requires 3DSTATE_BINDING_TABLE_POINTERS_XS to be re-emitted in * order to commit constants. TODO: Investigate "Disable Gather at Set * Shader" to go back to legacy mode... * * The null writes of the generation shader also appear to disturb the next * RT writes, so we choose to reemit the binding table to a null RT on Gfx8 * too. */ struct iris_binder *binder = &ice->state.binder; iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), ptr) { ptr.PointertoPSBindingTable = binder->bt_offset[MESA_SHADER_FRAGMENT] >> IRIS_BT_OFFSET_SHIFT; } uint32_t *bt_map = binder->map + binder->bt_offset[MESA_SHADER_FRAGMENT]; uint32_t surf_base_offset = binder->bo->address; bt_map[0] = ice->state.null_fb.offset - surf_base_offset; #endif genX(maybe_emit_breakpoint)(batch, true); iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) { prim.VertexAccessType = SEQUENTIAL; prim.PrimitiveTopologyType = _3DPRIM_RECTLIST; prim.VertexCountPerInstance = 3; prim.InstanceCount = 1; } /* We've smashed all state compared to what the normal 3D pipeline * rendering tracks for GL. */ uint64_t skip_bits = (IRIS_DIRTY_POLYGON_STIPPLE | IRIS_DIRTY_SO_BUFFERS | IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_LINE_STIPPLE | IRIS_ALL_DIRTY_FOR_COMPUTE | IRIS_DIRTY_SCISSOR_RECT | IRIS_DIRTY_VF); /* Wa_14016820455 * On Gfx 12.5 platforms, the SF_CL_VIEWPORT pointer can be invalidated * likely by a read cache invalidation when clipping is disabled, so we * don't skip its dirty bit here, in order to reprogram it. */ if (GFX_VERx10 != 125) skip_bits |= IRIS_DIRTY_SF_CL_VIEWPORT; uint64_t skip_stage_bits = (IRIS_ALL_STAGE_DIRTY_FOR_COMPUTE | IRIS_STAGE_DIRTY_UNCOMPILED_VS | IRIS_STAGE_DIRTY_UNCOMPILED_TCS | IRIS_STAGE_DIRTY_UNCOMPILED_TES | IRIS_STAGE_DIRTY_UNCOMPILED_GS | IRIS_STAGE_DIRTY_UNCOMPILED_FS | IRIS_STAGE_DIRTY_SAMPLER_STATES_VS | IRIS_STAGE_DIRTY_SAMPLER_STATES_TCS | IRIS_STAGE_DIRTY_SAMPLER_STATES_TES | IRIS_STAGE_DIRTY_SAMPLER_STATES_GS); if (!ice->shaders.prog[MESA_SHADER_TESS_EVAL]) { /* Generation disabled tessellation, but it was already off anyway */ skip_stage_bits |= IRIS_STAGE_DIRTY_TCS | IRIS_STAGE_DIRTY_TES | IRIS_STAGE_DIRTY_CONSTANTS_TCS | IRIS_STAGE_DIRTY_CONSTANTS_TES | IRIS_STAGE_DIRTY_BINDINGS_TCS | IRIS_STAGE_DIRTY_BINDINGS_TES; } if (!ice->shaders.prog[MESA_SHADER_GEOMETRY]) { /* Generation disabled geometry shaders, but it was already off * anyway */ skip_stage_bits |= IRIS_STAGE_DIRTY_GS | IRIS_STAGE_DIRTY_CONSTANTS_GS | IRIS_STAGE_DIRTY_BINDINGS_GS; } ice->state.dirty |= ~skip_bits; ice->state.stage_dirty |= ~skip_stage_bits; for (int i = 0; i < ARRAY_SIZE(ice->shaders.urb.cfg.size); i++) ice->shaders.urb.cfg.size[i] = 0; #if GFX_VER <= 9 /* Now reupdate the binding tables with the new offsets for the actual * application shaders. */ iris_binder_reserve_3d(ice); screen->vtbl.update_binder_address(batch, binder); #endif } #define RING_SIZE (128 * 1024) static void ensure_ring_bo(struct iris_context *ice, struct iris_screen *screen) { struct iris_bufmgr *bufmgr = screen->bufmgr; if (ice->draw.generation.ring_bo != NULL) return; ice->draw.generation.ring_bo = iris_bo_alloc(bufmgr, "gen ring", RING_SIZE, 8, IRIS_MEMZONE_OTHER, BO_ALLOC_NO_SUBALLOC); iris_get_backing_bo(ice->draw.generation.ring_bo)->real.capture = true; } struct iris_gen_indirect_params * genX(emit_indirect_generate)(struct iris_batch *batch, const struct pipe_draw_info *draw, const struct pipe_draw_indirect_info *indirect, const struct pipe_draw_start_count_bias *sc, struct iris_address *out_params_addr) { struct iris_screen *screen = batch->screen; struct iris_context *ice = batch->ice; iris_ensure_indirect_generation_shader(batch); ensure_ring_bo(ice, screen); const size_t struct_stride = draw->index_size > 0 ? sizeof(uint32_t) * 5 : sizeof(uint32_t) * 4; unsigned cmd_stride = 0; if (ice->state.vs_uses_draw_params || ice->state.vs_uses_derived_draw_params) { cmd_stride += 4; /* 3DSTATE_VERTEX_BUFFERS */ if (ice->state.vs_uses_draw_params) cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length); if (ice->state.vs_uses_derived_draw_params) cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length); } cmd_stride += 4 * GENX(3DPRIMITIVE_length); const unsigned setup_dws = #if GFX_VER >= 12 GENX(MI_ARB_CHECK_length) + #endif GENX(MI_BATCH_BUFFER_START_length); const unsigned ring_count = (RING_SIZE - 4 * setup_dws) / (cmd_stride + 4 * 2 /* draw_id, is_indexed_draw */); uint32_t params_size = align(sizeof(struct iris_gen_indirect_params), 32); struct iris_gen_indirect_params *params = upload_state(batch, ice->ctx.const_uploader, &ice->draw.generation.params, params_size, 64); *out_params_addr = ro_bo(iris_resource_bo(ice->draw.generation.params.res), ice->draw.generation.params.offset); iris_use_pinned_bo(batch, iris_resource_bo(indirect->buffer), false, IRIS_DOMAIN_NONE); if (indirect->indirect_draw_count) { iris_use_pinned_bo(batch, iris_resource_bo(indirect->indirect_draw_count), false, IRIS_DOMAIN_NONE); } iris_use_pinned_bo(batch, ice->draw.generation.ring_bo, false, IRIS_DOMAIN_NONE); *params = (struct iris_gen_indirect_params) { .generated_cmds_addr = ice->draw.generation.ring_bo->address, .ring_count = ring_count, .draw_id_addr = ice->draw.generation.ring_bo->address + ring_count * cmd_stride + 4 * GENX(MI_BATCH_BUFFER_START_length), .draw_count_addr = indirect->indirect_draw_count ? (iris_resource_bo(indirect->indirect_draw_count)->address + indirect->indirect_draw_count_offset) : 0, .indirect_data_addr = iris_resource_bo(indirect->buffer)->address + indirect->offset, .indirect_data_stride = indirect->stride == 0 ? struct_stride : indirect->stride, .max_draw_count = indirect->draw_count, .flags = (draw->index_size > 0 ? ANV_GENERATED_FLAG_INDEXED : 0) | (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT ? ANV_GENERATED_FLAG_PREDICATED : 0) | (ice->state.vs_uses_draw_params ? ANV_GENERATED_FLAG_BASE : 0) | (ice->state.vs_uses_derived_draw_params ? ANV_GENERATED_FLAG_DRAWID : 0) | (iris_mocs(NULL, &screen->isl_dev, ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) | ((cmd_stride / 4) << 16) | util_bitcount64(ice->state.bound_vertex_buffers) << 24, }; genX(maybe_emit_breakpoint)(batch, true); emit_indirect_generate_draw(batch, *out_params_addr, params_size, MIN2(ring_count, indirect->draw_count)); genX(emit_3dprimitive_was)(batch, indirect, ice->state.prim_mode, sc->count); genX(maybe_emit_breakpoint)(batch, false); return params; }