mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-10 23:20:14 +01:00
intel/elk: Remove multi-polygon support
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27629>
This commit is contained in:
parent
fd3a815a5b
commit
be73fa1434
9 changed files with 30 additions and 144 deletions
|
|
@ -297,8 +297,8 @@ iris_apply_elk_wm_prog_data(struct iris_compiled_shader *shader,
|
|||
iris->flat_inputs = elk->flat_inputs;
|
||||
iris->inputs = elk->inputs;
|
||||
iris->computed_depth_mode = elk->computed_depth_mode;
|
||||
iris->max_polygons = elk->max_polygons;
|
||||
iris->dispatch_multi = elk->dispatch_multi;
|
||||
iris->max_polygons = 1;
|
||||
iris->dispatch_multi = 0;
|
||||
iris->computed_stencil = elk->computed_stencil;
|
||||
iris->early_fragment_tests = elk->early_fragment_tests;
|
||||
iris->post_depth_coverage = elk->post_depth_coverage;
|
||||
|
|
|
|||
|
|
@ -89,10 +89,8 @@ intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps,
|
|||
}
|
||||
|
||||
assert(enable_8 || enable_16 || enable_32);
|
||||
assert(!prog_data->dispatch_multi);
|
||||
|
||||
ps->_8PixelDispatchEnable = enable_8 ||
|
||||
(GFX_VER == 12 && prog_data->dispatch_multi);
|
||||
ps->_8PixelDispatchEnable = enable_8;
|
||||
ps->_16PixelDispatchEnable = enable_16;
|
||||
ps->_32PixelDispatchEnable = enable_32;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -833,18 +833,6 @@ struct elk_wm_prog_data {
|
|||
uint8_t color_outputs_written;
|
||||
uint8_t computed_depth_mode;
|
||||
|
||||
/**
|
||||
* Number of polygons handled in parallel by the multi-polygon PS
|
||||
* kernel.
|
||||
*/
|
||||
uint8_t max_polygons;
|
||||
|
||||
/**
|
||||
* Dispatch width of the multi-polygon PS kernel, or 0 if no
|
||||
* multi-polygon kernel was built.
|
||||
*/
|
||||
uint8_t dispatch_multi;
|
||||
|
||||
bool computed_stencil;
|
||||
bool early_fragment_tests;
|
||||
bool post_depth_coverage;
|
||||
|
|
@ -1791,7 +1779,7 @@ elk_cs_get_dispatch_info(const struct intel_device_info *devinfo,
|
|||
*/
|
||||
static inline bool
|
||||
elk_stage_has_packed_dispatch(ASSERTED const struct intel_device_info *devinfo,
|
||||
gl_shader_stage stage, unsigned max_polygons,
|
||||
gl_shader_stage stage,
|
||||
const struct elk_stage_prog_data *prog_data)
|
||||
{
|
||||
/* The code below makes assumptions about the hardware's thread dispatch
|
||||
|
|
@ -1814,8 +1802,7 @@ elk_stage_has_packed_dispatch(ASSERTED const struct intel_device_info *devinfo,
|
|||
const struct elk_wm_prog_data *wm_prog_data =
|
||||
(const struct elk_wm_prog_data *)prog_data;
|
||||
return !wm_prog_data->persample_dispatch &&
|
||||
wm_prog_data->uses_vmask &&
|
||||
max_polygons < 2;
|
||||
wm_prog_data->uses_vmask;
|
||||
}
|
||||
case MESA_SHADER_COMPUTE:
|
||||
/* Compute shaders will be spawned with either a fully enabled dispatch
|
||||
|
|
|
|||
|
|
@ -1505,48 +1505,14 @@ elk_fs_visitor::assign_urb_setup()
|
|||
* 3 Attr0.w a1-a0 a2-a0 N/A a0
|
||||
* 4 Attr1.x a1-a0 a2-a0 N/A a0
|
||||
* ...
|
||||
*
|
||||
* In multipolygon mode that no longer works since
|
||||
* different channels may be processing polygons with
|
||||
* different plane parameters, so each parameter above is
|
||||
* represented as a dispatch_width-wide vector:
|
||||
*
|
||||
* elk_fs_reg::nr elk_fs_reg::offset Input Comp0 ... CompN
|
||||
* 0 0 Attr0.x a1[0]-a0[0] ... a1[N]-a0[N]
|
||||
* 0 4 * dispatch_width Attr0.x a2[0]-a0[0] ... a2[N]-a0[N]
|
||||
* 0 8 * dispatch_width Attr0.x N/A ... N/A
|
||||
* 0 12 * dispatch_width Attr0.x a0[0] ... a0[N]
|
||||
* 1 0 Attr0.y a1[0]-a0[0] ... a1[N]-a0[N]
|
||||
* ...
|
||||
*
|
||||
* Note that many of the components on a single row above
|
||||
* are likely to be replicated multiple times (if, say, a
|
||||
* single SIMD thread is only processing 2 different
|
||||
* polygons), so plane parameters aren't actually stored
|
||||
* in GRF memory with that layout to avoid wasting space.
|
||||
* Instead we compose ATTR register regions with a 2D
|
||||
* region that walks through the parameters of each
|
||||
* polygon with the correct stride, reading the parameter
|
||||
* corresponding to each channel directly from the PS
|
||||
* thread payload.
|
||||
*
|
||||
* The latter layout corresponds to a param_width equal to
|
||||
* dispatch_width, while the former (scalar parameter)
|
||||
* layout has a param_width of 1.
|
||||
*
|
||||
* Gfx20+ represent plane parameters in a format similar
|
||||
* to the above, except the parameters are packed in 12B
|
||||
* and ordered like "a0, a1-a0, a2-a0" instead of the
|
||||
* above vec4 representation with a missing component.
|
||||
*/
|
||||
const unsigned param_width = (max_polygons > 1 ? dispatch_width : 1);
|
||||
const unsigned param_width = 1;
|
||||
|
||||
/* Size of a single scalar component of a plane parameter
|
||||
* in bytes.
|
||||
*/
|
||||
const unsigned chan_sz = 4;
|
||||
struct elk_reg reg;
|
||||
assert(max_polygons > 0);
|
||||
|
||||
/* Calculate the base register on the thread payload of
|
||||
* either the block of vertex setup data or the block of
|
||||
|
|
@ -1558,7 +1524,7 @@ elk_fs_visitor::assign_urb_setup()
|
|||
const unsigned base = urb_start +
|
||||
(per_prim ? 0 :
|
||||
ALIGN(prog_data->num_per_primitive_inputs / 2,
|
||||
reg_unit(devinfo)) * max_polygons);
|
||||
reg_unit(devinfo)));
|
||||
const unsigned idx = per_prim ? inst->src[i].nr :
|
||||
inst->src[i].nr - prog_data->num_per_primitive_inputs;
|
||||
|
||||
|
|
@ -1570,7 +1536,7 @@ elk_fs_visitor::assign_urb_setup()
|
|||
* Earlier platforms and per-primitive block pack 2 logical
|
||||
* input components per 32B register.
|
||||
*/
|
||||
const unsigned grf = base + idx / 2 * max_polygons;
|
||||
const unsigned grf = base + idx / 2;
|
||||
assert(inst->src[i].offset / param_width < REG_SIZE / 2);
|
||||
const unsigned delta = (idx % 2) * (REG_SIZE / 2) +
|
||||
inst->src[i].offset / (param_width * chan_sz) * chan_sz +
|
||||
|
|
@ -1594,13 +1560,13 @@ elk_fs_visitor::assign_urb_setup()
|
|||
* but they may be replicated multiple times for multipolygon
|
||||
* dispatch.
|
||||
*/
|
||||
this->first_non_payload_grf += prog_data->num_varying_inputs * 2 * max_polygons;
|
||||
this->first_non_payload_grf += prog_data->num_varying_inputs * 2;
|
||||
|
||||
/* Unlike regular attributes, per-primitive attributes have all 4 channels
|
||||
* in the same slot, so each GRF can store two slots.
|
||||
*/
|
||||
assert(prog_data->num_per_primitive_inputs % 2 == 0);
|
||||
this->first_non_payload_grf += prog_data->num_per_primitive_inputs / 2 * max_polygons;
|
||||
this->first_non_payload_grf += prog_data->num_per_primitive_inputs / 2;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -2931,8 +2897,7 @@ elk_fs_visitor::eliminate_find_live_channel()
|
|||
bool progress = false;
|
||||
unsigned depth = 0;
|
||||
|
||||
if (!elk_stage_has_packed_dispatch(devinfo, stage, max_polygons,
|
||||
stage_prog_data)) {
|
||||
if (!elk_stage_has_packed_dispatch(devinfo, stage, stage_prog_data)) {
|
||||
/* The optimization below assumes that channel zero is live on thread
|
||||
* dispatch, which may not be the case if the fixed function dispatches
|
||||
* threads sparsely.
|
||||
|
|
@ -4226,19 +4191,6 @@ get_fpu_lowered_simd_width(const elk_fs_visitor *shader,
|
|||
/* Maximum execution size representable in the instruction controls. */
|
||||
unsigned max_width = MIN2(32, inst->exec_size);
|
||||
|
||||
/* Number of channels per polygon handled by a multipolygon PS shader. */
|
||||
const unsigned poly_width = shader->dispatch_width /
|
||||
MAX2(1, shader->max_polygons);
|
||||
|
||||
/* Number of registers that will be read by an ATTR source if
|
||||
* present for multipolygon PS shaders, since the PS vertex setup
|
||||
* data for each polygon is stored in different contiguous GRFs.
|
||||
*/
|
||||
const unsigned attr_reg_count = (shader->stage != MESA_SHADER_FRAGMENT ||
|
||||
shader->max_polygons < 2 ? 0 :
|
||||
DIV_ROUND_UP(inst->exec_size,
|
||||
poly_width) * reg_unit(devinfo));
|
||||
|
||||
/* According to the PRMs:
|
||||
* "A. In Direct Addressing mode, a source cannot span more than 2
|
||||
* adjacent GRF registers.
|
||||
|
|
@ -4251,8 +4203,7 @@ get_fpu_lowered_simd_width(const elk_fs_visitor *shader,
|
|||
unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE);
|
||||
|
||||
for (unsigned i = 0; i < inst->sources; i++)
|
||||
reg_count = MAX3(reg_count, DIV_ROUND_UP(inst->size_read(i), REG_SIZE),
|
||||
(inst->src[i].file == ATTR ? attr_reg_count : 0));
|
||||
reg_count = MAX2(reg_count, DIV_ROUND_UP(inst->size_read(i), REG_SIZE));
|
||||
|
||||
/* Calculate the maximum execution size of the instruction based on the
|
||||
* factor by which it goes over the hardware limit of 2 GRFs.
|
||||
|
|
@ -5200,8 +5151,7 @@ elk_fs_visitor::lower_find_live_channel()
|
|||
return false;
|
||||
|
||||
bool packed_dispatch =
|
||||
elk_stage_has_packed_dispatch(devinfo, stage, max_polygons,
|
||||
stage_prog_data);
|
||||
elk_stage_has_packed_dispatch(devinfo, stage, stage_prog_data);
|
||||
bool vmask =
|
||||
stage == MESA_SHADER_FRAGMENT &&
|
||||
elk_wm_prog_data(stage_prog_data)->uses_vmask;
|
||||
|
|
@ -6505,7 +6455,6 @@ elk_nir_populate_wm_prog_data(nir_shader *shader,
|
|||
prog_data->uses_omask = !key->ignore_sample_mask_out &&
|
||||
(shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
|
||||
prog_data->color_outputs_written = key->color_outputs_valid;
|
||||
prog_data->max_polygons = 1;
|
||||
prog_data->computed_depth_mode = computed_depth_mode(shader);
|
||||
prog_data->computed_stencil =
|
||||
shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL);
|
||||
|
|
@ -6681,7 +6630,7 @@ elk_compile_fs(const struct elk_compiler *compiler,
|
|||
bool has_spilled = false;
|
||||
|
||||
v8 = std::make_unique<elk_fs_visitor>(compiler, ¶ms->base, key,
|
||||
prog_data, nir, 8, 1,
|
||||
prog_data, nir, 8,
|
||||
params->base.stats != NULL,
|
||||
debug_enabled);
|
||||
if (!v8->run_fs(allow_spilling, false /* do_rep_send */)) {
|
||||
|
|
@ -6716,7 +6665,7 @@ elk_compile_fs(const struct elk_compiler *compiler,
|
|||
(INTEL_SIMD(FS, 16) || params->use_rep_send)) {
|
||||
/* Try a SIMD16 compile */
|
||||
v16 = std::make_unique<elk_fs_visitor>(compiler, ¶ms->base, key,
|
||||
prog_data, nir, 16, 1,
|
||||
prog_data, nir, 16,
|
||||
params->base.stats != NULL,
|
||||
debug_enabled);
|
||||
if (v8)
|
||||
|
|
@ -6749,7 +6698,7 @@ elk_compile_fs(const struct elk_compiler *compiler,
|
|||
INTEL_SIMD(FS, 32)) {
|
||||
/* Try a SIMD32 compile */
|
||||
v32 = std::make_unique<elk_fs_visitor>(compiler, ¶ms->base, key,
|
||||
prog_data, nir, 32, 1,
|
||||
prog_data, nir, 32,
|
||||
params->base.stats != NULL,
|
||||
debug_enabled);
|
||||
if (v8)
|
||||
|
|
@ -6830,7 +6779,7 @@ elk_compile_fs(const struct elk_compiler *compiler,
|
|||
if (simd8_cfg) {
|
||||
prog_data->dispatch_8 = true;
|
||||
g.generate_code(simd8_cfg, 8, v8->shader_stats,
|
||||
v8->performance_analysis.require(), stats, 1);
|
||||
v8->performance_analysis.require(), stats);
|
||||
stats = stats ? stats + 1 : NULL;
|
||||
max_dispatch_width = 8;
|
||||
}
|
||||
|
|
@ -6839,7 +6788,7 @@ elk_compile_fs(const struct elk_compiler *compiler,
|
|||
prog_data->dispatch_16 = true;
|
||||
prog_data->prog_offset_16 = g.generate_code(
|
||||
simd16_cfg, 16, v16->shader_stats,
|
||||
v16->performance_analysis.require(), stats, 1);
|
||||
v16->performance_analysis.require(), stats);
|
||||
stats = stats ? stats + 1 : NULL;
|
||||
max_dispatch_width = 16;
|
||||
}
|
||||
|
|
@ -6848,7 +6797,7 @@ elk_compile_fs(const struct elk_compiler *compiler,
|
|||
prog_data->dispatch_32 = true;
|
||||
prog_data->prog_offset_32 = g.generate_code(
|
||||
simd32_cfg, 32, v32->shader_stats,
|
||||
v32->performance_analysis.require(), stats, 1);
|
||||
v32->performance_analysis.require(), stats);
|
||||
stats = stats ? stats + 1 : NULL;
|
||||
max_dispatch_width = 32;
|
||||
}
|
||||
|
|
@ -7146,7 +7095,6 @@ elk_fs_test_dispatch_packing(const fs_builder &bld)
|
|||
elk_wm_prog_data(shader->stage_prog_data)->uses_vmask;
|
||||
|
||||
if (elk_stage_has_packed_dispatch(shader->devinfo, stage,
|
||||
shader->max_polygons,
|
||||
shader->stage_prog_data)) {
|
||||
const fs_builder ubld = bld.exec_all().group(1, 0);
|
||||
const elk_fs_reg tmp = component(bld.vgrf(ELK_REGISTER_TYPE_UD), 0);
|
||||
|
|
|
|||
|
|
@ -177,7 +177,6 @@ public:
|
|||
struct elk_wm_prog_data *prog_data,
|
||||
const nir_shader *shader,
|
||||
unsigned dispatch_width,
|
||||
unsigned num_polygons,
|
||||
bool needs_register_pressure,
|
||||
bool debug_enabled);
|
||||
elk_fs_visitor(const struct elk_compiler *compiler,
|
||||
|
|
@ -400,7 +399,6 @@ public:
|
|||
bool needs_register_pressure;
|
||||
|
||||
const unsigned dispatch_width; /**< 8, 16 or 32 */
|
||||
const unsigned max_polygons;
|
||||
unsigned max_dispatch_width;
|
||||
|
||||
/* The API selected subgroup size */
|
||||
|
|
@ -451,8 +449,7 @@ public:
|
|||
int generate_code(const elk_cfg_t *cfg, int dispatch_width,
|
||||
struct shader_stats shader_stats,
|
||||
const elk::performance &perf,
|
||||
struct elk_compile_stats *stats,
|
||||
unsigned max_polygons = 0);
|
||||
struct elk_compile_stats *stats);
|
||||
void add_const_data(void *data, unsigned size);
|
||||
const unsigned *get_assembly();
|
||||
|
||||
|
|
|
|||
|
|
@ -660,8 +660,7 @@ instruction_requires_packed_data(elk_fs_inst *inst)
|
|||
static bool
|
||||
try_copy_propagate(const elk_compiler *compiler, elk_fs_inst *inst,
|
||||
acp_entry *entry, int arg,
|
||||
const elk::simple_allocator &alloc,
|
||||
uint8_t max_polygons)
|
||||
const elk::simple_allocator &alloc)
|
||||
{
|
||||
if (inst->src[arg].file != VGRF)
|
||||
return false;
|
||||
|
|
@ -799,17 +798,6 @@ try_copy_propagate(const elk_compiler *compiler, elk_fs_inst *inst,
|
|||
(reg_offset(inst->dst) % REG_SIZE) != (reg_offset(entry->src) % REG_SIZE))
|
||||
return false;
|
||||
|
||||
/* The <8;8,0> regions used for FS attributes in multipolygon
|
||||
* dispatch mode could violate regioning restrictions, don't copy
|
||||
* propagate them in such cases.
|
||||
*/
|
||||
if (entry->src.file == ATTR && max_polygons > 1 &&
|
||||
(has_dst_aligned_region_restriction(devinfo, inst, dst_type) ||
|
||||
instruction_requires_packed_data(inst) ||
|
||||
(inst->elk_is_3src(compiler) && arg == 2) ||
|
||||
entry->dst.type != inst->src[arg].type))
|
||||
return false;
|
||||
|
||||
/* Bail if the source FIXED_GRF region of the copy cannot be trivially
|
||||
* composed with the source region of the instruction -- E.g. because the
|
||||
* copy uses some extended stride greater than 4 not supported natively by
|
||||
|
|
@ -1245,8 +1233,7 @@ can_propagate_from(elk_fs_inst *inst)
|
|||
static bool
|
||||
opt_copy_propagation_local(const elk_compiler *compiler, linear_ctx *lin_ctx,
|
||||
elk_bblock_t *block, struct acp &acp,
|
||||
const elk::simple_allocator &alloc,
|
||||
uint8_t max_polygons)
|
||||
const elk::simple_allocator &alloc)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
|
|
@ -1266,8 +1253,7 @@ opt_copy_propagation_local(const elk_compiler *compiler, linear_ctx *lin_ctx,
|
|||
break;
|
||||
}
|
||||
} else {
|
||||
if (try_copy_propagate(compiler, inst, *iter, i, alloc,
|
||||
max_polygons)) {
|
||||
if (try_copy_propagate(compiler, inst, *iter, i, alloc)) {
|
||||
instruction_progress = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -1373,8 +1359,7 @@ elk_fs_visitor::opt_copy_propagation()
|
|||
*/
|
||||
foreach_block (block, cfg) {
|
||||
progress = opt_copy_propagation_local(compiler, lin_ctx, block,
|
||||
out_acp[block->num], alloc,
|
||||
max_polygons) || progress;
|
||||
out_acp[block->num], alloc) || progress;
|
||||
|
||||
/* If the destination of an ACP entry exists only within this block,
|
||||
* then there's no need to keep it for dataflow analysis. We can delete
|
||||
|
|
@ -1414,7 +1399,7 @@ elk_fs_visitor::opt_copy_propagation()
|
|||
}
|
||||
|
||||
progress = opt_copy_propagation_local(compiler, lin_ctx, block,
|
||||
in_acp, alloc, max_polygons) ||
|
||||
in_acp, alloc) ||
|
||||
progress;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1517,8 +1517,7 @@ int
|
|||
elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
|
||||
struct shader_stats shader_stats,
|
||||
const elk::performance &perf,
|
||||
struct elk_compile_stats *stats,
|
||||
unsigned max_polygons)
|
||||
struct elk_compile_stats *stats)
|
||||
{
|
||||
/* align to 64 byte boundary. */
|
||||
elk_realign(p, 64);
|
||||
|
|
@ -2273,7 +2272,6 @@ elk_fs_generator::generate_code(const elk_cfg_t *cfg, int dispatch_width,
|
|||
before_size, after_size);
|
||||
if (stats) {
|
||||
stats->dispatch_width = dispatch_width;
|
||||
stats->max_polygons = max_polygons;
|
||||
stats->max_dispatch_width = dispatch_width;
|
||||
stats->instructions = before_size / 16 - nop_count - sync_nop_count;
|
||||
stats->sends = send_count;
|
||||
|
|
|
|||
|
|
@ -64,19 +64,7 @@ elk_fs_visitor::interp_reg(const fs_builder &bld, unsigned location,
|
|||
const unsigned per_vertex_start = prog_data->num_per_primitive_inputs;
|
||||
const unsigned regnr = per_vertex_start + (nr * 4) + channel;
|
||||
|
||||
if (max_polygons > 1) {
|
||||
/* In multipolygon dispatch each plane parameter is a
|
||||
* dispatch_width-wide SIMD vector (see comment in
|
||||
* assign_urb_setup()), so we need to use offset() instead of
|
||||
* component() to select the specified parameter.
|
||||
*/
|
||||
const elk_fs_reg tmp = bld.vgrf(ELK_REGISTER_TYPE_UD);
|
||||
bld.MOV(tmp, offset(elk_fs_reg(ATTR, regnr, ELK_REGISTER_TYPE_UD),
|
||||
dispatch_width, comp));
|
||||
return retype(tmp, ELK_REGISTER_TYPE_F);
|
||||
} else {
|
||||
return component(elk_fs_reg(ATTR, regnr, ELK_REGISTER_TYPE_F), comp);
|
||||
}
|
||||
return component(elk_fs_reg(ATTR, regnr, ELK_REGISTER_TYPE_F), comp);
|
||||
}
|
||||
|
||||
/* The register location here is relative to the start of the URB
|
||||
|
|
@ -99,19 +87,7 @@ elk_fs_visitor::per_primitive_reg(const fs_builder &bld, int location, unsigned
|
|||
|
||||
assert(regnr < prog_data->num_per_primitive_inputs);
|
||||
|
||||
if (max_polygons > 1) {
|
||||
/* In multipolygon dispatch each primitive constant is a
|
||||
* dispatch_width-wide SIMD vector (see comment in
|
||||
* assign_urb_setup()), so we need to use offset() instead of
|
||||
* component() to select the specified parameter.
|
||||
*/
|
||||
const elk_fs_reg tmp = bld.vgrf(ELK_REGISTER_TYPE_UD);
|
||||
bld.MOV(tmp, offset(elk_fs_reg(ATTR, regnr, ELK_REGISTER_TYPE_UD),
|
||||
dispatch_width, comp % 4));
|
||||
return retype(tmp, ELK_REGISTER_TYPE_F);
|
||||
} else {
|
||||
return component(elk_fs_reg(ATTR, regnr, ELK_REGISTER_TYPE_F), comp % 4);
|
||||
}
|
||||
return component(elk_fs_reg(ATTR, regnr, ELK_REGISTER_TYPE_F), comp % 4);
|
||||
}
|
||||
|
||||
/** Emits the interpolation for the varying inputs. */
|
||||
|
|
@ -878,7 +854,6 @@ elk_fs_visitor::elk_fs_visitor(const struct elk_compiler *compiler,
|
|||
performance_analysis(this),
|
||||
needs_register_pressure(needs_register_pressure),
|
||||
dispatch_width(dispatch_width),
|
||||
max_polygons(0),
|
||||
api_subgroup_size(elk_nir_api_subgroup_size(shader, dispatch_width))
|
||||
{
|
||||
init();
|
||||
|
|
@ -889,7 +864,7 @@ elk_fs_visitor::elk_fs_visitor(const struct elk_compiler *compiler,
|
|||
const elk_wm_prog_key *key,
|
||||
struct elk_wm_prog_data *prog_data,
|
||||
const nir_shader *shader,
|
||||
unsigned dispatch_width, unsigned max_polygons,
|
||||
unsigned dispatch_width,
|
||||
bool needs_register_pressure,
|
||||
bool debug_enabled)
|
||||
: elk_backend_shader(compiler, params, shader, &prog_data->base,
|
||||
|
|
@ -899,7 +874,6 @@ elk_fs_visitor::elk_fs_visitor(const struct elk_compiler *compiler,
|
|||
performance_analysis(this),
|
||||
needs_register_pressure(needs_register_pressure),
|
||||
dispatch_width(dispatch_width),
|
||||
max_polygons(max_polygons),
|
||||
api_subgroup_size(elk_nir_api_subgroup_size(shader, dispatch_width))
|
||||
{
|
||||
init();
|
||||
|
|
@ -924,7 +898,6 @@ elk_fs_visitor::elk_fs_visitor(const struct elk_compiler *compiler,
|
|||
performance_analysis(this),
|
||||
needs_register_pressure(needs_register_pressure),
|
||||
dispatch_width(8),
|
||||
max_polygons(0),
|
||||
api_subgroup_size(elk_nir_api_subgroup_size(shader, dispatch_width))
|
||||
{
|
||||
init();
|
||||
|
|
|
|||
|
|
@ -1208,7 +1208,7 @@ vec4_visitor::eliminate_find_live_channel()
|
|||
bool progress = false;
|
||||
unsigned depth = 0;
|
||||
|
||||
if (!elk_stage_has_packed_dispatch(devinfo, stage, 0, stage_prog_data)) {
|
||||
if (!elk_stage_has_packed_dispatch(devinfo, stage, stage_prog_data)) {
|
||||
/* The optimization below assumes that channel zero is live on thread
|
||||
* dispatch, which may not be the case if the fixed function dispatches
|
||||
* threads sparsely.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue