mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 17:48:10 +02:00
intel/brw: Remove Gfx8- code from visitor
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>
This commit is contained in:
parent
c793644ce9
commit
3ef1ed73d3
5 changed files with 48 additions and 260 deletions
|
|
@ -202,22 +202,6 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
|
|||
shuffle_from_32bit_read(bld, dst, vec4_result, 0, components);
|
||||
}
|
||||
|
||||
/**
|
||||
* A helper for MOV generation for fixing up broken hardware SEND dependency
|
||||
* handling.
|
||||
*/
|
||||
void
|
||||
fs_visitor::DEP_RESOLVE_MOV(const fs_builder &bld, int grf)
|
||||
{
|
||||
/* The caller always wants uncompressed to emit the minimal extra
|
||||
* dependencies, and to avoid having to deal with aligning its regs to 2.
|
||||
*/
|
||||
const fs_builder ubld = bld.annotate("send dependency resolve")
|
||||
.quarter(0);
|
||||
|
||||
ubld.MOV(ubld.null_reg_f(), fs_reg(VGRF, grf, BRW_REGISTER_TYPE_F));
|
||||
}
|
||||
|
||||
bool
|
||||
fs_inst::is_send_from_grf() const
|
||||
{
|
||||
|
|
@ -1636,7 +1620,7 @@ calculate_urb_setup(const struct intel_device_info *devinfo,
|
|||
prog_data->urb_setup[i] = urb_next++;
|
||||
}
|
||||
}
|
||||
} else if (devinfo->ver >= 6) {
|
||||
} else {
|
||||
assert(!nir->info.per_primitive_inputs);
|
||||
|
||||
uint64_t vue_header_bits =
|
||||
|
|
@ -1713,34 +1697,6 @@ calculate_urb_setup(const struct intel_device_info *devinfo,
|
|||
}
|
||||
urb_next = prev_stage_vue_map.num_slots - first_slot;
|
||||
}
|
||||
} else {
|
||||
/* FINISHME: The sf doesn't map VS->FS inputs for us very well. */
|
||||
for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {
|
||||
/* Point size is packed into the header, not as a general attribute */
|
||||
if (i == VARYING_SLOT_PSIZ)
|
||||
continue;
|
||||
|
||||
if (key->input_slots_valid & BITFIELD64_BIT(i)) {
|
||||
/* The back color slot is skipped when the front color is
|
||||
* also written to. In addition, some slots can be
|
||||
* written in the vertex shader and not read in the
|
||||
* fragment shader. So the register number must always be
|
||||
* incremented, mapped or not.
|
||||
*/
|
||||
if (_mesa_varying_slot_in_fs((gl_varying_slot) i))
|
||||
prog_data->urb_setup[i] = urb_next;
|
||||
urb_next++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* It's a FS only attribute, and we did interpolation for this attribute
|
||||
* in SF thread. So, count it here, too.
|
||||
*
|
||||
* See compile_sf_prog() for more info.
|
||||
*/
|
||||
if (inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC))
|
||||
prog_data->urb_setup[VARYING_SLOT_PNTC] = urb_next++;
|
||||
}
|
||||
|
||||
prog_data->num_varying_inputs = urb_next - prog_data->num_per_primitive_inputs;
|
||||
|
|
@ -2071,14 +2027,11 @@ fs_visitor::assign_constant_locations()
|
|||
|
||||
/* Now that we know how many regular uniforms we'll push, reduce the
|
||||
* UBO push ranges so we don't exceed the 3DSTATE_CONSTANT limits.
|
||||
*/
|
||||
/* For gen4/5:
|
||||
* Only allow 16 registers (128 uniform components) as push constants.
|
||||
*
|
||||
* If changing this value, note the limitation about total_regs in
|
||||
* brw_curbe.c/crocus_state.c
|
||||
*/
|
||||
const unsigned max_push_length = compiler->devinfo->ver < 6 ? 16 : 64;
|
||||
const unsigned max_push_length = 64;
|
||||
unsigned push_length = DIV_ROUND_UP(stage_prog_data->nr_params, 8);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
|
||||
|
|
@ -2129,14 +2082,8 @@ fs_visitor::emit_repclear_shader()
|
|||
assert(uniforms == 0);
|
||||
assume(key->nr_color_regions > 0);
|
||||
|
||||
fs_reg color_output, header;
|
||||
if (devinfo->ver >= 7) {
|
||||
color_output = retype(brw_vec4_grf(127, 0), BRW_REGISTER_TYPE_UD);
|
||||
header = retype(brw_vec8_grf(125, 0), BRW_REGISTER_TYPE_UD);
|
||||
} else {
|
||||
color_output = retype(brw_vec4_reg(MRF, 2, 0), BRW_REGISTER_TYPE_UD);
|
||||
header = retype(brw_vec8_reg(MRF, 0, 0), BRW_REGISTER_TYPE_UD);
|
||||
}
|
||||
fs_reg color_output = retype(brw_vec4_grf(127, 0), BRW_REGISTER_TYPE_UD);
|
||||
fs_reg header = retype(brw_vec8_grf(125, 0), BRW_REGISTER_TYPE_UD);
|
||||
|
||||
/* We pass the clear color as a flat input. Copy it to the output. */
|
||||
fs_reg color_input =
|
||||
|
|
@ -2157,23 +2104,17 @@ fs_visitor::emit_repclear_shader()
|
|||
if (i > 0)
|
||||
bld.exec_all().group(1, 0).MOV(component(header, 2), brw_imm_ud(i));
|
||||
|
||||
if (devinfo->ver >= 7) {
|
||||
write = bld.emit(SHADER_OPCODE_SEND);
|
||||
write->resize_sources(3);
|
||||
write->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE;
|
||||
write->src[0] = brw_imm_ud(0);
|
||||
write->src[1] = brw_imm_ud(0);
|
||||
write->src[2] = i == 0 ? color_output : header;
|
||||
write->check_tdr = true;
|
||||
write->send_has_side_effects = true;
|
||||
write->desc = brw_fb_write_desc(devinfo, i,
|
||||
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED,
|
||||
i == key->nr_color_regions - 1, false);
|
||||
} else {
|
||||
write = bld.emit(FS_OPCODE_REP_FB_WRITE);
|
||||
write->target = i;
|
||||
write->base_mrf = i == 0 ? color_output.nr : header.nr;
|
||||
}
|
||||
write = bld.emit(SHADER_OPCODE_SEND);
|
||||
write->resize_sources(3);
|
||||
write->sfid = GFX6_SFID_DATAPORT_RENDER_CACHE;
|
||||
write->src[0] = brw_imm_ud(0);
|
||||
write->src[1] = brw_imm_ud(0);
|
||||
write->src[2] = i == 0 ? color_output : header;
|
||||
write->check_tdr = true;
|
||||
write->send_has_side_effects = true;
|
||||
write->desc = brw_fb_write_desc(devinfo, i,
|
||||
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED,
|
||||
i == key->nr_color_regions - 1, false);
|
||||
|
||||
/* We can use a headerless message for the first render target */
|
||||
write->header_size = i == 0 ? 0 : 2;
|
||||
|
|
@ -2206,7 +2147,7 @@ brw_sample_mask_reg(const fs_builder &bld)
|
|||
assert(bld.dispatch_width() <= 16);
|
||||
return brw_flag_subreg(sample_mask_flag_subreg(s) + bld.group() / 16);
|
||||
} else {
|
||||
assert(s.devinfo->ver >= 6 && bld.dispatch_width() <= 16);
|
||||
assert(bld.dispatch_width() <= 16);
|
||||
assert(s.devinfo->ver < 20);
|
||||
return retype(brw_vec1_grf((bld.group() >= 16 ? 2 : 1), 7),
|
||||
BRW_REGISTER_TYPE_UW);
|
||||
|
|
@ -2774,24 +2715,6 @@ fs_visitor::allocate_registers(bool allow_spilling)
|
|||
prog_data->total_scratch = MAX2(brw_get_scratch_size(last_scratch),
|
||||
prog_data->total_scratch);
|
||||
|
||||
if (gl_shader_stage_is_compute(stage)) {
|
||||
if (devinfo->platform == INTEL_PLATFORM_HSW) {
|
||||
/* According to the MEDIA_VFE_STATE's "Per Thread Scratch Space"
|
||||
* field documentation, Haswell supports a minimum of 2kB of
|
||||
* scratch space for compute shaders, unlike every other stage
|
||||
* and platform.
|
||||
*/
|
||||
prog_data->total_scratch = MAX2(prog_data->total_scratch, 2048);
|
||||
} else if (devinfo->ver <= 7) {
|
||||
/* According to the MEDIA_VFE_STATE's "Per Thread Scratch Space"
|
||||
* field documentation, platforms prior to Haswell measure scratch
|
||||
* size linearly with a range of [1kB, 12kB] and 1kB granularity.
|
||||
*/
|
||||
prog_data->total_scratch = ALIGN(last_scratch, 1024);
|
||||
max_scratch_size = 12 * 1024;
|
||||
}
|
||||
}
|
||||
|
||||
/* We currently only support up to 2MB of scratch space. If we
|
||||
* need to support more eventually, the documentation suggests
|
||||
* that we could allocate a larger buffer, and partition it out
|
||||
|
|
@ -2892,7 +2815,7 @@ fs_visitor::emit_tcs_thread_end()
|
|||
* separate write just to finish the thread. There isn't guaranteed to
|
||||
* be one, so this may not succeed.
|
||||
*/
|
||||
if (devinfo->ver != 8 && mark_last_urb_write_with_eot())
|
||||
if (mark_last_urb_write_with_eot())
|
||||
return;
|
||||
|
||||
const fs_builder bld = fs_builder(this).at_end();
|
||||
|
|
@ -3089,10 +3012,7 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
|||
if (nir->info.inputs_read > 0 ||
|
||||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
|
||||
(nir->info.outputs_read > 0 && !wm_key->coherent_fb_fetch)) {
|
||||
if (devinfo->ver < 6)
|
||||
emit_interpolation_setup_gfx4();
|
||||
else
|
||||
emit_interpolation_setup_gfx6();
|
||||
emit_interpolation_setup();
|
||||
}
|
||||
|
||||
/* We handle discards by keeping track of the still-live pixels in f0.1.
|
||||
|
|
@ -3108,8 +3028,7 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
|||
*/
|
||||
const fs_reg dispatch_mask =
|
||||
devinfo->ver >= 20 ? xe2_vec1_grf(i, 15) :
|
||||
devinfo->ver >= 6 ? brw_vec1_grf(i + 1, 7) :
|
||||
brw_vec1_grf(0, 0);
|
||||
brw_vec1_grf(i + 1, 7);
|
||||
bld.exec_all().group(1, 0)
|
||||
.MOV(brw_sample_mask_reg(bld.group(lower_width, i)),
|
||||
retype(dispatch_mask, BRW_REGISTER_TYPE_UW));
|
||||
|
|
@ -3154,7 +3073,6 @@ bool
|
|||
fs_visitor::run_cs(bool allow_spilling)
|
||||
{
|
||||
assert(gl_shader_stage_is_compute(stage));
|
||||
assert(devinfo->ver >= 7);
|
||||
const fs_builder bld = fs_builder(this).at_end();
|
||||
|
||||
payload_ = new cs_thread_payload(*this);
|
||||
|
|
@ -3517,26 +3435,24 @@ brw_nir_populate_wm_prog_data(nir_shader *shader,
|
|||
assert(prog_data->alpha_to_coverage != BRW_SOMETIMES ||
|
||||
prog_data->persample_dispatch == BRW_SOMETIMES);
|
||||
|
||||
if (devinfo->ver >= 6) {
|
||||
prog_data->uses_sample_mask =
|
||||
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
|
||||
prog_data->uses_sample_mask =
|
||||
BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);
|
||||
|
||||
/* From the Ivy Bridge PRM documentation for 3DSTATE_PS:
|
||||
*
|
||||
* "MSDISPMODE_PERSAMPLE is required in order to select
|
||||
* POSOFFSET_SAMPLE"
|
||||
*
|
||||
* So we can only really get sample positions if we are doing real
|
||||
* per-sample dispatch. If we need gl_SamplePosition and we don't have
|
||||
* persample dispatch, we hard-code it to 0.5.
|
||||
*/
|
||||
prog_data->uses_pos_offset =
|
||||
prog_data->persample_dispatch != BRW_NEVER &&
|
||||
(BITSET_TEST(shader->info.system_values_read,
|
||||
SYSTEM_VALUE_SAMPLE_POS) ||
|
||||
BITSET_TEST(shader->info.system_values_read,
|
||||
SYSTEM_VALUE_SAMPLE_POS_OR_CENTER));
|
||||
}
|
||||
/* From the Ivy Bridge PRM documentation for 3DSTATE_PS:
|
||||
*
|
||||
* "MSDISPMODE_PERSAMPLE is required in order to select
|
||||
* POSOFFSET_SAMPLE"
|
||||
*
|
||||
* So we can only really get sample positions if we are doing real
|
||||
* per-sample dispatch. If we need gl_SamplePosition and we don't have
|
||||
* persample dispatch, we hard-code it to 0.5.
|
||||
*/
|
||||
prog_data->uses_pos_offset =
|
||||
prog_data->persample_dispatch != BRW_NEVER &&
|
||||
(BITSET_TEST(shader->info.system_values_read,
|
||||
SYSTEM_VALUE_SAMPLE_POS) ||
|
||||
BITSET_TEST(shader->info.system_values_read,
|
||||
SYSTEM_VALUE_SAMPLE_POS_OR_CENTER));
|
||||
|
||||
prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests;
|
||||
prog_data->post_depth_coverage = shader->info.fs.post_depth_coverage;
|
||||
|
|
@ -3951,17 +3867,13 @@ cs_fill_push_const_info(const struct intel_device_info *devinfo,
|
|||
{
|
||||
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
||||
int subgroup_id_index = brw_get_subgroup_id_param_index(devinfo, prog_data);
|
||||
bool cross_thread_supported = devinfo->verx10 >= 75;
|
||||
|
||||
/* The thread ID should be stored in the last param dword */
|
||||
assert(subgroup_id_index == -1 ||
|
||||
subgroup_id_index == (int)prog_data->nr_params - 1);
|
||||
|
||||
unsigned cross_thread_dwords, per_thread_dwords;
|
||||
if (!cross_thread_supported) {
|
||||
cross_thread_dwords = 0u;
|
||||
per_thread_dwords = prog_data->nr_params;
|
||||
} else if (subgroup_id_index >= 0) {
|
||||
if (subgroup_id_index >= 0) {
|
||||
/* Fill all but the last register with cross-thread payload */
|
||||
cross_thread_dwords = 8 * (subgroup_id_index / 8);
|
||||
per_thread_dwords = prog_data->nr_params - cross_thread_dwords;
|
||||
|
|
|
|||
|
|
@ -223,7 +223,6 @@ public:
|
|||
uint32_t const_offset,
|
||||
uint8_t alignment,
|
||||
unsigned components);
|
||||
void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);
|
||||
|
||||
bool run_fs(bool allow_spilling, bool do_rep_send);
|
||||
bool run_vs();
|
||||
|
|
@ -268,8 +267,7 @@ public:
|
|||
void limit_dispatch_width(unsigned n, const char *msg);
|
||||
|
||||
void emit_repclear_shader();
|
||||
void emit_interpolation_setup_gfx4();
|
||||
void emit_interpolation_setup_gfx6();
|
||||
void emit_interpolation_setup();
|
||||
|
||||
void set_tcs_invocation_id();
|
||||
|
||||
|
|
@ -412,14 +410,13 @@ public:
|
|||
/**
|
||||
* Return the flag register used in fragment shaders to keep track of live
|
||||
* samples. On Gfx7+ we use f1.0-f1.1 to allow discard jumps in SIMD32
|
||||
* dispatch mode, while earlier generations are constrained to f0.1, which
|
||||
* limits the dispatch width to SIMD16 for fragment shaders that use discard.
|
||||
* dispatch mode.
|
||||
*/
|
||||
static inline unsigned
|
||||
sample_mask_flag_subreg(const fs_visitor &s)
|
||||
{
|
||||
assert(s.stage == MESA_SHADER_FRAGMENT);
|
||||
return s.devinfo->ver >= 7 ? 2 : 1;
|
||||
return 2;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -116,67 +116,7 @@ fs_visitor::per_primitive_reg(const fs_builder &bld, int location, unsigned comp
|
|||
|
||||
/** Emits the interpolation for the varying inputs. */
|
||||
void
|
||||
fs_visitor::emit_interpolation_setup_gfx4()
|
||||
{
|
||||
struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
|
||||
|
||||
fs_builder abld = fs_builder(this).at_end().annotate("compute pixel centers");
|
||||
this->pixel_x = vgrf(glsl_uint_type());
|
||||
this->pixel_y = vgrf(glsl_uint_type());
|
||||
this->pixel_x.type = BRW_REGISTER_TYPE_UW;
|
||||
this->pixel_y.type = BRW_REGISTER_TYPE_UW;
|
||||
abld.ADD(this->pixel_x,
|
||||
fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
|
||||
fs_reg(brw_imm_v(0x10101010)));
|
||||
abld.ADD(this->pixel_y,
|
||||
fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
|
||||
fs_reg(brw_imm_v(0x11001100)));
|
||||
|
||||
const fs_builder bld = fs_builder(this).at_end();
|
||||
abld = bld.annotate("compute pixel deltas from v0");
|
||||
|
||||
this->delta_xy[BRW_BARYCENTRIC_PERSPECTIVE_PIXEL] =
|
||||
vgrf(glsl_vec2_type());
|
||||
const fs_reg &delta_xy = this->delta_xy[BRW_BARYCENTRIC_PERSPECTIVE_PIXEL];
|
||||
const fs_reg xstart(negate(brw_vec1_grf(1, 0)));
|
||||
const fs_reg ystart(negate(brw_vec1_grf(1, 1)));
|
||||
|
||||
if (devinfo->has_pln) {
|
||||
for (unsigned i = 0; i < dispatch_width / 8; i++) {
|
||||
abld.quarter(i).ADD(quarter(offset(delta_xy, abld, 0), i),
|
||||
quarter(this->pixel_x, i), xstart);
|
||||
abld.quarter(i).ADD(quarter(offset(delta_xy, abld, 1), i),
|
||||
quarter(this->pixel_y, i), ystart);
|
||||
}
|
||||
} else {
|
||||
abld.ADD(offset(delta_xy, abld, 0), this->pixel_x, xstart);
|
||||
abld.ADD(offset(delta_xy, abld, 1), this->pixel_y, ystart);
|
||||
}
|
||||
|
||||
this->pixel_z = fetch_payload_reg(bld, fs_payload().source_depth_reg);
|
||||
|
||||
/* The SF program automatically handles doing the perspective correction or
|
||||
* not based on wm_prog_data::interp_mode[] so we can use the same pixel
|
||||
* offsets for both perspective and non-perspective.
|
||||
*/
|
||||
this->delta_xy[BRW_BARYCENTRIC_NONPERSPECTIVE_PIXEL] =
|
||||
this->delta_xy[BRW_BARYCENTRIC_PERSPECTIVE_PIXEL];
|
||||
|
||||
abld = bld.annotate("compute pos.w and 1/pos.w");
|
||||
/* Compute wpos.w. It's always in our setup, since it's needed to
|
||||
* interpolate the other attributes.
|
||||
*/
|
||||
this->wpos_w = vgrf(glsl_float_type());
|
||||
abld.emit(FS_OPCODE_LINTERP, wpos_w, delta_xy,
|
||||
interp_reg(abld, VARYING_SLOT_POS, 3, 0));
|
||||
/* Compute the pixel 1/W value from wpos.w. */
|
||||
this->pixel_w = vgrf(glsl_float_type());
|
||||
abld.emit(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
|
||||
}
|
||||
|
||||
/** Emits the interpolation for the varying inputs. */
|
||||
void
|
||||
fs_visitor::emit_interpolation_setup_gfx6()
|
||||
fs_visitor::emit_interpolation_setup()
|
||||
{
|
||||
const fs_builder bld = fs_builder(this).at_end();
|
||||
fs_builder abld = bld.annotate("compute pixel centers");
|
||||
|
|
@ -384,7 +324,7 @@ fs_visitor::emit_interpolation_setup_gfx6()
|
|||
hbld.MOV(offset(pixel_x, hbld, i), horiz_stride(int_pixel_x, 2));
|
||||
hbld.MOV(offset(pixel_y, hbld, i), horiz_stride(int_pixel_y, 2));
|
||||
|
||||
} else if (devinfo->ver >= 8 || dispatch_width == 8) {
|
||||
} else {
|
||||
/* The "Register Region Restrictions" page says for BDW (and newer,
|
||||
* presumably):
|
||||
*
|
||||
|
|
@ -407,31 +347,6 @@ fs_visitor::emit_interpolation_setup_gfx6()
|
|||
horiz_stride(half_int_pixel_offset_x, 0));
|
||||
hbld.emit(FS_OPCODE_PIXEL_Y, offset(pixel_y, hbld, i), int_pixel_xy,
|
||||
horiz_stride(half_int_pixel_offset_y, 0));
|
||||
} else {
|
||||
/* The "Register Region Restrictions" page says for SNB, IVB, HSW:
|
||||
*
|
||||
* "When destination spans two registers, the source MUST span
|
||||
* two registers."
|
||||
*
|
||||
* Since the GRF source of the ADD will only read a single register,
|
||||
* we must do two separate ADDs in SIMD16.
|
||||
*/
|
||||
const fs_reg int_pixel_x = hbld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||
const fs_reg int_pixel_y = hbld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||
|
||||
hbld.ADD(int_pixel_x,
|
||||
fs_reg(stride(suboffset(gi_uw, 4), 2, 4, 0)),
|
||||
fs_reg(brw_imm_v(0x10101010)));
|
||||
hbld.ADD(int_pixel_y,
|
||||
fs_reg(stride(suboffset(gi_uw, 5), 2, 4, 0)),
|
||||
fs_reg(brw_imm_v(0x11001100)));
|
||||
|
||||
/* As of gfx6, we can no longer mix float and int sources. We have
|
||||
* to turn the integer pixel centers into floats for their actual
|
||||
* use.
|
||||
*/
|
||||
hbld.MOV(offset(pixel_x, hbld, i), int_pixel_x);
|
||||
hbld.MOV(offset(pixel_y, hbld, i), int_pixel_y);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -676,19 +591,8 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld,
|
|||
const fs_reg dst_depth = fetch_payload_reg(bld, fs_payload().dest_depth_reg);
|
||||
fs_reg src_depth, src_stencil;
|
||||
|
||||
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
|
||||
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
|
||||
src_depth = frag_depth;
|
||||
} else if (source_depth_to_render_target) {
|
||||
/* If we got here, we're in one of those strange Gen4-5 cases where
|
||||
* we're forced to pass the source depth, unmodified, to the FB write.
|
||||
* In this case, we don't want to use pixel_z because we may not have
|
||||
* set up interpolation. It's also perfectly safe because it only
|
||||
* happens on old hardware (no coarse interpolation) and this is
|
||||
* explicitly the pass-through case.
|
||||
*/
|
||||
assert(devinfo->ver <= 5);
|
||||
src_depth = fetch_payload_reg(bld, fs_payload().source_depth_reg);
|
||||
}
|
||||
|
||||
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))
|
||||
src_stencil = frag_stencil;
|
||||
|
|
@ -725,7 +629,7 @@ fs_visitor::do_emit_fb_writes(int nr_color_regions, bool replicate_alpha)
|
|||
ralloc_asprintf(this->mem_ctx, "FB write target %d", target));
|
||||
|
||||
fs_reg src0_alpha;
|
||||
if (devinfo->ver >= 6 && replicate_alpha && target != 0)
|
||||
if (replicate_alpha && target != 0)
|
||||
src0_alpha = offset(outputs[0], bld, 3);
|
||||
|
||||
inst = emit_single_fb_write(abld, this->outputs[target],
|
||||
|
|
@ -761,16 +665,6 @@ fs_visitor::emit_fb_writes()
|
|||
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
|
||||
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
|
||||
|
||||
if (source_depth_to_render_target && devinfo->ver == 6) {
|
||||
/* For outputting oDepth on gfx6, SIMD8 writes have to be used. This
|
||||
* would require SIMD8 moves of each half to message regs, e.g. by using
|
||||
* the SIMD lowering pass. Unfortunately this is more difficult than it
|
||||
* sounds because the SIMD8 single-source message lacks channel selects
|
||||
* for the second and third subspans.
|
||||
*/
|
||||
limit_dispatch_width(8, "Depth writes unsupported in SIMD16+ mode.\n");
|
||||
}
|
||||
|
||||
if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) {
|
||||
/* From the 'Render Target Write message' section of the docs:
|
||||
* "Output Stencil is not supported with SIMD16 Render Target Write
|
||||
|
|
@ -786,7 +680,7 @@ fs_visitor::emit_fb_writes()
|
|||
*/
|
||||
const bool replicate_alpha = key->alpha_test_replicate_alpha ||
|
||||
(key->nr_color_regions > 1 && key->alpha_to_coverage &&
|
||||
(sample_mask.file == BAD_FILE || devinfo->ver == 6));
|
||||
sample_mask.file == BAD_FILE);
|
||||
|
||||
prog_data->dual_src_blend = (this->dual_src_output.file != BAD_FILE &&
|
||||
this->outputs[0].file != BAD_FILE);
|
||||
|
|
@ -1142,7 +1036,6 @@ fs_visitor::emit_urb_fence()
|
|||
void
|
||||
fs_visitor::emit_cs_terminate()
|
||||
{
|
||||
assert(devinfo->ver >= 7);
|
||||
const fs_builder bld = fs_builder(this).at_end();
|
||||
|
||||
/* We can't directly send from g0, since sends with EOT have to use
|
||||
|
|
@ -1247,7 +1140,7 @@ fs_visitor::init()
|
|||
this->source_depth_to_render_target = false;
|
||||
this->runtime_check_aads_emit = false;
|
||||
this->first_non_payload_grf = 0;
|
||||
this->max_grf = devinfo->ver >= 7 ? GFX7_MRF_HACK_START : BRW_MAX_GRF;
|
||||
this->max_grf = GFX7_MRF_HACK_START;
|
||||
|
||||
this->uniforms = 0;
|
||||
this->last_scratch = 0;
|
||||
|
|
|
|||
|
|
@ -149,10 +149,10 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
|
|||
|
||||
switch (op) {
|
||||
case 0 ... NUM_BRW_OPCODES - 1:
|
||||
/* The DO instruction doesn't exist on Gfx6+, but we use it to mark the
|
||||
/* The DO instruction doesn't exist on Gfx9+, but we use it to mark the
|
||||
* start of a loop in the IR.
|
||||
*/
|
||||
if (devinfo->ver >= 6 && op == BRW_OPCODE_DO)
|
||||
if (op == BRW_OPCODE_DO)
|
||||
return "do";
|
||||
|
||||
/* DPAS instructions may transiently exist on platforms that do not
|
||||
|
|
|
|||
|
|
@ -130,7 +130,6 @@ static inline nir_variable_mode
|
|||
brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
|
||||
gl_shader_stage stage)
|
||||
{
|
||||
const struct intel_device_info *devinfo = compiler->devinfo;
|
||||
nir_variable_mode indirect_mask = (nir_variable_mode) 0;
|
||||
|
||||
switch (stage) {
|
||||
|
|
@ -149,19 +148,6 @@ brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
|
|||
stage != MESA_SHADER_MESH)
|
||||
indirect_mask |= nir_var_shader_out;
|
||||
|
||||
/* On HSW+, we allow indirects in scalar shaders. They get implemented
|
||||
* using nir_lower_vars_to_explicit_types and nir_lower_explicit_io in
|
||||
* brw_postprocess_nir.
|
||||
*
|
||||
* We haven't plumbed through the indirect scratch messages on gfx6 or
|
||||
* earlier so doing indirects via scratch doesn't work there. On gfx7 and
|
||||
* earlier the scratch space size is limited to 12kB. If we allowed
|
||||
* indirects as scratch all the time, we may easily exceed this limit
|
||||
* without having any fallback.
|
||||
*/
|
||||
if (devinfo->verx10 <= 70)
|
||||
indirect_mask |= nir_var_function_temp;
|
||||
|
||||
return indirect_mask;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue