i965: Split nir_emit_intrinsic by stage with a general fallback.

Many intrinsics only apply to a particular stage (such as discard).
In other cases, we may want to interpret them differently based on
the stage (such as load_primitive_id or load_input).

The current method isn't that pretty - we handle all intrinsics in
one giant function.  Sometimes we assert on stage, sometimes we forget.
Different behaviors are handled via if-ladders based on stage.

This commit introduces new nir_emit_<stage>_intrinsic() functions,
and makes nir_emit_instr() call those.  In turn, those fall back to
the generic nir_emit_intrinsic() function for cases they don't want
to handle specially.

This makes it clear which intrinsics only exist in one stage, and makes
it easy to handle inputs/outputs differently for various stages.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
This commit is contained in:
Kenneth Graunke 2015-11-04 23:05:07 -08:00
parent 912babba7b
commit 918bda23dd
2 changed files with 381 additions and 277 deletions

View file

@ -263,6 +263,14 @@ public:
nir_load_const_instr *instr);
void nir_emit_undef(const brw::fs_builder &bld,
nir_ssa_undef_instr *instr);
void nir_emit_vs_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
void nir_emit_gs_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
void nir_emit_fs_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
void nir_emit_cs_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
void nir_emit_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
void nir_emit_ssbo_atomic(const brw::fs_builder &bld,

View file

@ -360,7 +360,22 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
break;
case nir_instr_type_intrinsic:
nir_emit_intrinsic(abld, nir_instr_as_intrinsic(instr));
switch (stage) {
case MESA_SHADER_VERTEX:
nir_emit_vs_intrinsic(abld, nir_instr_as_intrinsic(instr));
break;
case MESA_SHADER_GEOMETRY:
nir_emit_gs_intrinsic(abld, nir_instr_as_intrinsic(instr));
break;
case MESA_SHADER_FRAGMENT:
nir_emit_fs_intrinsic(abld, nir_instr_as_intrinsic(instr));
break;
case MESA_SHADER_COMPUTE:
nir_emit_cs_intrinsic(abld, nir_instr_as_intrinsic(instr));
break;
default:
unreachable("unsupported shader stage");
}
break;
case nir_instr_type_tex:
@ -1568,15 +1583,128 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
}
void
fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)
{
assert(stage == MESA_SHADER_VERTEX);
fs_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
dest = get_nir_dest(instr->dest);
bool has_indirect = false;
switch (instr->intrinsic) {
case nir_intrinsic_load_vertex_id:
unreachable("should be lowered by lower_vertex_id()");
case nir_intrinsic_load_vertex_id_zero_base:
case nir_intrinsic_load_base_vertex:
case nir_intrinsic_load_instance_id: {
gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
fs_reg val = nir_system_values[sv];
assert(val.file != BAD_FILE);
dest.type = val.type;
bld.MOV(dest, val);
break;
}
default:
nir_emit_intrinsic(bld, instr);
break;
}
}
void
fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)
{
assert(stage == MESA_SHADER_GEOMETRY);
fs_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
dest = get_nir_dest(instr->dest);
switch (instr->intrinsic) {
case nir_intrinsic_load_primitive_id:
assert(stage == MESA_SHADER_GEOMETRY);
assert(((struct brw_gs_prog_data *)prog_data)->include_primitive_id);
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD),
retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD));
break;
case nir_intrinsic_load_input_indirect:
case nir_intrinsic_load_input:
unreachable("load_input intrinsics are invalid for the GS stage");
case nir_intrinsic_load_per_vertex_input_indirect:
assert(!"Not allowed");
case nir_intrinsic_load_per_vertex_input:
emit_gs_input_load(dest, instr->src[0], instr->const_index[0],
instr->num_components);
break;
case nir_intrinsic_emit_vertex_with_counter:
emit_gs_vertex(instr->src[0], instr->const_index[0]);
break;
case nir_intrinsic_end_primitive_with_counter:
emit_gs_end_primitive(instr->src[0]);
break;
case nir_intrinsic_set_vertex_count:
bld.MOV(this->final_gs_vertex_count, get_nir_src(instr->src[0]));
break;
case nir_intrinsic_load_invocation_id: {
fs_reg val = nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
assert(val.file != BAD_FILE);
dest.type = val.type;
bld.MOV(dest, val);
break;
}
default:
nir_emit_intrinsic(bld, instr);
break;
}
}
void
fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)
{
assert(stage == MESA_SHADER_FRAGMENT);
struct brw_wm_prog_data *wm_prog_data =
(struct brw_wm_prog_data *) prog_data;
fs_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
dest = get_nir_dest(instr->dest);
switch (instr->intrinsic) {
case nir_intrinsic_load_front_face:
bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
*emit_frontfacing_interpolation());
break;
case nir_intrinsic_load_sample_pos: {
fs_reg sample_pos = nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
assert(sample_pos.file != BAD_FILE);
dest.type = sample_pos.type;
bld.MOV(dest, sample_pos);
bld.MOV(offset(dest, bld, 1), offset(sample_pos, bld, 1));
break;
}
case nir_intrinsic_load_sample_mask_in:
case nir_intrinsic_load_sample_id: {
gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
fs_reg val = nir_system_values[sv];
assert(val.file != BAD_FILE);
dest.type = val.type;
bld.MOV(dest, val);
break;
}
case nir_intrinsic_discard:
case nir_intrinsic_discard_if: {
/* We track our discarded pixels in f0.1. By predicating on it, we can
@ -1602,6 +1730,248 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
case nir_intrinsic_interp_var_at_centroid:
case nir_intrinsic_interp_var_at_sample:
case nir_intrinsic_interp_var_at_offset: {
/* Handle ARB_gpu_shader5 interpolation intrinsics
*
* It's worth a quick word of explanation as to why we handle the full
* variable-based interpolation intrinsic rather than a lowered version
* with like we do for other inputs. We have to do that because the way
* we set up inputs doesn't allow us to use the already setup inputs for
* interpolation. At the beginning of the shader, we go through all of
* the input variables and do the initial interpolation and put it in
* the nir_inputs array based on its location as determined in
* nir_lower_io. If the input isn't used, dead code cleans up and
* everything works fine. However, when we get to the ARB_gpu_shader5
* interpolation intrinsics, we need to reinterpolate the input
* differently. If we used an intrinsic that just had an index it would
* only give us the offset into the nir_inputs array. However, this is
* useless because that value is post-interpolation and we need
* pre-interpolation. In order to get the actual location of the bits
* we get from the vertex fetching hardware, we need the variable.
*/
wm_prog_data->pulls_bary = true;
fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
const glsl_interp_qualifier interpolation =
(glsl_interp_qualifier) instr->variables[0]->var->data.interpolation;
switch (instr->intrinsic) {
case nir_intrinsic_interp_var_at_centroid:
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_CENTROID,
dst_xy,
fs_reg(), /* src */
fs_reg(0u),
interpolation);
break;
case nir_intrinsic_interp_var_at_sample: {
nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
if (const_sample) {
unsigned msg_data = const_sample->i[0] << 4;
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
dst_xy,
fs_reg(), /* src */
fs_reg(msg_data),
interpolation);
} else {
const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
BRW_REGISTER_TYPE_UD);
if (nir_src_is_dynamically_uniform(instr->src[0])) {
const fs_reg sample_id = bld.emit_uniformize(sample_src);
const fs_reg msg_data = vgrf(glsl_type::uint_type);
bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
dst_xy,
fs_reg(), /* src */
msg_data,
interpolation);
} else {
/* Make a loop that sends a message to the pixel interpolater
* for the sample number in each live channel. If there are
* multiple channels with the same sample number then these
* will be handled simultaneously with a single interation of
* the loop.
*/
bld.emit(BRW_OPCODE_DO);
/* Get the next live sample number into sample_id_reg */
const fs_reg sample_id = bld.emit_uniformize(sample_src);
/* Set the flag register so that we can perform the send
* message on all channels that have the same sample number
*/
bld.CMP(bld.null_reg_ud(),
sample_src, sample_id,
BRW_CONDITIONAL_EQ);
const fs_reg msg_data = vgrf(glsl_type::uint_type);
bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
fs_inst *inst =
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
dst_xy,
fs_reg(), /* src */
msg_data,
interpolation);
set_predicate(BRW_PREDICATE_NORMAL, inst);
/* Continue the loop if there are any live channels left */
set_predicate_inv(BRW_PREDICATE_NORMAL,
true, /* inverse */
bld.emit(BRW_OPCODE_WHILE));
}
}
break;
}
case nir_intrinsic_interp_var_at_offset: {
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
if (const_offset) {
unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
dst_xy,
fs_reg(), /* src */
fs_reg(off_x | (off_y << 4)),
interpolation);
} else {
fs_reg src = vgrf(glsl_type::ivec2_type);
fs_reg offset_src = retype(get_nir_src(instr->src[0]),
BRW_REGISTER_TYPE_F);
for (int i = 0; i < 2; i++) {
fs_reg temp = vgrf(glsl_type::float_type);
bld.MUL(temp, offset(offset_src, bld, i), fs_reg(16.0f));
fs_reg itemp = vgrf(glsl_type::int_type);
bld.MOV(itemp, temp); /* float to int */
/* Clamp the upper end of the range to +7/16.
* ARB_gpu_shader5 requires that we support a maximum offset
* of +0.5, which isn't representable in a S0.4 value -- if
* we didn't clamp it, we'd end up with -8/16, which is the
* opposite of what the shader author wanted.
*
* This is legal due to ARB_gpu_shader5's quantization
* rules:
*
* "Not all values of <offset> may be supported; x and y
* offsets may be rounded to fixed-point values with the
* number of fraction bits given by the
* implementation-dependent constant
* FRAGMENT_INTERPOLATION_OFFSET_BITS"
*/
set_condmod(BRW_CONDITIONAL_L,
bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
}
const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
emit_pixel_interpolater_send(bld,
opcode,
dst_xy,
src,
fs_reg(0u),
interpolation);
}
break;
}
default:
unreachable("Invalid intrinsic");
}
for (unsigned j = 0; j < instr->num_components; j++) {
fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
src.type = dest.type;
bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
dest = offset(dest, bld, 1);
}
break;
}
default:
nir_emit_intrinsic(bld, instr);
break;
}
}
void
fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)
{
assert(stage == MESA_SHADER_COMPUTE);
struct brw_cs_prog_data *cs_prog_data =
(struct brw_cs_prog_data *) prog_data;
fs_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
dest = get_nir_dest(instr->dest);
switch (instr->intrinsic) {
case nir_intrinsic_barrier:
emit_barrier();
cs_prog_data->uses_barrier = true;
break;
case nir_intrinsic_load_local_invocation_id:
case nir_intrinsic_load_work_group_id: {
gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
fs_reg val = nir_system_values[sv];
assert(val.file != BAD_FILE);
dest.type = val.type;
for (unsigned i = 0; i < 3; i++)
bld.MOV(offset(dest, bld, i), offset(val, bld, i));
break;
}
case nir_intrinsic_load_num_work_groups: {
const unsigned surface =
cs_prog_data->binding_table.work_groups_start;
cs_prog_data->uses_num_work_groups = true;
fs_reg surf_index = fs_reg(surface);
brw_mark_surface_used(prog_data, surface);
/* Read the 3 GLuint components of gl_NumWorkGroups */
for (unsigned i = 0; i < 3; i++) {
fs_reg read_result =
emit_untyped_read(bld, surf_index,
fs_reg(i << 2),
1 /* dims */, 1 /* size */,
BRW_PREDICATE_NONE);
read_result.type = dest.type;
bld.MOV(dest, read_result);
dest = offset(dest, bld, 1);
}
break;
}
default:
nir_emit_intrinsic(bld, instr);
break;
}
}
void
fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
{
fs_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
dest = get_nir_dest(instr->dest);
bool has_indirect = false;
switch (instr->intrinsic) {
case nir_intrinsic_atomic_counter_inc:
case nir_intrinsic_atomic_counter_dec:
case nir_intrinsic_atomic_counter_read: {
@ -1789,44 +2159,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
bld.MOV(retype(dest, BRW_REGISTER_TYPE_D), fs_reg(1));
break;
case nir_intrinsic_load_front_face:
bld.MOV(retype(dest, BRW_REGISTER_TYPE_D),
*emit_frontfacing_interpolation());
break;
case nir_intrinsic_load_vertex_id:
unreachable("should be lowered by lower_vertex_id()");
case nir_intrinsic_load_primitive_id:
assert(stage == MESA_SHADER_GEOMETRY);
assert(((struct brw_gs_prog_data *)prog_data)->include_primitive_id);
bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD),
retype(fs_reg(brw_vec8_grf(2, 0)), BRW_REGISTER_TYPE_UD));
break;
case nir_intrinsic_load_vertex_id_zero_base:
case nir_intrinsic_load_base_vertex:
case nir_intrinsic_load_instance_id:
case nir_intrinsic_load_invocation_id:
case nir_intrinsic_load_sample_mask_in:
case nir_intrinsic_load_sample_id: {
gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
fs_reg val = nir_system_values[sv];
assert(val.file != BAD_FILE);
dest.type = val.type;
bld.MOV(dest, val);
break;
}
case nir_intrinsic_load_sample_pos: {
fs_reg sample_pos = nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
assert(sample_pos.file != BAD_FILE);
dest.type = sample_pos.type;
bld.MOV(dest, sample_pos);
bld.MOV(offset(dest, bld, 1), offset(sample_pos, bld, 1));
break;
}
case nir_intrinsic_load_uniform_indirect:
has_indirect = true;
/* fallthrough */
@ -1980,185 +2312,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
case nir_intrinsic_load_per_vertex_input_indirect:
assert(!"Not allowed");
/* fallthrough */
case nir_intrinsic_load_per_vertex_input:
emit_gs_input_load(dest, instr->src[0], instr->const_index[0],
instr->num_components);
break;
/* Handle ARB_gpu_shader5 interpolation intrinsics
*
* It's worth a quick word of explanation as to why we handle the full
* variable-based interpolation intrinsic rather than a lowered version
* with like we do for other inputs. We have to do that because the way
* we set up inputs doesn't allow us to use the already setup inputs for
* interpolation. At the beginning of the shader, we go through all of
* the input variables and do the initial interpolation and put it in
* the nir_inputs array based on its location as determined in
* nir_lower_io. If the input isn't used, dead code cleans up and
* everything works fine. However, when we get to the ARB_gpu_shader5
* interpolation intrinsics, we need to reinterpolate the input
* differently. If we used an intrinsic that just had an index it would
* only give us the offset into the nir_inputs array. However, this is
* useless because that value is post-interpolation and we need
* pre-interpolation. In order to get the actual location of the bits
* we get from the vertex fetching hardware, we need the variable.
*/
case nir_intrinsic_interp_var_at_centroid:
case nir_intrinsic_interp_var_at_sample:
case nir_intrinsic_interp_var_at_offset: {
assert(stage == MESA_SHADER_FRAGMENT);
((struct brw_wm_prog_data *) prog_data)->pulls_bary = true;
fs_reg dst_xy = bld.vgrf(BRW_REGISTER_TYPE_F, 2);
const glsl_interp_qualifier interpolation =
(glsl_interp_qualifier) instr->variables[0]->var->data.interpolation;
switch (instr->intrinsic) {
case nir_intrinsic_interp_var_at_centroid:
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_CENTROID,
dst_xy,
fs_reg(), /* src */
fs_reg(0u),
interpolation);
break;
case nir_intrinsic_interp_var_at_sample: {
nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
if (const_sample) {
unsigned msg_data = const_sample->i[0] << 4;
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
dst_xy,
fs_reg(), /* src */
fs_reg(msg_data),
interpolation);
} else {
const fs_reg sample_src = retype(get_nir_src(instr->src[0]),
BRW_REGISTER_TYPE_UD);
if (nir_src_is_dynamically_uniform(instr->src[0])) {
const fs_reg sample_id = bld.emit_uniformize(sample_src);
const fs_reg msg_data = vgrf(glsl_type::uint_type);
bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
dst_xy,
fs_reg(), /* src */
msg_data,
interpolation);
} else {
/* Make a loop that sends a message to the pixel interpolater
* for the sample number in each live channel. If there are
* multiple channels with the same sample number then these
* will be handled simultaneously with a single interation of
* the loop.
*/
bld.emit(BRW_OPCODE_DO);
/* Get the next live sample number into sample_id_reg */
const fs_reg sample_id = bld.emit_uniformize(sample_src);
/* Set the flag register so that we can perform the send
* message on all channels that have the same sample number
*/
bld.CMP(bld.null_reg_ud(),
sample_src, sample_id,
BRW_CONDITIONAL_EQ);
const fs_reg msg_data = vgrf(glsl_type::uint_type);
bld.exec_all().group(1, 0).SHL(msg_data, sample_id, fs_reg(4u));
fs_inst *inst =
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
dst_xy,
fs_reg(), /* src */
msg_data,
interpolation);
set_predicate(BRW_PREDICATE_NORMAL, inst);
/* Continue the loop if there are any live channels left */
set_predicate_inv(BRW_PREDICATE_NORMAL,
true, /* inverse */
bld.emit(BRW_OPCODE_WHILE));
}
}
break;
}
case nir_intrinsic_interp_var_at_offset: {
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
if (const_offset) {
unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
dst_xy,
fs_reg(), /* src */
fs_reg(off_x | (off_y << 4)),
interpolation);
} else {
fs_reg src = vgrf(glsl_type::ivec2_type);
fs_reg offset_src = retype(get_nir_src(instr->src[0]),
BRW_REGISTER_TYPE_F);
for (int i = 0; i < 2; i++) {
fs_reg temp = vgrf(glsl_type::float_type);
bld.MUL(temp, offset(offset_src, bld, i), fs_reg(16.0f));
fs_reg itemp = vgrf(glsl_type::int_type);
bld.MOV(itemp, temp); /* float to int */
/* Clamp the upper end of the range to +7/16.
* ARB_gpu_shader5 requires that we support a maximum offset
* of +0.5, which isn't representable in a S0.4 value -- if
* we didn't clamp it, we'd end up with -8/16, which is the
* opposite of what the shader author wanted.
*
* This is legal due to ARB_gpu_shader5's quantization
* rules:
*
* "Not all values of <offset> may be supported; x and y
* offsets may be rounded to fixed-point values with the
* number of fraction bits given by the
* implementation-dependent constant
* FRAGMENT_INTERPOLATION_OFFSET_BITS"
*/
set_condmod(BRW_CONDITIONAL_L,
bld.SEL(offset(src, bld, i), itemp, fs_reg(7)));
}
const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET;
emit_pixel_interpolater_send(bld,
opcode,
dst_xy,
src,
fs_reg(0u),
interpolation);
}
break;
}
default:
unreachable("Invalid intrinsic");
}
for (unsigned j = 0; j < instr->num_components; j++) {
fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
src.type = dest.type;
bld.emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
dest = offset(dest, bld, 1);
}
break;
}
case nir_intrinsic_store_ssbo_indirect:
has_indirect = true;
/* fallthrough */
@ -2240,23 +2393,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
case nir_intrinsic_barrier:
emit_barrier();
if (stage == MESA_SHADER_COMPUTE)
((struct brw_cs_prog_data *) prog_data)->uses_barrier = true;
break;
case nir_intrinsic_load_local_invocation_id:
case nir_intrinsic_load_work_group_id: {
gl_system_value sv = nir_system_value_from_intrinsic(instr->intrinsic);
fs_reg val = nir_system_values[sv];
assert(val.file != BAD_FILE);
dest.type = val.type;
for (unsigned i = 0; i < 3; i++)
bld.MOV(offset(dest, bld, i), offset(val, bld, i));
break;
}
case nir_intrinsic_ssbo_atomic_add:
nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr);
break;
@ -2312,46 +2448,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
case nir_intrinsic_load_num_work_groups: {
assert(devinfo->gen >= 7);
assert(stage == MESA_SHADER_COMPUTE);
struct brw_cs_prog_data *cs_prog_data =
(struct brw_cs_prog_data *) prog_data;
const unsigned surface =
cs_prog_data->binding_table.work_groups_start;
cs_prog_data->uses_num_work_groups = true;
fs_reg surf_index = fs_reg(surface);
brw_mark_surface_used(prog_data, surface);
/* Read the 3 GLuint components of gl_NumWorkGroups */
for (unsigned i = 0; i < 3; i++) {
fs_reg read_result =
emit_untyped_read(bld, surf_index,
fs_reg(i << 2),
1 /* dims */, 1 /* size */,
BRW_PREDICATE_NONE);
read_result.type = dest.type;
bld.MOV(dest, read_result);
dest = offset(dest, bld, 1);
}
break;
}
case nir_intrinsic_emit_vertex_with_counter:
emit_gs_vertex(instr->src[0], instr->const_index[0]);
break;
case nir_intrinsic_end_primitive_with_counter:
emit_gs_end_primitive(instr->src[0]);
break;
case nir_intrinsic_set_vertex_count:
bld.MOV(this->final_gs_vertex_count, get_nir_src(instr->src[0]));
break;
default:
unreachable("unknown intrinsic");
}