diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 56a0b2942bf..018be4588e8 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3629,8 +3629,8 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, if (const_offset) { assert(nir_src_bit_size(instr->src[0]) == 32); - unsigned off_x = MIN2((int)(const_offset[0].f32 * 16), 7) & 0xf; - unsigned off_y = MIN2((int)(const_offset[1].f32 * 16), 7) & 0xf; + unsigned off_x = const_offset[0].u32 & 0xf; + unsigned off_y = const_offset[1].u32 & 0xf; emit_pixel_interpolater_send(bld, FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, @@ -3639,35 +3639,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld, brw_imm_ud(off_x | (off_y << 4)), interpolation); } else { - fs_reg src = vgrf(glsl_type::ivec2_type); - fs_reg offset_src = retype(get_nir_src(instr->src[0]), - BRW_REGISTER_TYPE_F); - for (int i = 0; i < 2; i++) { - fs_reg temp = vgrf(glsl_type::float_type); - bld.MUL(temp, offset(offset_src, bld, i), brw_imm_f(16.0f)); - fs_reg itemp = vgrf(glsl_type::int_type); - /* float to int */ - bld.MOV(itemp, temp); - - /* Clamp the upper end of the range to +7/16. - * ARB_gpu_shader5 requires that we support a maximum offset - * of +0.5, which isn't representable in a S0.4 value -- if - * we didn't clamp it, we'd end up with -8/16, which is the - * opposite of what the shader author wanted. - * - * This is legal due to ARB_gpu_shader5's quantization - * rules: - * - * "Not all values of may be supported; x and y - * offsets may be rounded to fixed-point values with the - * number of fraction bits given by the - * implementation-dependent constant - * FRAGMENT_INTERPOLATION_OFFSET_BITS" - */ - set_condmod(BRW_CONDITIONAL_L, - bld.SEL(offset(src, bld, i), itemp, brw_imm_d(7))); - } - + fs_reg src = retype(get_nir_src(instr->src[0]), BRW_REGISTER_TYPE_D); const enum opcode opcode = FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET; emit_pixel_interpolater_send(bld, opcode, diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c index b83a8800063..4589ddd9501 100644 --- a/src/intel/compiler/brw_nir.c +++ b/src/intel/compiler/brw_nir.c @@ -364,6 +364,45 @@ brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map) } } +/** + * Convert interpolateAtOffset() offsets from [-0.5, +0.5] floating point + * offsets to integer [-8, +7] offsets (in units of 1/16th of a pixel). + * + * We clamp to +7/16 on the upper end of the range, since +0.5 isn't + * representable in a S0.4 value; a naive conversion would give us -8/16, + * which is the opposite of what was intended. + * + * This is allowed by GL_ARB_gpu_shader5's quantization rules: + * + * "Not all values of may be supported; x and y offsets may + * be rounded to fixed-point values with the number of fraction bits + * given by the implementation-dependent constant + * FRAGMENT_INTERPOLATION_OFFSET_BITS." + */ +static bool +lower_barycentric_at_offset(nir_builder *b, nir_instr *instr, void *data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + if (intrin->intrinsic != nir_intrinsic_load_barycentric_at_offset) + return false; + + b->cursor = nir_before_instr(instr); + + assert(intrin->src[0].ssa); + nir_ssa_def *offset = + nir_imin(b, nir_imm_int(b, 7), + nir_f2i32(b, nir_fmul(b, nir_imm_float(b, 16), + intrin->src[0].ssa))); + + nir_instr_rewrite_src(instr, &intrin->src[0], nir_src_for_ssa(offset)); + + return true; +} + void brw_nir_lower_fs_inputs(nir_shader *nir, const struct gen_device_info *devinfo, @@ -404,6 +443,11 @@ brw_nir_lower_fs_inputs(nir_shader *nir, if (devinfo->gen >= 11) nir_lower_interpolation(nir, ~0); + nir_shader_instructions_pass(nir, lower_barycentric_at_offset, + nir_metadata_block_index | + nir_metadata_dominance, + NULL); + /* This pass needs actual constants */ nir_opt_constant_folding(nir);