diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 1b443d7299d..3f5d217ca80 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -457,6 +457,11 @@ public: return *static_cast(this->payload_); }; + const fs_thread_payload &fs_payload() const { + assert(stage == MESA_SHADER_FRAGMENT); + return *static_cast(this->payload_); + }; + cs_thread_payload &cs_payload() { assert(gl_shader_stage_uses_workgroup(stage)); return *static_cast(this->payload_); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index ecd4b50cfd7..447c5b9edc4 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1986,6 +1986,181 @@ emit_pixel_interpolater_send(const fs_builder &bld, return inst; } +/** + * Return the specified component \p subreg of a per-polygon PS + * payload register for the polygon corresponding to each channel + * specified in the provided \p bld. + * + * \p reg specifies the payload register in REG_SIZE units for the + * first polygon dispatched to the thread. This function requires + * that subsequent registers on the payload contain the corresponding + * register for subsequent polygons, one GRF register per polygon, if + * multiple polygons are being processed by the same PS thread. + * + * This can be used to access the value of a "Source Depth and/or W + * Attribute Vertex Deltas", "Perspective Bary Planes" or + * "Non-Perspective Bary Planes" payload field conveniently for + * multiple polygons as a single fs_reg. + */ +static fs_reg +fetch_polygon_reg(const fs_builder &bld, unsigned reg, unsigned subreg) +{ + const fs_visitor *shader = bld.shader; + assert(shader->stage == MESA_SHADER_FRAGMENT); + + const struct intel_device_info *devinfo = shader->devinfo; + const unsigned poly_width = shader->dispatch_width / shader->max_polygons; + const unsigned poly_idx = bld.group() / poly_width; + assert(bld.group() % poly_width == 0); + + if (bld.dispatch_width() > poly_width) { + assert(bld.dispatch_width() <= 2 * poly_width); + const unsigned reg_size = reg_unit(devinfo) * REG_SIZE; + const unsigned vstride = reg_size / brw_type_size_bytes(BRW_TYPE_F); + return stride(brw_vec1_grf(reg + reg_unit(devinfo) * poly_idx, subreg), + vstride, poly_width, 0); + } else { + return brw_vec1_grf(reg + reg_unit(devinfo) * poly_idx, subreg); + } +} + +/** + * Interpolate per-polygon barycentrics at a specific offset relative + * to each channel fragment coordinates, optionally using + * perspective-correct interpolation if requested. This is mostly + * useful as replacement for the PI shared function that existed on + * platforms prior to Xe2, but is expected to work on earlier + * platforms since we can get the required polygon setup information + * from the thread payload as far back as ICL. + */ +static void +emit_pixel_interpolater_alu_at_offset(const fs_builder &bld, + const fs_reg &dst, + const fs_reg &offs, + glsl_interp_mode interpolation) +{ + const fs_visitor *shader = bld.shader; + assert(shader->stage == MESA_SHADER_FRAGMENT); + + const intel_device_info *devinfo = shader->devinfo; + assert(devinfo->ver >= 11); + + const fs_thread_payload &payload = shader->fs_payload(); + const struct brw_wm_prog_data *wm_prog_data = + brw_wm_prog_data(shader->prog_data); + + if (interpolation == INTERP_MODE_NOPERSPECTIVE) { + assert(wm_prog_data->uses_npc_bary_coefficients && + wm_prog_data->uses_nonperspective_interp_modes); + } else { + assert(interpolation == INTERP_MODE_SMOOTH); + assert(wm_prog_data->uses_pc_bary_coefficients && + wm_prog_data->uses_depth_w_coefficients); + } + + /* Account for half-pixel X/Y coordinate offset. */ + const fs_reg off_x = bld.vgrf(BRW_TYPE_F); + bld.ADD(off_x, offs, brw_imm_f(0.5)); + + const fs_reg off_y = bld.vgrf(BRW_TYPE_F); + bld.ADD(off_y, offset(offs, bld, 1), brw_imm_f(0.5)); + + /* Process no more than two polygons at a time to avoid hitting + * regioning restrictions. + */ + const unsigned poly_width = shader->dispatch_width / shader->max_polygons; + + for (unsigned i = 0; i < DIV_ROUND_UP(shader->max_polygons, 2); i++) { + const fs_builder ibld = bld.group(MIN2(bld.dispatch_width(), 2 * poly_width), i); + + /* Fetch needed parameters from the thread payload. */ + const unsigned bary_coef_reg = interpolation == INTERP_MODE_NOPERSPECTIVE ? + payload.npc_bary_coef_reg : payload.pc_bary_coef_reg; + const fs_reg start_x = devinfo->ver < 12 ? fetch_polygon_reg(ibld, 1, 1) : + fetch_polygon_reg(ibld, bary_coef_reg, + devinfo->ver >= 20 ? 6 : 2); + const fs_reg start_y = devinfo->ver < 12 ? fetch_polygon_reg(ibld, 1, 6) : + fetch_polygon_reg(ibld, bary_coef_reg, + devinfo->ver >= 20 ? 7 : 6); + + const fs_reg bary1_c0 = fetch_polygon_reg(ibld, bary_coef_reg, + devinfo->ver >= 20 ? 2 : 3); + const fs_reg bary1_cx = fetch_polygon_reg(ibld, bary_coef_reg, 1); + const fs_reg bary1_cy = fetch_polygon_reg(ibld, bary_coef_reg, 0); + + const fs_reg bary2_c0 = fetch_polygon_reg(ibld, bary_coef_reg, + devinfo->ver >= 20 ? 5 : 7); + const fs_reg bary2_cx = fetch_polygon_reg(ibld, bary_coef_reg, + devinfo->ver >= 20 ? 4 : 5); + const fs_reg bary2_cy = fetch_polygon_reg(ibld, bary_coef_reg, + devinfo->ver >= 20 ? 3 : 4); + + const fs_reg rhw_c0 = devinfo->ver >= 20 ? + fetch_polygon_reg(ibld, payload.depth_w_coef_reg + 1, 5) : + fetch_polygon_reg(ibld, payload.depth_w_coef_reg, 7); + const fs_reg rhw_cx = devinfo->ver >= 20 ? + fetch_polygon_reg(ibld, payload.depth_w_coef_reg + 1, 4) : + fetch_polygon_reg(ibld, payload.depth_w_coef_reg, 5); + const fs_reg rhw_cy = devinfo->ver >= 20 ? + fetch_polygon_reg(ibld, payload.depth_w_coef_reg + 1, 3) : + fetch_polygon_reg(ibld, payload.depth_w_coef_reg, 4); + + /* Compute X/Y coordinate deltas relative to the origin of the polygon. */ + const fs_reg delta_x = ibld.vgrf(BRW_TYPE_F); + ibld.ADD(delta_x, offset(shader->pixel_x, ibld, i), negate(start_x)); + ibld.ADD(delta_x, delta_x, offset(off_x, ibld, i)); + + const fs_reg delta_y = ibld.vgrf(BRW_TYPE_F); + ibld.ADD(delta_y, offset(shader->pixel_y, ibld, i), negate(start_y)); + ibld.ADD(delta_y, delta_y, offset(off_y, ibld, i)); + + /* Evaluate the plane equations obtained above for the + * barycentrics and RHW coordinate at the offset specified for + * each channel. Limit arithmetic to acc_width in order to + * allow the accumulator to be used for linear interpolation. + */ + const unsigned acc_width = 16 * reg_unit(devinfo); + const fs_reg rhw = ibld.vgrf(BRW_TYPE_F); + const fs_reg bary1 = ibld.vgrf(BRW_TYPE_F); + const fs_reg bary2 = ibld.vgrf(BRW_TYPE_F); + + for (unsigned j = 0; j < DIV_ROUND_UP(ibld.dispatch_width(), acc_width); j++) { + const fs_builder jbld = ibld.group(MIN2(ibld.dispatch_width(), acc_width), j); + const fs_reg acc = suboffset(brw_acc_reg(16), jbld.group() % acc_width); + + if (interpolation != INTERP_MODE_NOPERSPECTIVE) { + jbld.MAD(acc, horiz_offset(rhw_c0, acc_width * j), + horiz_offset(rhw_cx, acc_width * j), offset(delta_x, jbld, j)); + jbld.MAC(offset(rhw, jbld, j), + horiz_offset(rhw_cy, acc_width * j), offset(delta_y, jbld, j)); + } + + jbld.MAD(acc, horiz_offset(bary1_c0, acc_width * j), + horiz_offset(bary1_cx, acc_width * j), offset(delta_x, jbld, j)); + jbld.MAC(offset(bary1, jbld, j), + horiz_offset(bary1_cy, acc_width * j), offset(delta_y, jbld, j)); + + jbld.MAD(acc, horiz_offset(bary2_c0, acc_width * j), + horiz_offset(bary2_cx, acc_width * j), offset(delta_x, jbld, j)); + jbld.MAC(offset(bary2, jbld, j), + horiz_offset(bary2_cy, acc_width * j), offset(delta_y, jbld, j)); + } + + /* Scale the results dividing by the interpolated RHW coordinate + * if the interpolation is required to be perspective-correct. + */ + if (interpolation == INTERP_MODE_NOPERSPECTIVE) { + ibld.MOV(offset(dst, ibld, i), bary1); + ibld.MOV(offset(offset(dst, bld, 1), ibld, i), bary2); + } else { + const fs_reg w = ibld.vgrf(BRW_TYPE_F); + ibld.emit(SHADER_OPCODE_RCP, w, rhw); + ibld.MUL(offset(dst, ibld, i), bary1, w); + ibld.MUL(offset(offset(dst, bld, 1), ibld, i), bary2, w); + } + } +} + /** * Computes 1 << x, given a D/UD register containing some value x. */ @@ -4094,9 +4269,13 @@ fs_nir_emit_fs_intrinsic(nir_to_brw_state &ntb, const glsl_interp_mode interpolation = (enum glsl_interp_mode) nir_intrinsic_interp_mode(instr); - nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + if (devinfo->ver >= 20) { + emit_pixel_interpolater_alu_at_offset( + bld, dest, + retype(get_nir_src(ntb, instr->src[0]), BRW_TYPE_F), + interpolation); - if (const_offset) { + } else if (nir_const_value *const_offset = nir_src_as_const_value(instr->src[0])) { assert(nir_src_bit_size(instr->src[0]) == 32); unsigned off_x = const_offset[0].u32 & 0xf; unsigned off_y = const_offset[1].u32 & 0xf;