mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 04:40:09 +01:00
amd/llvm: switch to 3-spaces style
Follow-up of !4319 using the same clang-format config. Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Acked-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5310>
This commit is contained in:
parent
afa1fba198
commit
82d2d73e03
10 changed files with 8876 additions and 10026 deletions
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -24,205 +24,188 @@
|
|||
*/
|
||||
|
||||
#include "ac_llvm_cull.h"
|
||||
|
||||
#include <llvm-c/Core.h>
|
||||
|
||||
struct ac_position_w_info {
|
||||
/* If a primitive intersects the W=0 plane, it causes a reflection
|
||||
* of the determinant used for face culling. Every vertex behind
|
||||
* the W=0 plane negates the determinant, so having 2 vertices behind
|
||||
* the plane has no effect. This is i1 true if the determinant should be
|
||||
* negated.
|
||||
*/
|
||||
LLVMValueRef w_reflection;
|
||||
/* If a primitive intersects the W=0 plane, it causes a reflection
|
||||
* of the determinant used for face culling. Every vertex behind
|
||||
* the W=0 plane negates the determinant, so having 2 vertices behind
|
||||
* the plane has no effect. This is i1 true if the determinant should be
|
||||
* negated.
|
||||
*/
|
||||
LLVMValueRef w_reflection;
|
||||
|
||||
/* If we simplify the "-w <= p <= w" view culling equation, we get
|
||||
* "-w <= w", which can't be satisfied when w is negative.
|
||||
* In perspective projection, a negative W means that the primitive
|
||||
* is behind the viewer, but the equation is independent of the type
|
||||
* of projection.
|
||||
*
|
||||
* w_accepted is false when all W are negative and therefore
|
||||
* the primitive is invisible.
|
||||
*/
|
||||
LLVMValueRef w_accepted;
|
||||
/* If we simplify the "-w <= p <= w" view culling equation, we get
|
||||
* "-w <= w", which can't be satisfied when w is negative.
|
||||
* In perspective projection, a negative W means that the primitive
|
||||
* is behind the viewer, but the equation is independent of the type
|
||||
* of projection.
|
||||
*
|
||||
* w_accepted is false when all W are negative and therefore
|
||||
* the primitive is invisible.
|
||||
*/
|
||||
LLVMValueRef w_accepted;
|
||||
|
||||
LLVMValueRef all_w_positive;
|
||||
LLVMValueRef any_w_negative;
|
||||
LLVMValueRef all_w_positive;
|
||||
LLVMValueRef any_w_negative;
|
||||
};
|
||||
|
||||
static void ac_analyze_position_w(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef pos[3][4],
|
||||
struct ac_position_w_info *w)
|
||||
static void ac_analyze_position_w(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
|
||||
struct ac_position_w_info *w)
|
||||
{
|
||||
LLVMBuilderRef builder = ctx->builder;
|
||||
LLVMValueRef all_w_negative = ctx->i1true;
|
||||
LLVMBuilderRef builder = ctx->builder;
|
||||
LLVMValueRef all_w_negative = ctx->i1true;
|
||||
|
||||
w->w_reflection = ctx->i1false;
|
||||
w->any_w_negative = ctx->i1false;
|
||||
w->w_reflection = ctx->i1false;
|
||||
w->any_w_negative = ctx->i1false;
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
LLVMValueRef neg_w;
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
LLVMValueRef neg_w;
|
||||
|
||||
neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, "");
|
||||
/* If neg_w is true, negate w_reflection. */
|
||||
w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, "");
|
||||
w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, "");
|
||||
all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, "");
|
||||
}
|
||||
w->all_w_positive = LLVMBuildNot(builder, w->any_w_negative, "");
|
||||
w->w_accepted = LLVMBuildNot(builder, all_w_negative, "");
|
||||
neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, "");
|
||||
/* If neg_w is true, negate w_reflection. */
|
||||
w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, "");
|
||||
w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, "");
|
||||
all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, "");
|
||||
}
|
||||
w->all_w_positive = LLVMBuildNot(builder, w->any_w_negative, "");
|
||||
w->w_accepted = LLVMBuildNot(builder, all_w_negative, "");
|
||||
}
|
||||
|
||||
/* Perform front/back face culling and return true if the primitive is accepted. */
|
||||
static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef pos[3][4],
|
||||
struct ac_position_w_info *w,
|
||||
bool cull_front,
|
||||
bool cull_back,
|
||||
bool cull_zero_area)
|
||||
static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
|
||||
struct ac_position_w_info *w, bool cull_front, bool cull_back,
|
||||
bool cull_zero_area)
|
||||
{
|
||||
LLVMBuilderRef builder = ctx->builder;
|
||||
LLVMBuilderRef builder = ctx->builder;
|
||||
|
||||
if (cull_front && cull_back)
|
||||
return ctx->i1false;
|
||||
if (cull_front && cull_back)
|
||||
return ctx->i1false;
|
||||
|
||||
if (!cull_front && !cull_back && !cull_zero_area)
|
||||
return ctx->i1true;
|
||||
if (!cull_front && !cull_back && !cull_zero_area)
|
||||
return ctx->i1true;
|
||||
|
||||
/* Front/back face culling. Also if the determinant == 0, the triangle
|
||||
* area is 0.
|
||||
*/
|
||||
LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], "");
|
||||
LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], "");
|
||||
LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], "");
|
||||
LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], "");
|
||||
LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, "");
|
||||
LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, "");
|
||||
LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, "");
|
||||
/* Front/back face culling. Also if the determinant == 0, the triangle
|
||||
* area is 0.
|
||||
*/
|
||||
LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], "");
|
||||
LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], "");
|
||||
LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], "");
|
||||
LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], "");
|
||||
LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, "");
|
||||
LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, "");
|
||||
LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, "");
|
||||
|
||||
/* Negative W negates the determinant. */
|
||||
det = LLVMBuildSelect(builder, w->w_reflection,
|
||||
LLVMBuildFNeg(builder, det, ""),
|
||||
det, "");
|
||||
/* Negative W negates the determinant. */
|
||||
det = LLVMBuildSelect(builder, w->w_reflection, LLVMBuildFNeg(builder, det, ""), det, "");
|
||||
|
||||
LLVMValueRef accepted = NULL;
|
||||
if (cull_front) {
|
||||
LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE;
|
||||
accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
|
||||
} else if (cull_back) {
|
||||
LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE;
|
||||
accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
|
||||
} else if (cull_zero_area) {
|
||||
accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, "");
|
||||
}
|
||||
return accepted;
|
||||
LLVMValueRef accepted = NULL;
|
||||
if (cull_front) {
|
||||
LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE;
|
||||
accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
|
||||
} else if (cull_back) {
|
||||
LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE;
|
||||
accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
|
||||
} else if (cull_zero_area) {
|
||||
accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, "");
|
||||
}
|
||||
return accepted;
|
||||
}
|
||||
|
||||
/* Perform view culling and small primitive elimination and return true
|
||||
* if the primitive is accepted and initially_accepted == true. */
|
||||
static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef pos[3][4],
|
||||
LLVMValueRef initially_accepted,
|
||||
struct ac_position_w_info *w,
|
||||
LLVMValueRef vp_scale[2],
|
||||
LLVMValueRef vp_translate[2],
|
||||
LLVMValueRef small_prim_precision,
|
||||
bool cull_view_xy,
|
||||
bool cull_view_near_z,
|
||||
bool cull_view_far_z,
|
||||
bool cull_small_prims,
|
||||
bool use_halfz_clip_space)
|
||||
static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
|
||||
LLVMValueRef initially_accepted, struct ac_position_w_info *w,
|
||||
LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2],
|
||||
LLVMValueRef small_prim_precision, bool cull_view_xy,
|
||||
bool cull_view_near_z, bool cull_view_far_z, bool cull_small_prims,
|
||||
bool use_halfz_clip_space)
|
||||
{
|
||||
LLVMBuilderRef builder = ctx->builder;
|
||||
LLVMBuilderRef builder = ctx->builder;
|
||||
|
||||
if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims)
|
||||
return initially_accepted;
|
||||
if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims)
|
||||
return initially_accepted;
|
||||
|
||||
/* Skip the culling if the primitive has already been rejected or
|
||||
* if any W is negative. The bounding box culling doesn't work when
|
||||
* W is negative.
|
||||
*/
|
||||
LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted,
|
||||
w->all_w_positive, "");
|
||||
LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, "");
|
||||
LLVMBuildStore(builder, initially_accepted, accepted_var);
|
||||
/* Skip the culling if the primitive has already been rejected or
|
||||
* if any W is negative. The bounding box culling doesn't work when
|
||||
* W is negative.
|
||||
*/
|
||||
LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted, w->all_w_positive, "");
|
||||
LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, "");
|
||||
LLVMBuildStore(builder, initially_accepted, accepted_var);
|
||||
|
||||
ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */);
|
||||
{
|
||||
LLVMValueRef bbox_min[3], bbox_max[3];
|
||||
LLVMValueRef accepted = initially_accepted;
|
||||
ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */);
|
||||
{
|
||||
LLVMValueRef bbox_min[3], bbox_max[3];
|
||||
LLVMValueRef accepted = initially_accepted;
|
||||
|
||||
/* Compute the primitive bounding box for easy culling. */
|
||||
for (unsigned chan = 0; chan < (cull_view_near_z || cull_view_far_z ? 3 : 2); chan++) {
|
||||
bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);
|
||||
bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
|
||||
/* Compute the primitive bounding box for easy culling. */
|
||||
for (unsigned chan = 0; chan < (cull_view_near_z || cull_view_far_z ? 3 : 2); chan++) {
|
||||
bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);
|
||||
bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
|
||||
|
||||
bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);
|
||||
bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
|
||||
}
|
||||
bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);
|
||||
bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
|
||||
}
|
||||
|
||||
/* View culling. */
|
||||
if (cull_view_xy || cull_view_near_z || cull_view_far_z) {
|
||||
for (unsigned chan = 0; chan < 3; chan++) {
|
||||
LLVMValueRef visible;
|
||||
/* View culling. */
|
||||
if (cull_view_xy || cull_view_near_z || cull_view_far_z) {
|
||||
for (unsigned chan = 0; chan < 3; chan++) {
|
||||
LLVMValueRef visible;
|
||||
|
||||
if ((cull_view_xy && chan <= 1) ||
|
||||
(cull_view_near_z && chan == 2)) {
|
||||
float t = chan == 2 && use_halfz_clip_space ? 0 : -1;
|
||||
visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan],
|
||||
LLVMConstReal(ctx->f32, t), "");
|
||||
accepted = LLVMBuildAnd(builder, accepted, visible, "");
|
||||
}
|
||||
if ((cull_view_xy && chan <= 1) || (cull_view_near_z && chan == 2)) {
|
||||
float t = chan == 2 && use_halfz_clip_space ? 0 : -1;
|
||||
visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan],
|
||||
LLVMConstReal(ctx->f32, t), "");
|
||||
accepted = LLVMBuildAnd(builder, accepted, visible, "");
|
||||
}
|
||||
|
||||
if ((cull_view_xy && chan <= 1) ||
|
||||
(cull_view_far_z && chan == 2)) {
|
||||
visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan],
|
||||
ctx->f32_1, "");
|
||||
accepted = LLVMBuildAnd(builder, accepted, visible, "");
|
||||
}
|
||||
}
|
||||
}
|
||||
if ((cull_view_xy && chan <= 1) || (cull_view_far_z && chan == 2)) {
|
||||
visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan], ctx->f32_1, "");
|
||||
accepted = LLVMBuildAnd(builder, accepted, visible, "");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Small primitive elimination. */
|
||||
if (cull_small_prims) {
|
||||
/* Assuming a sample position at (0.5, 0.5), if we round
|
||||
* the bounding box min/max extents and the results of
|
||||
* the rounding are equal in either the X or Y direction,
|
||||
* the bounding box does not intersect the sample.
|
||||
*
|
||||
* See these GDC slides for pictures:
|
||||
* https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf
|
||||
*/
|
||||
LLVMValueRef min, max, not_equal[2], visible;
|
||||
/* Small primitive elimination. */
|
||||
if (cull_small_prims) {
|
||||
/* Assuming a sample position at (0.5, 0.5), if we round
|
||||
* the bounding box min/max extents and the results of
|
||||
* the rounding are equal in either the X or Y direction,
|
||||
* the bounding box does not intersect the sample.
|
||||
*
|
||||
* See these GDC slides for pictures:
|
||||
* https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf
|
||||
*/
|
||||
LLVMValueRef min, max, not_equal[2], visible;
|
||||
|
||||
for (unsigned chan = 0; chan < 2; chan++) {
|
||||
/* Convert the position to screen-space coordinates. */
|
||||
min = ac_build_fmad(ctx, bbox_min[chan],
|
||||
vp_scale[chan], vp_translate[chan]);
|
||||
max = ac_build_fmad(ctx, bbox_max[chan],
|
||||
vp_scale[chan], vp_translate[chan]);
|
||||
/* Scale the bounding box according to the precision of
|
||||
* the rasterizer and the number of MSAA samples. */
|
||||
min = LLVMBuildFSub(builder, min, small_prim_precision, "");
|
||||
max = LLVMBuildFAdd(builder, max, small_prim_precision, "");
|
||||
for (unsigned chan = 0; chan < 2; chan++) {
|
||||
/* Convert the position to screen-space coordinates. */
|
||||
min = ac_build_fmad(ctx, bbox_min[chan], vp_scale[chan], vp_translate[chan]);
|
||||
max = ac_build_fmad(ctx, bbox_max[chan], vp_scale[chan], vp_translate[chan]);
|
||||
/* Scale the bounding box according to the precision of
|
||||
* the rasterizer and the number of MSAA samples. */
|
||||
min = LLVMBuildFSub(builder, min, small_prim_precision, "");
|
||||
max = LLVMBuildFAdd(builder, max, small_prim_precision, "");
|
||||
|
||||
/* Determine if the bbox intersects the sample point.
|
||||
* It also works for MSAA, but vp_scale, vp_translate,
|
||||
* and small_prim_precision are computed differently.
|
||||
*/
|
||||
min = ac_build_round(ctx, min);
|
||||
max = ac_build_round(ctx, max);
|
||||
not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, "");
|
||||
}
|
||||
visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], "");
|
||||
accepted = LLVMBuildAnd(builder, accepted, visible, "");
|
||||
}
|
||||
/* Determine if the bbox intersects the sample point.
|
||||
* It also works for MSAA, but vp_scale, vp_translate,
|
||||
* and small_prim_precision are computed differently.
|
||||
*/
|
||||
min = ac_build_round(ctx, min);
|
||||
max = ac_build_round(ctx, max);
|
||||
not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, "");
|
||||
}
|
||||
visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], "");
|
||||
accepted = LLVMBuildAnd(builder, accepted, visible, "");
|
||||
}
|
||||
|
||||
LLVMBuildStore(builder, accepted, accepted_var);
|
||||
}
|
||||
ac_build_endif(ctx, 10000000);
|
||||
LLVMBuildStore(builder, accepted, accepted_var);
|
||||
}
|
||||
ac_build_endif(ctx, 10000000);
|
||||
|
||||
return LLVMBuildLoad(builder, accepted_var, "");
|
||||
return LLVMBuildLoad(builder, accepted_var, "");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -241,35 +224,27 @@ static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx,
|
|||
* subpixel_bits are defined by the quantization mode.
|
||||
* \param options See ac_cull_options.
|
||||
*/
|
||||
LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef pos[3][4],
|
||||
LLVMValueRef initially_accepted,
|
||||
LLVMValueRef vp_scale[2],
|
||||
LLVMValueRef vp_translate[2],
|
||||
LLVMValueRef small_prim_precision,
|
||||
struct ac_cull_options *options)
|
||||
LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
|
||||
LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2],
|
||||
LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision,
|
||||
struct ac_cull_options *options)
|
||||
{
|
||||
struct ac_position_w_info w;
|
||||
ac_analyze_position_w(ctx, pos, &w);
|
||||
struct ac_position_w_info w;
|
||||
ac_analyze_position_w(ctx, pos, &w);
|
||||
|
||||
/* W culling. */
|
||||
LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true;
|
||||
accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, "");
|
||||
/* W culling. */
|
||||
LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true;
|
||||
accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, "");
|
||||
|
||||
/* Face culling. */
|
||||
accepted = LLVMBuildAnd(ctx->builder, accepted,
|
||||
ac_cull_face(ctx, pos, &w,
|
||||
options->cull_front,
|
||||
options->cull_back,
|
||||
options->cull_zero_area), "");
|
||||
/* Face culling. */
|
||||
accepted = LLVMBuildAnd(
|
||||
ctx->builder, accepted,
|
||||
ac_cull_face(ctx, pos, &w, options->cull_front, options->cull_back, options->cull_zero_area),
|
||||
"");
|
||||
|
||||
/* View culling and small primitive elimination. */
|
||||
accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate,
|
||||
small_prim_precision,
|
||||
options->cull_view_xy,
|
||||
options->cull_view_near_z,
|
||||
options->cull_view_far_z,
|
||||
options->cull_small_prims,
|
||||
options->use_halfz_clip_space);
|
||||
return accepted;
|
||||
/* View culling and small primitive elimination. */
|
||||
accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision,
|
||||
options->cull_view_xy, options->cull_view_near_z, options->cull_view_far_z,
|
||||
options->cull_small_prims, options->use_halfz_clip_space);
|
||||
return accepted;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,31 +29,28 @@
|
|||
#include "ac_llvm_build.h"
|
||||
|
||||
struct ac_cull_options {
|
||||
/* In general, I recommend setting all to true except view Z culling,
|
||||
* which isn't so effective because W culling is cheaper and partially
|
||||
* replaces near Z culling, and you don't need to set Position.z
|
||||
* if Z culling is disabled.
|
||||
*
|
||||
* If something doesn't work, turn some of these off to find out what.
|
||||
*/
|
||||
bool cull_front;
|
||||
bool cull_back;
|
||||
bool cull_view_xy;
|
||||
bool cull_view_near_z;
|
||||
bool cull_view_far_z;
|
||||
bool cull_small_prims;
|
||||
bool cull_zero_area;
|
||||
bool cull_w; /* cull primitives with all W < 0 */
|
||||
/* In general, I recommend setting all to true except view Z culling,
|
||||
* which isn't so effective because W culling is cheaper and partially
|
||||
* replaces near Z culling, and you don't need to set Position.z
|
||||
* if Z culling is disabled.
|
||||
*
|
||||
* If something doesn't work, turn some of these off to find out what.
|
||||
*/
|
||||
bool cull_front;
|
||||
bool cull_back;
|
||||
bool cull_view_xy;
|
||||
bool cull_view_near_z;
|
||||
bool cull_view_far_z;
|
||||
bool cull_small_prims;
|
||||
bool cull_zero_area;
|
||||
bool cull_w; /* cull primitives with all W < 0 */
|
||||
|
||||
bool use_halfz_clip_space;
|
||||
bool use_halfz_clip_space;
|
||||
};
|
||||
|
||||
LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef pos[3][4],
|
||||
LLVMValueRef initially_accepted,
|
||||
LLVMValueRef vp_scale[2],
|
||||
LLVMValueRef vp_translate[2],
|
||||
LLVMValueRef small_prim_precision,
|
||||
struct ac_cull_options *options);
|
||||
LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
|
||||
LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2],
|
||||
LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision,
|
||||
struct ac_cull_options *options);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -23,15 +23,14 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include <llvm-c/Core.h>
|
||||
#include <llvm/Target/TargetMachine.h>
|
||||
#include <llvm/IR/IRBuilder.h>
|
||||
#include <llvm/Analysis/TargetLibraryInfo.h>
|
||||
#include <llvm/IR/IRBuilder.h>
|
||||
#include <llvm/IR/LegacyPassManager.h>
|
||||
#include <llvm/Target/TargetMachine.h>
|
||||
#include <llvm/Transforms/IPO.h>
|
||||
|
||||
#include <llvm/IR/LegacyPassManager.h>
|
||||
#include <cstring>
|
||||
|
||||
/* DO NOT REORDER THE HEADERS
|
||||
* The LLVM headers need to all be included before any Mesa header,
|
||||
|
|
@ -42,7 +41,6 @@
|
|||
#include "ac_binary.h"
|
||||
#include "ac_llvm_util.h"
|
||||
#include "ac_llvm_build.h"
|
||||
|
||||
#include "util/macros.h"
|
||||
|
||||
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
|
||||
|
|
@ -54,36 +52,36 @@ void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
|
|||
void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
|
||||
{
|
||||
#if LLVM_VERSION_MAJOR >= 10
|
||||
llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
|
||||
A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));
|
||||
llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
|
||||
A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));
|
||||
#else
|
||||
/* Avoid unused parameter warnings. */
|
||||
(void)val;
|
||||
(void)bytes;
|
||||
/* Avoid unused parameter warnings. */
|
||||
(void)val;
|
||||
(void)bytes;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool ac_is_sgpr_param(LLVMValueRef arg)
|
||||
{
|
||||
llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
|
||||
llvm::AttributeList AS = A->getParent()->getAttributes();
|
||||
unsigned ArgNo = A->getArgNo();
|
||||
return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
|
||||
llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
|
||||
llvm::AttributeList AS = A->getParent()->getAttributes();
|
||||
unsigned ArgNo = A->getArgNo();
|
||||
return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
|
||||
}
|
||||
|
||||
LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
|
||||
{
|
||||
return LLVMGetCalledValue(call);
|
||||
return LLVMGetCalledValue(call);
|
||||
}
|
||||
|
||||
bool ac_llvm_is_function(LLVMValueRef v)
|
||||
{
|
||||
return LLVMGetValueKind(v) == LLVMFunctionValueKind;
|
||||
return LLVMGetValueKind(v) == LLVMFunctionValueKind;
|
||||
}
|
||||
|
||||
LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
|
||||
{
|
||||
llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
|
||||
llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
|
||||
LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
|
||||
|
||||
llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
|
||||
|
|
@ -91,246 +89,243 @@ LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
|
|||
return module;
|
||||
}
|
||||
|
||||
LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
|
||||
enum ac_float_mode float_mode)
|
||||
LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)
|
||||
{
|
||||
LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
|
||||
LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
|
||||
|
||||
llvm::FastMathFlags flags;
|
||||
llvm::FastMathFlags flags;
|
||||
|
||||
switch (float_mode) {
|
||||
case AC_FLOAT_MODE_DEFAULT:
|
||||
case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
|
||||
break;
|
||||
switch (float_mode) {
|
||||
case AC_FLOAT_MODE_DEFAULT:
|
||||
case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
|
||||
break;
|
||||
|
||||
case AC_FLOAT_MODE_DEFAULT_OPENGL:
|
||||
/* Allow optimizations to treat the sign of a zero argument or
|
||||
* result as insignificant.
|
||||
*/
|
||||
flags.setNoSignedZeros(); /* nsz */
|
||||
case AC_FLOAT_MODE_DEFAULT_OPENGL:
|
||||
/* Allow optimizations to treat the sign of a zero argument or
|
||||
* result as insignificant.
|
||||
*/
|
||||
flags.setNoSignedZeros(); /* nsz */
|
||||
|
||||
/* Allow optimizations to use the reciprocal of an argument
|
||||
* rather than perform division.
|
||||
*/
|
||||
flags.setAllowReciprocal(); /* arcp */
|
||||
/* Allow optimizations to use the reciprocal of an argument
|
||||
* rather than perform division.
|
||||
*/
|
||||
flags.setAllowReciprocal(); /* arcp */
|
||||
|
||||
llvm::unwrap(builder)->setFastMathFlags(flags);
|
||||
break;
|
||||
}
|
||||
llvm::unwrap(builder)->setFastMathFlags(flags);
|
||||
break;
|
||||
}
|
||||
|
||||
return builder;
|
||||
return builder;
|
||||
}
|
||||
|
||||
void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
|
||||
{
|
||||
if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
|
||||
auto *b = llvm::unwrap(ctx->builder);
|
||||
llvm::FastMathFlags flags = b->getFastMathFlags();
|
||||
if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
|
||||
auto *b = llvm::unwrap(ctx->builder);
|
||||
llvm::FastMathFlags flags = b->getFastMathFlags();
|
||||
|
||||
/* This disables the optimization of (x + 0), which is used
|
||||
* to convert negative zero to positive zero.
|
||||
*/
|
||||
flags.setNoSignedZeros(false);
|
||||
b->setFastMathFlags(flags);
|
||||
}
|
||||
/* This disables the optimization of (x + 0), which is used
|
||||
* to convert negative zero to positive zero.
|
||||
*/
|
||||
flags.setNoSignedZeros(false);
|
||||
b->setFastMathFlags(flags);
|
||||
}
|
||||
}
|
||||
|
||||
void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
|
||||
{
|
||||
if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
|
||||
auto *b = llvm::unwrap(ctx->builder);
|
||||
llvm::FastMathFlags flags = b->getFastMathFlags();
|
||||
if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
|
||||
auto *b = llvm::unwrap(ctx->builder);
|
||||
llvm::FastMathFlags flags = b->getFastMathFlags();
|
||||
|
||||
flags.setNoSignedZeros();
|
||||
b->setFastMathFlags(flags);
|
||||
}
|
||||
flags.setNoSignedZeros();
|
||||
b->setFastMathFlags(flags);
|
||||
}
|
||||
}
|
||||
|
||||
LLVMTargetLibraryInfoRef
|
||||
ac_create_target_library_info(const char *triple)
|
||||
LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple)
|
||||
{
|
||||
return reinterpret_cast<LLVMTargetLibraryInfoRef>(new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
|
||||
return reinterpret_cast<LLVMTargetLibraryInfoRef>(
|
||||
new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
|
||||
}
|
||||
|
||||
void
|
||||
ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
|
||||
void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
|
||||
{
|
||||
delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
|
||||
delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
|
||||
}
|
||||
|
||||
/* Implementation of raw_pwrite_stream that works on malloc()ed memory for
|
||||
* better compatibility with C code. */
|
||||
struct raw_memory_ostream : public llvm::raw_pwrite_stream {
|
||||
char *buffer;
|
||||
size_t written;
|
||||
size_t bufsize;
|
||||
char *buffer;
|
||||
size_t written;
|
||||
size_t bufsize;
|
||||
|
||||
raw_memory_ostream()
|
||||
{
|
||||
buffer = NULL;
|
||||
written = 0;
|
||||
bufsize = 0;
|
||||
SetUnbuffered();
|
||||
}
|
||||
raw_memory_ostream()
|
||||
{
|
||||
buffer = NULL;
|
||||
written = 0;
|
||||
bufsize = 0;
|
||||
SetUnbuffered();
|
||||
}
|
||||
|
||||
~raw_memory_ostream()
|
||||
{
|
||||
free(buffer);
|
||||
}
|
||||
~raw_memory_ostream()
|
||||
{
|
||||
free(buffer);
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
written = 0;
|
||||
}
|
||||
void clear()
|
||||
{
|
||||
written = 0;
|
||||
}
|
||||
|
||||
void take(char *&out_buffer, size_t &out_size)
|
||||
{
|
||||
out_buffer = buffer;
|
||||
out_size = written;
|
||||
buffer = NULL;
|
||||
written = 0;
|
||||
bufsize = 0;
|
||||
}
|
||||
void take(char *&out_buffer, size_t &out_size)
|
||||
{
|
||||
out_buffer = buffer;
|
||||
out_size = written;
|
||||
buffer = NULL;
|
||||
written = 0;
|
||||
bufsize = 0;
|
||||
}
|
||||
|
||||
void flush() = delete;
|
||||
void flush() = delete;
|
||||
|
||||
void write_impl(const char *ptr, size_t size) override
|
||||
{
|
||||
if (unlikely(written + size < written))
|
||||
abort();
|
||||
if (written + size > bufsize) {
|
||||
bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
|
||||
buffer = (char *)realloc(buffer, bufsize);
|
||||
if (!buffer) {
|
||||
fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
memcpy(buffer + written, ptr, size);
|
||||
written += size;
|
||||
}
|
||||
void write_impl(const char *ptr, size_t size) override
|
||||
{
|
||||
if (unlikely(written + size < written))
|
||||
abort();
|
||||
if (written + size > bufsize) {
|
||||
bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
|
||||
buffer = (char *)realloc(buffer, bufsize);
|
||||
if (!buffer) {
|
||||
fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
|
||||
abort();
|
||||
}
|
||||
}
|
||||
memcpy(buffer + written, ptr, size);
|
||||
written += size;
|
||||
}
|
||||
|
||||
void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
|
||||
{
|
||||
assert(offset == (size_t)offset &&
|
||||
offset + size >= offset && offset + size <= written);
|
||||
memcpy(buffer + offset, ptr, size);
|
||||
}
|
||||
void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
|
||||
{
|
||||
assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);
|
||||
memcpy(buffer + offset, ptr, size);
|
||||
}
|
||||
|
||||
uint64_t current_pos() const override
|
||||
{
|
||||
return written;
|
||||
}
|
||||
uint64_t current_pos() const override
|
||||
{
|
||||
return written;
|
||||
}
|
||||
};
|
||||
|
||||
/* The LLVM compiler is represented as a pass manager containing passes for
|
||||
* optimizations, instruction selection, and code generation.
|
||||
*/
|
||||
struct ac_compiler_passes {
|
||||
raw_memory_ostream ostream; /* ELF shader binary stream */
|
||||
llvm::legacy::PassManager passmgr; /* list of passes */
|
||||
raw_memory_ostream ostream; /* ELF shader binary stream */
|
||||
llvm::legacy::PassManager passmgr; /* list of passes */
|
||||
};
|
||||
|
||||
struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
|
||||
{
|
||||
struct ac_compiler_passes *p = new ac_compiler_passes();
|
||||
if (!p)
|
||||
return NULL;
|
||||
struct ac_compiler_passes *p = new ac_compiler_passes();
|
||||
if (!p)
|
||||
return NULL;
|
||||
|
||||
llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
|
||||
llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
|
||||
|
||||
if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
|
||||
nullptr,
|
||||
if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr,
|
||||
#if LLVM_VERSION_MAJOR >= 10
|
||||
llvm::CGFT_ObjectFile)) {
|
||||
llvm::CGFT_ObjectFile)) {
|
||||
#else
|
||||
llvm::TargetMachine::CGFT_ObjectFile)) {
|
||||
llvm::TargetMachine::CGFT_ObjectFile)) {
|
||||
#endif
|
||||
fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
|
||||
delete p;
|
||||
return NULL;
|
||||
}
|
||||
return p;
|
||||
fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
|
||||
delete p;
|
||||
return NULL;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
|
||||
{
|
||||
delete p;
|
||||
delete p;
|
||||
}
|
||||
|
||||
/* This returns false on failure. */
|
||||
bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
|
||||
char **pelf_buffer, size_t *pelf_size)
|
||||
char **pelf_buffer, size_t *pelf_size)
|
||||
{
|
||||
p->passmgr.run(*llvm::unwrap(module));
|
||||
p->ostream.take(*pelf_buffer, *pelf_size);
|
||||
return true;
|
||||
p->passmgr.run(*llvm::unwrap(module));
|
||||
p->ostream.take(*pelf_buffer, *pelf_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
|
||||
{
|
||||
llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
|
||||
llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
|
||||
}
|
||||
|
||||
void ac_enable_global_isel(LLVMTargetMachineRef tm)
|
||||
{
|
||||
reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
|
||||
reinterpret_cast<llvm::TargetMachine *>(tm)->setGlobalISel(true);
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
|
||||
LLVMValueRef ptr, LLVMValueRef val,
|
||||
const char *sync_scope) {
|
||||
llvm::AtomicRMWInst::BinOp binop;
|
||||
switch (op) {
|
||||
case LLVMAtomicRMWBinOpXchg:
|
||||
binop = llvm::AtomicRMWInst::Xchg;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpAdd:
|
||||
binop = llvm::AtomicRMWInst::Add;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpSub:
|
||||
binop = llvm::AtomicRMWInst::Sub;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpAnd:
|
||||
binop = llvm::AtomicRMWInst::And;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpNand:
|
||||
binop = llvm::AtomicRMWInst::Nand;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpOr:
|
||||
binop = llvm::AtomicRMWInst::Or;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpXor:
|
||||
binop = llvm::AtomicRMWInst::Xor;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpMax:
|
||||
binop = llvm::AtomicRMWInst::Max;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpMin:
|
||||
binop = llvm::AtomicRMWInst::Min;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpUMax:
|
||||
binop = llvm::AtomicRMWInst::UMax;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpUMin:
|
||||
binop = llvm::AtomicRMWInst::UMin;
|
||||
break;
|
||||
default:
|
||||
unreachable(!"invalid LLVMAtomicRMWBinOp");
|
||||
break;
|
||||
}
|
||||
unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
|
||||
return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
|
||||
binop, llvm::unwrap(ptr), llvm::unwrap(val),
|
||||
llvm::AtomicOrdering::SequentiallyConsistent, SSID));
|
||||
LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)
|
||||
{
|
||||
llvm::AtomicRMWInst::BinOp binop;
|
||||
switch (op) {
|
||||
case LLVMAtomicRMWBinOpXchg:
|
||||
binop = llvm::AtomicRMWInst::Xchg;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpAdd:
|
||||
binop = llvm::AtomicRMWInst::Add;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpSub:
|
||||
binop = llvm::AtomicRMWInst::Sub;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpAnd:
|
||||
binop = llvm::AtomicRMWInst::And;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpNand:
|
||||
binop = llvm::AtomicRMWInst::Nand;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpOr:
|
||||
binop = llvm::AtomicRMWInst::Or;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpXor:
|
||||
binop = llvm::AtomicRMWInst::Xor;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpMax:
|
||||
binop = llvm::AtomicRMWInst::Max;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpMin:
|
||||
binop = llvm::AtomicRMWInst::Min;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpUMax:
|
||||
binop = llvm::AtomicRMWInst::UMax;
|
||||
break;
|
||||
case LLVMAtomicRMWBinOpUMin:
|
||||
binop = llvm::AtomicRMWInst::UMin;
|
||||
break;
|
||||
default:
|
||||
unreachable(!"invalid LLVMAtomicRMWBinOp");
|
||||
break;
|
||||
}
|
||||
unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
|
||||
return llvm::wrap(llvm::unwrap(ctx->builder)
|
||||
->CreateAtomicRMW(binop, llvm::unwrap(ptr), llvm::unwrap(val),
|
||||
llvm::AtomicOrdering::SequentiallyConsistent, SSID));
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
|
||||
LLVMValueRef cmp, LLVMValueRef val,
|
||||
const char *sync_scope) {
|
||||
unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
|
||||
return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg(
|
||||
llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val),
|
||||
llvm::AtomicOrdering::SequentiallyConsistent,
|
||||
llvm::AtomicOrdering::SequentiallyConsistent, SSID));
|
||||
LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)
|
||||
{
|
||||
unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
|
||||
return llvm::wrap(llvm::unwrap(ctx->builder)
|
||||
->CreateAtomicCmpXchg(llvm::unwrap(ptr), llvm::unwrap(cmp),
|
||||
llvm::unwrap(val),
|
||||
llvm::AtomicOrdering::SequentiallyConsistent,
|
||||
llvm::AtomicOrdering::SequentiallyConsistent, SSID));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,16 +24,17 @@
|
|||
*/
|
||||
/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
|
||||
#include "ac_llvm_util.h"
|
||||
|
||||
#include "ac_llvm_build.h"
|
||||
#include "c11/threads.h"
|
||||
#include "gallivm/lp_bld_misc.h"
|
||||
#include "util/bitscan.h"
|
||||
#include "util/u_math.h"
|
||||
#include <llvm-c/Core.h>
|
||||
#include <llvm-c/Support.h>
|
||||
#include <llvm-c/Transforms/IPO.h>
|
||||
#include <llvm-c/Transforms/Scalar.h>
|
||||
#include <llvm-c/Transforms/Utils.h>
|
||||
#include "c11/threads.h"
|
||||
#include "gallivm/lp_bld_misc.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
|
@ -41,239 +42,240 @@
|
|||
|
||||
static void ac_init_llvm_target()
|
||||
{
|
||||
LLVMInitializeAMDGPUTargetInfo();
|
||||
LLVMInitializeAMDGPUTarget();
|
||||
LLVMInitializeAMDGPUTargetMC();
|
||||
LLVMInitializeAMDGPUAsmPrinter();
|
||||
LLVMInitializeAMDGPUTargetInfo();
|
||||
LLVMInitializeAMDGPUTarget();
|
||||
LLVMInitializeAMDGPUTargetMC();
|
||||
LLVMInitializeAMDGPUAsmPrinter();
|
||||
|
||||
/* For inline assembly. */
|
||||
LLVMInitializeAMDGPUAsmParser();
|
||||
/* For inline assembly. */
|
||||
LLVMInitializeAMDGPUAsmParser();
|
||||
|
||||
/* For ACO disassembly. */
|
||||
LLVMInitializeAMDGPUDisassembler();
|
||||
/* For ACO disassembly. */
|
||||
LLVMInitializeAMDGPUDisassembler();
|
||||
|
||||
/* Workaround for bug in llvm 4.0 that causes image intrinsics
|
||||
* to disappear.
|
||||
* https://reviews.llvm.org/D26348
|
||||
*
|
||||
* "mesa" is the prefix for error messages.
|
||||
*
|
||||
* -global-isel-abort=2 is a no-op unless global isel has been enabled.
|
||||
* This option tells the backend to fall-back to SelectionDAG and print
|
||||
* a diagnostic message if global isel fails.
|
||||
*/
|
||||
const char *argv[] = {
|
||||
"mesa",
|
||||
"-simplifycfg-sink-common=false",
|
||||
"-global-isel-abort=2",
|
||||
/* Workaround for bug in llvm 4.0 that causes image intrinsics
|
||||
* to disappear.
|
||||
* https://reviews.llvm.org/D26348
|
||||
*
|
||||
* "mesa" is the prefix for error messages.
|
||||
*
|
||||
* -global-isel-abort=2 is a no-op unless global isel has been enabled.
|
||||
* This option tells the backend to fall-back to SelectionDAG and print
|
||||
* a diagnostic message if global isel fails.
|
||||
*/
|
||||
const char *argv[] = {
|
||||
"mesa",
|
||||
"-simplifycfg-sink-common=false",
|
||||
"-global-isel-abort=2",
|
||||
#if LLVM_VERSION_MAJOR >= 10
|
||||
/* Atomic optimizations require LLVM 10.0 for gfx10 support. */
|
||||
"-amdgpu-atomic-optimizations=true",
|
||||
/* Atomic optimizations require LLVM 10.0 for gfx10 support. */
|
||||
"-amdgpu-atomic-optimizations=true",
|
||||
#endif
|
||||
#if LLVM_VERSION_MAJOR >= 11
|
||||
/* This was disabled by default in: https://reviews.llvm.org/D77228 */
|
||||
"-structurizecfg-skip-uniform-regions",
|
||||
/* This was disabled by default in: https://reviews.llvm.org/D77228 */
|
||||
"-structurizecfg-skip-uniform-regions",
|
||||
#endif
|
||||
};
|
||||
LLVMParseCommandLineOptions(ARRAY_SIZE(argv), argv, NULL);
|
||||
};
|
||||
LLVMParseCommandLineOptions(ARRAY_SIZE(argv), argv, NULL);
|
||||
}
|
||||
|
||||
PUBLIC void ac_init_shared_llvm_once(void)
|
||||
{
|
||||
static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
|
||||
call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
|
||||
static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
|
||||
call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
|
||||
}
|
||||
|
||||
#if !LLVM_IS_SHARED
|
||||
static once_flag ac_init_static_llvm_target_once_flag = ONCE_FLAG_INIT;
|
||||
static void ac_init_static_llvm_once(void)
|
||||
{
|
||||
call_once(&ac_init_static_llvm_target_once_flag, ac_init_llvm_target);
|
||||
call_once(&ac_init_static_llvm_target_once_flag, ac_init_llvm_target);
|
||||
}
|
||||
#endif
|
||||
|
||||
void ac_init_llvm_once(void)
|
||||
{
|
||||
#if LLVM_IS_SHARED
|
||||
ac_init_shared_llvm_once();
|
||||
ac_init_shared_llvm_once();
|
||||
#else
|
||||
ac_init_static_llvm_once();
|
||||
ac_init_static_llvm_once();
|
||||
#endif
|
||||
}
|
||||
|
||||
static LLVMTargetRef ac_get_llvm_target(const char *triple)
|
||||
{
|
||||
LLVMTargetRef target = NULL;
|
||||
char *err_message = NULL;
|
||||
LLVMTargetRef target = NULL;
|
||||
char *err_message = NULL;
|
||||
|
||||
if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
|
||||
fprintf(stderr, "Cannot find target for triple %s ", triple);
|
||||
if (err_message) {
|
||||
fprintf(stderr, "%s\n", err_message);
|
||||
}
|
||||
LLVMDisposeMessage(err_message);
|
||||
return NULL;
|
||||
}
|
||||
return target;
|
||||
if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
|
||||
fprintf(stderr, "Cannot find target for triple %s ", triple);
|
||||
if (err_message) {
|
||||
fprintf(stderr, "%s\n", err_message);
|
||||
}
|
||||
LLVMDisposeMessage(err_message);
|
||||
return NULL;
|
||||
}
|
||||
return target;
|
||||
}
|
||||
|
||||
const char *ac_get_llvm_processor_name(enum radeon_family family)
|
||||
{
|
||||
switch (family) {
|
||||
case CHIP_TAHITI:
|
||||
return "tahiti";
|
||||
case CHIP_PITCAIRN:
|
||||
return "pitcairn";
|
||||
case CHIP_VERDE:
|
||||
return "verde";
|
||||
case CHIP_OLAND:
|
||||
return "oland";
|
||||
case CHIP_HAINAN:
|
||||
return "hainan";
|
||||
case CHIP_BONAIRE:
|
||||
return "bonaire";
|
||||
case CHIP_KABINI:
|
||||
return "kabini";
|
||||
case CHIP_KAVERI:
|
||||
return "kaveri";
|
||||
case CHIP_HAWAII:
|
||||
return "hawaii";
|
||||
case CHIP_TONGA:
|
||||
return "tonga";
|
||||
case CHIP_ICELAND:
|
||||
return "iceland";
|
||||
case CHIP_CARRIZO:
|
||||
return "carrizo";
|
||||
case CHIP_FIJI:
|
||||
return "fiji";
|
||||
case CHIP_STONEY:
|
||||
return "stoney";
|
||||
case CHIP_POLARIS10:
|
||||
return "polaris10";
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
return "polaris11";
|
||||
case CHIP_VEGA10:
|
||||
return "gfx900";
|
||||
case CHIP_RAVEN:
|
||||
return "gfx902";
|
||||
case CHIP_VEGA12:
|
||||
return "gfx904";
|
||||
case CHIP_VEGA20:
|
||||
return "gfx906";
|
||||
case CHIP_RAVEN2:
|
||||
case CHIP_RENOIR:
|
||||
return "gfx909";
|
||||
case CHIP_ARCTURUS:
|
||||
return "gfx908";
|
||||
case CHIP_NAVI10:
|
||||
return "gfx1010";
|
||||
case CHIP_NAVI12:
|
||||
return "gfx1011";
|
||||
case CHIP_NAVI14:
|
||||
return "gfx1012";
|
||||
case CHIP_SIENNA_CICHLID:
|
||||
case CHIP_NAVY_FLOUNDER:
|
||||
return "gfx1030";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
switch (family) {
|
||||
case CHIP_TAHITI:
|
||||
return "tahiti";
|
||||
case CHIP_PITCAIRN:
|
||||
return "pitcairn";
|
||||
case CHIP_VERDE:
|
||||
return "verde";
|
||||
case CHIP_OLAND:
|
||||
return "oland";
|
||||
case CHIP_HAINAN:
|
||||
return "hainan";
|
||||
case CHIP_BONAIRE:
|
||||
return "bonaire";
|
||||
case CHIP_KABINI:
|
||||
return "kabini";
|
||||
case CHIP_KAVERI:
|
||||
return "kaveri";
|
||||
case CHIP_HAWAII:
|
||||
return "hawaii";
|
||||
case CHIP_TONGA:
|
||||
return "tonga";
|
||||
case CHIP_ICELAND:
|
||||
return "iceland";
|
||||
case CHIP_CARRIZO:
|
||||
return "carrizo";
|
||||
case CHIP_FIJI:
|
||||
return "fiji";
|
||||
case CHIP_STONEY:
|
||||
return "stoney";
|
||||
case CHIP_POLARIS10:
|
||||
return "polaris10";
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGAM:
|
||||
return "polaris11";
|
||||
case CHIP_VEGA10:
|
||||
return "gfx900";
|
||||
case CHIP_RAVEN:
|
||||
return "gfx902";
|
||||
case CHIP_VEGA12:
|
||||
return "gfx904";
|
||||
case CHIP_VEGA20:
|
||||
return "gfx906";
|
||||
case CHIP_RAVEN2:
|
||||
case CHIP_RENOIR:
|
||||
return "gfx909";
|
||||
case CHIP_ARCTURUS:
|
||||
return "gfx908";
|
||||
case CHIP_NAVI10:
|
||||
return "gfx1010";
|
||||
case CHIP_NAVI12:
|
||||
return "gfx1011";
|
||||
case CHIP_NAVI14:
|
||||
return "gfx1012";
|
||||
case CHIP_SIENNA_CICHLID:
|
||||
case CHIP_NAVY_FLOUNDER:
|
||||
return "gfx1030";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
|
||||
enum ac_target_machine_options tm_options,
|
||||
LLVMCodeGenOptLevel level,
|
||||
const char **out_triple)
|
||||
enum ac_target_machine_options tm_options,
|
||||
LLVMCodeGenOptLevel level,
|
||||
const char **out_triple)
|
||||
{
|
||||
assert(family >= CHIP_TAHITI);
|
||||
char features[256];
|
||||
const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
|
||||
LLVMTargetRef target = ac_get_llvm_target(triple);
|
||||
assert(family >= CHIP_TAHITI);
|
||||
char features[256];
|
||||
const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
|
||||
LLVMTargetRef target = ac_get_llvm_target(triple);
|
||||
|
||||
snprintf(features, sizeof(features),
|
||||
"+DumpCode%s%s%s%s%s",
|
||||
LLVM_VERSION_MAJOR >= 11 ? "" : ",-fp32-denormals,+fp64-denormals",
|
||||
family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32) ?
|
||||
",+wavefrontsize64,-wavefrontsize32" : "",
|
||||
family <= CHIP_NAVI14 && tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
|
||||
family <= CHIP_NAVI14 && tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
|
||||
tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
|
||||
snprintf(features, sizeof(features), "+DumpCode%s%s%s%s%s",
|
||||
LLVM_VERSION_MAJOR >= 11 ? "" : ",-fp32-denormals,+fp64-denormals",
|
||||
family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32)
|
||||
? ",+wavefrontsize64,-wavefrontsize32"
|
||||
: "",
|
||||
family <= CHIP_NAVI14 && tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
|
||||
family <= CHIP_NAVI14 && tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
|
||||
tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
|
||||
|
||||
LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
|
||||
target,
|
||||
triple,
|
||||
ac_get_llvm_processor_name(family),
|
||||
features,
|
||||
level,
|
||||
LLVMRelocDefault,
|
||||
LLVMCodeModelDefault);
|
||||
LLVMTargetMachineRef tm =
|
||||
LLVMCreateTargetMachine(target, triple, ac_get_llvm_processor_name(family), features, level,
|
||||
LLVMRelocDefault, LLVMCodeModelDefault);
|
||||
|
||||
if (out_triple)
|
||||
*out_triple = triple;
|
||||
if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL)
|
||||
ac_enable_global_isel(tm);
|
||||
return tm;
|
||||
if (out_triple)
|
||||
*out_triple = triple;
|
||||
if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL)
|
||||
ac_enable_global_isel(tm);
|
||||
return tm;
|
||||
}
|
||||
|
||||
static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
|
||||
bool check_ir)
|
||||
bool check_ir)
|
||||
{
|
||||
LLVMPassManagerRef passmgr = LLVMCreatePassManager();
|
||||
if (!passmgr)
|
||||
return NULL;
|
||||
LLVMPassManagerRef passmgr = LLVMCreatePassManager();
|
||||
if (!passmgr)
|
||||
return NULL;
|
||||
|
||||
if (target_library_info)
|
||||
LLVMAddTargetLibraryInfo(target_library_info,
|
||||
passmgr);
|
||||
if (target_library_info)
|
||||
LLVMAddTargetLibraryInfo(target_library_info, passmgr);
|
||||
|
||||
if (check_ir)
|
||||
LLVMAddVerifierPass(passmgr);
|
||||
LLVMAddAlwaysInlinerPass(passmgr);
|
||||
/* Normally, the pass manager runs all passes on one function before
|
||||
* moving onto another. Adding a barrier no-op pass forces the pass
|
||||
* manager to run the inliner on all functions first, which makes sure
|
||||
* that the following passes are only run on the remaining non-inline
|
||||
* function, so it removes useless work done on dead inline functions.
|
||||
*/
|
||||
ac_llvm_add_barrier_noop_pass(passmgr);
|
||||
/* This pass should eliminate all the load and store instructions. */
|
||||
LLVMAddPromoteMemoryToRegisterPass(passmgr);
|
||||
LLVMAddScalarReplAggregatesPass(passmgr);
|
||||
LLVMAddLICMPass(passmgr);
|
||||
LLVMAddAggressiveDCEPass(passmgr);
|
||||
LLVMAddCFGSimplificationPass(passmgr);
|
||||
/* This is recommended by the instruction combining pass. */
|
||||
LLVMAddEarlyCSEMemSSAPass(passmgr);
|
||||
LLVMAddInstructionCombiningPass(passmgr);
|
||||
return passmgr;
|
||||
if (check_ir)
|
||||
LLVMAddVerifierPass(passmgr);
|
||||
LLVMAddAlwaysInlinerPass(passmgr);
|
||||
/* Normally, the pass manager runs all passes on one function before
|
||||
* moving onto another. Adding a barrier no-op pass forces the pass
|
||||
* manager to run the inliner on all functions first, which makes sure
|
||||
* that the following passes are only run on the remaining non-inline
|
||||
* function, so it removes useless work done on dead inline functions.
|
||||
*/
|
||||
ac_llvm_add_barrier_noop_pass(passmgr);
|
||||
/* This pass should eliminate all the load and store instructions. */
|
||||
LLVMAddPromoteMemoryToRegisterPass(passmgr);
|
||||
LLVMAddScalarReplAggregatesPass(passmgr);
|
||||
LLVMAddLICMPass(passmgr);
|
||||
LLVMAddAggressiveDCEPass(passmgr);
|
||||
LLVMAddCFGSimplificationPass(passmgr);
|
||||
/* This is recommended by the instruction combining pass. */
|
||||
LLVMAddEarlyCSEMemSSAPass(passmgr);
|
||||
LLVMAddInstructionCombiningPass(passmgr);
|
||||
return passmgr;
|
||||
}
|
||||
|
||||
static const char *attr_to_str(enum ac_func_attr attr)
|
||||
{
|
||||
switch (attr) {
|
||||
case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
|
||||
case AC_FUNC_ATTR_INREG: return "inreg";
|
||||
case AC_FUNC_ATTR_NOALIAS: return "noalias";
|
||||
case AC_FUNC_ATTR_NOUNWIND: return "nounwind";
|
||||
case AC_FUNC_ATTR_READNONE: return "readnone";
|
||||
case AC_FUNC_ATTR_READONLY: return "readonly";
|
||||
case AC_FUNC_ATTR_WRITEONLY: return "writeonly";
|
||||
case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY: return "inaccessiblememonly";
|
||||
case AC_FUNC_ATTR_CONVERGENT: return "convergent";
|
||||
case AC_FUNC_ATTR_ALWAYSINLINE:
|
||||
return "alwaysinline";
|
||||
case AC_FUNC_ATTR_INREG:
|
||||
return "inreg";
|
||||
case AC_FUNC_ATTR_NOALIAS:
|
||||
return "noalias";
|
||||
case AC_FUNC_ATTR_NOUNWIND:
|
||||
return "nounwind";
|
||||
case AC_FUNC_ATTR_READNONE:
|
||||
return "readnone";
|
||||
case AC_FUNC_ATTR_READONLY:
|
||||
return "readonly";
|
||||
case AC_FUNC_ATTR_WRITEONLY:
|
||||
return "writeonly";
|
||||
case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY:
|
||||
return "inaccessiblememonly";
|
||||
case AC_FUNC_ATTR_CONVERGENT:
|
||||
return "convergent";
|
||||
default:
|
||||
fprintf(stderr, "Unhandled function attribute: %x\n", attr);
|
||||
return 0;
|
||||
fprintf(stderr, "Unhandled function attribute: %x\n", attr);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
|
||||
int attr_idx, enum ac_func_attr attr)
|
||||
void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx,
|
||||
enum ac_func_attr attr)
|
||||
{
|
||||
const char *attr_name = attr_to_str(attr);
|
||||
unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
|
||||
strlen(attr_name));
|
||||
unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, strlen(attr_name));
|
||||
LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
|
||||
|
||||
if (LLVMIsAFunction(function))
|
||||
|
|
@ -282,138 +284,124 @@ ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
|
|||
LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
|
||||
}
|
||||
|
||||
void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
|
||||
unsigned attrib_mask)
|
||||
void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask)
|
||||
{
|
||||
attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
|
||||
attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
|
||||
attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
|
||||
attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
|
||||
|
||||
while (attrib_mask) {
|
||||
enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
|
||||
ac_add_function_attr(ctx, function, -1, attr);
|
||||
}
|
||||
while (attrib_mask) {
|
||||
enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
|
||||
ac_add_function_attr(ctx, function, -1, attr);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ac_dump_module(LLVMModuleRef module)
|
||||
void ac_dump_module(LLVMModuleRef module)
|
||||
{
|
||||
char *str = LLVMPrintModuleToString(module);
|
||||
fprintf(stderr, "%s", str);
|
||||
LLVMDisposeMessage(str);
|
||||
char *str = LLVMPrintModuleToString(module);
|
||||
fprintf(stderr, "%s", str);
|
||||
LLVMDisposeMessage(str);
|
||||
}
|
||||
|
||||
void
|
||||
ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
|
||||
const char *name, unsigned value)
|
||||
void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value)
|
||||
{
|
||||
char str[16];
|
||||
char str[16];
|
||||
|
||||
snprintf(str, sizeof(str), "0x%x", value);
|
||||
LLVMAddTargetDependentFunctionAttr(F, name, str);
|
||||
snprintf(str, sizeof(str), "0x%x", value);
|
||||
LLVMAddTargetDependentFunctionAttr(F, name, str);
|
||||
}
|
||||
|
||||
void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size)
|
||||
{
|
||||
if (!size)
|
||||
return;
|
||||
if (!size)
|
||||
return;
|
||||
|
||||
char str[32];
|
||||
snprintf(str, sizeof(str), "%u,%u", size, size);
|
||||
LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);
|
||||
char str[32];
|
||||
snprintf(str, sizeof(str), "%u,%u", size, size);
|
||||
LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);
|
||||
}
|
||||
|
||||
unsigned
|
||||
ac_count_scratch_private_memory(LLVMValueRef function)
|
||||
unsigned ac_count_scratch_private_memory(LLVMValueRef function)
|
||||
{
|
||||
unsigned private_mem_vgprs = 0;
|
||||
unsigned private_mem_vgprs = 0;
|
||||
|
||||
/* Process all LLVM instructions. */
|
||||
LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);
|
||||
while (bb) {
|
||||
LLVMValueRef next = LLVMGetFirstInstruction(bb);
|
||||
/* Process all LLVM instructions. */
|
||||
LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);
|
||||
while (bb) {
|
||||
LLVMValueRef next = LLVMGetFirstInstruction(bb);
|
||||
|
||||
while (next) {
|
||||
LLVMValueRef inst = next;
|
||||
next = LLVMGetNextInstruction(next);
|
||||
while (next) {
|
||||
LLVMValueRef inst = next;
|
||||
next = LLVMGetNextInstruction(next);
|
||||
|
||||
if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
|
||||
continue;
|
||||
if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
|
||||
continue;
|
||||
|
||||
LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
|
||||
/* No idea why LLVM aligns allocas to 4 elements. */
|
||||
unsigned alignment = LLVMGetAlignment(inst);
|
||||
unsigned dw_size = align(ac_get_type_size(type) / 4, alignment);
|
||||
private_mem_vgprs += dw_size;
|
||||
}
|
||||
bb = LLVMGetNextBasicBlock(bb);
|
||||
}
|
||||
LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
|
||||
/* No idea why LLVM aligns allocas to 4 elements. */
|
||||
unsigned alignment = LLVMGetAlignment(inst);
|
||||
unsigned dw_size = align(ac_get_type_size(type) / 4, alignment);
|
||||
private_mem_vgprs += dw_size;
|
||||
}
|
||||
bb = LLVMGetNextBasicBlock(bb);
|
||||
}
|
||||
|
||||
return private_mem_vgprs;
|
||||
return private_mem_vgprs;
|
||||
}
|
||||
|
||||
bool
|
||||
ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
|
||||
enum radeon_family family,
|
||||
enum ac_target_machine_options tm_options)
|
||||
bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family family,
|
||||
enum ac_target_machine_options tm_options)
|
||||
{
|
||||
const char *triple;
|
||||
memset(compiler, 0, sizeof(*compiler));
|
||||
const char *triple;
|
||||
memset(compiler, 0, sizeof(*compiler));
|
||||
|
||||
compiler->tm = ac_create_target_machine(family, tm_options,
|
||||
LLVMCodeGenLevelDefault,
|
||||
&triple);
|
||||
if (!compiler->tm)
|
||||
return false;
|
||||
compiler->tm = ac_create_target_machine(family, tm_options, LLVMCodeGenLevelDefault, &triple);
|
||||
if (!compiler->tm)
|
||||
return false;
|
||||
|
||||
if (tm_options & AC_TM_CREATE_LOW_OPT) {
|
||||
compiler->low_opt_tm =
|
||||
ac_create_target_machine(family, tm_options,
|
||||
LLVMCodeGenLevelLess, NULL);
|
||||
if (!compiler->low_opt_tm)
|
||||
goto fail;
|
||||
}
|
||||
if (tm_options & AC_TM_CREATE_LOW_OPT) {
|
||||
compiler->low_opt_tm =
|
||||
ac_create_target_machine(family, tm_options, LLVMCodeGenLevelLess, NULL);
|
||||
if (!compiler->low_opt_tm)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (family >= CHIP_NAVI10) {
|
||||
assert(!(tm_options & AC_TM_CREATE_LOW_OPT));
|
||||
compiler->tm_wave32 = ac_create_target_machine(family,
|
||||
tm_options | AC_TM_WAVE32,
|
||||
LLVMCodeGenLevelDefault,
|
||||
NULL);
|
||||
if (!compiler->tm_wave32)
|
||||
goto fail;
|
||||
}
|
||||
if (family >= CHIP_NAVI10) {
|
||||
assert(!(tm_options & AC_TM_CREATE_LOW_OPT));
|
||||
compiler->tm_wave32 =
|
||||
ac_create_target_machine(family, tm_options | AC_TM_WAVE32, LLVMCodeGenLevelDefault, NULL);
|
||||
if (!compiler->tm_wave32)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
compiler->target_library_info =
|
||||
ac_create_target_library_info(triple);
|
||||
if (!compiler->target_library_info)
|
||||
goto fail;
|
||||
compiler->target_library_info = ac_create_target_library_info(triple);
|
||||
if (!compiler->target_library_info)
|
||||
goto fail;
|
||||
|
||||
compiler->passmgr = ac_create_passmgr(compiler->target_library_info,
|
||||
tm_options & AC_TM_CHECK_IR);
|
||||
if (!compiler->passmgr)
|
||||
goto fail;
|
||||
compiler->passmgr =
|
||||
ac_create_passmgr(compiler->target_library_info, tm_options & AC_TM_CHECK_IR);
|
||||
if (!compiler->passmgr)
|
||||
goto fail;
|
||||
|
||||
return true;
|
||||
return true;
|
||||
fail:
|
||||
ac_destroy_llvm_compiler(compiler);
|
||||
return false;
|
||||
ac_destroy_llvm_compiler(compiler);
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
|
||||
void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
|
||||
{
|
||||
ac_destroy_llvm_passes(compiler->passes);
|
||||
ac_destroy_llvm_passes(compiler->passes_wave32);
|
||||
ac_destroy_llvm_passes(compiler->low_opt_passes);
|
||||
ac_destroy_llvm_passes(compiler->passes);
|
||||
ac_destroy_llvm_passes(compiler->passes_wave32);
|
||||
ac_destroy_llvm_passes(compiler->low_opt_passes);
|
||||
|
||||
if (compiler->passmgr)
|
||||
LLVMDisposePassManager(compiler->passmgr);
|
||||
if (compiler->target_library_info)
|
||||
ac_dispose_target_library_info(compiler->target_library_info);
|
||||
if (compiler->low_opt_tm)
|
||||
LLVMDisposeTargetMachine(compiler->low_opt_tm);
|
||||
if (compiler->tm)
|
||||
LLVMDisposeTargetMachine(compiler->tm);
|
||||
if (compiler->tm_wave32)
|
||||
LLVMDisposeTargetMachine(compiler->tm_wave32);
|
||||
if (compiler->passmgr)
|
||||
LLVMDisposePassManager(compiler->passmgr);
|
||||
if (compiler->target_library_info)
|
||||
ac_dispose_target_library_info(compiler->target_library_info);
|
||||
if (compiler->low_opt_tm)
|
||||
LLVMDisposeTargetMachine(compiler->low_opt_tm);
|
||||
if (compiler->tm)
|
||||
LLVMDisposeTargetMachine(compiler->tm);
|
||||
if (compiler->tm_wave32)
|
||||
LLVMDisposeTargetMachine(compiler->tm_wave32);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,11 +26,11 @@
|
|||
#ifndef AC_LLVM_UTIL_H
|
||||
#define AC_LLVM_UTIL_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "amd_family.h"
|
||||
#include <llvm-c/TargetMachine.h>
|
||||
#include <llvm/Config/llvm-config.h>
|
||||
|
||||
#include "amd_family.h"
|
||||
#include <stdbool.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
@ -39,124 +39,117 @@ extern "C" {
|
|||
struct ac_compiler_passes;
|
||||
struct ac_llvm_context;
|
||||
|
||||
enum ac_func_attr {
|
||||
AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
|
||||
AC_FUNC_ATTR_INREG = (1 << 2),
|
||||
AC_FUNC_ATTR_NOALIAS = (1 << 3),
|
||||
AC_FUNC_ATTR_NOUNWIND = (1 << 4),
|
||||
AC_FUNC_ATTR_READNONE = (1 << 5),
|
||||
AC_FUNC_ATTR_READONLY = (1 << 6),
|
||||
AC_FUNC_ATTR_WRITEONLY = (1 << 7),
|
||||
AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY = (1 << 8),
|
||||
AC_FUNC_ATTR_CONVERGENT = (1 << 9),
|
||||
enum ac_func_attr
|
||||
{
|
||||
AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
|
||||
AC_FUNC_ATTR_INREG = (1 << 2),
|
||||
AC_FUNC_ATTR_NOALIAS = (1 << 3),
|
||||
AC_FUNC_ATTR_NOUNWIND = (1 << 4),
|
||||
AC_FUNC_ATTR_READNONE = (1 << 5),
|
||||
AC_FUNC_ATTR_READONLY = (1 << 6),
|
||||
AC_FUNC_ATTR_WRITEONLY = (1 << 7),
|
||||
AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY = (1 << 8),
|
||||
AC_FUNC_ATTR_CONVERGENT = (1 << 9),
|
||||
|
||||
/* Legacy intrinsic that needs attributes on function declarations
|
||||
* and they must match the internal LLVM definition exactly, otherwise
|
||||
* intrinsic selection fails.
|
||||
*/
|
||||
AC_FUNC_ATTR_LEGACY = (1u << 31),
|
||||
/* Legacy intrinsic that needs attributes on function declarations
|
||||
* and they must match the internal LLVM definition exactly, otherwise
|
||||
* intrinsic selection fails.
|
||||
*/
|
||||
AC_FUNC_ATTR_LEGACY = (1u << 31),
|
||||
};
|
||||
|
||||
enum ac_target_machine_options {
|
||||
AC_TM_SUPPORTS_SPILL = (1 << 0),
|
||||
AC_TM_FORCE_ENABLE_XNACK = (1 << 1),
|
||||
AC_TM_FORCE_DISABLE_XNACK = (1 << 2),
|
||||
AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 3),
|
||||
AC_TM_CHECK_IR = (1 << 4),
|
||||
AC_TM_ENABLE_GLOBAL_ISEL = (1 << 5),
|
||||
AC_TM_CREATE_LOW_OPT = (1 << 6),
|
||||
AC_TM_WAVE32 = (1 << 7),
|
||||
enum ac_target_machine_options
|
||||
{
|
||||
AC_TM_SUPPORTS_SPILL = (1 << 0),
|
||||
AC_TM_FORCE_ENABLE_XNACK = (1 << 1),
|
||||
AC_TM_FORCE_DISABLE_XNACK = (1 << 2),
|
||||
AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 3),
|
||||
AC_TM_CHECK_IR = (1 << 4),
|
||||
AC_TM_ENABLE_GLOBAL_ISEL = (1 << 5),
|
||||
AC_TM_CREATE_LOW_OPT = (1 << 6),
|
||||
AC_TM_WAVE32 = (1 << 7),
|
||||
};
|
||||
|
||||
enum ac_float_mode {
|
||||
AC_FLOAT_MODE_DEFAULT,
|
||||
AC_FLOAT_MODE_DEFAULT_OPENGL,
|
||||
AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO,
|
||||
enum ac_float_mode
|
||||
{
|
||||
AC_FLOAT_MODE_DEFAULT,
|
||||
AC_FLOAT_MODE_DEFAULT_OPENGL,
|
||||
AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO,
|
||||
};
|
||||
|
||||
/* Per-thread persistent LLVM objects. */
|
||||
struct ac_llvm_compiler {
|
||||
LLVMTargetLibraryInfoRef target_library_info;
|
||||
LLVMPassManagerRef passmgr;
|
||||
LLVMTargetLibraryInfoRef target_library_info;
|
||||
LLVMPassManagerRef passmgr;
|
||||
|
||||
/* Default compiler. */
|
||||
LLVMTargetMachineRef tm;
|
||||
struct ac_compiler_passes *passes;
|
||||
/* Default compiler. */
|
||||
LLVMTargetMachineRef tm;
|
||||
struct ac_compiler_passes *passes;
|
||||
|
||||
/* Wave32 compiler for GFX10. */
|
||||
LLVMTargetMachineRef tm_wave32;
|
||||
struct ac_compiler_passes *passes_wave32;
|
||||
/* Wave32 compiler for GFX10. */
|
||||
LLVMTargetMachineRef tm_wave32;
|
||||
struct ac_compiler_passes *passes_wave32;
|
||||
|
||||
/* Optional compiler for faster compilation with fewer optimizations.
|
||||
* LLVM modules can be created with "tm" too. There is no difference.
|
||||
*/
|
||||
LLVMTargetMachineRef low_opt_tm; /* uses -O1 instead of -O2 */
|
||||
struct ac_compiler_passes *low_opt_passes;
|
||||
/* Optional compiler for faster compilation with fewer optimizations.
|
||||
* LLVM modules can be created with "tm" too. There is no difference.
|
||||
*/
|
||||
LLVMTargetMachineRef low_opt_tm; /* uses -O1 instead of -O2 */
|
||||
struct ac_compiler_passes *low_opt_passes;
|
||||
};
|
||||
|
||||
const char *ac_get_llvm_processor_name(enum radeon_family family);
|
||||
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
|
||||
void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes);
|
||||
bool ac_is_sgpr_param(LLVMValueRef param);
|
||||
void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
|
||||
int attr_idx, enum ac_func_attr attr);
|
||||
void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
|
||||
unsigned attrib_mask);
|
||||
void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx,
|
||||
enum ac_func_attr attr);
|
||||
void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask);
|
||||
void ac_dump_module(LLVMModuleRef module);
|
||||
|
||||
LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
|
||||
bool ac_llvm_is_function(LLVMValueRef v);
|
||||
LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx);
|
||||
|
||||
LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
|
||||
enum ac_float_mode float_mode);
|
||||
LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode);
|
||||
void ac_enable_signed_zeros(struct ac_llvm_context *ctx);
|
||||
void ac_disable_signed_zeros(struct ac_llvm_context *ctx);
|
||||
|
||||
void
|
||||
ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
|
||||
const char *name, unsigned value);
|
||||
void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value);
|
||||
void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size);
|
||||
|
||||
static inline unsigned
|
||||
ac_get_load_intr_attribs(bool can_speculate)
|
||||
static inline unsigned ac_get_load_intr_attribs(bool can_speculate)
|
||||
{
|
||||
/* READNONE means writes can't affect it, while READONLY means that
|
||||
* writes can affect it. */
|
||||
return can_speculate ? AC_FUNC_ATTR_READNONE :
|
||||
AC_FUNC_ATTR_READONLY;
|
||||
/* READNONE means writes can't affect it, while READONLY means that
|
||||
* writes can affect it. */
|
||||
return can_speculate ? AC_FUNC_ATTR_READNONE : AC_FUNC_ATTR_READONLY;
|
||||
}
|
||||
|
||||
unsigned
|
||||
ac_count_scratch_private_memory(LLVMValueRef function);
|
||||
unsigned ac_count_scratch_private_memory(LLVMValueRef function);
|
||||
|
||||
LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple);
|
||||
void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info);
|
||||
void ac_init_shared_llvm_once(void); /* Do not use directly, use ac_init_llvm_once */
|
||||
void ac_init_llvm_once(void);
|
||||
|
||||
|
||||
bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
|
||||
enum radeon_family family,
|
||||
enum ac_target_machine_options tm_options);
|
||||
bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family family,
|
||||
enum ac_target_machine_options tm_options);
|
||||
void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler);
|
||||
|
||||
struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm);
|
||||
void ac_destroy_llvm_passes(struct ac_compiler_passes *p);
|
||||
bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
|
||||
char **pelf_buffer, size_t *pelf_size);
|
||||
char **pelf_buffer, size_t *pelf_size);
|
||||
void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr);
|
||||
void ac_enable_global_isel(LLVMTargetMachineRef tm);
|
||||
|
||||
static inline bool
|
||||
ac_has_vec3_support(enum chip_class chip, bool use_format)
|
||||
static inline bool ac_has_vec3_support(enum chip_class chip, bool use_format)
|
||||
{
|
||||
if (chip == GFX6 && !use_format) {
|
||||
/* GFX6 only supports vec3 with load/store format. */
|
||||
return false;
|
||||
}
|
||||
if (chip == GFX6 && !use_format) {
|
||||
/* GFX6 only supports vec3 with load/store format. */
|
||||
return false;
|
||||
}
|
||||
|
||||
return LLVM_VERSION_MAJOR >= 9;
|
||||
return LLVM_VERSION_MAJOR >= 9;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -24,11 +24,12 @@
|
|||
#ifndef AC_NIR_TO_LLVM_H
|
||||
#define AC_NIR_TO_LLVM_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "llvm-c/Core.h"
|
||||
#include "llvm-c/TargetMachine.h"
|
||||
#include "amd_family.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
#include "llvm-c/Core.h"
|
||||
#include "llvm-c/TargetMachine.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
struct nir_shader;
|
||||
struct nir_variable;
|
||||
|
|
@ -37,13 +38,13 @@ struct ac_shader_abi;
|
|||
struct ac_shader_args;
|
||||
|
||||
/* Interpolation locations */
|
||||
#define INTERP_CENTER 0
|
||||
#define INTERP_CENTER 0
|
||||
#define INTERP_CENTROID 1
|
||||
#define INTERP_SAMPLE 2
|
||||
#define INTERP_SAMPLE 2
|
||||
|
||||
static inline unsigned ac_llvm_reg_index_soa(unsigned index, unsigned chan)
|
||||
{
|
||||
return (index * 4) + chan;
|
||||
return (index * 4) + chan;
|
||||
}
|
||||
|
||||
bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class);
|
||||
|
|
@ -51,14 +52,11 @@ bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class);
|
|||
bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir);
|
||||
|
||||
void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
|
||||
const struct ac_shader_args *args, struct nir_shader *nir);
|
||||
const struct ac_shader_args *args, struct nir_shader *nir);
|
||||
|
||||
void
|
||||
ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
|
||||
struct ac_shader_abi *abi,
|
||||
struct nir_shader *nir,
|
||||
struct nir_variable *variable,
|
||||
gl_shader_stage stage);
|
||||
void ac_handle_shader_output_decl(struct ac_llvm_context *ctx, struct ac_shader_abi *abi,
|
||||
struct nir_shader *nir, struct nir_variable *variable,
|
||||
gl_shader_stage stage);
|
||||
|
||||
void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage);
|
||||
|
||||
|
|
|
|||
|
|
@ -24,11 +24,11 @@
|
|||
#ifndef AC_SHADER_ABI_H
|
||||
#define AC_SHADER_ABI_H
|
||||
|
||||
#include <llvm-c/Core.h>
|
||||
#include <assert.h>
|
||||
#include "ac_shader_args.h"
|
||||
|
||||
#include "compiler/shader_enums.h"
|
||||
#include <llvm-c/Core.h>
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
struct nir_variable;
|
||||
|
||||
|
|
@ -36,167 +36,136 @@ struct nir_variable;
|
|||
|
||||
#define AC_MAX_INLINE_PUSH_CONSTS 8
|
||||
|
||||
enum ac_descriptor_type {
|
||||
AC_DESC_IMAGE,
|
||||
AC_DESC_FMASK,
|
||||
AC_DESC_SAMPLER,
|
||||
AC_DESC_BUFFER,
|
||||
AC_DESC_PLANE_0,
|
||||
AC_DESC_PLANE_1,
|
||||
AC_DESC_PLANE_2,
|
||||
enum ac_descriptor_type
|
||||
{
|
||||
AC_DESC_IMAGE,
|
||||
AC_DESC_FMASK,
|
||||
AC_DESC_SAMPLER,
|
||||
AC_DESC_BUFFER,
|
||||
AC_DESC_PLANE_0,
|
||||
AC_DESC_PLANE_1,
|
||||
AC_DESC_PLANE_2,
|
||||
};
|
||||
|
||||
/* Document the shader ABI during compilation. This is what allows radeonsi and
|
||||
* radv to share a compiler backend.
|
||||
*/
|
||||
struct ac_shader_abi {
|
||||
LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];
|
||||
LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];
|
||||
|
||||
/* These input registers sometimes need to be fixed up. */
|
||||
LLVMValueRef vertex_id;
|
||||
LLVMValueRef instance_id;
|
||||
LLVMValueRef persp_centroid, linear_centroid;
|
||||
LLVMValueRef color0, color1;
|
||||
LLVMValueRef user_data;
|
||||
/* These input registers sometimes need to be fixed up. */
|
||||
LLVMValueRef vertex_id;
|
||||
LLVMValueRef instance_id;
|
||||
LLVMValueRef persp_centroid, linear_centroid;
|
||||
LLVMValueRef color0, color1;
|
||||
LLVMValueRef user_data;
|
||||
|
||||
/* For VS and PS: pre-loaded shader inputs.
|
||||
*
|
||||
* Currently only used for NIR shaders; indexed by variables'
|
||||
* driver_location.
|
||||
*/
|
||||
LLVMValueRef *inputs;
|
||||
/* For VS and PS: pre-loaded shader inputs.
|
||||
*
|
||||
* Currently only used for NIR shaders; indexed by variables'
|
||||
* driver_location.
|
||||
*/
|
||||
LLVMValueRef *inputs;
|
||||
|
||||
/* Varying -> attribute number mapping. Also NIR-only */
|
||||
unsigned fs_input_attr_indices[MAX_VARYING];
|
||||
/* Varying -> attribute number mapping. Also NIR-only */
|
||||
unsigned fs_input_attr_indices[MAX_VARYING];
|
||||
|
||||
void (*emit_outputs)(struct ac_shader_abi *abi,
|
||||
unsigned max_outputs,
|
||||
LLVMValueRef *addrs);
|
||||
void (*emit_outputs)(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs);
|
||||
|
||||
void (*emit_vertex)(struct ac_shader_abi *abi,
|
||||
unsigned stream,
|
||||
LLVMValueRef *addrs);
|
||||
void (*emit_vertex)(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs);
|
||||
|
||||
void (*emit_primitive)(struct ac_shader_abi *abi,
|
||||
unsigned stream);
|
||||
void (*emit_primitive)(struct ac_shader_abi *abi, unsigned stream);
|
||||
|
||||
void (*emit_vertex_with_counter)(struct ac_shader_abi *abi,
|
||||
unsigned stream,
|
||||
LLVMValueRef vertexidx,
|
||||
LLVMValueRef *addrs);
|
||||
void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream,
|
||||
LLVMValueRef vertexidx, LLVMValueRef *addrs);
|
||||
|
||||
LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
|
||||
unsigned location,
|
||||
unsigned driver_location,
|
||||
unsigned component,
|
||||
unsigned num_components,
|
||||
unsigned vertex_index,
|
||||
unsigned const_index,
|
||||
LLVMTypeRef type);
|
||||
LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi, unsigned location,
|
||||
unsigned driver_location, unsigned component,
|
||||
unsigned num_components, unsigned vertex_index, unsigned const_index,
|
||||
LLVMTypeRef type);
|
||||
|
||||
LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi,
|
||||
LLVMTypeRef type,
|
||||
LLVMValueRef vertex_index,
|
||||
LLVMValueRef param_index,
|
||||
unsigned const_index,
|
||||
unsigned location,
|
||||
unsigned driver_location,
|
||||
unsigned component,
|
||||
unsigned num_components,
|
||||
bool is_patch,
|
||||
bool is_compact,
|
||||
bool load_inputs);
|
||||
LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type,
|
||||
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
||||
unsigned const_index, unsigned location,
|
||||
unsigned driver_location, unsigned component,
|
||||
unsigned num_components, bool is_patch, bool is_compact,
|
||||
bool load_inputs);
|
||||
|
||||
void (*store_tcs_outputs)(struct ac_shader_abi *abi,
|
||||
const struct nir_variable *var,
|
||||
LLVMValueRef vertex_index,
|
||||
LLVMValueRef param_index,
|
||||
unsigned const_index,
|
||||
LLVMValueRef src,
|
||||
unsigned writemask,
|
||||
unsigned component,
|
||||
unsigned driver_location);
|
||||
void (*store_tcs_outputs)(struct ac_shader_abi *abi, const struct nir_variable *var,
|
||||
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
||||
unsigned const_index, LLVMValueRef src, unsigned writemask,
|
||||
unsigned component, unsigned driver_location);
|
||||
|
||||
LLVMValueRef (*load_tess_coord)(struct ac_shader_abi *abi);
|
||||
LLVMValueRef (*load_tess_coord)(struct ac_shader_abi *abi);
|
||||
|
||||
LLVMValueRef (*load_patch_vertices_in)(struct ac_shader_abi *abi);
|
||||
LLVMValueRef (*load_patch_vertices_in)(struct ac_shader_abi *abi);
|
||||
|
||||
LLVMValueRef (*load_tess_level)(struct ac_shader_abi *abi,
|
||||
unsigned varying_id,
|
||||
bool load_default_state);
|
||||
LLVMValueRef (*load_tess_level)(struct ac_shader_abi *abi, unsigned varying_id,
|
||||
bool load_default_state);
|
||||
|
||||
LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
|
||||
|
||||
LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
|
||||
/**
|
||||
* Load the descriptor for the given buffer.
|
||||
*
|
||||
* \param buffer the buffer as presented in NIR: this is the descriptor
|
||||
* in Vulkan, and the buffer index in OpenGL/Gallium
|
||||
* \param write whether buffer contents will be written
|
||||
*/
|
||||
LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi, LLVMValueRef buffer, bool write);
|
||||
|
||||
/**
|
||||
* Load the descriptor for the given buffer.
|
||||
*
|
||||
* \param buffer the buffer as presented in NIR: this is the descriptor
|
||||
* in Vulkan, and the buffer index in OpenGL/Gallium
|
||||
* \param write whether buffer contents will be written
|
||||
*/
|
||||
LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
|
||||
LLVMValueRef buffer, bool write);
|
||||
/**
|
||||
* Load a descriptor associated to a sampler.
|
||||
*
|
||||
* \param descriptor_set the descriptor set index (only for Vulkan)
|
||||
* \param base_index the base index of the sampler variable
|
||||
* \param constant_index constant part of an array index (or 0, if the
|
||||
* sampler variable is not an array)
|
||||
* \param index non-constant part of an array index (may be NULL)
|
||||
* \param desc_type the type of descriptor to load
|
||||
* \param image whether the descriptor is loaded for an image operation
|
||||
*/
|
||||
LLVMValueRef (*load_sampler_desc)(struct ac_shader_abi *abi, unsigned descriptor_set,
|
||||
unsigned base_index, unsigned constant_index,
|
||||
LLVMValueRef index, enum ac_descriptor_type desc_type,
|
||||
bool image, bool write, bool bindless);
|
||||
|
||||
/**
|
||||
* Load a descriptor associated to a sampler.
|
||||
*
|
||||
* \param descriptor_set the descriptor set index (only for Vulkan)
|
||||
* \param base_index the base index of the sampler variable
|
||||
* \param constant_index constant part of an array index (or 0, if the
|
||||
* sampler variable is not an array)
|
||||
* \param index non-constant part of an array index (may be NULL)
|
||||
* \param desc_type the type of descriptor to load
|
||||
* \param image whether the descriptor is loaded for an image operation
|
||||
*/
|
||||
LLVMValueRef (*load_sampler_desc)(struct ac_shader_abi *abi,
|
||||
unsigned descriptor_set,
|
||||
unsigned base_index,
|
||||
unsigned constant_index,
|
||||
LLVMValueRef index,
|
||||
enum ac_descriptor_type desc_type,
|
||||
bool image, bool write,
|
||||
bool bindless);
|
||||
/**
|
||||
* Load a Vulkan-specific resource.
|
||||
*
|
||||
* \param index resource index
|
||||
* \param desc_set descriptor set
|
||||
* \param binding descriptor set binding
|
||||
*/
|
||||
LLVMValueRef (*load_resource)(struct ac_shader_abi *abi, LLVMValueRef index, unsigned desc_set,
|
||||
unsigned binding);
|
||||
|
||||
/**
|
||||
* Load a Vulkan-specific resource.
|
||||
*
|
||||
* \param index resource index
|
||||
* \param desc_set descriptor set
|
||||
* \param binding descriptor set binding
|
||||
*/
|
||||
LLVMValueRef (*load_resource)(struct ac_shader_abi *abi,
|
||||
LLVMValueRef index,
|
||||
unsigned desc_set,
|
||||
unsigned binding);
|
||||
LLVMValueRef (*load_sample_position)(struct ac_shader_abi *abi, LLVMValueRef sample_id);
|
||||
|
||||
LLVMValueRef (*load_sample_position)(struct ac_shader_abi *abi,
|
||||
LLVMValueRef sample_id);
|
||||
LLVMValueRef (*load_local_group_size)(struct ac_shader_abi *abi);
|
||||
|
||||
LLVMValueRef (*load_local_group_size)(struct ac_shader_abi *abi);
|
||||
LLVMValueRef (*load_sample_mask_in)(struct ac_shader_abi *abi);
|
||||
|
||||
LLVMValueRef (*load_sample_mask_in)(struct ac_shader_abi *abi);
|
||||
LLVMValueRef (*load_base_vertex)(struct ac_shader_abi *abi);
|
||||
|
||||
LLVMValueRef (*load_base_vertex)(struct ac_shader_abi *abi);
|
||||
LLVMValueRef (*emit_fbfetch)(struct ac_shader_abi *abi);
|
||||
|
||||
LLVMValueRef (*emit_fbfetch)(struct ac_shader_abi *abi);
|
||||
/* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently
|
||||
* uses it due to promoting D16 to D32, but radv needs it off. */
|
||||
bool clamp_shadow_reference;
|
||||
bool interp_at_sample_force_center;
|
||||
|
||||
/* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently
|
||||
* uses it due to promoting D16 to D32, but radv needs it off. */
|
||||
bool clamp_shadow_reference;
|
||||
bool interp_at_sample_force_center;
|
||||
/* Whether bounds checks are required */
|
||||
bool robust_buffer_access;
|
||||
|
||||
/* Whether bounds checks are required */
|
||||
bool robust_buffer_access;
|
||||
/* Check for Inf interpolation coeff */
|
||||
bool kill_ps_if_inf_interp;
|
||||
|
||||
/* Check for Inf interpolation coeff */
|
||||
bool kill_ps_if_inf_interp;
|
||||
/* Whether undef values must be converted to zero */
|
||||
bool convert_undef_to_zero;
|
||||
|
||||
/* Whether undef values must be converted to zero */
|
||||
bool convert_undef_to_zero;
|
||||
|
||||
/* Clamp div by 0 (so it won't produce NaN) */
|
||||
bool clamp_div_by_zero;
|
||||
/* Clamp div by 0 (so it won't produce NaN) */
|
||||
bool clamp_div_by_zero;
|
||||
};
|
||||
|
||||
#endif /* AC_SHADER_ABI_H */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue