mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-23 09:00:10 +01:00
amd/llvm: switch to 3-spaces style
Follow-up of !4319 using the same clang-format config. Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Acked-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5310>
This commit is contained in:
parent
afa1fba198
commit
82d2d73e03
10 changed files with 8876 additions and 10026 deletions
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -24,205 +24,188 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "ac_llvm_cull.h"
|
#include "ac_llvm_cull.h"
|
||||||
|
|
||||||
#include <llvm-c/Core.h>
|
#include <llvm-c/Core.h>
|
||||||
|
|
||||||
struct ac_position_w_info {
|
struct ac_position_w_info {
|
||||||
/* If a primitive intersects the W=0 plane, it causes a reflection
|
/* If a primitive intersects the W=0 plane, it causes a reflection
|
||||||
* of the determinant used for face culling. Every vertex behind
|
* of the determinant used for face culling. Every vertex behind
|
||||||
* the W=0 plane negates the determinant, so having 2 vertices behind
|
* the W=0 plane negates the determinant, so having 2 vertices behind
|
||||||
* the plane has no effect. This is i1 true if the determinant should be
|
* the plane has no effect. This is i1 true if the determinant should be
|
||||||
* negated.
|
* negated.
|
||||||
*/
|
*/
|
||||||
LLVMValueRef w_reflection;
|
LLVMValueRef w_reflection;
|
||||||
|
|
||||||
/* If we simplify the "-w <= p <= w" view culling equation, we get
|
/* If we simplify the "-w <= p <= w" view culling equation, we get
|
||||||
* "-w <= w", which can't be satisfied when w is negative.
|
* "-w <= w", which can't be satisfied when w is negative.
|
||||||
* In perspective projection, a negative W means that the primitive
|
* In perspective projection, a negative W means that the primitive
|
||||||
* is behind the viewer, but the equation is independent of the type
|
* is behind the viewer, but the equation is independent of the type
|
||||||
* of projection.
|
* of projection.
|
||||||
*
|
*
|
||||||
* w_accepted is false when all W are negative and therefore
|
* w_accepted is false when all W are negative and therefore
|
||||||
* the primitive is invisible.
|
* the primitive is invisible.
|
||||||
*/
|
*/
|
||||||
LLVMValueRef w_accepted;
|
LLVMValueRef w_accepted;
|
||||||
|
|
||||||
LLVMValueRef all_w_positive;
|
LLVMValueRef all_w_positive;
|
||||||
LLVMValueRef any_w_negative;
|
LLVMValueRef any_w_negative;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void ac_analyze_position_w(struct ac_llvm_context *ctx,
|
static void ac_analyze_position_w(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
|
||||||
LLVMValueRef pos[3][4],
|
struct ac_position_w_info *w)
|
||||||
struct ac_position_w_info *w)
|
|
||||||
{
|
{
|
||||||
LLVMBuilderRef builder = ctx->builder;
|
LLVMBuilderRef builder = ctx->builder;
|
||||||
LLVMValueRef all_w_negative = ctx->i1true;
|
LLVMValueRef all_w_negative = ctx->i1true;
|
||||||
|
|
||||||
w->w_reflection = ctx->i1false;
|
w->w_reflection = ctx->i1false;
|
||||||
w->any_w_negative = ctx->i1false;
|
w->any_w_negative = ctx->i1false;
|
||||||
|
|
||||||
for (unsigned i = 0; i < 3; i++) {
|
for (unsigned i = 0; i < 3; i++) {
|
||||||
LLVMValueRef neg_w;
|
LLVMValueRef neg_w;
|
||||||
|
|
||||||
neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, "");
|
neg_w = LLVMBuildFCmp(builder, LLVMRealOLT, pos[i][3], ctx->f32_0, "");
|
||||||
/* If neg_w is true, negate w_reflection. */
|
/* If neg_w is true, negate w_reflection. */
|
||||||
w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, "");
|
w->w_reflection = LLVMBuildXor(builder, w->w_reflection, neg_w, "");
|
||||||
w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, "");
|
w->any_w_negative = LLVMBuildOr(builder, w->any_w_negative, neg_w, "");
|
||||||
all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, "");
|
all_w_negative = LLVMBuildAnd(builder, all_w_negative, neg_w, "");
|
||||||
}
|
}
|
||||||
w->all_w_positive = LLVMBuildNot(builder, w->any_w_negative, "");
|
w->all_w_positive = LLVMBuildNot(builder, w->any_w_negative, "");
|
||||||
w->w_accepted = LLVMBuildNot(builder, all_w_negative, "");
|
w->w_accepted = LLVMBuildNot(builder, all_w_negative, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Perform front/back face culling and return true if the primitive is accepted. */
|
/* Perform front/back face culling and return true if the primitive is accepted. */
|
||||||
static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx,
|
static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
|
||||||
LLVMValueRef pos[3][4],
|
struct ac_position_w_info *w, bool cull_front, bool cull_back,
|
||||||
struct ac_position_w_info *w,
|
bool cull_zero_area)
|
||||||
bool cull_front,
|
|
||||||
bool cull_back,
|
|
||||||
bool cull_zero_area)
|
|
||||||
{
|
{
|
||||||
LLVMBuilderRef builder = ctx->builder;
|
LLVMBuilderRef builder = ctx->builder;
|
||||||
|
|
||||||
if (cull_front && cull_back)
|
if (cull_front && cull_back)
|
||||||
return ctx->i1false;
|
return ctx->i1false;
|
||||||
|
|
||||||
if (!cull_front && !cull_back && !cull_zero_area)
|
if (!cull_front && !cull_back && !cull_zero_area)
|
||||||
return ctx->i1true;
|
return ctx->i1true;
|
||||||
|
|
||||||
/* Front/back face culling. Also if the determinant == 0, the triangle
|
/* Front/back face culling. Also if the determinant == 0, the triangle
|
||||||
* area is 0.
|
* area is 0.
|
||||||
*/
|
*/
|
||||||
LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], "");
|
LLVMValueRef det_t0 = LLVMBuildFSub(builder, pos[2][0], pos[0][0], "");
|
||||||
LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], "");
|
LLVMValueRef det_t1 = LLVMBuildFSub(builder, pos[1][1], pos[0][1], "");
|
||||||
LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], "");
|
LLVMValueRef det_t2 = LLVMBuildFSub(builder, pos[0][0], pos[1][0], "");
|
||||||
LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], "");
|
LLVMValueRef det_t3 = LLVMBuildFSub(builder, pos[0][1], pos[2][1], "");
|
||||||
LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, "");
|
LLVMValueRef det_p0 = LLVMBuildFMul(builder, det_t0, det_t1, "");
|
||||||
LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, "");
|
LLVMValueRef det_p1 = LLVMBuildFMul(builder, det_t2, det_t3, "");
|
||||||
LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, "");
|
LLVMValueRef det = LLVMBuildFSub(builder, det_p0, det_p1, "");
|
||||||
|
|
||||||
/* Negative W negates the determinant. */
|
/* Negative W negates the determinant. */
|
||||||
det = LLVMBuildSelect(builder, w->w_reflection,
|
det = LLVMBuildSelect(builder, w->w_reflection, LLVMBuildFNeg(builder, det, ""), det, "");
|
||||||
LLVMBuildFNeg(builder, det, ""),
|
|
||||||
det, "");
|
|
||||||
|
|
||||||
LLVMValueRef accepted = NULL;
|
LLVMValueRef accepted = NULL;
|
||||||
if (cull_front) {
|
if (cull_front) {
|
||||||
LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE;
|
LLVMRealPredicate cond = cull_zero_area ? LLVMRealOGT : LLVMRealOGE;
|
||||||
accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
|
accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
|
||||||
} else if (cull_back) {
|
} else if (cull_back) {
|
||||||
LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE;
|
LLVMRealPredicate cond = cull_zero_area ? LLVMRealOLT : LLVMRealOLE;
|
||||||
accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
|
accepted = LLVMBuildFCmp(builder, cond, det, ctx->f32_0, "");
|
||||||
} else if (cull_zero_area) {
|
} else if (cull_zero_area) {
|
||||||
accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, "");
|
accepted = LLVMBuildFCmp(builder, LLVMRealONE, det, ctx->f32_0, "");
|
||||||
}
|
}
|
||||||
return accepted;
|
return accepted;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Perform view culling and small primitive elimination and return true
|
/* Perform view culling and small primitive elimination and return true
|
||||||
* if the primitive is accepted and initially_accepted == true. */
|
* if the primitive is accepted and initially_accepted == true. */
|
||||||
static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx,
|
static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
|
||||||
LLVMValueRef pos[3][4],
|
LLVMValueRef initially_accepted, struct ac_position_w_info *w,
|
||||||
LLVMValueRef initially_accepted,
|
LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2],
|
||||||
struct ac_position_w_info *w,
|
LLVMValueRef small_prim_precision, bool cull_view_xy,
|
||||||
LLVMValueRef vp_scale[2],
|
bool cull_view_near_z, bool cull_view_far_z, bool cull_small_prims,
|
||||||
LLVMValueRef vp_translate[2],
|
bool use_halfz_clip_space)
|
||||||
LLVMValueRef small_prim_precision,
|
|
||||||
bool cull_view_xy,
|
|
||||||
bool cull_view_near_z,
|
|
||||||
bool cull_view_far_z,
|
|
||||||
bool cull_small_prims,
|
|
||||||
bool use_halfz_clip_space)
|
|
||||||
{
|
{
|
||||||
LLVMBuilderRef builder = ctx->builder;
|
LLVMBuilderRef builder = ctx->builder;
|
||||||
|
|
||||||
if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims)
|
if (!cull_view_xy && !cull_view_near_z && !cull_view_far_z && !cull_small_prims)
|
||||||
return initially_accepted;
|
return initially_accepted;
|
||||||
|
|
||||||
/* Skip the culling if the primitive has already been rejected or
|
/* Skip the culling if the primitive has already been rejected or
|
||||||
* if any W is negative. The bounding box culling doesn't work when
|
* if any W is negative. The bounding box culling doesn't work when
|
||||||
* W is negative.
|
* W is negative.
|
||||||
*/
|
*/
|
||||||
LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted,
|
LLVMValueRef cond = LLVMBuildAnd(builder, initially_accepted, w->all_w_positive, "");
|
||||||
w->all_w_positive, "");
|
LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, "");
|
||||||
LLVMValueRef accepted_var = ac_build_alloca_undef(ctx, ctx->i1, "");
|
LLVMBuildStore(builder, initially_accepted, accepted_var);
|
||||||
LLVMBuildStore(builder, initially_accepted, accepted_var);
|
|
||||||
|
|
||||||
ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */);
|
ac_build_ifcc(ctx, cond, 10000000 /* does this matter? */);
|
||||||
{
|
{
|
||||||
LLVMValueRef bbox_min[3], bbox_max[3];
|
LLVMValueRef bbox_min[3], bbox_max[3];
|
||||||
LLVMValueRef accepted = initially_accepted;
|
LLVMValueRef accepted = initially_accepted;
|
||||||
|
|
||||||
/* Compute the primitive bounding box for easy culling. */
|
/* Compute the primitive bounding box for easy culling. */
|
||||||
for (unsigned chan = 0; chan < (cull_view_near_z || cull_view_far_z ? 3 : 2); chan++) {
|
for (unsigned chan = 0; chan < (cull_view_near_z || cull_view_far_z ? 3 : 2); chan++) {
|
||||||
bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);
|
bbox_min[chan] = ac_build_fmin(ctx, pos[0][chan], pos[1][chan]);
|
||||||
bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
|
bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
|
||||||
|
|
||||||
bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);
|
bbox_max[chan] = ac_build_fmax(ctx, pos[0][chan], pos[1][chan]);
|
||||||
bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
|
bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* View culling. */
|
/* View culling. */
|
||||||
if (cull_view_xy || cull_view_near_z || cull_view_far_z) {
|
if (cull_view_xy || cull_view_near_z || cull_view_far_z) {
|
||||||
for (unsigned chan = 0; chan < 3; chan++) {
|
for (unsigned chan = 0; chan < 3; chan++) {
|
||||||
LLVMValueRef visible;
|
LLVMValueRef visible;
|
||||||
|
|
||||||
if ((cull_view_xy && chan <= 1) ||
|
if ((cull_view_xy && chan <= 1) || (cull_view_near_z && chan == 2)) {
|
||||||
(cull_view_near_z && chan == 2)) {
|
float t = chan == 2 && use_halfz_clip_space ? 0 : -1;
|
||||||
float t = chan == 2 && use_halfz_clip_space ? 0 : -1;
|
visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan],
|
||||||
visible = LLVMBuildFCmp(builder, LLVMRealOGE, bbox_max[chan],
|
LLVMConstReal(ctx->f32, t), "");
|
||||||
LLVMConstReal(ctx->f32, t), "");
|
accepted = LLVMBuildAnd(builder, accepted, visible, "");
|
||||||
accepted = LLVMBuildAnd(builder, accepted, visible, "");
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if ((cull_view_xy && chan <= 1) ||
|
if ((cull_view_xy && chan <= 1) || (cull_view_far_z && chan == 2)) {
|
||||||
(cull_view_far_z && chan == 2)) {
|
visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan], ctx->f32_1, "");
|
||||||
visible = LLVMBuildFCmp(builder, LLVMRealOLE, bbox_min[chan],
|
accepted = LLVMBuildAnd(builder, accepted, visible, "");
|
||||||
ctx->f32_1, "");
|
}
|
||||||
accepted = LLVMBuildAnd(builder, accepted, visible, "");
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Small primitive elimination. */
|
/* Small primitive elimination. */
|
||||||
if (cull_small_prims) {
|
if (cull_small_prims) {
|
||||||
/* Assuming a sample position at (0.5, 0.5), if we round
|
/* Assuming a sample position at (0.5, 0.5), if we round
|
||||||
* the bounding box min/max extents and the results of
|
* the bounding box min/max extents and the results of
|
||||||
* the rounding are equal in either the X or Y direction,
|
* the rounding are equal in either the X or Y direction,
|
||||||
* the bounding box does not intersect the sample.
|
* the bounding box does not intersect the sample.
|
||||||
*
|
*
|
||||||
* See these GDC slides for pictures:
|
* See these GDC slides for pictures:
|
||||||
* https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf
|
* https://frostbite-wp-prd.s3.amazonaws.com/wp-content/uploads/2016/03/29204330/GDC_2016_Compute.pdf
|
||||||
*/
|
*/
|
||||||
LLVMValueRef min, max, not_equal[2], visible;
|
LLVMValueRef min, max, not_equal[2], visible;
|
||||||
|
|
||||||
for (unsigned chan = 0; chan < 2; chan++) {
|
for (unsigned chan = 0; chan < 2; chan++) {
|
||||||
/* Convert the position to screen-space coordinates. */
|
/* Convert the position to screen-space coordinates. */
|
||||||
min = ac_build_fmad(ctx, bbox_min[chan],
|
min = ac_build_fmad(ctx, bbox_min[chan], vp_scale[chan], vp_translate[chan]);
|
||||||
vp_scale[chan], vp_translate[chan]);
|
max = ac_build_fmad(ctx, bbox_max[chan], vp_scale[chan], vp_translate[chan]);
|
||||||
max = ac_build_fmad(ctx, bbox_max[chan],
|
/* Scale the bounding box according to the precision of
|
||||||
vp_scale[chan], vp_translate[chan]);
|
* the rasterizer and the number of MSAA samples. */
|
||||||
/* Scale the bounding box according to the precision of
|
min = LLVMBuildFSub(builder, min, small_prim_precision, "");
|
||||||
* the rasterizer and the number of MSAA samples. */
|
max = LLVMBuildFAdd(builder, max, small_prim_precision, "");
|
||||||
min = LLVMBuildFSub(builder, min, small_prim_precision, "");
|
|
||||||
max = LLVMBuildFAdd(builder, max, small_prim_precision, "");
|
|
||||||
|
|
||||||
/* Determine if the bbox intersects the sample point.
|
/* Determine if the bbox intersects the sample point.
|
||||||
* It also works for MSAA, but vp_scale, vp_translate,
|
* It also works for MSAA, but vp_scale, vp_translate,
|
||||||
* and small_prim_precision are computed differently.
|
* and small_prim_precision are computed differently.
|
||||||
*/
|
*/
|
||||||
min = ac_build_round(ctx, min);
|
min = ac_build_round(ctx, min);
|
||||||
max = ac_build_round(ctx, max);
|
max = ac_build_round(ctx, max);
|
||||||
not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, "");
|
not_equal[chan] = LLVMBuildFCmp(builder, LLVMRealONE, min, max, "");
|
||||||
}
|
}
|
||||||
visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], "");
|
visible = LLVMBuildAnd(builder, not_equal[0], not_equal[1], "");
|
||||||
accepted = LLVMBuildAnd(builder, accepted, visible, "");
|
accepted = LLVMBuildAnd(builder, accepted, visible, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
LLVMBuildStore(builder, accepted, accepted_var);
|
LLVMBuildStore(builder, accepted, accepted_var);
|
||||||
}
|
}
|
||||||
ac_build_endif(ctx, 10000000);
|
ac_build_endif(ctx, 10000000);
|
||||||
|
|
||||||
return LLVMBuildLoad(builder, accepted_var, "");
|
return LLVMBuildLoad(builder, accepted_var, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -241,35 +224,27 @@ static LLVMValueRef cull_bbox(struct ac_llvm_context *ctx,
|
||||||
* subpixel_bits are defined by the quantization mode.
|
* subpixel_bits are defined by the quantization mode.
|
||||||
* \param options See ac_cull_options.
|
* \param options See ac_cull_options.
|
||||||
*/
|
*/
|
||||||
LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx,
|
LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
|
||||||
LLVMValueRef pos[3][4],
|
LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2],
|
||||||
LLVMValueRef initially_accepted,
|
LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision,
|
||||||
LLVMValueRef vp_scale[2],
|
struct ac_cull_options *options)
|
||||||
LLVMValueRef vp_translate[2],
|
|
||||||
LLVMValueRef small_prim_precision,
|
|
||||||
struct ac_cull_options *options)
|
|
||||||
{
|
{
|
||||||
struct ac_position_w_info w;
|
struct ac_position_w_info w;
|
||||||
ac_analyze_position_w(ctx, pos, &w);
|
ac_analyze_position_w(ctx, pos, &w);
|
||||||
|
|
||||||
/* W culling. */
|
/* W culling. */
|
||||||
LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true;
|
LLVMValueRef accepted = options->cull_w ? w.w_accepted : ctx->i1true;
|
||||||
accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, "");
|
accepted = LLVMBuildAnd(ctx->builder, accepted, initially_accepted, "");
|
||||||
|
|
||||||
/* Face culling. */
|
/* Face culling. */
|
||||||
accepted = LLVMBuildAnd(ctx->builder, accepted,
|
accepted = LLVMBuildAnd(
|
||||||
ac_cull_face(ctx, pos, &w,
|
ctx->builder, accepted,
|
||||||
options->cull_front,
|
ac_cull_face(ctx, pos, &w, options->cull_front, options->cull_back, options->cull_zero_area),
|
||||||
options->cull_back,
|
"");
|
||||||
options->cull_zero_area), "");
|
|
||||||
|
|
||||||
/* View culling and small primitive elimination. */
|
/* View culling and small primitive elimination. */
|
||||||
accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate,
|
accepted = cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision,
|
||||||
small_prim_precision,
|
options->cull_view_xy, options->cull_view_near_z, options->cull_view_far_z,
|
||||||
options->cull_view_xy,
|
options->cull_small_prims, options->use_halfz_clip_space);
|
||||||
options->cull_view_near_z,
|
return accepted;
|
||||||
options->cull_view_far_z,
|
|
||||||
options->cull_small_prims,
|
|
||||||
options->use_halfz_clip_space);
|
|
||||||
return accepted;
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -29,31 +29,28 @@
|
||||||
#include "ac_llvm_build.h"
|
#include "ac_llvm_build.h"
|
||||||
|
|
||||||
struct ac_cull_options {
|
struct ac_cull_options {
|
||||||
/* In general, I recommend setting all to true except view Z culling,
|
/* In general, I recommend setting all to true except view Z culling,
|
||||||
* which isn't so effective because W culling is cheaper and partially
|
* which isn't so effective because W culling is cheaper and partially
|
||||||
* replaces near Z culling, and you don't need to set Position.z
|
* replaces near Z culling, and you don't need to set Position.z
|
||||||
* if Z culling is disabled.
|
* if Z culling is disabled.
|
||||||
*
|
*
|
||||||
* If something doesn't work, turn some of these off to find out what.
|
* If something doesn't work, turn some of these off to find out what.
|
||||||
*/
|
*/
|
||||||
bool cull_front;
|
bool cull_front;
|
||||||
bool cull_back;
|
bool cull_back;
|
||||||
bool cull_view_xy;
|
bool cull_view_xy;
|
||||||
bool cull_view_near_z;
|
bool cull_view_near_z;
|
||||||
bool cull_view_far_z;
|
bool cull_view_far_z;
|
||||||
bool cull_small_prims;
|
bool cull_small_prims;
|
||||||
bool cull_zero_area;
|
bool cull_zero_area;
|
||||||
bool cull_w; /* cull primitives with all W < 0 */
|
bool cull_w; /* cull primitives with all W < 0 */
|
||||||
|
|
||||||
bool use_halfz_clip_space;
|
bool use_halfz_clip_space;
|
||||||
};
|
};
|
||||||
|
|
||||||
LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx,
|
LLVMValueRef ac_cull_triangle(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
|
||||||
LLVMValueRef pos[3][4],
|
LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2],
|
||||||
LLVMValueRef initially_accepted,
|
LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision,
|
||||||
LLVMValueRef vp_scale[2],
|
struct ac_cull_options *options);
|
||||||
LLVMValueRef vp_translate[2],
|
|
||||||
LLVMValueRef small_prim_precision,
|
|
||||||
struct ac_cull_options *options);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -23,15 +23,14 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <cstring>
|
|
||||||
|
|
||||||
#include <llvm-c/Core.h>
|
#include <llvm-c/Core.h>
|
||||||
#include <llvm/Target/TargetMachine.h>
|
|
||||||
#include <llvm/IR/IRBuilder.h>
|
|
||||||
#include <llvm/Analysis/TargetLibraryInfo.h>
|
#include <llvm/Analysis/TargetLibraryInfo.h>
|
||||||
|
#include <llvm/IR/IRBuilder.h>
|
||||||
|
#include <llvm/IR/LegacyPassManager.h>
|
||||||
|
#include <llvm/Target/TargetMachine.h>
|
||||||
#include <llvm/Transforms/IPO.h>
|
#include <llvm/Transforms/IPO.h>
|
||||||
|
|
||||||
#include <llvm/IR/LegacyPassManager.h>
|
#include <cstring>
|
||||||
|
|
||||||
/* DO NOT REORDER THE HEADERS
|
/* DO NOT REORDER THE HEADERS
|
||||||
* The LLVM headers need to all be included before any Mesa header,
|
* The LLVM headers need to all be included before any Mesa header,
|
||||||
|
|
@ -42,7 +41,6 @@
|
||||||
#include "ac_binary.h"
|
#include "ac_binary.h"
|
||||||
#include "ac_llvm_util.h"
|
#include "ac_llvm_util.h"
|
||||||
#include "ac_llvm_build.h"
|
#include "ac_llvm_build.h"
|
||||||
|
|
||||||
#include "util/macros.h"
|
#include "util/macros.h"
|
||||||
|
|
||||||
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
|
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
|
||||||
|
|
@ -54,36 +52,36 @@ void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
|
||||||
void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
|
void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
|
||||||
{
|
{
|
||||||
#if LLVM_VERSION_MAJOR >= 10
|
#if LLVM_VERSION_MAJOR >= 10
|
||||||
llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
|
llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
|
||||||
A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));
|
A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));
|
||||||
#else
|
#else
|
||||||
/* Avoid unused parameter warnings. */
|
/* Avoid unused parameter warnings. */
|
||||||
(void)val;
|
(void)val;
|
||||||
(void)bytes;
|
(void)bytes;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ac_is_sgpr_param(LLVMValueRef arg)
|
bool ac_is_sgpr_param(LLVMValueRef arg)
|
||||||
{
|
{
|
||||||
llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
|
llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
|
||||||
llvm::AttributeList AS = A->getParent()->getAttributes();
|
llvm::AttributeList AS = A->getParent()->getAttributes();
|
||||||
unsigned ArgNo = A->getArgNo();
|
unsigned ArgNo = A->getArgNo();
|
||||||
return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
|
return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
|
||||||
}
|
}
|
||||||
|
|
||||||
LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
|
LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
|
||||||
{
|
{
|
||||||
return LLVMGetCalledValue(call);
|
return LLVMGetCalledValue(call);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ac_llvm_is_function(LLVMValueRef v)
|
bool ac_llvm_is_function(LLVMValueRef v)
|
||||||
{
|
{
|
||||||
return LLVMGetValueKind(v) == LLVMFunctionValueKind;
|
return LLVMGetValueKind(v) == LLVMFunctionValueKind;
|
||||||
}
|
}
|
||||||
|
|
||||||
LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
|
LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
|
||||||
{
|
{
|
||||||
llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
|
llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
|
||||||
LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
|
LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
|
||||||
|
|
||||||
llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
|
llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
|
||||||
|
|
@ -91,246 +89,243 @@ LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
|
||||||
return module;
|
return module;
|
||||||
}
|
}
|
||||||
|
|
||||||
LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
|
LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)
|
||||||
enum ac_float_mode float_mode)
|
|
||||||
{
|
{
|
||||||
LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
|
LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
|
||||||
|
|
||||||
llvm::FastMathFlags flags;
|
llvm::FastMathFlags flags;
|
||||||
|
|
||||||
switch (float_mode) {
|
switch (float_mode) {
|
||||||
case AC_FLOAT_MODE_DEFAULT:
|
case AC_FLOAT_MODE_DEFAULT:
|
||||||
case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
|
case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case AC_FLOAT_MODE_DEFAULT_OPENGL:
|
case AC_FLOAT_MODE_DEFAULT_OPENGL:
|
||||||
/* Allow optimizations to treat the sign of a zero argument or
|
/* Allow optimizations to treat the sign of a zero argument or
|
||||||
* result as insignificant.
|
* result as insignificant.
|
||||||
*/
|
*/
|
||||||
flags.setNoSignedZeros(); /* nsz */
|
flags.setNoSignedZeros(); /* nsz */
|
||||||
|
|
||||||
/* Allow optimizations to use the reciprocal of an argument
|
/* Allow optimizations to use the reciprocal of an argument
|
||||||
* rather than perform division.
|
* rather than perform division.
|
||||||
*/
|
*/
|
||||||
flags.setAllowReciprocal(); /* arcp */
|
flags.setAllowReciprocal(); /* arcp */
|
||||||
|
|
||||||
llvm::unwrap(builder)->setFastMathFlags(flags);
|
llvm::unwrap(builder)->setFastMathFlags(flags);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return builder;
|
return builder;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
|
void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
|
||||||
{
|
{
|
||||||
if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
|
if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
|
||||||
auto *b = llvm::unwrap(ctx->builder);
|
auto *b = llvm::unwrap(ctx->builder);
|
||||||
llvm::FastMathFlags flags = b->getFastMathFlags();
|
llvm::FastMathFlags flags = b->getFastMathFlags();
|
||||||
|
|
||||||
/* This disables the optimization of (x + 0), which is used
|
/* This disables the optimization of (x + 0), which is used
|
||||||
* to convert negative zero to positive zero.
|
* to convert negative zero to positive zero.
|
||||||
*/
|
*/
|
||||||
flags.setNoSignedZeros(false);
|
flags.setNoSignedZeros(false);
|
||||||
b->setFastMathFlags(flags);
|
b->setFastMathFlags(flags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
|
void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
|
||||||
{
|
{
|
||||||
if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
|
if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
|
||||||
auto *b = llvm::unwrap(ctx->builder);
|
auto *b = llvm::unwrap(ctx->builder);
|
||||||
llvm::FastMathFlags flags = b->getFastMathFlags();
|
llvm::FastMathFlags flags = b->getFastMathFlags();
|
||||||
|
|
||||||
flags.setNoSignedZeros();
|
flags.setNoSignedZeros();
|
||||||
b->setFastMathFlags(flags);
|
b->setFastMathFlags(flags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LLVMTargetLibraryInfoRef
|
LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple)
|
||||||
ac_create_target_library_info(const char *triple)
|
|
||||||
{
|
{
|
||||||
return reinterpret_cast<LLVMTargetLibraryInfoRef>(new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
|
return reinterpret_cast<LLVMTargetLibraryInfoRef>(
|
||||||
|
new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
|
||||||
ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
|
|
||||||
{
|
{
|
||||||
delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
|
delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Implementation of raw_pwrite_stream that works on malloc()ed memory for
|
/* Implementation of raw_pwrite_stream that works on malloc()ed memory for
|
||||||
* better compatibility with C code. */
|
* better compatibility with C code. */
|
||||||
struct raw_memory_ostream : public llvm::raw_pwrite_stream {
|
struct raw_memory_ostream : public llvm::raw_pwrite_stream {
|
||||||
char *buffer;
|
char *buffer;
|
||||||
size_t written;
|
size_t written;
|
||||||
size_t bufsize;
|
size_t bufsize;
|
||||||
|
|
||||||
raw_memory_ostream()
|
raw_memory_ostream()
|
||||||
{
|
{
|
||||||
buffer = NULL;
|
buffer = NULL;
|
||||||
written = 0;
|
written = 0;
|
||||||
bufsize = 0;
|
bufsize = 0;
|
||||||
SetUnbuffered();
|
SetUnbuffered();
|
||||||
}
|
}
|
||||||
|
|
||||||
~raw_memory_ostream()
|
~raw_memory_ostream()
|
||||||
{
|
{
|
||||||
free(buffer);
|
free(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
void clear()
|
void clear()
|
||||||
{
|
{
|
||||||
written = 0;
|
written = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void take(char *&out_buffer, size_t &out_size)
|
void take(char *&out_buffer, size_t &out_size)
|
||||||
{
|
{
|
||||||
out_buffer = buffer;
|
out_buffer = buffer;
|
||||||
out_size = written;
|
out_size = written;
|
||||||
buffer = NULL;
|
buffer = NULL;
|
||||||
written = 0;
|
written = 0;
|
||||||
bufsize = 0;
|
bufsize = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void flush() = delete;
|
void flush() = delete;
|
||||||
|
|
||||||
void write_impl(const char *ptr, size_t size) override
|
void write_impl(const char *ptr, size_t size) override
|
||||||
{
|
{
|
||||||
if (unlikely(written + size < written))
|
if (unlikely(written + size < written))
|
||||||
abort();
|
abort();
|
||||||
if (written + size > bufsize) {
|
if (written + size > bufsize) {
|
||||||
bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
|
bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
|
||||||
buffer = (char *)realloc(buffer, bufsize);
|
buffer = (char *)realloc(buffer, bufsize);
|
||||||
if (!buffer) {
|
if (!buffer) {
|
||||||
fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
|
fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
memcpy(buffer + written, ptr, size);
|
memcpy(buffer + written, ptr, size);
|
||||||
written += size;
|
written += size;
|
||||||
}
|
}
|
||||||
|
|
||||||
void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
|
void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
|
||||||
{
|
{
|
||||||
assert(offset == (size_t)offset &&
|
assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);
|
||||||
offset + size >= offset && offset + size <= written);
|
memcpy(buffer + offset, ptr, size);
|
||||||
memcpy(buffer + offset, ptr, size);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t current_pos() const override
|
uint64_t current_pos() const override
|
||||||
{
|
{
|
||||||
return written;
|
return written;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/* The LLVM compiler is represented as a pass manager containing passes for
|
/* The LLVM compiler is represented as a pass manager containing passes for
|
||||||
* optimizations, instruction selection, and code generation.
|
* optimizations, instruction selection, and code generation.
|
||||||
*/
|
*/
|
||||||
struct ac_compiler_passes {
|
struct ac_compiler_passes {
|
||||||
raw_memory_ostream ostream; /* ELF shader binary stream */
|
raw_memory_ostream ostream; /* ELF shader binary stream */
|
||||||
llvm::legacy::PassManager passmgr; /* list of passes */
|
llvm::legacy::PassManager passmgr; /* list of passes */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
|
struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
|
||||||
{
|
{
|
||||||
struct ac_compiler_passes *p = new ac_compiler_passes();
|
struct ac_compiler_passes *p = new ac_compiler_passes();
|
||||||
if (!p)
|
if (!p)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
|
llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
|
||||||
|
|
||||||
if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
|
if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr,
|
||||||
nullptr,
|
|
||||||
#if LLVM_VERSION_MAJOR >= 10
|
#if LLVM_VERSION_MAJOR >= 10
|
||||||
llvm::CGFT_ObjectFile)) {
|
llvm::CGFT_ObjectFile)) {
|
||||||
#else
|
#else
|
||||||
llvm::TargetMachine::CGFT_ObjectFile)) {
|
llvm::TargetMachine::CGFT_ObjectFile)) {
|
||||||
#endif
|
#endif
|
||||||
fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
|
fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
|
||||||
delete p;
|
delete p;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
|
void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
|
||||||
{
|
{
|
||||||
delete p;
|
delete p;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This returns false on failure. */
|
/* This returns false on failure. */
|
||||||
bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
|
bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
|
||||||
char **pelf_buffer, size_t *pelf_size)
|
char **pelf_buffer, size_t *pelf_size)
|
||||||
{
|
{
|
||||||
p->passmgr.run(*llvm::unwrap(module));
|
p->passmgr.run(*llvm::unwrap(module));
|
||||||
p->ostream.take(*pelf_buffer, *pelf_size);
|
p->ostream.take(*pelf_buffer, *pelf_size);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
|
void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
|
||||||
{
|
{
|
||||||
llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
|
llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
|
||||||
}
|
}
|
||||||
|
|
||||||
void ac_enable_global_isel(LLVMTargetMachineRef tm)
|
void ac_enable_global_isel(LLVMTargetMachineRef tm)
|
||||||
{
|
{
|
||||||
reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
|
reinterpret_cast<llvm::TargetMachine *>(tm)->setGlobalISel(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
|
LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
|
||||||
LLVMValueRef ptr, LLVMValueRef val,
|
LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)
|
||||||
const char *sync_scope) {
|
{
|
||||||
llvm::AtomicRMWInst::BinOp binop;
|
llvm::AtomicRMWInst::BinOp binop;
|
||||||
switch (op) {
|
switch (op) {
|
||||||
case LLVMAtomicRMWBinOpXchg:
|
case LLVMAtomicRMWBinOpXchg:
|
||||||
binop = llvm::AtomicRMWInst::Xchg;
|
binop = llvm::AtomicRMWInst::Xchg;
|
||||||
break;
|
break;
|
||||||
case LLVMAtomicRMWBinOpAdd:
|
case LLVMAtomicRMWBinOpAdd:
|
||||||
binop = llvm::AtomicRMWInst::Add;
|
binop = llvm::AtomicRMWInst::Add;
|
||||||
break;
|
break;
|
||||||
case LLVMAtomicRMWBinOpSub:
|
case LLVMAtomicRMWBinOpSub:
|
||||||
binop = llvm::AtomicRMWInst::Sub;
|
binop = llvm::AtomicRMWInst::Sub;
|
||||||
break;
|
break;
|
||||||
case LLVMAtomicRMWBinOpAnd:
|
case LLVMAtomicRMWBinOpAnd:
|
||||||
binop = llvm::AtomicRMWInst::And;
|
binop = llvm::AtomicRMWInst::And;
|
||||||
break;
|
break;
|
||||||
case LLVMAtomicRMWBinOpNand:
|
case LLVMAtomicRMWBinOpNand:
|
||||||
binop = llvm::AtomicRMWInst::Nand;
|
binop = llvm::AtomicRMWInst::Nand;
|
||||||
break;
|
break;
|
||||||
case LLVMAtomicRMWBinOpOr:
|
case LLVMAtomicRMWBinOpOr:
|
||||||
binop = llvm::AtomicRMWInst::Or;
|
binop = llvm::AtomicRMWInst::Or;
|
||||||
break;
|
break;
|
||||||
case LLVMAtomicRMWBinOpXor:
|
case LLVMAtomicRMWBinOpXor:
|
||||||
binop = llvm::AtomicRMWInst::Xor;
|
binop = llvm::AtomicRMWInst::Xor;
|
||||||
break;
|
break;
|
||||||
case LLVMAtomicRMWBinOpMax:
|
case LLVMAtomicRMWBinOpMax:
|
||||||
binop = llvm::AtomicRMWInst::Max;
|
binop = llvm::AtomicRMWInst::Max;
|
||||||
break;
|
break;
|
||||||
case LLVMAtomicRMWBinOpMin:
|
case LLVMAtomicRMWBinOpMin:
|
||||||
binop = llvm::AtomicRMWInst::Min;
|
binop = llvm::AtomicRMWInst::Min;
|
||||||
break;
|
break;
|
||||||
case LLVMAtomicRMWBinOpUMax:
|
case LLVMAtomicRMWBinOpUMax:
|
||||||
binop = llvm::AtomicRMWInst::UMax;
|
binop = llvm::AtomicRMWInst::UMax;
|
||||||
break;
|
break;
|
||||||
case LLVMAtomicRMWBinOpUMin:
|
case LLVMAtomicRMWBinOpUMin:
|
||||||
binop = llvm::AtomicRMWInst::UMin;
|
binop = llvm::AtomicRMWInst::UMin;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
unreachable(!"invalid LLVMAtomicRMWBinOp");
|
unreachable(!"invalid LLVMAtomicRMWBinOp");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
|
unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
|
||||||
return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
|
return llvm::wrap(llvm::unwrap(ctx->builder)
|
||||||
binop, llvm::unwrap(ptr), llvm::unwrap(val),
|
->CreateAtomicRMW(binop, llvm::unwrap(ptr), llvm::unwrap(val),
|
||||||
llvm::AtomicOrdering::SequentiallyConsistent, SSID));
|
llvm::AtomicOrdering::SequentiallyConsistent, SSID));
|
||||||
}
|
}
|
||||||
|
|
||||||
LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
|
LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
|
||||||
LLVMValueRef cmp, LLVMValueRef val,
|
LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)
|
||||||
const char *sync_scope) {
|
{
|
||||||
unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
|
unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
|
||||||
return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg(
|
return llvm::wrap(llvm::unwrap(ctx->builder)
|
||||||
llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val),
|
->CreateAtomicCmpXchg(llvm::unwrap(ptr), llvm::unwrap(cmp),
|
||||||
llvm::AtomicOrdering::SequentiallyConsistent,
|
llvm::unwrap(val),
|
||||||
llvm::AtomicOrdering::SequentiallyConsistent, SSID));
|
llvm::AtomicOrdering::SequentiallyConsistent,
|
||||||
|
llvm::AtomicOrdering::SequentiallyConsistent, SSID));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -24,16 +24,17 @@
|
||||||
*/
|
*/
|
||||||
/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
|
/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
|
||||||
#include "ac_llvm_util.h"
|
#include "ac_llvm_util.h"
|
||||||
|
|
||||||
#include "ac_llvm_build.h"
|
#include "ac_llvm_build.h"
|
||||||
|
#include "c11/threads.h"
|
||||||
|
#include "gallivm/lp_bld_misc.h"
|
||||||
#include "util/bitscan.h"
|
#include "util/bitscan.h"
|
||||||
|
#include "util/u_math.h"
|
||||||
#include <llvm-c/Core.h>
|
#include <llvm-c/Core.h>
|
||||||
#include <llvm-c/Support.h>
|
#include <llvm-c/Support.h>
|
||||||
#include <llvm-c/Transforms/IPO.h>
|
#include <llvm-c/Transforms/IPO.h>
|
||||||
#include <llvm-c/Transforms/Scalar.h>
|
#include <llvm-c/Transforms/Scalar.h>
|
||||||
#include <llvm-c/Transforms/Utils.h>
|
#include <llvm-c/Transforms/Utils.h>
|
||||||
#include "c11/threads.h"
|
|
||||||
#include "gallivm/lp_bld_misc.h"
|
|
||||||
#include "util/u_math.h"
|
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
@ -41,239 +42,240 @@
|
||||||
|
|
||||||
static void ac_init_llvm_target()
|
static void ac_init_llvm_target()
|
||||||
{
|
{
|
||||||
LLVMInitializeAMDGPUTargetInfo();
|
LLVMInitializeAMDGPUTargetInfo();
|
||||||
LLVMInitializeAMDGPUTarget();
|
LLVMInitializeAMDGPUTarget();
|
||||||
LLVMInitializeAMDGPUTargetMC();
|
LLVMInitializeAMDGPUTargetMC();
|
||||||
LLVMInitializeAMDGPUAsmPrinter();
|
LLVMInitializeAMDGPUAsmPrinter();
|
||||||
|
|
||||||
/* For inline assembly. */
|
/* For inline assembly. */
|
||||||
LLVMInitializeAMDGPUAsmParser();
|
LLVMInitializeAMDGPUAsmParser();
|
||||||
|
|
||||||
/* For ACO disassembly. */
|
/* For ACO disassembly. */
|
||||||
LLVMInitializeAMDGPUDisassembler();
|
LLVMInitializeAMDGPUDisassembler();
|
||||||
|
|
||||||
/* Workaround for bug in llvm 4.0 that causes image intrinsics
|
/* Workaround for bug in llvm 4.0 that causes image intrinsics
|
||||||
* to disappear.
|
* to disappear.
|
||||||
* https://reviews.llvm.org/D26348
|
* https://reviews.llvm.org/D26348
|
||||||
*
|
*
|
||||||
* "mesa" is the prefix for error messages.
|
* "mesa" is the prefix for error messages.
|
||||||
*
|
*
|
||||||
* -global-isel-abort=2 is a no-op unless global isel has been enabled.
|
* -global-isel-abort=2 is a no-op unless global isel has been enabled.
|
||||||
* This option tells the backend to fall-back to SelectionDAG and print
|
* This option tells the backend to fall-back to SelectionDAG and print
|
||||||
* a diagnostic message if global isel fails.
|
* a diagnostic message if global isel fails.
|
||||||
*/
|
*/
|
||||||
const char *argv[] = {
|
const char *argv[] = {
|
||||||
"mesa",
|
"mesa",
|
||||||
"-simplifycfg-sink-common=false",
|
"-simplifycfg-sink-common=false",
|
||||||
"-global-isel-abort=2",
|
"-global-isel-abort=2",
|
||||||
#if LLVM_VERSION_MAJOR >= 10
|
#if LLVM_VERSION_MAJOR >= 10
|
||||||
/* Atomic optimizations require LLVM 10.0 for gfx10 support. */
|
/* Atomic optimizations require LLVM 10.0 for gfx10 support. */
|
||||||
"-amdgpu-atomic-optimizations=true",
|
"-amdgpu-atomic-optimizations=true",
|
||||||
#endif
|
#endif
|
||||||
#if LLVM_VERSION_MAJOR >= 11
|
#if LLVM_VERSION_MAJOR >= 11
|
||||||
/* This was disabled by default in: https://reviews.llvm.org/D77228 */
|
/* This was disabled by default in: https://reviews.llvm.org/D77228 */
|
||||||
"-structurizecfg-skip-uniform-regions",
|
"-structurizecfg-skip-uniform-regions",
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
LLVMParseCommandLineOptions(ARRAY_SIZE(argv), argv, NULL);
|
LLVMParseCommandLineOptions(ARRAY_SIZE(argv), argv, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
PUBLIC void ac_init_shared_llvm_once(void)
|
PUBLIC void ac_init_shared_llvm_once(void)
|
||||||
{
|
{
|
||||||
static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
|
static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
|
||||||
call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
|
call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !LLVM_IS_SHARED
|
#if !LLVM_IS_SHARED
|
||||||
static once_flag ac_init_static_llvm_target_once_flag = ONCE_FLAG_INIT;
|
static once_flag ac_init_static_llvm_target_once_flag = ONCE_FLAG_INIT;
|
||||||
static void ac_init_static_llvm_once(void)
|
static void ac_init_static_llvm_once(void)
|
||||||
{
|
{
|
||||||
call_once(&ac_init_static_llvm_target_once_flag, ac_init_llvm_target);
|
call_once(&ac_init_static_llvm_target_once_flag, ac_init_llvm_target);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void ac_init_llvm_once(void)
|
void ac_init_llvm_once(void)
|
||||||
{
|
{
|
||||||
#if LLVM_IS_SHARED
|
#if LLVM_IS_SHARED
|
||||||
ac_init_shared_llvm_once();
|
ac_init_shared_llvm_once();
|
||||||
#else
|
#else
|
||||||
ac_init_static_llvm_once();
|
ac_init_static_llvm_once();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static LLVMTargetRef ac_get_llvm_target(const char *triple)
|
static LLVMTargetRef ac_get_llvm_target(const char *triple)
|
||||||
{
|
{
|
||||||
LLVMTargetRef target = NULL;
|
LLVMTargetRef target = NULL;
|
||||||
char *err_message = NULL;
|
char *err_message = NULL;
|
||||||
|
|
||||||
if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
|
if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
|
||||||
fprintf(stderr, "Cannot find target for triple %s ", triple);
|
fprintf(stderr, "Cannot find target for triple %s ", triple);
|
||||||
if (err_message) {
|
if (err_message) {
|
||||||
fprintf(stderr, "%s\n", err_message);
|
fprintf(stderr, "%s\n", err_message);
|
||||||
}
|
}
|
||||||
LLVMDisposeMessage(err_message);
|
LLVMDisposeMessage(err_message);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return target;
|
return target;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *ac_get_llvm_processor_name(enum radeon_family family)
|
const char *ac_get_llvm_processor_name(enum radeon_family family)
|
||||||
{
|
{
|
||||||
switch (family) {
|
switch (family) {
|
||||||
case CHIP_TAHITI:
|
case CHIP_TAHITI:
|
||||||
return "tahiti";
|
return "tahiti";
|
||||||
case CHIP_PITCAIRN:
|
case CHIP_PITCAIRN:
|
||||||
return "pitcairn";
|
return "pitcairn";
|
||||||
case CHIP_VERDE:
|
case CHIP_VERDE:
|
||||||
return "verde";
|
return "verde";
|
||||||
case CHIP_OLAND:
|
case CHIP_OLAND:
|
||||||
return "oland";
|
return "oland";
|
||||||
case CHIP_HAINAN:
|
case CHIP_HAINAN:
|
||||||
return "hainan";
|
return "hainan";
|
||||||
case CHIP_BONAIRE:
|
case CHIP_BONAIRE:
|
||||||
return "bonaire";
|
return "bonaire";
|
||||||
case CHIP_KABINI:
|
case CHIP_KABINI:
|
||||||
return "kabini";
|
return "kabini";
|
||||||
case CHIP_KAVERI:
|
case CHIP_KAVERI:
|
||||||
return "kaveri";
|
return "kaveri";
|
||||||
case CHIP_HAWAII:
|
case CHIP_HAWAII:
|
||||||
return "hawaii";
|
return "hawaii";
|
||||||
case CHIP_TONGA:
|
case CHIP_TONGA:
|
||||||
return "tonga";
|
return "tonga";
|
||||||
case CHIP_ICELAND:
|
case CHIP_ICELAND:
|
||||||
return "iceland";
|
return "iceland";
|
||||||
case CHIP_CARRIZO:
|
case CHIP_CARRIZO:
|
||||||
return "carrizo";
|
return "carrizo";
|
||||||
case CHIP_FIJI:
|
case CHIP_FIJI:
|
||||||
return "fiji";
|
return "fiji";
|
||||||
case CHIP_STONEY:
|
case CHIP_STONEY:
|
||||||
return "stoney";
|
return "stoney";
|
||||||
case CHIP_POLARIS10:
|
case CHIP_POLARIS10:
|
||||||
return "polaris10";
|
return "polaris10";
|
||||||
case CHIP_POLARIS11:
|
case CHIP_POLARIS11:
|
||||||
case CHIP_POLARIS12:
|
case CHIP_POLARIS12:
|
||||||
case CHIP_VEGAM:
|
case CHIP_VEGAM:
|
||||||
return "polaris11";
|
return "polaris11";
|
||||||
case CHIP_VEGA10:
|
case CHIP_VEGA10:
|
||||||
return "gfx900";
|
return "gfx900";
|
||||||
case CHIP_RAVEN:
|
case CHIP_RAVEN:
|
||||||
return "gfx902";
|
return "gfx902";
|
||||||
case CHIP_VEGA12:
|
case CHIP_VEGA12:
|
||||||
return "gfx904";
|
return "gfx904";
|
||||||
case CHIP_VEGA20:
|
case CHIP_VEGA20:
|
||||||
return "gfx906";
|
return "gfx906";
|
||||||
case CHIP_RAVEN2:
|
case CHIP_RAVEN2:
|
||||||
case CHIP_RENOIR:
|
case CHIP_RENOIR:
|
||||||
return "gfx909";
|
return "gfx909";
|
||||||
case CHIP_ARCTURUS:
|
case CHIP_ARCTURUS:
|
||||||
return "gfx908";
|
return "gfx908";
|
||||||
case CHIP_NAVI10:
|
case CHIP_NAVI10:
|
||||||
return "gfx1010";
|
return "gfx1010";
|
||||||
case CHIP_NAVI12:
|
case CHIP_NAVI12:
|
||||||
return "gfx1011";
|
return "gfx1011";
|
||||||
case CHIP_NAVI14:
|
case CHIP_NAVI14:
|
||||||
return "gfx1012";
|
return "gfx1012";
|
||||||
case CHIP_SIENNA_CICHLID:
|
case CHIP_SIENNA_CICHLID:
|
||||||
case CHIP_NAVY_FLOUNDER:
|
case CHIP_NAVY_FLOUNDER:
|
||||||
return "gfx1030";
|
return "gfx1030";
|
||||||
default:
|
default:
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
|
static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
|
||||||
enum ac_target_machine_options tm_options,
|
enum ac_target_machine_options tm_options,
|
||||||
LLVMCodeGenOptLevel level,
|
LLVMCodeGenOptLevel level,
|
||||||
const char **out_triple)
|
const char **out_triple)
|
||||||
{
|
{
|
||||||
assert(family >= CHIP_TAHITI);
|
assert(family >= CHIP_TAHITI);
|
||||||
char features[256];
|
char features[256];
|
||||||
const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
|
const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
|
||||||
LLVMTargetRef target = ac_get_llvm_target(triple);
|
LLVMTargetRef target = ac_get_llvm_target(triple);
|
||||||
|
|
||||||
snprintf(features, sizeof(features),
|
snprintf(features, sizeof(features), "+DumpCode%s%s%s%s%s",
|
||||||
"+DumpCode%s%s%s%s%s",
|
LLVM_VERSION_MAJOR >= 11 ? "" : ",-fp32-denormals,+fp64-denormals",
|
||||||
LLVM_VERSION_MAJOR >= 11 ? "" : ",-fp32-denormals,+fp64-denormals",
|
family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32)
|
||||||
family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32) ?
|
? ",+wavefrontsize64,-wavefrontsize32"
|
||||||
",+wavefrontsize64,-wavefrontsize32" : "",
|
: "",
|
||||||
family <= CHIP_NAVI14 && tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
|
family <= CHIP_NAVI14 && tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
|
||||||
family <= CHIP_NAVI14 && tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
|
family <= CHIP_NAVI14 && tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
|
||||||
tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
|
tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
|
||||||
|
|
||||||
LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
|
LLVMTargetMachineRef tm =
|
||||||
target,
|
LLVMCreateTargetMachine(target, triple, ac_get_llvm_processor_name(family), features, level,
|
||||||
triple,
|
LLVMRelocDefault, LLVMCodeModelDefault);
|
||||||
ac_get_llvm_processor_name(family),
|
|
||||||
features,
|
|
||||||
level,
|
|
||||||
LLVMRelocDefault,
|
|
||||||
LLVMCodeModelDefault);
|
|
||||||
|
|
||||||
if (out_triple)
|
if (out_triple)
|
||||||
*out_triple = triple;
|
*out_triple = triple;
|
||||||
if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL)
|
if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL)
|
||||||
ac_enable_global_isel(tm);
|
ac_enable_global_isel(tm);
|
||||||
return tm;
|
return tm;
|
||||||
}
|
}
|
||||||
|
|
||||||
static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
|
static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
|
||||||
bool check_ir)
|
bool check_ir)
|
||||||
{
|
{
|
||||||
LLVMPassManagerRef passmgr = LLVMCreatePassManager();
|
LLVMPassManagerRef passmgr = LLVMCreatePassManager();
|
||||||
if (!passmgr)
|
if (!passmgr)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (target_library_info)
|
if (target_library_info)
|
||||||
LLVMAddTargetLibraryInfo(target_library_info,
|
LLVMAddTargetLibraryInfo(target_library_info, passmgr);
|
||||||
passmgr);
|
|
||||||
|
|
||||||
if (check_ir)
|
if (check_ir)
|
||||||
LLVMAddVerifierPass(passmgr);
|
LLVMAddVerifierPass(passmgr);
|
||||||
LLVMAddAlwaysInlinerPass(passmgr);
|
LLVMAddAlwaysInlinerPass(passmgr);
|
||||||
/* Normally, the pass manager runs all passes on one function before
|
/* Normally, the pass manager runs all passes on one function before
|
||||||
* moving onto another. Adding a barrier no-op pass forces the pass
|
* moving onto another. Adding a barrier no-op pass forces the pass
|
||||||
* manager to run the inliner on all functions first, which makes sure
|
* manager to run the inliner on all functions first, which makes sure
|
||||||
* that the following passes are only run on the remaining non-inline
|
* that the following passes are only run on the remaining non-inline
|
||||||
* function, so it removes useless work done on dead inline functions.
|
* function, so it removes useless work done on dead inline functions.
|
||||||
*/
|
*/
|
||||||
ac_llvm_add_barrier_noop_pass(passmgr);
|
ac_llvm_add_barrier_noop_pass(passmgr);
|
||||||
/* This pass should eliminate all the load and store instructions. */
|
/* This pass should eliminate all the load and store instructions. */
|
||||||
LLVMAddPromoteMemoryToRegisterPass(passmgr);
|
LLVMAddPromoteMemoryToRegisterPass(passmgr);
|
||||||
LLVMAddScalarReplAggregatesPass(passmgr);
|
LLVMAddScalarReplAggregatesPass(passmgr);
|
||||||
LLVMAddLICMPass(passmgr);
|
LLVMAddLICMPass(passmgr);
|
||||||
LLVMAddAggressiveDCEPass(passmgr);
|
LLVMAddAggressiveDCEPass(passmgr);
|
||||||
LLVMAddCFGSimplificationPass(passmgr);
|
LLVMAddCFGSimplificationPass(passmgr);
|
||||||
/* This is recommended by the instruction combining pass. */
|
/* This is recommended by the instruction combining pass. */
|
||||||
LLVMAddEarlyCSEMemSSAPass(passmgr);
|
LLVMAddEarlyCSEMemSSAPass(passmgr);
|
||||||
LLVMAddInstructionCombiningPass(passmgr);
|
LLVMAddInstructionCombiningPass(passmgr);
|
||||||
return passmgr;
|
return passmgr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *attr_to_str(enum ac_func_attr attr)
|
static const char *attr_to_str(enum ac_func_attr attr)
|
||||||
{
|
{
|
||||||
switch (attr) {
|
switch (attr) {
|
||||||
case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
|
case AC_FUNC_ATTR_ALWAYSINLINE:
|
||||||
case AC_FUNC_ATTR_INREG: return "inreg";
|
return "alwaysinline";
|
||||||
case AC_FUNC_ATTR_NOALIAS: return "noalias";
|
case AC_FUNC_ATTR_INREG:
|
||||||
case AC_FUNC_ATTR_NOUNWIND: return "nounwind";
|
return "inreg";
|
||||||
case AC_FUNC_ATTR_READNONE: return "readnone";
|
case AC_FUNC_ATTR_NOALIAS:
|
||||||
case AC_FUNC_ATTR_READONLY: return "readonly";
|
return "noalias";
|
||||||
case AC_FUNC_ATTR_WRITEONLY: return "writeonly";
|
case AC_FUNC_ATTR_NOUNWIND:
|
||||||
case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY: return "inaccessiblememonly";
|
return "nounwind";
|
||||||
case AC_FUNC_ATTR_CONVERGENT: return "convergent";
|
case AC_FUNC_ATTR_READNONE:
|
||||||
|
return "readnone";
|
||||||
|
case AC_FUNC_ATTR_READONLY:
|
||||||
|
return "readonly";
|
||||||
|
case AC_FUNC_ATTR_WRITEONLY:
|
||||||
|
return "writeonly";
|
||||||
|
case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY:
|
||||||
|
return "inaccessiblememonly";
|
||||||
|
case AC_FUNC_ATTR_CONVERGENT:
|
||||||
|
return "convergent";
|
||||||
default:
|
default:
|
||||||
fprintf(stderr, "Unhandled function attribute: %x\n", attr);
|
fprintf(stderr, "Unhandled function attribute: %x\n", attr);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx,
|
||||||
ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
|
enum ac_func_attr attr)
|
||||||
int attr_idx, enum ac_func_attr attr)
|
|
||||||
{
|
{
|
||||||
const char *attr_name = attr_to_str(attr);
|
const char *attr_name = attr_to_str(attr);
|
||||||
unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
|
unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, strlen(attr_name));
|
||||||
strlen(attr_name));
|
|
||||||
LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
|
LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
|
||||||
|
|
||||||
if (LLVMIsAFunction(function))
|
if (LLVMIsAFunction(function))
|
||||||
|
|
@ -282,138 +284,124 @@ ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
|
||||||
LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
|
LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
|
void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask)
|
||||||
unsigned attrib_mask)
|
|
||||||
{
|
{
|
||||||
attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
|
attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
|
||||||
attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
|
attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
|
||||||
|
|
||||||
while (attrib_mask) {
|
while (attrib_mask) {
|
||||||
enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
|
enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
|
||||||
ac_add_function_attr(ctx, function, -1, attr);
|
ac_add_function_attr(ctx, function, -1, attr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void ac_dump_module(LLVMModuleRef module)
|
||||||
ac_dump_module(LLVMModuleRef module)
|
|
||||||
{
|
{
|
||||||
char *str = LLVMPrintModuleToString(module);
|
char *str = LLVMPrintModuleToString(module);
|
||||||
fprintf(stderr, "%s", str);
|
fprintf(stderr, "%s", str);
|
||||||
LLVMDisposeMessage(str);
|
LLVMDisposeMessage(str);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value)
|
||||||
ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
|
|
||||||
const char *name, unsigned value)
|
|
||||||
{
|
{
|
||||||
char str[16];
|
char str[16];
|
||||||
|
|
||||||
snprintf(str, sizeof(str), "0x%x", value);
|
snprintf(str, sizeof(str), "0x%x", value);
|
||||||
LLVMAddTargetDependentFunctionAttr(F, name, str);
|
LLVMAddTargetDependentFunctionAttr(F, name, str);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size)
|
void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size)
|
||||||
{
|
{
|
||||||
if (!size)
|
if (!size)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
char str[32];
|
char str[32];
|
||||||
snprintf(str, sizeof(str), "%u,%u", size, size);
|
snprintf(str, sizeof(str), "%u,%u", size, size);
|
||||||
LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);
|
LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned
|
unsigned ac_count_scratch_private_memory(LLVMValueRef function)
|
||||||
ac_count_scratch_private_memory(LLVMValueRef function)
|
|
||||||
{
|
{
|
||||||
unsigned private_mem_vgprs = 0;
|
unsigned private_mem_vgprs = 0;
|
||||||
|
|
||||||
/* Process all LLVM instructions. */
|
/* Process all LLVM instructions. */
|
||||||
LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);
|
LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);
|
||||||
while (bb) {
|
while (bb) {
|
||||||
LLVMValueRef next = LLVMGetFirstInstruction(bb);
|
LLVMValueRef next = LLVMGetFirstInstruction(bb);
|
||||||
|
|
||||||
while (next) {
|
while (next) {
|
||||||
LLVMValueRef inst = next;
|
LLVMValueRef inst = next;
|
||||||
next = LLVMGetNextInstruction(next);
|
next = LLVMGetNextInstruction(next);
|
||||||
|
|
||||||
if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
|
if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
|
LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
|
||||||
/* No idea why LLVM aligns allocas to 4 elements. */
|
/* No idea why LLVM aligns allocas to 4 elements. */
|
||||||
unsigned alignment = LLVMGetAlignment(inst);
|
unsigned alignment = LLVMGetAlignment(inst);
|
||||||
unsigned dw_size = align(ac_get_type_size(type) / 4, alignment);
|
unsigned dw_size = align(ac_get_type_size(type) / 4, alignment);
|
||||||
private_mem_vgprs += dw_size;
|
private_mem_vgprs += dw_size;
|
||||||
}
|
}
|
||||||
bb = LLVMGetNextBasicBlock(bb);
|
bb = LLVMGetNextBasicBlock(bb);
|
||||||
}
|
}
|
||||||
|
|
||||||
return private_mem_vgprs;
|
return private_mem_vgprs;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family family,
|
||||||
ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
|
enum ac_target_machine_options tm_options)
|
||||||
enum radeon_family family,
|
|
||||||
enum ac_target_machine_options tm_options)
|
|
||||||
{
|
{
|
||||||
const char *triple;
|
const char *triple;
|
||||||
memset(compiler, 0, sizeof(*compiler));
|
memset(compiler, 0, sizeof(*compiler));
|
||||||
|
|
||||||
compiler->tm = ac_create_target_machine(family, tm_options,
|
compiler->tm = ac_create_target_machine(family, tm_options, LLVMCodeGenLevelDefault, &triple);
|
||||||
LLVMCodeGenLevelDefault,
|
if (!compiler->tm)
|
||||||
&triple);
|
return false;
|
||||||
if (!compiler->tm)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (tm_options & AC_TM_CREATE_LOW_OPT) {
|
if (tm_options & AC_TM_CREATE_LOW_OPT) {
|
||||||
compiler->low_opt_tm =
|
compiler->low_opt_tm =
|
||||||
ac_create_target_machine(family, tm_options,
|
ac_create_target_machine(family, tm_options, LLVMCodeGenLevelLess, NULL);
|
||||||
LLVMCodeGenLevelLess, NULL);
|
if (!compiler->low_opt_tm)
|
||||||
if (!compiler->low_opt_tm)
|
goto fail;
|
||||||
goto fail;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (family >= CHIP_NAVI10) {
|
if (family >= CHIP_NAVI10) {
|
||||||
assert(!(tm_options & AC_TM_CREATE_LOW_OPT));
|
assert(!(tm_options & AC_TM_CREATE_LOW_OPT));
|
||||||
compiler->tm_wave32 = ac_create_target_machine(family,
|
compiler->tm_wave32 =
|
||||||
tm_options | AC_TM_WAVE32,
|
ac_create_target_machine(family, tm_options | AC_TM_WAVE32, LLVMCodeGenLevelDefault, NULL);
|
||||||
LLVMCodeGenLevelDefault,
|
if (!compiler->tm_wave32)
|
||||||
NULL);
|
goto fail;
|
||||||
if (!compiler->tm_wave32)
|
}
|
||||||
goto fail;
|
|
||||||
}
|
|
||||||
|
|
||||||
compiler->target_library_info =
|
compiler->target_library_info = ac_create_target_library_info(triple);
|
||||||
ac_create_target_library_info(triple);
|
if (!compiler->target_library_info)
|
||||||
if (!compiler->target_library_info)
|
goto fail;
|
||||||
goto fail;
|
|
||||||
|
|
||||||
compiler->passmgr = ac_create_passmgr(compiler->target_library_info,
|
compiler->passmgr =
|
||||||
tm_options & AC_TM_CHECK_IR);
|
ac_create_passmgr(compiler->target_library_info, tm_options & AC_TM_CHECK_IR);
|
||||||
if (!compiler->passmgr)
|
if (!compiler->passmgr)
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
fail:
|
fail:
|
||||||
ac_destroy_llvm_compiler(compiler);
|
ac_destroy_llvm_compiler(compiler);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
|
||||||
ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
|
|
||||||
{
|
{
|
||||||
ac_destroy_llvm_passes(compiler->passes);
|
ac_destroy_llvm_passes(compiler->passes);
|
||||||
ac_destroy_llvm_passes(compiler->passes_wave32);
|
ac_destroy_llvm_passes(compiler->passes_wave32);
|
||||||
ac_destroy_llvm_passes(compiler->low_opt_passes);
|
ac_destroy_llvm_passes(compiler->low_opt_passes);
|
||||||
|
|
||||||
if (compiler->passmgr)
|
if (compiler->passmgr)
|
||||||
LLVMDisposePassManager(compiler->passmgr);
|
LLVMDisposePassManager(compiler->passmgr);
|
||||||
if (compiler->target_library_info)
|
if (compiler->target_library_info)
|
||||||
ac_dispose_target_library_info(compiler->target_library_info);
|
ac_dispose_target_library_info(compiler->target_library_info);
|
||||||
if (compiler->low_opt_tm)
|
if (compiler->low_opt_tm)
|
||||||
LLVMDisposeTargetMachine(compiler->low_opt_tm);
|
LLVMDisposeTargetMachine(compiler->low_opt_tm);
|
||||||
if (compiler->tm)
|
if (compiler->tm)
|
||||||
LLVMDisposeTargetMachine(compiler->tm);
|
LLVMDisposeTargetMachine(compiler->tm);
|
||||||
if (compiler->tm_wave32)
|
if (compiler->tm_wave32)
|
||||||
LLVMDisposeTargetMachine(compiler->tm_wave32);
|
LLVMDisposeTargetMachine(compiler->tm_wave32);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,11 +26,11 @@
|
||||||
#ifndef AC_LLVM_UTIL_H
|
#ifndef AC_LLVM_UTIL_H
|
||||||
#define AC_LLVM_UTIL_H
|
#define AC_LLVM_UTIL_H
|
||||||
|
|
||||||
#include <stdbool.h>
|
#include "amd_family.h"
|
||||||
#include <llvm-c/TargetMachine.h>
|
#include <llvm-c/TargetMachine.h>
|
||||||
#include <llvm/Config/llvm-config.h>
|
#include <llvm/Config/llvm-config.h>
|
||||||
|
|
||||||
#include "amd_family.h"
|
#include <stdbool.h>
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
|
@ -39,124 +39,117 @@ extern "C" {
|
||||||
struct ac_compiler_passes;
|
struct ac_compiler_passes;
|
||||||
struct ac_llvm_context;
|
struct ac_llvm_context;
|
||||||
|
|
||||||
enum ac_func_attr {
|
enum ac_func_attr
|
||||||
AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
|
{
|
||||||
AC_FUNC_ATTR_INREG = (1 << 2),
|
AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
|
||||||
AC_FUNC_ATTR_NOALIAS = (1 << 3),
|
AC_FUNC_ATTR_INREG = (1 << 2),
|
||||||
AC_FUNC_ATTR_NOUNWIND = (1 << 4),
|
AC_FUNC_ATTR_NOALIAS = (1 << 3),
|
||||||
AC_FUNC_ATTR_READNONE = (1 << 5),
|
AC_FUNC_ATTR_NOUNWIND = (1 << 4),
|
||||||
AC_FUNC_ATTR_READONLY = (1 << 6),
|
AC_FUNC_ATTR_READNONE = (1 << 5),
|
||||||
AC_FUNC_ATTR_WRITEONLY = (1 << 7),
|
AC_FUNC_ATTR_READONLY = (1 << 6),
|
||||||
AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY = (1 << 8),
|
AC_FUNC_ATTR_WRITEONLY = (1 << 7),
|
||||||
AC_FUNC_ATTR_CONVERGENT = (1 << 9),
|
AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY = (1 << 8),
|
||||||
|
AC_FUNC_ATTR_CONVERGENT = (1 << 9),
|
||||||
|
|
||||||
/* Legacy intrinsic that needs attributes on function declarations
|
/* Legacy intrinsic that needs attributes on function declarations
|
||||||
* and they must match the internal LLVM definition exactly, otherwise
|
* and they must match the internal LLVM definition exactly, otherwise
|
||||||
* intrinsic selection fails.
|
* intrinsic selection fails.
|
||||||
*/
|
*/
|
||||||
AC_FUNC_ATTR_LEGACY = (1u << 31),
|
AC_FUNC_ATTR_LEGACY = (1u << 31),
|
||||||
};
|
};
|
||||||
|
|
||||||
enum ac_target_machine_options {
|
enum ac_target_machine_options
|
||||||
AC_TM_SUPPORTS_SPILL = (1 << 0),
|
{
|
||||||
AC_TM_FORCE_ENABLE_XNACK = (1 << 1),
|
AC_TM_SUPPORTS_SPILL = (1 << 0),
|
||||||
AC_TM_FORCE_DISABLE_XNACK = (1 << 2),
|
AC_TM_FORCE_ENABLE_XNACK = (1 << 1),
|
||||||
AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 3),
|
AC_TM_FORCE_DISABLE_XNACK = (1 << 2),
|
||||||
AC_TM_CHECK_IR = (1 << 4),
|
AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 3),
|
||||||
AC_TM_ENABLE_GLOBAL_ISEL = (1 << 5),
|
AC_TM_CHECK_IR = (1 << 4),
|
||||||
AC_TM_CREATE_LOW_OPT = (1 << 6),
|
AC_TM_ENABLE_GLOBAL_ISEL = (1 << 5),
|
||||||
AC_TM_WAVE32 = (1 << 7),
|
AC_TM_CREATE_LOW_OPT = (1 << 6),
|
||||||
|
AC_TM_WAVE32 = (1 << 7),
|
||||||
};
|
};
|
||||||
|
|
||||||
enum ac_float_mode {
|
enum ac_float_mode
|
||||||
AC_FLOAT_MODE_DEFAULT,
|
{
|
||||||
AC_FLOAT_MODE_DEFAULT_OPENGL,
|
AC_FLOAT_MODE_DEFAULT,
|
||||||
AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO,
|
AC_FLOAT_MODE_DEFAULT_OPENGL,
|
||||||
|
AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Per-thread persistent LLVM objects. */
|
/* Per-thread persistent LLVM objects. */
|
||||||
struct ac_llvm_compiler {
|
struct ac_llvm_compiler {
|
||||||
LLVMTargetLibraryInfoRef target_library_info;
|
LLVMTargetLibraryInfoRef target_library_info;
|
||||||
LLVMPassManagerRef passmgr;
|
LLVMPassManagerRef passmgr;
|
||||||
|
|
||||||
/* Default compiler. */
|
/* Default compiler. */
|
||||||
LLVMTargetMachineRef tm;
|
LLVMTargetMachineRef tm;
|
||||||
struct ac_compiler_passes *passes;
|
struct ac_compiler_passes *passes;
|
||||||
|
|
||||||
/* Wave32 compiler for GFX10. */
|
/* Wave32 compiler for GFX10. */
|
||||||
LLVMTargetMachineRef tm_wave32;
|
LLVMTargetMachineRef tm_wave32;
|
||||||
struct ac_compiler_passes *passes_wave32;
|
struct ac_compiler_passes *passes_wave32;
|
||||||
|
|
||||||
/* Optional compiler for faster compilation with fewer optimizations.
|
/* Optional compiler for faster compilation with fewer optimizations.
|
||||||
* LLVM modules can be created with "tm" too. There is no difference.
|
* LLVM modules can be created with "tm" too. There is no difference.
|
||||||
*/
|
*/
|
||||||
LLVMTargetMachineRef low_opt_tm; /* uses -O1 instead of -O2 */
|
LLVMTargetMachineRef low_opt_tm; /* uses -O1 instead of -O2 */
|
||||||
struct ac_compiler_passes *low_opt_passes;
|
struct ac_compiler_passes *low_opt_passes;
|
||||||
};
|
};
|
||||||
|
|
||||||
const char *ac_get_llvm_processor_name(enum radeon_family family);
|
const char *ac_get_llvm_processor_name(enum radeon_family family);
|
||||||
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
|
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
|
||||||
void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes);
|
void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes);
|
||||||
bool ac_is_sgpr_param(LLVMValueRef param);
|
bool ac_is_sgpr_param(LLVMValueRef param);
|
||||||
void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
|
void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx,
|
||||||
int attr_idx, enum ac_func_attr attr);
|
enum ac_func_attr attr);
|
||||||
void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
|
void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask);
|
||||||
unsigned attrib_mask);
|
|
||||||
void ac_dump_module(LLVMModuleRef module);
|
void ac_dump_module(LLVMModuleRef module);
|
||||||
|
|
||||||
LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
|
LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
|
||||||
bool ac_llvm_is_function(LLVMValueRef v);
|
bool ac_llvm_is_function(LLVMValueRef v);
|
||||||
LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx);
|
LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx);
|
||||||
|
|
||||||
LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
|
LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode);
|
||||||
enum ac_float_mode float_mode);
|
|
||||||
void ac_enable_signed_zeros(struct ac_llvm_context *ctx);
|
void ac_enable_signed_zeros(struct ac_llvm_context *ctx);
|
||||||
void ac_disable_signed_zeros(struct ac_llvm_context *ctx);
|
void ac_disable_signed_zeros(struct ac_llvm_context *ctx);
|
||||||
|
|
||||||
void
|
void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value);
|
||||||
ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
|
|
||||||
const char *name, unsigned value);
|
|
||||||
void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size);
|
void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size);
|
||||||
|
|
||||||
static inline unsigned
|
static inline unsigned ac_get_load_intr_attribs(bool can_speculate)
|
||||||
ac_get_load_intr_attribs(bool can_speculate)
|
|
||||||
{
|
{
|
||||||
/* READNONE means writes can't affect it, while READONLY means that
|
/* READNONE means writes can't affect it, while READONLY means that
|
||||||
* writes can affect it. */
|
* writes can affect it. */
|
||||||
return can_speculate ? AC_FUNC_ATTR_READNONE :
|
return can_speculate ? AC_FUNC_ATTR_READNONE : AC_FUNC_ATTR_READONLY;
|
||||||
AC_FUNC_ATTR_READONLY;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned
|
unsigned ac_count_scratch_private_memory(LLVMValueRef function);
|
||||||
ac_count_scratch_private_memory(LLVMValueRef function);
|
|
||||||
|
|
||||||
LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple);
|
LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple);
|
||||||
void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info);
|
void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info);
|
||||||
void ac_init_shared_llvm_once(void); /* Do not use directly, use ac_init_llvm_once */
|
void ac_init_shared_llvm_once(void); /* Do not use directly, use ac_init_llvm_once */
|
||||||
void ac_init_llvm_once(void);
|
void ac_init_llvm_once(void);
|
||||||
|
|
||||||
|
bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family family,
|
||||||
bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
|
enum ac_target_machine_options tm_options);
|
||||||
enum radeon_family family,
|
|
||||||
enum ac_target_machine_options tm_options);
|
|
||||||
void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler);
|
void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler);
|
||||||
|
|
||||||
struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm);
|
struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm);
|
||||||
void ac_destroy_llvm_passes(struct ac_compiler_passes *p);
|
void ac_destroy_llvm_passes(struct ac_compiler_passes *p);
|
||||||
bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
|
bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
|
||||||
char **pelf_buffer, size_t *pelf_size);
|
char **pelf_buffer, size_t *pelf_size);
|
||||||
void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr);
|
void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr);
|
||||||
void ac_enable_global_isel(LLVMTargetMachineRef tm);
|
void ac_enable_global_isel(LLVMTargetMachineRef tm);
|
||||||
|
|
||||||
static inline bool
|
static inline bool ac_has_vec3_support(enum chip_class chip, bool use_format)
|
||||||
ac_has_vec3_support(enum chip_class chip, bool use_format)
|
|
||||||
{
|
{
|
||||||
if (chip == GFX6 && !use_format) {
|
if (chip == GFX6 && !use_format) {
|
||||||
/* GFX6 only supports vec3 with load/store format. */
|
/* GFX6 only supports vec3 with load/store format. */
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return LLVM_VERSION_MAJOR >= 9;
|
return LLVM_VERSION_MAJOR >= 9;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -24,11 +24,12 @@
|
||||||
#ifndef AC_NIR_TO_LLVM_H
|
#ifndef AC_NIR_TO_LLVM_H
|
||||||
#define AC_NIR_TO_LLVM_H
|
#define AC_NIR_TO_LLVM_H
|
||||||
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include "llvm-c/Core.h"
|
|
||||||
#include "llvm-c/TargetMachine.h"
|
|
||||||
#include "amd_family.h"
|
#include "amd_family.h"
|
||||||
#include "compiler/shader_enums.h"
|
#include "compiler/shader_enums.h"
|
||||||
|
#include "llvm-c/Core.h"
|
||||||
|
#include "llvm-c/TargetMachine.h"
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
struct nir_shader;
|
struct nir_shader;
|
||||||
struct nir_variable;
|
struct nir_variable;
|
||||||
|
|
@ -37,13 +38,13 @@ struct ac_shader_abi;
|
||||||
struct ac_shader_args;
|
struct ac_shader_args;
|
||||||
|
|
||||||
/* Interpolation locations */
|
/* Interpolation locations */
|
||||||
#define INTERP_CENTER 0
|
#define INTERP_CENTER 0
|
||||||
#define INTERP_CENTROID 1
|
#define INTERP_CENTROID 1
|
||||||
#define INTERP_SAMPLE 2
|
#define INTERP_SAMPLE 2
|
||||||
|
|
||||||
static inline unsigned ac_llvm_reg_index_soa(unsigned index, unsigned chan)
|
static inline unsigned ac_llvm_reg_index_soa(unsigned index, unsigned chan)
|
||||||
{
|
{
|
||||||
return (index * 4) + chan;
|
return (index * 4) + chan;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class);
|
bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class);
|
||||||
|
|
@ -51,14 +52,11 @@ bool ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class);
|
||||||
bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir);
|
bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir);
|
||||||
|
|
||||||
void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
|
void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
|
||||||
const struct ac_shader_args *args, struct nir_shader *nir);
|
const struct ac_shader_args *args, struct nir_shader *nir);
|
||||||
|
|
||||||
void
|
void ac_handle_shader_output_decl(struct ac_llvm_context *ctx, struct ac_shader_abi *abi,
|
||||||
ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
|
struct nir_shader *nir, struct nir_variable *variable,
|
||||||
struct ac_shader_abi *abi,
|
gl_shader_stage stage);
|
||||||
struct nir_shader *nir,
|
|
||||||
struct nir_variable *variable,
|
|
||||||
gl_shader_stage stage);
|
|
||||||
|
|
||||||
void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage);
|
void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,11 +24,11 @@
|
||||||
#ifndef AC_SHADER_ABI_H
|
#ifndef AC_SHADER_ABI_H
|
||||||
#define AC_SHADER_ABI_H
|
#define AC_SHADER_ABI_H
|
||||||
|
|
||||||
#include <llvm-c/Core.h>
|
|
||||||
#include <assert.h>
|
|
||||||
#include "ac_shader_args.h"
|
#include "ac_shader_args.h"
|
||||||
|
|
||||||
#include "compiler/shader_enums.h"
|
#include "compiler/shader_enums.h"
|
||||||
|
#include <llvm-c/Core.h>
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
struct nir_variable;
|
struct nir_variable;
|
||||||
|
|
||||||
|
|
@ -36,167 +36,136 @@ struct nir_variable;
|
||||||
|
|
||||||
#define AC_MAX_INLINE_PUSH_CONSTS 8
|
#define AC_MAX_INLINE_PUSH_CONSTS 8
|
||||||
|
|
||||||
enum ac_descriptor_type {
|
enum ac_descriptor_type
|
||||||
AC_DESC_IMAGE,
|
{
|
||||||
AC_DESC_FMASK,
|
AC_DESC_IMAGE,
|
||||||
AC_DESC_SAMPLER,
|
AC_DESC_FMASK,
|
||||||
AC_DESC_BUFFER,
|
AC_DESC_SAMPLER,
|
||||||
AC_DESC_PLANE_0,
|
AC_DESC_BUFFER,
|
||||||
AC_DESC_PLANE_1,
|
AC_DESC_PLANE_0,
|
||||||
AC_DESC_PLANE_2,
|
AC_DESC_PLANE_1,
|
||||||
|
AC_DESC_PLANE_2,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Document the shader ABI during compilation. This is what allows radeonsi and
|
/* Document the shader ABI during compilation. This is what allows radeonsi and
|
||||||
* radv to share a compiler backend.
|
* radv to share a compiler backend.
|
||||||
*/
|
*/
|
||||||
struct ac_shader_abi {
|
struct ac_shader_abi {
|
||||||
LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];
|
LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];
|
||||||
|
|
||||||
/* These input registers sometimes need to be fixed up. */
|
/* These input registers sometimes need to be fixed up. */
|
||||||
LLVMValueRef vertex_id;
|
LLVMValueRef vertex_id;
|
||||||
LLVMValueRef instance_id;
|
LLVMValueRef instance_id;
|
||||||
LLVMValueRef persp_centroid, linear_centroid;
|
LLVMValueRef persp_centroid, linear_centroid;
|
||||||
LLVMValueRef color0, color1;
|
LLVMValueRef color0, color1;
|
||||||
LLVMValueRef user_data;
|
LLVMValueRef user_data;
|
||||||
|
|
||||||
/* For VS and PS: pre-loaded shader inputs.
|
/* For VS and PS: pre-loaded shader inputs.
|
||||||
*
|
*
|
||||||
* Currently only used for NIR shaders; indexed by variables'
|
* Currently only used for NIR shaders; indexed by variables'
|
||||||
* driver_location.
|
* driver_location.
|
||||||
*/
|
*/
|
||||||
LLVMValueRef *inputs;
|
LLVMValueRef *inputs;
|
||||||
|
|
||||||
/* Varying -> attribute number mapping. Also NIR-only */
|
/* Varying -> attribute number mapping. Also NIR-only */
|
||||||
unsigned fs_input_attr_indices[MAX_VARYING];
|
unsigned fs_input_attr_indices[MAX_VARYING];
|
||||||
|
|
||||||
void (*emit_outputs)(struct ac_shader_abi *abi,
|
void (*emit_outputs)(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs);
|
||||||
unsigned max_outputs,
|
|
||||||
LLVMValueRef *addrs);
|
|
||||||
|
|
||||||
void (*emit_vertex)(struct ac_shader_abi *abi,
|
void (*emit_vertex)(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs);
|
||||||
unsigned stream,
|
|
||||||
LLVMValueRef *addrs);
|
|
||||||
|
|
||||||
void (*emit_primitive)(struct ac_shader_abi *abi,
|
void (*emit_primitive)(struct ac_shader_abi *abi, unsigned stream);
|
||||||
unsigned stream);
|
|
||||||
|
|
||||||
void (*emit_vertex_with_counter)(struct ac_shader_abi *abi,
|
void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream,
|
||||||
unsigned stream,
|
LLVMValueRef vertexidx, LLVMValueRef *addrs);
|
||||||
LLVMValueRef vertexidx,
|
|
||||||
LLVMValueRef *addrs);
|
|
||||||
|
|
||||||
LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi,
|
LLVMValueRef (*load_inputs)(struct ac_shader_abi *abi, unsigned location,
|
||||||
unsigned location,
|
unsigned driver_location, unsigned component,
|
||||||
unsigned driver_location,
|
unsigned num_components, unsigned vertex_index, unsigned const_index,
|
||||||
unsigned component,
|
LLVMTypeRef type);
|
||||||
unsigned num_components,
|
|
||||||
unsigned vertex_index,
|
|
||||||
unsigned const_index,
|
|
||||||
LLVMTypeRef type);
|
|
||||||
|
|
||||||
LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi,
|
LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type,
|
||||||
LLVMTypeRef type,
|
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
||||||
LLVMValueRef vertex_index,
|
unsigned const_index, unsigned location,
|
||||||
LLVMValueRef param_index,
|
unsigned driver_location, unsigned component,
|
||||||
unsigned const_index,
|
unsigned num_components, bool is_patch, bool is_compact,
|
||||||
unsigned location,
|
bool load_inputs);
|
||||||
unsigned driver_location,
|
|
||||||
unsigned component,
|
|
||||||
unsigned num_components,
|
|
||||||
bool is_patch,
|
|
||||||
bool is_compact,
|
|
||||||
bool load_inputs);
|
|
||||||
|
|
||||||
void (*store_tcs_outputs)(struct ac_shader_abi *abi,
|
void (*store_tcs_outputs)(struct ac_shader_abi *abi, const struct nir_variable *var,
|
||||||
const struct nir_variable *var,
|
LLVMValueRef vertex_index, LLVMValueRef param_index,
|
||||||
LLVMValueRef vertex_index,
|
unsigned const_index, LLVMValueRef src, unsigned writemask,
|
||||||
LLVMValueRef param_index,
|
unsigned component, unsigned driver_location);
|
||||||
unsigned const_index,
|
|
||||||
LLVMValueRef src,
|
|
||||||
unsigned writemask,
|
|
||||||
unsigned component,
|
|
||||||
unsigned driver_location);
|
|
||||||
|
|
||||||
LLVMValueRef (*load_tess_coord)(struct ac_shader_abi *abi);
|
LLVMValueRef (*load_tess_coord)(struct ac_shader_abi *abi);
|
||||||
|
|
||||||
LLVMValueRef (*load_patch_vertices_in)(struct ac_shader_abi *abi);
|
LLVMValueRef (*load_patch_vertices_in)(struct ac_shader_abi *abi);
|
||||||
|
|
||||||
LLVMValueRef (*load_tess_level)(struct ac_shader_abi *abi,
|
LLVMValueRef (*load_tess_level)(struct ac_shader_abi *abi, unsigned varying_id,
|
||||||
unsigned varying_id,
|
bool load_default_state);
|
||||||
bool load_default_state);
|
|
||||||
|
|
||||||
|
LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
|
||||||
|
|
||||||
LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
|
/**
|
||||||
|
* Load the descriptor for the given buffer.
|
||||||
|
*
|
||||||
|
* \param buffer the buffer as presented in NIR: this is the descriptor
|
||||||
|
* in Vulkan, and the buffer index in OpenGL/Gallium
|
||||||
|
* \param write whether buffer contents will be written
|
||||||
|
*/
|
||||||
|
LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi, LLVMValueRef buffer, bool write);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Load the descriptor for the given buffer.
|
* Load a descriptor associated to a sampler.
|
||||||
*
|
*
|
||||||
* \param buffer the buffer as presented in NIR: this is the descriptor
|
* \param descriptor_set the descriptor set index (only for Vulkan)
|
||||||
* in Vulkan, and the buffer index in OpenGL/Gallium
|
* \param base_index the base index of the sampler variable
|
||||||
* \param write whether buffer contents will be written
|
* \param constant_index constant part of an array index (or 0, if the
|
||||||
*/
|
* sampler variable is not an array)
|
||||||
LLVMValueRef (*load_ssbo)(struct ac_shader_abi *abi,
|
* \param index non-constant part of an array index (may be NULL)
|
||||||
LLVMValueRef buffer, bool write);
|
* \param desc_type the type of descriptor to load
|
||||||
|
* \param image whether the descriptor is loaded for an image operation
|
||||||
|
*/
|
||||||
|
LLVMValueRef (*load_sampler_desc)(struct ac_shader_abi *abi, unsigned descriptor_set,
|
||||||
|
unsigned base_index, unsigned constant_index,
|
||||||
|
LLVMValueRef index, enum ac_descriptor_type desc_type,
|
||||||
|
bool image, bool write, bool bindless);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Load a descriptor associated to a sampler.
|
* Load a Vulkan-specific resource.
|
||||||
*
|
*
|
||||||
* \param descriptor_set the descriptor set index (only for Vulkan)
|
* \param index resource index
|
||||||
* \param base_index the base index of the sampler variable
|
* \param desc_set descriptor set
|
||||||
* \param constant_index constant part of an array index (or 0, if the
|
* \param binding descriptor set binding
|
||||||
* sampler variable is not an array)
|
*/
|
||||||
* \param index non-constant part of an array index (may be NULL)
|
LLVMValueRef (*load_resource)(struct ac_shader_abi *abi, LLVMValueRef index, unsigned desc_set,
|
||||||
* \param desc_type the type of descriptor to load
|
unsigned binding);
|
||||||
* \param image whether the descriptor is loaded for an image operation
|
|
||||||
*/
|
|
||||||
LLVMValueRef (*load_sampler_desc)(struct ac_shader_abi *abi,
|
|
||||||
unsigned descriptor_set,
|
|
||||||
unsigned base_index,
|
|
||||||
unsigned constant_index,
|
|
||||||
LLVMValueRef index,
|
|
||||||
enum ac_descriptor_type desc_type,
|
|
||||||
bool image, bool write,
|
|
||||||
bool bindless);
|
|
||||||
|
|
||||||
/**
|
LLVMValueRef (*load_sample_position)(struct ac_shader_abi *abi, LLVMValueRef sample_id);
|
||||||
* Load a Vulkan-specific resource.
|
|
||||||
*
|
|
||||||
* \param index resource index
|
|
||||||
* \param desc_set descriptor set
|
|
||||||
* \param binding descriptor set binding
|
|
||||||
*/
|
|
||||||
LLVMValueRef (*load_resource)(struct ac_shader_abi *abi,
|
|
||||||
LLVMValueRef index,
|
|
||||||
unsigned desc_set,
|
|
||||||
unsigned binding);
|
|
||||||
|
|
||||||
LLVMValueRef (*load_sample_position)(struct ac_shader_abi *abi,
|
LLVMValueRef (*load_local_group_size)(struct ac_shader_abi *abi);
|
||||||
LLVMValueRef sample_id);
|
|
||||||
|
|
||||||
LLVMValueRef (*load_local_group_size)(struct ac_shader_abi *abi);
|
LLVMValueRef (*load_sample_mask_in)(struct ac_shader_abi *abi);
|
||||||
|
|
||||||
LLVMValueRef (*load_sample_mask_in)(struct ac_shader_abi *abi);
|
LLVMValueRef (*load_base_vertex)(struct ac_shader_abi *abi);
|
||||||
|
|
||||||
LLVMValueRef (*load_base_vertex)(struct ac_shader_abi *abi);
|
LLVMValueRef (*emit_fbfetch)(struct ac_shader_abi *abi);
|
||||||
|
|
||||||
LLVMValueRef (*emit_fbfetch)(struct ac_shader_abi *abi);
|
/* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently
|
||||||
|
* uses it due to promoting D16 to D32, but radv needs it off. */
|
||||||
|
bool clamp_shadow_reference;
|
||||||
|
bool interp_at_sample_force_center;
|
||||||
|
|
||||||
/* Whether to clamp the shadow reference value to [0,1]on GFX8. Radeonsi currently
|
/* Whether bounds checks are required */
|
||||||
* uses it due to promoting D16 to D32, but radv needs it off. */
|
bool robust_buffer_access;
|
||||||
bool clamp_shadow_reference;
|
|
||||||
bool interp_at_sample_force_center;
|
|
||||||
|
|
||||||
/* Whether bounds checks are required */
|
/* Check for Inf interpolation coeff */
|
||||||
bool robust_buffer_access;
|
bool kill_ps_if_inf_interp;
|
||||||
|
|
||||||
/* Check for Inf interpolation coeff */
|
/* Whether undef values must be converted to zero */
|
||||||
bool kill_ps_if_inf_interp;
|
bool convert_undef_to_zero;
|
||||||
|
|
||||||
/* Whether undef values must be converted to zero */
|
/* Clamp div by 0 (so it won't produce NaN) */
|
||||||
bool convert_undef_to_zero;
|
bool clamp_div_by_zero;
|
||||||
|
|
||||||
/* Clamp div by 0 (so it won't produce NaN) */
|
|
||||||
bool clamp_div_by_zero;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* AC_SHADER_ABI_H */
|
#endif /* AC_SHADER_ABI_H */
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue