Merge branch 'r600-remove-tgsi-shaders' into 'main'

r600: replace TGSI shaders with nir

See merge request mesa/mesa!41328
This commit is contained in:
Gert Wollny 2026-05-08 02:14:11 +02:00
commit 7fe68feb77
6 changed files with 371 additions and 203 deletions

View file

@ -10,7 +10,7 @@
#include "util/u_memory.h"
#include "util/u_upload_mgr.h"
#include "util/os_time.h"
#include "tgsi/tgsi_text.h"
#include "nir/nir_builder.h"
#define R600_MAX_STREAMS 4
@ -1353,6 +1353,253 @@ bool r600_query_hw_get_result(struct r600_common_context *rctx,
return true;
}
enum r600_query_result_shader_config {
R600_QUERY_READ_PREV = 1,
R600_QUERY_WRITE_CHAIN = 2,
R600_QUERY_WRITE_AVAIL = 4,
R600_QUERY_BOOL = 8,
R600_QUERY_ONE_DWORD = 16,
R600_QUERY_TIMESTAMP = 32,
R600_QUERY_RESULT64 = 64,
R600_QUERY_SIGNED32 = 128,
R600_QUERY_SO_OVERFLOW = 256,
};
struct r600_query_result_accum_vars {
nir_variable *accum;
nir_variable *result_available;
};
#define QUERY_RESULT_BUF nir_imm_int(b, 0)
#define QUERY_PREV_SUMMARY_BUF nir_imm_int(b, 1)
#define NEXT_SUMMARY_OR_USER_BUF nir_imm_int(b, 2)
static void r600_query_result_load_one_dword(nir_builder *b, nir_def *fence_offset,
nir_def *result_offset,
const struct r600_query_result_accum_vars *vars)
{
nir_def *fence_value = nir_load_ssbo(b, 1, 32, QUERY_RESULT_BUF,
nir_iadd(b, fence_offset, result_offset));
nir_def *fence_mask = nir_inot(b, nir_ishr_imm(b, fence_value, 31));
nir_store_var(b, vars->result_available, fence_mask, 0x1);
nir_if *if_fence_set = nir_push_if(b, nir_ilt_imm(b, fence_value, 0)); {
nir_def *result = nir_load_ssbo(b, 2, 32, QUERY_RESULT_BUF, result_offset);
nir_store_var(b, vars->accum, nir_channels(b, result, 0x3), 0x3);
}
nir_pop_if(b, if_fence_set);
}
static void r600_query_result_accumulate_pairs(nir_builder *b, nir_function_impl *impl,
nir_def *config_bitfield,
nir_def *result_count,
nir_def *result_stride,
nir_def *fence_offset,
nir_def *pair_stride,
nir_def *pair_count,
nir_def *end_offset,
nir_def *result_offset,
const struct r600_query_result_accum_vars *vars)
{
nir_def *_0i = nir_imm_int(b, 0);
nir_store_var(b, vars->accum, nir_vec2(b, _0i, _0i), 0x3);
nir_def *cfg_prev_acc_result = nir_test_mask(b, config_bitfield, R600_QUERY_READ_PREV);
nir_if *if_prev_acc_result = nir_push_if(b, cfg_prev_acc_result); {
nir_def *result = nir_load_ssbo(b, 3, 32, QUERY_PREV_SUMMARY_BUF, _0i);
nir_store_var(b, vars->accum, nir_channels(b, result, 0x3), 0x3);
nir_store_var(b, vars->result_available, nir_channel(b, result, 2), 0x1);
}
nir_pop_if(b, if_prev_acc_result);
nir_def *availability_mask_value = nir_load_var(b, vars->result_available);
nir_if *if_not_z = nir_push_if(b, nir_ieq(b, availability_mask_value, _0i)); {
/* Accumulate query result pairs across result buffers:
* for (result_index = 0; result_index < result_count; ++result_index) {
* fence = load_fence(result_index)
* if (!fence.valid) break // Stop when we hit an invalid fence
* for (pair_index = 0; pair_index < pair_count; ++pair_index) {
* pair_start = load64(result_offset + pair_index * pair_stride)
* pair_end = load64(result_offset + pair_index * pair_stride + end_offset)
* accumulator += (pair_end - pair_start)
* if (SO_OVERFLOW_mode) {
* fence_start = load64(...)
* fence_end = load64(...)
* accumulator -= (fence_end - fence_start)
* }
* }
* }
*/
nir_variable *outer_loop_iter =
nir_local_variable_create(impl, glsl_uint_type(), "outer_loop_iter");
nir_store_var(b, outer_loop_iter, _0i, 0x1);
nir_loop *loop_outer = nir_push_loop(b); {
nir_def *result_index = nir_load_var(b, outer_loop_iter);
nir_break_if(b, nir_uge(b, result_index, result_count));
nir_def *pitch_outer_loop = nir_imul(b, result_index, result_stride);
nir_def *address = nir_iadd3(b, pitch_outer_loop,
fence_offset,
result_offset);
nir_def *value = nir_load_ssbo(b, 1, 32, QUERY_RESULT_BUF, address);
nir_def *bitmask = nir_inot(b, nir_ishr_imm(b, value, 31));
nir_store_var(b, vars->result_available, bitmask, 0x1);
nir_break_if(b, nir_i2b(b, bitmask));
nir_variable *inner_loop_iter =
nir_local_variable_create(impl, glsl_uint_type(), "inner_loop_iter");
nir_store_var(b, inner_loop_iter, _0i, 0x1);
nir_loop *loop_inner = nir_push_loop(b); {
nir_def *i = nir_load_var(b, inner_loop_iter);
nir_def *pitch_inner_loop =
nir_iadd(b,
nir_imul(b, i, pair_stride),
pitch_outer_loop);
nir_def *base =
nir_iadd(b, pitch_inner_loop, result_offset);
nir_def *first = nir_load_ssbo(b, 1, 64, QUERY_RESULT_BUF, base);
nir_def *new_pitch =
nir_iadd(b, pitch_inner_loop, end_offset);
nir_def *second = nir_load_ssbo(b, 1, 64, QUERY_RESULT_BUF,
nir_iadd(b, new_pitch, result_offset));
nir_def *start_half_pair = nir_isub(b, second, first);
nir_def *difference;
nir_def *cfg_so_overflow_mode = nir_test_mask(b, config_bitfield, R600_QUERY_SO_OVERFLOW);
nir_if *if_so_overflow_mode = nir_push_if(b, cfg_so_overflow_mode); {
first = nir_load_ssbo(b, 1, 64, QUERY_RESULT_BUF,
nir_iadd_imm(b, base, 8));
second = nir_load_ssbo(b, 1, 64, QUERY_RESULT_BUF,
nir_iadd_imm(b,
nir_iadd(b, new_pitch, result_offset),
8));
nir_def *end_half_pair = nir_isub(b, second, first);
difference = nir_isub(b, start_half_pair, end_half_pair);
}
nir_pop_if(b, if_so_overflow_mode);
nir_def *sum = nir_iadd(b, nir_pack_64_2x32(b, nir_load_var(b, vars->accum)),
nir_if_phi(b, difference, start_half_pair));
sum = nir_unpack_64_2x32(b, sum);
i = nir_iadd_imm(b, i, 1);
nir_store_var(b, inner_loop_iter, i, 0x1);
nir_store_var(b, vars->accum, sum, 0x3);
nir_def *is_pair_count_exceeded = nir_uge(b, i, pair_count);
nir_break_if(b, is_pair_count_exceeded);
}
nir_pop_loop(b, loop_inner);
nir_store_var(b, outer_loop_iter, nir_iadd_imm(b, result_index, 1), 0x1);
}
nir_pop_loop(b, loop_outer);
}
nir_pop_if(b, if_not_z);
}
static void r600_query_result_store_output(nir_builder *b,
struct r600_common_context *rctx,
nir_def *config_bitfield,
nir_def *result_offset,
const struct r600_query_result_accum_vars *vars)
{
nir_def *_0i = nir_imm_int(b, 0);
nir_def *accum_value = nir_load_var(b, vars->accum);
nir_def *availability_value = nir_load_var(b, vars->result_available);
/* Store query results to output buffer based on configuration:
* if (write_chain) {
* // Store accumulated values for chaining to next grid invocation
* output = (accum_lo, accum_hi, availability_mask)
* } else if (write_available) {
* // Store query availability flag (!fence_valid) and optional 64-bit padding
* output = (fence_valid ? 0 : 1)
* if (is_64bit) output += padding_word
* } else {
* // Store actual query result with optional conversions
* if (apply_timestamp)
* result = convert_to_timestamp(x, y)
* if (convert_to_bool)
* result = (result != 0) ? 1 : 0
* if (is_64bit) {
* output = (result_low, result_high)
* } else {
* output = result_low (clamped for signed mode)
* }
* }
*/
nir_def *is_acc_chaining = nir_test_mask(b, config_bitfield, R600_QUERY_WRITE_CHAIN);
nir_if *if_acc_chaining = nir_push_if(b, is_acc_chaining); {
nir_store_ssbo(b, nir_vec3(b, nir_channel(b, accum_value, 0), nir_channel(b, accum_value, 1), availability_value), NEXT_SUMMARY_OR_USER_BUF,
result_offset);
} nir_push_else(b, if_acc_chaining); {
nir_def *cfg_write_result_available = nir_test_mask(b, config_bitfield, R600_QUERY_WRITE_AVAIL);
nir_if *if_write_result_available = nir_push_if(b, cfg_write_result_available); {
nir_store_ssbo(b,
nir_iand_imm(b, nir_inot(b, availability_value), 1),
NEXT_SUMMARY_OR_USER_BUF, result_offset);
nir_def *is_result_64_bits = nir_test_mask(b, config_bitfield, R600_QUERY_RESULT64);
nir_if *if_result_64_bits = nir_push_if(b, is_result_64_bits); {
nir_store_ssbo(b, _0i, NEXT_SUMMARY_OR_USER_BUF, nir_iadd_imm(b, result_offset, 4));
}
nir_pop_if(b, if_result_64_bits);
} nir_push_else(b, if_write_result_available); {
nir_def *result_not_written = nir_i2b(b, nir_inot(b, availability_value));
nir_if *if_result_not_written = nir_push_if(b, result_not_written); {
nir_def *ts;
nir_def *cfg_apply_timestamp = nir_test_mask(b, config_bitfield, R600_QUERY_TIMESTAMP);
nir_if *if_apply_timestamp = nir_push_if(b, cfg_apply_timestamp); {
nir_def *clock_crystal_frequency =
nir_imm_int64(b, rctx->screen->info.clock_crystal_freq);
nir_def *xy_million = nir_imul(b,
nir_pack_64_2x32(b, accum_value),
nir_imm_int64(b, 1000000));
nir_def *ts_converted = nir_udiv(b, xy_million,
clock_crystal_frequency);
ts = nir_unpack_64_2x32(b, ts_converted);
}
nir_pop_if(b, if_apply_timestamp);
nir_def *n = nir_if_phi(b, ts, accum_value);
nir_def *nx = nir_channel(b,n, 0);
nir_def *ny = nir_channel(b,n, 1);
nir_def *cfg_convert_to_bool = nir_test_mask(b, config_bitfield, R600_QUERY_BOOL);
nir_def *is_nonzero = nir_b2i32(b, nir_ine(b, nir_ior(b, nx, ny), _0i));
nx = nir_bcsel(b, cfg_convert_to_bool, is_nonzero, nx);
ny = nir_bcsel(b, cfg_convert_to_bool, _0i, ny);
nir_def *cfg_result_64_bits = nir_test_mask(b, config_bitfield, R600_QUERY_RESULT64);
nir_if *if_result_64_bits = nir_push_if(b, cfg_result_64_bits); {
nir_store_ssbo(b, nir_vec2(b, nx, ny), NEXT_SUMMARY_OR_USER_BUF, result_offset);
} nir_push_else(b, if_result_64_bits); {
nir_def *is_y = nir_ine(b, ny, _0i);
nx = nir_bcsel(b, is_y, nir_imm_int(b, UINT32_MAX), nx);
nir_def *cfg_signed_32bit_result = nir_test_mask(b, config_bitfield, R600_QUERY_SIGNED32);
nir_def *min = nir_umin(b, nx, nir_imm_int(b, INT_MAX));
nx = nir_bcsel(b, cfg_signed_32bit_result, min, nx);
nir_store_ssbo(b, nx, NEXT_SUMMARY_OR_USER_BUF, result_offset);
}
nir_pop_if(b, if_result_64_bits);
}
nir_pop_if(b, if_result_not_written);
}
nir_pop_if(b, if_write_result_available);
}
nir_pop_if(b, if_acc_chaining);
}
/* Create the compute shader that is used to collect the results.
*
* One compute grid with a single thread is launched for every query result
@ -1389,188 +1636,68 @@ bool r600_query_hw_get_result(struct r600_common_context *rctx,
*/
static void r600_create_query_result_shader(struct r600_common_context *rctx)
{
/* TEMP[0].xy = accumulated result so far
* TEMP[0].z = result not available
*
* TEMP[1].x = current result index
* TEMP[1].y = current pair index
*/
static const char text_tmpl[] =
"COMP\n"
"PROPERTY CS_FIXED_BLOCK_WIDTH 1\n"
"PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n"
"PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
"DCL BUFFER[0]\n"
"DCL BUFFER[1]\n"
"DCL BUFFER[2]\n"
"DCL CONST[0][0..2]\n"
"DCL TEMP[0..5]\n"
"IMM[0] UINT32 {0, 31, 2147483647, 4294967295}\n"
"IMM[1] UINT32 {1, 2, 4, 8}\n"
"IMM[2] UINT32 {16, 32, 64, 128}\n"
"IMM[3] UINT32 {1000000, 0, %u, 0}\n" /* for timestamp conversion */
"IMM[4] UINT32 {256, 0, 0, 0}\n"
"AND TEMP[5], CONST[0][0].wwww, IMM[2].xxxx\n"
"UIF TEMP[5]\n"
/* Check result availability. */
"UADD TEMP[1].x, CONST[0][1].xxxx, CONST[0][2].xxxx\n"
"LOAD TEMP[1].x, BUFFER[0], TEMP[1].xxxx\n"
"ISHR TEMP[0].z, TEMP[1].xxxx, IMM[0].yyyy\n"
"MOV TEMP[1], TEMP[0].zzzz\n"
"NOT TEMP[0].z, TEMP[0].zzzz\n"
/* Load result if available. */
"UIF TEMP[1]\n"
"UADD TEMP[0].x, IMM[0].xxxx, CONST[0][2].xxxx\n"
"LOAD TEMP[0].xy, BUFFER[0], TEMP[0].xxxx\n"
"ENDIF\n"
"ELSE\n"
/* Load previously accumulated result if requested. */
"MOV TEMP[0], IMM[0].xxxx\n"
"AND TEMP[4], CONST[0][0].wwww, IMM[1].xxxx\n"
"UIF TEMP[4]\n"
"LOAD TEMP[0].xyz, BUFFER[1], IMM[0].xxxx\n"
"ENDIF\n"
"MOV TEMP[1].x, IMM[0].xxxx\n"
"BGNLOOP\n"
/* Break if accumulated result so far is not available. */
"UIF TEMP[0].zzzz\n"
"BRK\n"
"ENDIF\n"
/* Break if result_index >= result_count. */
"USGE TEMP[5], TEMP[1].xxxx, CONST[0][0].zzzz\n"
"UIF TEMP[5]\n"
"BRK\n"
"ENDIF\n"
/* Load fence and check result availability */
"UMAD TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy, CONST[0][1].xxxx\n"
"UADD TEMP[5].x, TEMP[5].xxxx, CONST[0][2].xxxx\n"
"LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n"
"ISHR TEMP[0].z, TEMP[5].xxxx, IMM[0].yyyy\n"
"NOT TEMP[0].z, TEMP[0].zzzz\n"
"UIF TEMP[0].zzzz\n"
"BRK\n"
"ENDIF\n"
"MOV TEMP[1].y, IMM[0].xxxx\n"
"BGNLOOP\n"
/* Load start and end. */
"UMUL TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy\n"
"UMAD TEMP[5].x, TEMP[1].yyyy, CONST[0][1].yyyy, TEMP[5].xxxx\n"
"UADD TEMP[5].x, TEMP[5].xxxx, CONST[0][2].xxxx\n"
"LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n"
"UADD TEMP[5].y, TEMP[5].xxxx, CONST[0][0].xxxx\n"
"LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n"
"U64ADD TEMP[4].xy, TEMP[3], -TEMP[2]\n"
"AND TEMP[5].z, CONST[0][0].wwww, IMM[4].xxxx\n"
"UIF TEMP[5].zzzz\n"
/* Load second start/end half-pair and
* take the difference
*/
"UADD TEMP[5].xy, TEMP[5], IMM[1].wwww\n"
"LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n"
"LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n"
"U64ADD TEMP[3].xy, TEMP[3], -TEMP[2]\n"
"U64ADD TEMP[4].xy, TEMP[4], -TEMP[3]\n"
"ENDIF\n"
"U64ADD TEMP[0].xy, TEMP[0], TEMP[4]\n"
/* Increment pair index */
"UADD TEMP[1].y, TEMP[1].yyyy, IMM[1].xxxx\n"
"USGE TEMP[5], TEMP[1].yyyy, CONST[0][1].zzzz\n"
"UIF TEMP[5]\n"
"BRK\n"
"ENDIF\n"
"ENDLOOP\n"
/* Increment result index */
"UADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx\n"
"ENDLOOP\n"
"ENDIF\n"
"AND TEMP[4], CONST[0][0].wwww, IMM[1].yyyy\n"
"UIF TEMP[4]\n"
/* Store accumulated data for chaining. */
"STORE BUFFER[2].xyz, CONST[0][1].wwww, TEMP[0]\n"
"ELSE\n"
"AND TEMP[4], CONST[0][0].wwww, IMM[1].zzzz\n"
"UIF TEMP[4]\n"
/* Store result availability. */
"NOT TEMP[0].z, TEMP[0]\n"
"AND TEMP[0].z, TEMP[0].zzzz, IMM[1].xxxx\n"
"STORE BUFFER[2].x, CONST[0][1].wwww, TEMP[0].zzzz\n"
"AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n"
"UIF TEMP[4]\n"
"STORE BUFFER[2].y, CONST[0][1].wwww, IMM[0].xxxx\n"
"ENDIF\n"
"ELSE\n"
/* Store result if it is available. */
"NOT TEMP[4], TEMP[0].zzzz\n"
"UIF TEMP[4]\n"
/* Apply timestamp conversion */
"AND TEMP[4], CONST[0][0].wwww, IMM[2].yyyy\n"
"UIF TEMP[4]\n"
"U64MUL TEMP[0].xy, TEMP[0], IMM[3].xyxy\n"
"U64DIV TEMP[0].xy, TEMP[0], IMM[3].zwzw\n"
"ENDIF\n"
/* Convert to boolean */
"AND TEMP[4], CONST[0][0].wwww, IMM[1].wwww\n"
"UIF TEMP[4]\n"
"U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[4].zwzw\n"
"AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx\n"
"MOV TEMP[0].y, IMM[0].xxxx\n"
"ENDIF\n"
"AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n"
"UIF TEMP[4]\n"
"STORE BUFFER[2].xy, CONST[0][1].wwww, TEMP[0].xyxy\n"
"ELSE\n"
/* Clamping */
"UIF TEMP[0].yyyy\n"
"MOV TEMP[0].x, IMM[0].wwww\n"
"ENDIF\n"
"AND TEMP[4], CONST[0][0].wwww, IMM[2].wwww\n"
"UIF TEMP[4]\n"
"UMIN TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz\n"
"ENDIF\n"
"STORE BUFFER[2].x, CONST[0][1].wwww, TEMP[0].xxxx\n"
"ENDIF\n"
"ENDIF\n"
"ENDIF\n"
"ENDIF\n"
"END\n";
char text[sizeof(text_tmpl) + 32];
struct tgsi_token tokens[1024];
const struct nir_shader_compiler_options *options =
rctx->b.screen->nir_options[MESA_SHADER_COMPUTE];
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
"r600_create_query_result_cs");
struct pipe_compute_state state = {};
/* Hard code the frequency into the shader so that the backend can
* use the full range of optimizations for divide-by-constant.
b.shader->info.workgroup_size[0] = 1;
b.shader->info.workgroup_size[1] = 1;
b.shader->info.workgroup_size[2] = 1;
b.shader->info.num_ubos = 1;
b.shader->info.num_ssbos = 3;
b.shader->num_uniforms = 3;
/* uint32_t accum_lo, accum_hi, availability_mask = 0; */
nir_function_impl *impl = nir_shader_get_entrypoint(b.shader);
nir_variable *accum = nir_local_variable_create(impl, glsl_ivec2_type(), "accum");
nir_variable *availability_mask = nir_local_variable_create(impl, glsl_uint_type(), "availability_mask");
nir_store_var(&b, availability_mask, nir_imm_int(&b, 0), 0x1);
const struct r600_query_result_accum_vars accum_vars = {accum, availability_mask};
/* consts[0] = {end_offset, result_stride, result_count, config}
* consts[1] = {fence_offset, pair_stride, pair_count, buffer_offset}
* consts[2] = {buffer0_offset, 0, 0, 0}
*/
snprintf(text, sizeof(text), text_tmpl,
rctx->screen->info.clock_crystal_freq);
nir_def *default_ubo = nir_imm_int(&b, 0);
nir_def *config_0 = nir_load_ubo(&b, 4, 32, default_ubo, nir_imm_int(&b, 0), .range_base = 0, .range = 16);
nir_def *config_1 = nir_load_ubo(&b, 4, 32, default_ubo, nir_imm_int(&b, 16), .range_base = 16, .range = 16);
nir_def *config_2 = nir_load_ubo(&b, 4, 32, default_ubo, nir_imm_int(&b, 32), .range_base = 32, .range = 16);
if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) {
assert(false);
return;
/* Decode UBO payload into named fields to make the shader logic readable. */
nir_def *end_offset = nir_channel(&b, config_0, 0);
nir_def *result_stride = nir_channel(&b, config_0, 1);
nir_def *result_count = nir_channel(&b, config_0, 2);
nir_def *confifg_bitfield = nir_channel(&b, config_0, 3);
nir_def *fence_offset = nir_channel(&b, config_1, 0);
nir_def *pair_stride = nir_channel(&b, config_1, 1);
nir_def *pair_count = nir_channel(&b, config_1, 2);
nir_def *result_offset = nir_channel(&b, config_1, 3);
nir_def *buffer0_offset = nir_channel(&b, config_2, 0);
/* Check result availability for timestamp queries. */
nir_def *is_one_dword_result =
nir_test_mask(&b, confifg_bitfield, R600_QUERY_ONE_DWORD);
nir_if *if_one_dword_result = nir_push_if(&b, is_one_dword_result); {
r600_query_result_load_one_dword(&b, fence_offset, buffer0_offset, &accum_vars);
} nir_push_else(&b, if_one_dword_result); {
r600_query_result_accumulate_pairs(&b, impl, confifg_bitfield, result_count,
result_stride, fence_offset,
pair_stride, pair_count,
end_offset, buffer0_offset,
&accum_vars);
}
nir_pop_if(&b, if_one_dword_result);
r600_query_result_store_output(&b, rctx, confifg_bitfield, result_offset, &accum_vars);
rctx->b.screen->finalize_nir(rctx->b.screen, b.shader, true);
state.ir_type = PIPE_SHADER_IR_NIR;
state.prog = b.shader;
state.ir_type = PIPE_SHADER_IR_TGSI;
state.prog = tokens;
rctx->query_result_shader = rctx->b.create_compute_state(&rctx->b, &state);
}
@ -1613,6 +1740,7 @@ static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
uint32_t pair_count;
uint32_t buffer_offset;
uint32_t buffer0_offset;
uint32_t pad[3];
} consts;
if (!rctx->query_result_shader) {

View file

@ -19,9 +19,9 @@
#include "util/u_math.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
#include "tgsi/tgsi_ureg.h"
#include "nir.h"
#include "nir_builder.h"
#include "nir/nir_to_tgsi_info.h"
void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw)
@ -1680,32 +1680,25 @@ static void r600_update_clip_state(struct r600_context *rctx,
}
}
static void r600_generate_fixed_func_tcs(struct r600_context *rctx)
/* The TCS passthrough shader only writes the tessellation levels,
* the IO doesn't need to be copied over, because TES gets handed the
* location of the VS outputs directly if this shader is used
* (see evergreen_setup_tess_constants)
*/
static struct r600_pipe_shader_selector *
r600_create_fixed_func_tcs_nir(struct r600_context *rctx)
{
struct ureg_src const0, const1;
struct ureg_dst tessouter, tessinner;
struct ureg_program *ureg = ureg_create(MESA_SHADER_TESS_CTRL);
const struct nir_shader_compiler_options *options =
rctx->screen->b.b.nir_options[MESA_SHADER_TESS_CTRL];
if (!ureg)
return; /* if we get here, we're screwed */
struct pipe_shader_state state = {
.type = PIPE_SHADER_IR_NIR,
.ir.nir = nir_create_passthrough_tcs_impl(options, NULL, 0, 0)
};
assert(!rctx->fixed_func_tcs_shader);
ureg_DECL_constant2D(ureg, 0, 1, R600_BUFFER_INFO_CONST_BUFFER);
const0 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 0),
R600_BUFFER_INFO_CONST_BUFFER);
const1 = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 1),
R600_BUFFER_INFO_CONST_BUFFER);
tessouter = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSOUTER, 0);
tessinner = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSINNER, 0);
ureg_MOV(ureg, tessouter, const0);
ureg_MOV(ureg, tessinner, const1);
ureg_END(ureg);
rctx->fixed_func_tcs_shader =
ureg_create_shader_and_destroy(ureg, &rctx->b.b);
return (struct r600_pipe_shader_selector *)
rctx->b.b.create_tcs_state(&rctx->b.b, &state);
}
void r600_update_compressed_resource_state(struct r600_context *rctx, bool compute_only)
@ -1920,7 +1913,7 @@ static bool r600_update_derived_state(struct r600_context *rctx)
UPDATE_SHADER(EG_HW_STAGE_HS, tcs);
} else if (rctx->tes_shader) {
if (!rctx->fixed_func_tcs_shader) {
r600_generate_fixed_func_tcs(rctx);
rctx->fixed_func_tcs_shader = r600_create_fixed_func_tcs_nir(rctx);
if (!rctx->fixed_func_tcs_shader)
return false;

View file

@ -22,6 +22,7 @@
#include "sfn_nir_lower_tex.h"
#include "sfn_optimizer.h"
#include "sfn_ra.h"
#include "sfn_shader_tess.h"
#include "sfn_scheduler.h"
#include "sfn_shader.h"
#include "sfn_split_address_loads.h"
@ -869,8 +870,11 @@ r600_lower_and_optimize_nir(nir_shader *sh,
NIR_PASS(_, sh, r600_lower_tess_io, static_cast<mesa_prim>(prim_type));
}
if (sh->info.stage == MESA_SHADER_TESS_CTRL)
if (sh->info.stage == MESA_SHADER_TESS_CTRL) {
NIR_PASS(_, sh, nir_lower_system_values);
NIR_PASS(_, sh, r600_lower_tess_level_default_to_ubo);
NIR_PASS(_, sh, r600_append_tcs_TF_emission, (mesa_prim)key->tcs.prim_mode);
}
if (sh->info.stage == MESA_SHADER_TESS_EVAL) {
NIR_PASS(_,

View file

@ -49,6 +49,8 @@ lower_alu = [
(('seq', ('fadd', 'a', 'b'), 0.0), ('seq', 'a', ('fneg', 'b'))),
(('sne', ('fadd', 'a', 'b'), 0.0), ('sne', 'a', ('fneg', 'b'))),
(('iadd3', 'a', 'b', 'c'), ('iadd', 'a', ('iadd', 'b', 'c'))),
]

View file

@ -8,6 +8,8 @@
#include "sfn_instr_export.h"
#include "sfn_shader_vs.h"
#include "sfn_nir.h"
#include "nir.h"
#include <sstream>
@ -245,4 +247,41 @@ TESShader::do_print_properties(std::ostream& os) const
(void)os;
}
class LowerTessLevelDefault : public NirLowerInstruction {
bool filter(const nir_instr *instr) const override
{
if (instr->type != nir_instr_type_intrinsic)
return false;
auto intr = nir_instr_as_intrinsic(instr);
return intr->intrinsic == nir_intrinsic_load_tess_level_inner_default ||
intr->intrinsic == nir_intrinsic_load_tess_level_outer_default;
}
nir_def *lower(nir_instr *instr) override
{
auto intr = nir_instr_as_intrinsic(instr);
auto info_buffer = nir_imm_int(b, R600_BUFFER_INFO_CONST_BUFFER);
switch (intr->intrinsic) {
case nir_intrinsic_load_tess_level_inner_default:
return nir_load_ubo(b, 2, 32, info_buffer, nir_imm_int(b, 16),
.range_base = 16, .range = 8);
case nir_intrinsic_load_tess_level_outer_default:
return nir_load_ubo(b, 4, 32, info_buffer, nir_imm_int(b, 0),
.range_base = 0, .range = 16);
default:
assert(0);
return nullptr;
}
}
};
} // namespace r600
int r600_lower_tess_level_default_to_ubo(nir_shader *sh)
{
return r600::LowerTessLevelDefault().run(sh);
}

View file

@ -83,4 +83,6 @@ private:
} // namespace r600
int r600_lower_tess_level_default_to_ubo(nir_shader *sh);
#endif // TCS_H