mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 04:48:08 +02:00
r600: replace TGSI query shader with nir
The commit was created in assistence with github Copilot.
v2: - remove a few useless helpers
- rename some variables
- use some more nir with immediate codes (Emma)
Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
This commit is contained in:
parent
7bdc3d74d3
commit
6d10e79c26
1 changed files with 305 additions and 177 deletions
|
|
@ -10,7 +10,7 @@
|
|||
#include "util/u_memory.h"
|
||||
#include "util/u_upload_mgr.h"
|
||||
#include "util/os_time.h"
|
||||
#include "tgsi/tgsi_text.h"
|
||||
#include "nir/nir_builder.h"
|
||||
|
||||
#define R600_MAX_STREAMS 4
|
||||
|
||||
|
|
@ -1353,6 +1353,253 @@ bool r600_query_hw_get_result(struct r600_common_context *rctx,
|
|||
return true;
|
||||
}
|
||||
|
||||
enum r600_query_result_shader_config {
|
||||
R600_QUERY_READ_PREV = 1,
|
||||
R600_QUERY_WRITE_CHAIN = 2,
|
||||
R600_QUERY_WRITE_AVAIL = 4,
|
||||
R600_QUERY_BOOL = 8,
|
||||
R600_QUERY_ONE_DWORD = 16,
|
||||
R600_QUERY_TIMESTAMP = 32,
|
||||
R600_QUERY_RESULT64 = 64,
|
||||
R600_QUERY_SIGNED32 = 128,
|
||||
R600_QUERY_SO_OVERFLOW = 256,
|
||||
};
|
||||
|
||||
struct r600_query_result_accum_vars {
|
||||
nir_variable *accum;
|
||||
nir_variable *result_available;
|
||||
};
|
||||
|
||||
#define QUERY_RESULT_BUF nir_imm_int(b, 0)
|
||||
#define QUERY_PREV_SUMMARY_BUF nir_imm_int(b, 1)
|
||||
#define NEXT_SUMMARY_OR_USER_BUF nir_imm_int(b, 2)
|
||||
|
||||
static void r600_query_result_load_one_dword(nir_builder *b, nir_def *fence_offset,
|
||||
nir_def *result_offset,
|
||||
const struct r600_query_result_accum_vars *vars)
|
||||
{
|
||||
nir_def *fence_value = nir_load_ssbo(b, 1, 32, QUERY_RESULT_BUF,
|
||||
nir_iadd(b, fence_offset, result_offset));
|
||||
nir_def *fence_mask = nir_inot(b, nir_ishr_imm(b, fence_value, 31));
|
||||
nir_store_var(b, vars->result_available, fence_mask, 0x1);
|
||||
|
||||
nir_if *if_fence_set = nir_push_if(b, nir_ilt_imm(b, fence_value, 0)); {
|
||||
nir_def *result = nir_load_ssbo(b, 2, 32, QUERY_RESULT_BUF, result_offset);
|
||||
nir_store_var(b, vars->accum, nir_channels(b, result, 0x3), 0x3);
|
||||
}
|
||||
nir_pop_if(b, if_fence_set);
|
||||
}
|
||||
|
||||
static void r600_query_result_accumulate_pairs(nir_builder *b, nir_function_impl *impl,
|
||||
nir_def *config_bitfield,
|
||||
nir_def *result_count,
|
||||
nir_def *result_stride,
|
||||
nir_def *fence_offset,
|
||||
nir_def *pair_stride,
|
||||
nir_def *pair_count,
|
||||
nir_def *end_offset,
|
||||
nir_def *result_offset,
|
||||
const struct r600_query_result_accum_vars *vars)
|
||||
{
|
||||
nir_def *_0i = nir_imm_int(b, 0);
|
||||
|
||||
nir_store_var(b, vars->accum, nir_vec2(b, _0i, _0i), 0x3);
|
||||
|
||||
nir_def *cfg_prev_acc_result = nir_test_mask(b, config_bitfield, R600_QUERY_READ_PREV);
|
||||
nir_if *if_prev_acc_result = nir_push_if(b, cfg_prev_acc_result); {
|
||||
nir_def *result = nir_load_ssbo(b, 3, 32, QUERY_PREV_SUMMARY_BUF, _0i);
|
||||
nir_store_var(b, vars->accum, nir_channels(b, result, 0x3), 0x3);
|
||||
nir_store_var(b, vars->result_available, nir_channel(b, result, 2), 0x1);
|
||||
}
|
||||
nir_pop_if(b, if_prev_acc_result);
|
||||
|
||||
nir_def *availability_mask_value = nir_load_var(b, vars->result_available);
|
||||
nir_if *if_not_z = nir_push_if(b, nir_ieq(b, availability_mask_value, _0i)); {
|
||||
|
||||
/* Accumulate query result pairs across result buffers:
|
||||
* for (result_index = 0; result_index < result_count; ++result_index) {
|
||||
* fence = load_fence(result_index)
|
||||
* if (!fence.valid) break // Stop when we hit an invalid fence
|
||||
* for (pair_index = 0; pair_index < pair_count; ++pair_index) {
|
||||
* pair_start = load64(result_offset + pair_index * pair_stride)
|
||||
* pair_end = load64(result_offset + pair_index * pair_stride + end_offset)
|
||||
* accumulator += (pair_end - pair_start)
|
||||
* if (SO_OVERFLOW_mode) {
|
||||
* fence_start = load64(...)
|
||||
* fence_end = load64(...)
|
||||
* accumulator -= (fence_end - fence_start)
|
||||
* }
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
|
||||
nir_variable *outer_loop_iter =
|
||||
nir_local_variable_create(impl, glsl_uint_type(), "outer_loop_iter");
|
||||
nir_store_var(b, outer_loop_iter, _0i, 0x1);
|
||||
nir_loop *loop_outer = nir_push_loop(b); {
|
||||
nir_def *result_index = nir_load_var(b, outer_loop_iter);
|
||||
|
||||
nir_break_if(b, nir_uge(b, result_index, result_count));
|
||||
|
||||
nir_def *pitch_outer_loop = nir_imul(b, result_index, result_stride);
|
||||
nir_def *address = nir_iadd3(b, pitch_outer_loop,
|
||||
fence_offset,
|
||||
result_offset);
|
||||
nir_def *value = nir_load_ssbo(b, 1, 32, QUERY_RESULT_BUF, address);
|
||||
nir_def *bitmask = nir_inot(b, nir_ishr_imm(b, value, 31));
|
||||
nir_store_var(b, vars->result_available, bitmask, 0x1);
|
||||
|
||||
nir_break_if(b, nir_i2b(b, bitmask));
|
||||
|
||||
nir_variable *inner_loop_iter =
|
||||
nir_local_variable_create(impl, glsl_uint_type(), "inner_loop_iter");
|
||||
nir_store_var(b, inner_loop_iter, _0i, 0x1);
|
||||
|
||||
nir_loop *loop_inner = nir_push_loop(b); {
|
||||
nir_def *i = nir_load_var(b, inner_loop_iter);
|
||||
nir_def *pitch_inner_loop =
|
||||
nir_iadd(b,
|
||||
nir_imul(b, i, pair_stride),
|
||||
pitch_outer_loop);
|
||||
|
||||
nir_def *base =
|
||||
nir_iadd(b, pitch_inner_loop, result_offset);
|
||||
nir_def *first = nir_load_ssbo(b, 1, 64, QUERY_RESULT_BUF, base);
|
||||
nir_def *new_pitch =
|
||||
nir_iadd(b, pitch_inner_loop, end_offset);
|
||||
nir_def *second = nir_load_ssbo(b, 1, 64, QUERY_RESULT_BUF,
|
||||
nir_iadd(b, new_pitch, result_offset));
|
||||
nir_def *start_half_pair = nir_isub(b, second, first);
|
||||
|
||||
nir_def *difference;
|
||||
nir_def *cfg_so_overflow_mode = nir_test_mask(b, config_bitfield, R600_QUERY_SO_OVERFLOW);
|
||||
nir_if *if_so_overflow_mode = nir_push_if(b, cfg_so_overflow_mode); {
|
||||
first = nir_load_ssbo(b, 1, 64, QUERY_RESULT_BUF,
|
||||
nir_iadd_imm(b, base, 8));
|
||||
second = nir_load_ssbo(b, 1, 64, QUERY_RESULT_BUF,
|
||||
nir_iadd_imm(b,
|
||||
nir_iadd(b, new_pitch, result_offset),
|
||||
8));
|
||||
nir_def *end_half_pair = nir_isub(b, second, first);
|
||||
difference = nir_isub(b, start_half_pair, end_half_pair);
|
||||
}
|
||||
nir_pop_if(b, if_so_overflow_mode);
|
||||
|
||||
nir_def *sum = nir_iadd(b, nir_pack_64_2x32(b, nir_load_var(b, vars->accum)),
|
||||
nir_if_phi(b, difference, start_half_pair));
|
||||
sum = nir_unpack_64_2x32(b, sum);
|
||||
|
||||
i = nir_iadd_imm(b, i, 1);
|
||||
nir_store_var(b, inner_loop_iter, i, 0x1);
|
||||
|
||||
nir_store_var(b, vars->accum, sum, 0x3);
|
||||
|
||||
nir_def *is_pair_count_exceeded = nir_uge(b, i, pair_count);
|
||||
nir_break_if(b, is_pair_count_exceeded);
|
||||
}
|
||||
nir_pop_loop(b, loop_inner);
|
||||
|
||||
nir_store_var(b, outer_loop_iter, nir_iadd_imm(b, result_index, 1), 0x1);
|
||||
}
|
||||
nir_pop_loop(b, loop_outer);
|
||||
}
|
||||
nir_pop_if(b, if_not_z);
|
||||
}
|
||||
|
||||
static void r600_query_result_store_output(nir_builder *b,
|
||||
struct r600_common_context *rctx,
|
||||
nir_def *config_bitfield,
|
||||
nir_def *result_offset,
|
||||
const struct r600_query_result_accum_vars *vars)
|
||||
{
|
||||
nir_def *_0i = nir_imm_int(b, 0);
|
||||
nir_def *accum_value = nir_load_var(b, vars->accum);
|
||||
nir_def *availability_value = nir_load_var(b, vars->result_available);
|
||||
|
||||
/* Store query results to output buffer based on configuration:
|
||||
* if (write_chain) {
|
||||
* // Store accumulated values for chaining to next grid invocation
|
||||
* output = (accum_lo, accum_hi, availability_mask)
|
||||
* } else if (write_available) {
|
||||
* // Store query availability flag (!fence_valid) and optional 64-bit padding
|
||||
* output = (fence_valid ? 0 : 1)
|
||||
* if (is_64bit) output += padding_word
|
||||
* } else {
|
||||
* // Store actual query result with optional conversions
|
||||
* if (apply_timestamp)
|
||||
* result = convert_to_timestamp(x, y)
|
||||
* if (convert_to_bool)
|
||||
* result = (result != 0) ? 1 : 0
|
||||
* if (is_64bit) {
|
||||
* output = (result_low, result_high)
|
||||
* } else {
|
||||
* output = result_low (clamped for signed mode)
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
nir_def *is_acc_chaining = nir_test_mask(b, config_bitfield, R600_QUERY_WRITE_CHAIN);
|
||||
nir_if *if_acc_chaining = nir_push_if(b, is_acc_chaining); {
|
||||
nir_store_ssbo(b, nir_vec3(b, nir_channel(b, accum_value, 0), nir_channel(b, accum_value, 1), availability_value), NEXT_SUMMARY_OR_USER_BUF,
|
||||
result_offset);
|
||||
} nir_push_else(b, if_acc_chaining); {
|
||||
nir_def *cfg_write_result_available = nir_test_mask(b, config_bitfield, R600_QUERY_WRITE_AVAIL);
|
||||
nir_if *if_write_result_available = nir_push_if(b, cfg_write_result_available); {
|
||||
nir_store_ssbo(b,
|
||||
nir_iand_imm(b, nir_inot(b, availability_value), 1),
|
||||
NEXT_SUMMARY_OR_USER_BUF, result_offset);
|
||||
|
||||
nir_def *is_result_64_bits = nir_test_mask(b, config_bitfield, R600_QUERY_RESULT64);
|
||||
nir_if *if_result_64_bits = nir_push_if(b, is_result_64_bits); {
|
||||
nir_store_ssbo(b, _0i, NEXT_SUMMARY_OR_USER_BUF, nir_iadd_imm(b, result_offset, 4));
|
||||
}
|
||||
nir_pop_if(b, if_result_64_bits);
|
||||
} nir_push_else(b, if_write_result_available); {
|
||||
nir_def *result_not_written = nir_i2b(b, nir_inot(b, availability_value));
|
||||
nir_if *if_result_not_written = nir_push_if(b, result_not_written); {
|
||||
nir_def *ts;
|
||||
|
||||
nir_def *cfg_apply_timestamp = nir_test_mask(b, config_bitfield, R600_QUERY_TIMESTAMP);
|
||||
nir_if *if_apply_timestamp = nir_push_if(b, cfg_apply_timestamp); {
|
||||
nir_def *clock_crystal_frequency =
|
||||
nir_imm_int64(b, rctx->screen->info.clock_crystal_freq);
|
||||
nir_def *xy_million = nir_imul(b,
|
||||
nir_pack_64_2x32(b, accum_value),
|
||||
nir_imm_int64(b, 1000000));
|
||||
nir_def *ts_converted = nir_udiv(b, xy_million,
|
||||
clock_crystal_frequency);
|
||||
ts = nir_unpack_64_2x32(b, ts_converted);
|
||||
}
|
||||
nir_pop_if(b, if_apply_timestamp);
|
||||
|
||||
nir_def *n = nir_if_phi(b, ts, accum_value);
|
||||
nir_def *nx = nir_channel(b,n, 0);
|
||||
nir_def *ny = nir_channel(b,n, 1);
|
||||
|
||||
nir_def *cfg_convert_to_bool = nir_test_mask(b, config_bitfield, R600_QUERY_BOOL);
|
||||
nir_def *is_nonzero = nir_b2i32(b, nir_ine(b, nir_ior(b, nx, ny), _0i));
|
||||
nx = nir_bcsel(b, cfg_convert_to_bool, is_nonzero, nx);
|
||||
ny = nir_bcsel(b, cfg_convert_to_bool, _0i, ny);
|
||||
|
||||
nir_def *cfg_result_64_bits = nir_test_mask(b, config_bitfield, R600_QUERY_RESULT64);
|
||||
nir_if *if_result_64_bits = nir_push_if(b, cfg_result_64_bits); {
|
||||
nir_store_ssbo(b, nir_vec2(b, nx, ny), NEXT_SUMMARY_OR_USER_BUF, result_offset);
|
||||
} nir_push_else(b, if_result_64_bits); {
|
||||
nir_def *is_y = nir_ine(b, ny, _0i);
|
||||
nx = nir_bcsel(b, is_y, nir_imm_int(b, UINT32_MAX), nx);
|
||||
nir_def *cfg_signed_32bit_result = nir_test_mask(b, config_bitfield, R600_QUERY_SIGNED32);
|
||||
nir_def *min = nir_umin(b, nx, nir_imm_int(b, INT_MAX));
|
||||
nx = nir_bcsel(b, cfg_signed_32bit_result, min, nx);
|
||||
nir_store_ssbo(b, nx, NEXT_SUMMARY_OR_USER_BUF, result_offset);
|
||||
}
|
||||
nir_pop_if(b, if_result_64_bits);
|
||||
}
|
||||
nir_pop_if(b, if_result_not_written);
|
||||
}
|
||||
nir_pop_if(b, if_write_result_available);
|
||||
}
|
||||
nir_pop_if(b, if_acc_chaining);
|
||||
}
|
||||
|
||||
/* Create the compute shader that is used to collect the results.
|
||||
*
|
||||
* One compute grid with a single thread is launched for every query result
|
||||
|
|
@ -1389,188 +1636,68 @@ bool r600_query_hw_get_result(struct r600_common_context *rctx,
|
|||
*/
|
||||
static void r600_create_query_result_shader(struct r600_common_context *rctx)
|
||||
{
|
||||
/* TEMP[0].xy = accumulated result so far
|
||||
* TEMP[0].z = result not available
|
||||
*
|
||||
* TEMP[1].x = current result index
|
||||
* TEMP[1].y = current pair index
|
||||
*/
|
||||
static const char text_tmpl[] =
|
||||
"COMP\n"
|
||||
"PROPERTY CS_FIXED_BLOCK_WIDTH 1\n"
|
||||
"PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n"
|
||||
"PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
|
||||
"DCL BUFFER[0]\n"
|
||||
"DCL BUFFER[1]\n"
|
||||
"DCL BUFFER[2]\n"
|
||||
"DCL CONST[0][0..2]\n"
|
||||
"DCL TEMP[0..5]\n"
|
||||
"IMM[0] UINT32 {0, 31, 2147483647, 4294967295}\n"
|
||||
"IMM[1] UINT32 {1, 2, 4, 8}\n"
|
||||
"IMM[2] UINT32 {16, 32, 64, 128}\n"
|
||||
"IMM[3] UINT32 {1000000, 0, %u, 0}\n" /* for timestamp conversion */
|
||||
"IMM[4] UINT32 {256, 0, 0, 0}\n"
|
||||
|
||||
"AND TEMP[5], CONST[0][0].wwww, IMM[2].xxxx\n"
|
||||
"UIF TEMP[5]\n"
|
||||
/* Check result availability. */
|
||||
"UADD TEMP[1].x, CONST[0][1].xxxx, CONST[0][2].xxxx\n"
|
||||
"LOAD TEMP[1].x, BUFFER[0], TEMP[1].xxxx\n"
|
||||
"ISHR TEMP[0].z, TEMP[1].xxxx, IMM[0].yyyy\n"
|
||||
"MOV TEMP[1], TEMP[0].zzzz\n"
|
||||
"NOT TEMP[0].z, TEMP[0].zzzz\n"
|
||||
|
||||
/* Load result if available. */
|
||||
"UIF TEMP[1]\n"
|
||||
"UADD TEMP[0].x, IMM[0].xxxx, CONST[0][2].xxxx\n"
|
||||
"LOAD TEMP[0].xy, BUFFER[0], TEMP[0].xxxx\n"
|
||||
"ENDIF\n"
|
||||
"ELSE\n"
|
||||
/* Load previously accumulated result if requested. */
|
||||
"MOV TEMP[0], IMM[0].xxxx\n"
|
||||
"AND TEMP[4], CONST[0][0].wwww, IMM[1].xxxx\n"
|
||||
"UIF TEMP[4]\n"
|
||||
"LOAD TEMP[0].xyz, BUFFER[1], IMM[0].xxxx\n"
|
||||
"ENDIF\n"
|
||||
|
||||
"MOV TEMP[1].x, IMM[0].xxxx\n"
|
||||
"BGNLOOP\n"
|
||||
/* Break if accumulated result so far is not available. */
|
||||
"UIF TEMP[0].zzzz\n"
|
||||
"BRK\n"
|
||||
"ENDIF\n"
|
||||
|
||||
/* Break if result_index >= result_count. */
|
||||
"USGE TEMP[5], TEMP[1].xxxx, CONST[0][0].zzzz\n"
|
||||
"UIF TEMP[5]\n"
|
||||
"BRK\n"
|
||||
"ENDIF\n"
|
||||
|
||||
/* Load fence and check result availability */
|
||||
"UMAD TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy, CONST[0][1].xxxx\n"
|
||||
"UADD TEMP[5].x, TEMP[5].xxxx, CONST[0][2].xxxx\n"
|
||||
"LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n"
|
||||
"ISHR TEMP[0].z, TEMP[5].xxxx, IMM[0].yyyy\n"
|
||||
"NOT TEMP[0].z, TEMP[0].zzzz\n"
|
||||
"UIF TEMP[0].zzzz\n"
|
||||
"BRK\n"
|
||||
"ENDIF\n"
|
||||
|
||||
"MOV TEMP[1].y, IMM[0].xxxx\n"
|
||||
"BGNLOOP\n"
|
||||
/* Load start and end. */
|
||||
"UMUL TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy\n"
|
||||
"UMAD TEMP[5].x, TEMP[1].yyyy, CONST[0][1].yyyy, TEMP[5].xxxx\n"
|
||||
"UADD TEMP[5].x, TEMP[5].xxxx, CONST[0][2].xxxx\n"
|
||||
"LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n"
|
||||
|
||||
"UADD TEMP[5].y, TEMP[5].xxxx, CONST[0][0].xxxx\n"
|
||||
"LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n"
|
||||
|
||||
"U64ADD TEMP[4].xy, TEMP[3], -TEMP[2]\n"
|
||||
|
||||
"AND TEMP[5].z, CONST[0][0].wwww, IMM[4].xxxx\n"
|
||||
"UIF TEMP[5].zzzz\n"
|
||||
/* Load second start/end half-pair and
|
||||
* take the difference
|
||||
*/
|
||||
"UADD TEMP[5].xy, TEMP[5], IMM[1].wwww\n"
|
||||
"LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n"
|
||||
"LOAD TEMP[3].xy, BUFFER[0], TEMP[5].yyyy\n"
|
||||
|
||||
"U64ADD TEMP[3].xy, TEMP[3], -TEMP[2]\n"
|
||||
"U64ADD TEMP[4].xy, TEMP[4], -TEMP[3]\n"
|
||||
"ENDIF\n"
|
||||
|
||||
"U64ADD TEMP[0].xy, TEMP[0], TEMP[4]\n"
|
||||
|
||||
/* Increment pair index */
|
||||
"UADD TEMP[1].y, TEMP[1].yyyy, IMM[1].xxxx\n"
|
||||
"USGE TEMP[5], TEMP[1].yyyy, CONST[0][1].zzzz\n"
|
||||
"UIF TEMP[5]\n"
|
||||
"BRK\n"
|
||||
"ENDIF\n"
|
||||
"ENDLOOP\n"
|
||||
|
||||
/* Increment result index */
|
||||
"UADD TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx\n"
|
||||
"ENDLOOP\n"
|
||||
"ENDIF\n"
|
||||
|
||||
"AND TEMP[4], CONST[0][0].wwww, IMM[1].yyyy\n"
|
||||
"UIF TEMP[4]\n"
|
||||
/* Store accumulated data for chaining. */
|
||||
"STORE BUFFER[2].xyz, CONST[0][1].wwww, TEMP[0]\n"
|
||||
"ELSE\n"
|
||||
"AND TEMP[4], CONST[0][0].wwww, IMM[1].zzzz\n"
|
||||
"UIF TEMP[4]\n"
|
||||
/* Store result availability. */
|
||||
"NOT TEMP[0].z, TEMP[0]\n"
|
||||
"AND TEMP[0].z, TEMP[0].zzzz, IMM[1].xxxx\n"
|
||||
"STORE BUFFER[2].x, CONST[0][1].wwww, TEMP[0].zzzz\n"
|
||||
|
||||
"AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n"
|
||||
"UIF TEMP[4]\n"
|
||||
"STORE BUFFER[2].y, CONST[0][1].wwww, IMM[0].xxxx\n"
|
||||
"ENDIF\n"
|
||||
"ELSE\n"
|
||||
/* Store result if it is available. */
|
||||
"NOT TEMP[4], TEMP[0].zzzz\n"
|
||||
"UIF TEMP[4]\n"
|
||||
/* Apply timestamp conversion */
|
||||
"AND TEMP[4], CONST[0][0].wwww, IMM[2].yyyy\n"
|
||||
"UIF TEMP[4]\n"
|
||||
"U64MUL TEMP[0].xy, TEMP[0], IMM[3].xyxy\n"
|
||||
"U64DIV TEMP[0].xy, TEMP[0], IMM[3].zwzw\n"
|
||||
"ENDIF\n"
|
||||
|
||||
/* Convert to boolean */
|
||||
"AND TEMP[4], CONST[0][0].wwww, IMM[1].wwww\n"
|
||||
"UIF TEMP[4]\n"
|
||||
"U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[4].zwzw\n"
|
||||
"AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx\n"
|
||||
"MOV TEMP[0].y, IMM[0].xxxx\n"
|
||||
"ENDIF\n"
|
||||
|
||||
"AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n"
|
||||
"UIF TEMP[4]\n"
|
||||
"STORE BUFFER[2].xy, CONST[0][1].wwww, TEMP[0].xyxy\n"
|
||||
"ELSE\n"
|
||||
/* Clamping */
|
||||
"UIF TEMP[0].yyyy\n"
|
||||
"MOV TEMP[0].x, IMM[0].wwww\n"
|
||||
"ENDIF\n"
|
||||
|
||||
"AND TEMP[4], CONST[0][0].wwww, IMM[2].wwww\n"
|
||||
"UIF TEMP[4]\n"
|
||||
"UMIN TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz\n"
|
||||
"ENDIF\n"
|
||||
|
||||
"STORE BUFFER[2].x, CONST[0][1].wwww, TEMP[0].xxxx\n"
|
||||
"ENDIF\n"
|
||||
"ENDIF\n"
|
||||
"ENDIF\n"
|
||||
"ENDIF\n"
|
||||
|
||||
"END\n";
|
||||
|
||||
char text[sizeof(text_tmpl) + 32];
|
||||
struct tgsi_token tokens[1024];
|
||||
const struct nir_shader_compiler_options *options =
|
||||
rctx->b.screen->nir_options[MESA_SHADER_COMPUTE];
|
||||
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options,
|
||||
"r600_create_query_result_cs");
|
||||
struct pipe_compute_state state = {};
|
||||
|
||||
/* Hard code the frequency into the shader so that the backend can
|
||||
* use the full range of optimizations for divide-by-constant.
|
||||
b.shader->info.workgroup_size[0] = 1;
|
||||
b.shader->info.workgroup_size[1] = 1;
|
||||
b.shader->info.workgroup_size[2] = 1;
|
||||
b.shader->info.num_ubos = 1;
|
||||
b.shader->info.num_ssbos = 3;
|
||||
b.shader->num_uniforms = 3;
|
||||
|
||||
/* uint32_t accum_lo, accum_hi, availability_mask = 0; */
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(b.shader);
|
||||
nir_variable *accum = nir_local_variable_create(impl, glsl_ivec2_type(), "accum");
|
||||
nir_variable *availability_mask = nir_local_variable_create(impl, glsl_uint_type(), "availability_mask");
|
||||
nir_store_var(&b, availability_mask, nir_imm_int(&b, 0), 0x1);
|
||||
const struct r600_query_result_accum_vars accum_vars = {accum, availability_mask};
|
||||
|
||||
/* consts[0] = {end_offset, result_stride, result_count, config}
|
||||
* consts[1] = {fence_offset, pair_stride, pair_count, buffer_offset}
|
||||
* consts[2] = {buffer0_offset, 0, 0, 0}
|
||||
*/
|
||||
snprintf(text, sizeof(text), text_tmpl,
|
||||
rctx->screen->info.clock_crystal_freq);
|
||||
nir_def *default_ubo = nir_imm_int(&b, 0);
|
||||
nir_def *config_0 = nir_load_ubo(&b, 4, 32, default_ubo, nir_imm_int(&b, 0), .range_base = 0, .range = 16);
|
||||
nir_def *config_1 = nir_load_ubo(&b, 4, 32, default_ubo, nir_imm_int(&b, 16), .range_base = 16, .range = 16);
|
||||
nir_def *config_2 = nir_load_ubo(&b, 4, 32, default_ubo, nir_imm_int(&b, 32), .range_base = 32, .range = 16);
|
||||
|
||||
if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) {
|
||||
assert(false);
|
||||
return;
|
||||
/* Decode UBO payload into named fields to make the shader logic readable. */
|
||||
nir_def *end_offset = nir_channel(&b, config_0, 0);
|
||||
nir_def *result_stride = nir_channel(&b, config_0, 1);
|
||||
nir_def *result_count = nir_channel(&b, config_0, 2);
|
||||
nir_def *confifg_bitfield = nir_channel(&b, config_0, 3);
|
||||
nir_def *fence_offset = nir_channel(&b, config_1, 0);
|
||||
nir_def *pair_stride = nir_channel(&b, config_1, 1);
|
||||
nir_def *pair_count = nir_channel(&b, config_1, 2);
|
||||
nir_def *result_offset = nir_channel(&b, config_1, 3);
|
||||
nir_def *buffer0_offset = nir_channel(&b, config_2, 0);
|
||||
|
||||
/* Check result availability for timestamp queries. */
|
||||
nir_def *is_one_dword_result =
|
||||
nir_test_mask(&b, confifg_bitfield, R600_QUERY_ONE_DWORD);
|
||||
nir_if *if_one_dword_result = nir_push_if(&b, is_one_dword_result); {
|
||||
r600_query_result_load_one_dword(&b, fence_offset, buffer0_offset, &accum_vars);
|
||||
} nir_push_else(&b, if_one_dword_result); {
|
||||
r600_query_result_accumulate_pairs(&b, impl, confifg_bitfield, result_count,
|
||||
result_stride, fence_offset,
|
||||
pair_stride, pair_count,
|
||||
end_offset, buffer0_offset,
|
||||
&accum_vars);
|
||||
}
|
||||
nir_pop_if(&b, if_one_dword_result);
|
||||
|
||||
r600_query_result_store_output(&b, rctx, confifg_bitfield, result_offset, &accum_vars);
|
||||
|
||||
rctx->b.screen->finalize_nir(rctx->b.screen, b.shader, true);
|
||||
|
||||
state.ir_type = PIPE_SHADER_IR_NIR;
|
||||
state.prog = b.shader;
|
||||
|
||||
|
||||
state.ir_type = PIPE_SHADER_IR_TGSI;
|
||||
state.prog = tokens;
|
||||
|
||||
rctx->query_result_shader = rctx->b.create_compute_state(&rctx->b, &state);
|
||||
}
|
||||
|
|
@ -1613,6 +1740,7 @@ static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
|
|||
uint32_t pair_count;
|
||||
uint32_t buffer_offset;
|
||||
uint32_t buffer0_offset;
|
||||
uint32_t pad[3];
|
||||
} consts;
|
||||
|
||||
if (!rctx->query_result_shader) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue