mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 18:18:06 +02:00
amd: add ac_cu_info::has_point_sample_accel flag and use in ACO
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38701>
This commit is contained in:
parent
cfb745592d
commit
7b7bdb76ab
8 changed files with 17 additions and 11 deletions
|
|
@ -304,6 +304,8 @@ ac_fill_cu_info(struct radeon_info *info, struct drm_amdgpu_info_device *device_
|
|||
cu_info->has_lds_bank_count_16 = info->family == CHIP_KABINI || info->family == CHIP_STONEY;
|
||||
cu_info->has_sram_ecc_enabled = info->family == CHIP_VEGA20 || info->family == CHIP_MI100 ||
|
||||
info->family == CHIP_MI200 || info->family == CHIP_GFX940;
|
||||
cu_info->has_point_sample_accel = info->family == CHIP_STRIX1 || info->family == CHIP_STRIX_HALO ||
|
||||
info->family == CHIP_KRACKAN1;
|
||||
cu_info->has_fast_fma32 = info->gfx_level >= GFX9 || info->family == CHIP_TAHITI ||
|
||||
info->family == CHIP_HAWAII || info->family == CHIP_CARRIZO;
|
||||
cu_info->has_fma_mix = info->gfx_level >= GFX10 ||
|
||||
|
|
|
|||
|
|
@ -47,6 +47,8 @@ struct ac_cu_info {
|
|||
/* Flags */
|
||||
bool has_lds_bank_count_16 : 1;
|
||||
bool has_sram_ecc_enabled : 1;
|
||||
/* Whether image_sample* instructions can be either a sampler or no-sampler access.*/
|
||||
bool has_point_sample_accel : 1;
|
||||
bool has_fast_fma32 : 1;
|
||||
/* Whether chips support fused v_fma_mix* instructions.
|
||||
* Otherwise, unfused v_mad_mix* is available on GFX9.
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ get_type(Program* program, aco_ptr<Instruction>& instr)
|
|||
|
||||
if (program->gfx_level >= GFX11) {
|
||||
if (instr->isMIMG()) {
|
||||
uint8_t vmem_type = get_vmem_type(program->gfx_level, program->family, instr.get());
|
||||
uint8_t vmem_type = get_vmem_type(instr.get(), program->dev.has_point_sample_accel);
|
||||
switch (vmem_type) {
|
||||
case vmem_bvh: return clause_bvh;
|
||||
case vmem_sampler: return clause_mimg_sample;
|
||||
|
|
|
|||
|
|
@ -1687,7 +1687,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr<Instruction>&
|
|||
} else {
|
||||
uint8_t vmem_type =
|
||||
state.program->gfx_level >= GFX12
|
||||
? get_vmem_type(state.program->gfx_level, state.program->family, instr.get())
|
||||
? get_vmem_type(instr.get(), state.program->dev.has_point_sample_accel)
|
||||
: vmem_nosampler;
|
||||
std::bitset<256>* vgprs = &ctx.vgpr_used_by_vmem_load;
|
||||
if (vmem_type == vmem_sampler)
|
||||
|
|
|
|||
|
|
@ -408,7 +408,7 @@ check_instr(wait_ctx& ctx, wait_imm& wait, Instruction* instr)
|
|||
* We can do this for GFX12 and different types for GFX11 if we know that the two
|
||||
* VMEM loads do not write the same register half or the same lanes.
|
||||
*/
|
||||
uint8_t vmem_type = get_vmem_type(ctx.gfx_level, ctx.program->family, instr);
|
||||
uint8_t vmem_type = get_vmem_type(instr, ctx.program->dev.has_point_sample_accel);
|
||||
if (vmem_type) {
|
||||
wait_event event = get_vmem_event(ctx, instr, vmem_type);
|
||||
wait_type type = (wait_type)(ffs(ctx.info->get_counters_for_event(event)) - 1);
|
||||
|
|
@ -834,7 +834,7 @@ gen(Instruction* instr, wait_ctx& ctx)
|
|||
case Format::MIMG:
|
||||
case Format::GLOBAL:
|
||||
case Format::SCRATCH: {
|
||||
uint8_t type = get_vmem_type(ctx.gfx_level, ctx.program->family, instr);
|
||||
uint8_t type = get_vmem_type(instr, ctx.program->dev.has_point_sample_accel);
|
||||
wait_event ev = get_vmem_event(ctx, instr, type);
|
||||
uint32_t mask = ev == event_vmem ? get_vmem_mask(ctx, instr) : 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -107,6 +107,7 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
|||
program->dev.xnack_enabled = false;
|
||||
|
||||
program->dev.sram_ecc_enabled = options->cu_info->has_sram_ecc_enabled;
|
||||
program->dev.has_point_sample_accel = options->cu_info->has_point_sample_accel;
|
||||
|
||||
program->dev.has_fast_fma32 = options->cu_info->has_fast_fma32;
|
||||
program->dev.has_mac_legacy32 = program->gfx_level <= GFX7 || program->gfx_level == GFX10;
|
||||
|
|
@ -1470,7 +1471,7 @@ get_tied_defs(Instruction* instr)
|
|||
}
|
||||
|
||||
uint8_t
|
||||
get_vmem_type(amd_gfx_level gfx_level, radeon_family family, Instruction* instr)
|
||||
get_vmem_type(Instruction* instr, bool has_point_sample_accel)
|
||||
{
|
||||
if (instr->opcode == aco_opcode::image_bvh_intersect_ray ||
|
||||
instr->opcode == aco_opcode::image_bvh64_intersect_ray ||
|
||||
|
|
@ -1481,10 +1482,10 @@ get_vmem_type(amd_gfx_level gfx_level, radeon_family family, Instruction* instr)
|
|||
return vmem_sampler;
|
||||
} else if (instr->isMIMG() && !instr->operands[1].isUndefined() &&
|
||||
instr->operands[1].regClass() == s4) {
|
||||
bool point_sample_accel = gfx_level == GFX11_5 && family != CHIP_GFX1153 &&
|
||||
(instr->opcode == aco_opcode::image_sample ||
|
||||
instr->opcode == aco_opcode::image_sample_l ||
|
||||
instr->opcode == aco_opcode::image_sample_lz);
|
||||
bool point_sample_accel =
|
||||
has_point_sample_accel && (instr->opcode == aco_opcode::image_sample ||
|
||||
instr->opcode == aco_opcode::image_sample_l ||
|
||||
instr->opcode == aco_opcode::image_sample_lz);
|
||||
return vmem_sampler | (point_sample_accel ? vmem_nosampler : 0);
|
||||
} else if (instr->isVMEM() || instr->isScratch() || instr->isGlobal()) {
|
||||
return vmem_nosampler;
|
||||
|
|
|
|||
|
|
@ -2054,7 +2054,7 @@ enum vmem_type : uint8_t {
|
|||
/* VMEM instructions of the same type return in-order. For GFX12+, this determines which counter
|
||||
* is used.
|
||||
*/
|
||||
uint8_t get_vmem_type(amd_gfx_level gfx_level, radeon_family family, Instruction* instr);
|
||||
uint8_t get_vmem_type(Instruction* instr, bool has_point_sample_accel);
|
||||
|
||||
/* For all of the counters, the maximum value means no wait.
|
||||
* Some of the counters are larger than their bit field,
|
||||
|
|
@ -2249,6 +2249,7 @@ struct DeviceInfo {
|
|||
bool fused_mad_mix = false;
|
||||
bool xnack_enabled = false;
|
||||
bool sram_ecc_enabled = false;
|
||||
bool has_point_sample_accel = false;
|
||||
|
||||
int32_t scratch_global_offset_min;
|
||||
int32_t scratch_global_offset_max;
|
||||
|
|
|
|||
|
|
@ -320,7 +320,7 @@ get_wait_counter_info(Program* program, aco_ptr<Instruction>& instr)
|
|||
} else if (instr->isVMEM() && instr->definitions.empty() && program->gfx_level >= GFX10) {
|
||||
info[wait_type_vs] = 320;
|
||||
} else if (instr->isVMEM()) {
|
||||
uint8_t vm_type = get_vmem_type(program->gfx_level, program->family, instr.get());
|
||||
uint8_t vm_type = get_vmem_type(instr.get(), program->dev.has_point_sample_accel);
|
||||
wait_type type = wait_type_vm;
|
||||
if (program->gfx_level >= GFX12 && vm_type == vmem_bvh)
|
||||
type = wait_type_bvh;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue