mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 18:18:06 +02:00
radeonsi: enable denorms for 64-bit and 16-bit floats
This fixes FP16 conversion instructions for VI, which has 16-bit floats, but not SI & CI, which can't disable denorms for those instructions. Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
17fe3fa312
commit
329181ae33
3 changed files with 29 additions and 6 deletions
|
|
@ -4119,6 +4119,20 @@ int si_compile_llvm(struct si_screen *sscreen,
|
|||
|
||||
si_shader_binary_read_config(binary, conf, 0);
|
||||
|
||||
/* Enable 64-bit and 16-bit denormals, because there is no performance
|
||||
* cost.
|
||||
*
|
||||
* If denormals are enabled, all floating-point output modifiers are
|
||||
* ignored.
|
||||
*
|
||||
* Don't enable denormals for 32-bit floats, because:
|
||||
* - Floating-point output modifiers would be ignored by the hw.
|
||||
* - Some opcodes don't support denormals, such as v_mad_f32. We would
|
||||
* have to stop using those.
|
||||
* - SI & CI would be very slow.
|
||||
*/
|
||||
conf->float_mode |= V_00B028_FP_64_DENORMS;
|
||||
|
||||
FREE(binary->config);
|
||||
FREE(binary->global_symbol_offsets);
|
||||
binary->config = NULL;
|
||||
|
|
|
|||
|
|
@ -124,7 +124,8 @@ static void si_shader_ls(struct si_shader *shader)
|
|||
shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B528_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
|
||||
S_00B528_DX10_CLAMP(1);
|
||||
S_00B528_DX10_CLAMP(1) |
|
||||
S_00B528_FLOAT_MODE(shader->config.float_mode);
|
||||
shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
|
||||
S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
|
||||
}
|
||||
|
|
@ -157,7 +158,8 @@ static void si_shader_hs(struct si_shader *shader)
|
|||
si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
|
||||
S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B428_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B428_DX10_CLAMP(1));
|
||||
S_00B428_DX10_CLAMP(1) |
|
||||
S_00B428_FLOAT_MODE(shader->config.float_mode));
|
||||
si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
|
||||
S_00B42C_USER_SGPR(num_user_sgprs) |
|
||||
S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
|
||||
|
|
@ -203,7 +205,8 @@ static void si_shader_es(struct si_shader *shader)
|
|||
S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B328_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
|
||||
S_00B328_DX10_CLAMP(1));
|
||||
S_00B328_DX10_CLAMP(1) |
|
||||
S_00B328_FLOAT_MODE(shader->config.float_mode));
|
||||
si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
|
||||
S_00B32C_USER_SGPR(num_user_sgprs) |
|
||||
S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
|
||||
|
|
@ -292,7 +295,8 @@ static void si_shader_gs(struct si_shader *shader)
|
|||
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
|
||||
S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B228_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B228_DX10_CLAMP(1));
|
||||
S_00B228_DX10_CLAMP(1) |
|
||||
S_00B228_FLOAT_MODE(shader->config.float_mode));
|
||||
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
|
||||
S_00B22C_USER_SGPR(num_user_sgprs) |
|
||||
S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
|
||||
|
|
@ -381,7 +385,8 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
|
|||
S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B128_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
|
||||
S_00B128_DX10_CLAMP(1));
|
||||
S_00B128_DX10_CLAMP(1) |
|
||||
S_00B128_FLOAT_MODE(shader->config.float_mode));
|
||||
si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
|
||||
S_00B12C_USER_SGPR(num_user_sgprs) |
|
||||
S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
|
||||
|
|
@ -567,7 +572,8 @@ static void si_shader_ps(struct si_shader *shader)
|
|||
si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
|
||||
S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
|
||||
S_00B028_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B028_DX10_CLAMP(1));
|
||||
S_00B028_DX10_CLAMP(1) |
|
||||
S_00B028_FLOAT_MODE(shader->config.float_mode));
|
||||
si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
|
||||
S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
|
||||
S_00B02C_USER_SGPR(num_user_sgprs) |
|
||||
|
|
|
|||
|
|
@ -2845,6 +2845,9 @@
|
|||
#define S_00B028_FLOAT_MODE(x) (((x) & 0xFF) << 12)
|
||||
#define G_00B028_FLOAT_MODE(x) (((x) >> 12) & 0xFF)
|
||||
#define C_00B028_FLOAT_MODE 0xFFF00FFF
|
||||
#define V_00B028_FP_32_DENORMS 0x30
|
||||
#define V_00B028_FP_64_DENORMS 0xc0
|
||||
#define V_00B028_FP_ALL_DENORMS 0xf0
|
||||
#define S_00B028_PRIV(x) (((x) & 0x1) << 20)
|
||||
#define G_00B028_PRIV(x) (((x) >> 20) & 0x1)
|
||||
#define C_00B028_PRIV 0xFFEFFFFF
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue