diff --git a/docs/envvars.rst b/docs/envvars.rst index f032c1ba2c7..a8dcc8c465c 100644 --- a/docs/envvars.rst +++ b/docs/envvars.rst @@ -1869,6 +1869,13 @@ r300 driver environment variables Disable AA compression and fast AA clear ``notcl`` Disable hardware accelerated Transform/Clip/Lighting + ``ieeemath`` + Force IEEE versions of VS math opcodes where applicable + and also IEEE handling of multiply by zero (R5xx only) + ``ffmath`` + Force FF versions of VS math opcodes where applicable + and 0 * anything = 0 rules in FS + Asahi driver environment variables ---------------------------------- diff --git a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c index e522fcae614..ff954b4e815 100644 --- a/src/gallium/drivers/r300/compiler/r3xx_vertprog.c +++ b/src/gallium/drivers/r300/compiler/r3xx_vertprog.c @@ -197,6 +197,26 @@ ei_math1(struct r300_vertex_program_code *vp, unsigned int hw_opcode, inst[3] = __CONST(0, RC_SWIZZLE_ZERO); } +static void +ei_math1_select(struct r300_vertex_program_code *vp, + unsigned math_mode, + unsigned hw_opcode_ieee, + unsigned hw_opcode_dx, + unsigned hw_opcode_ff, + struct rc_sub_instruction *vpi, + unsigned int *inst) +{ + unsigned hw_opcode; + switch (math_mode) { + case RC_MATH_IEEE: hw_opcode = hw_opcode_ieee; break; + case RC_MATH_DX: hw_opcode = hw_opcode_dx; break; + case RC_MATH_FF: hw_opcode = hw_opcode_ff; break; + default: + unreachable(); + } + ei_math1(vp, hw_opcode, vpi, inst); +} + static void ei_cmp(struct r300_vertex_program_code *vp, struct rc_sub_instruction *vpi, unsigned int *inst) { @@ -407,7 +427,8 @@ translate_vertex_program(struct radeon_compiler *c, void *user) ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break; case RC_OPCODE_LG2: - ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); + ei_math1_select(compiler->code, compiler->Base.math_rules, ME_LOG_BASE2_IEEE, + ME_LOG_BASE2_FULL_DX, ME_LOG_BASE2_FULL_DX, vpi, inst); break; case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); @@ -434,10 +455,12 @@ translate_vertex_program(struct radeon_compiler *c, void *user) ei_pow(compiler->code, vpi, inst); break; case RC_OPCODE_RCP: - ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); + ei_math1_select(compiler->code, compiler->Base.math_rules, ME_RECIP_IEEE, + ME_RECIP_DX, ME_RECIP_FF, vpi, inst); break; case RC_OPCODE_RSQ: - ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); + ei_math1_select(compiler->code, compiler->Base.math_rules, ME_RECIP_SQRT_IEEE, + ME_RECIP_SQRT_DX, ME_RECIP_SQRT_FF, vpi, inst); break; case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); diff --git a/src/gallium/drivers/r300/compiler/radeon_compiler.h b/src/gallium/drivers/r300/compiler/radeon_compiler.h index 699500d1d35..f02b9551801 100644 --- a/src/gallium/drivers/r300/compiler/radeon_compiler.h +++ b/src/gallium/drivers/r300/compiler/radeon_compiler.h @@ -14,6 +14,10 @@ #define RC_DBG_LOG (1 << 0) +#define RC_MATH_DX 0x00 +#define RC_MATH_IEEE 0x01 +#define RC_MATH_FF 0x02 + struct rc_swizzle_caps; enum rc_program_type { RC_VERTEX_PROGRAM, RC_FRAGMENT_PROGRAM, RC_NUM_PROGRAM_TYPES }; @@ -45,6 +49,9 @@ struct radeon_compiler { /* Whether to remove unused constants and empty holes in constant space. */ unsigned remove_unused_constants : 1; + /* Math compatibility mode, for some PVS opcodes and for multiply by zero rules on R5xx */ + unsigned math_rules : 2; + /** * Variables used internally, not be touched by callers * of the compiler diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c index 8af57945716..1fe1eab0772 100644 --- a/src/gallium/drivers/r300/r300_debug.c +++ b/src/gallium/drivers/r300/r300_debug.c @@ -34,6 +34,8 @@ static const struct debug_named_value r300_debug_options[] = { { "nohiz", DBG_NO_HIZ, "Disable hierarchical zbuffer" }, { "nocmask", DBG_NO_CMASK, "Disable AA compression and fast AA clear" }, { "notcl", DBG_NO_TCL, "Disable hardware accelerated Transform/Clip/Lighting" }, + { "ieeemath", DBG_IEEEMATH, "Force IEEE versions of VS math opcodes where applicable and also IEEE handling of multiply by zero (R5xx only)" }, + { "ffmath", DBG_FFMATH, "Force FF versions of VS math opcodes where applicable and 0*anything=0 rules in FS" }, /* must be last */ DEBUG_NAMED_VALUE_END diff --git a/src/gallium/drivers/r300/r300_debug_options.h b/src/gallium/drivers/r300/r300_debug_options.h index 13f5aad8677..d43e7eea149 100644 --- a/src/gallium/drivers/r300/r300_debug_options.h +++ b/src/gallium/drivers/r300/r300_debug_options.h @@ -1,4 +1,6 @@ OPT_BOOL(nohiz, false, "Disable hierarchical zbuffer") OPT_BOOL(nozmask, false, "Disable zbuffer compression") +OPT_BOOL(ieeemath, false, "Force IEEE math rules and opcodes where applicable") +OPT_BOOL(ffmath, false, "Force FF math rules and opcodes where applicable") #undef OPT_BOOL diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index fd39caa7368..3933274c238 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -255,7 +255,10 @@ static void r300_emit_fs_code_to_buffer( code->int_constant_count * 2; NEW_CB(shader->cb_code, shader->cb_code_size); - OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); + if (r300->screen->options.ieeemath) + OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO_DEFAULT); + else + OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO_LEGACY); OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx); OUT_CB_REG(R500_US_FC_CTRL, code->us_fc_ctrl); for(i = 0; i < code->int_constant_count; i++){ diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 7b14a276912..25bbc69e232 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -3279,7 +3279,8 @@ enum { # define R500_US_CODE_RANGE_ADDR(x) ((x) << 0) # define R500_US_CODE_RANGE_SIZE(x) ((x) << 16) #define R500_US_CONFIG 0x4600 -# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1) +# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO_DEFAULT (0 << 1) +# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO_LEGACY (1 << 1) #define R500_US_FC_ADDR_0 0xa000 # define R500_FC_BOOL_ADDR(x) ((x) << 0) # define R500_FC_INT_ADDR(x) ((x) << 8) diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index 0f04489b3b1..c136dce8ac8 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -840,6 +840,11 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws, if (SCREEN_DBG_ON(r300screen, DBG_NO_TCL)) r300screen->caps.has_tcl = false; + if (SCREEN_DBG_ON(r300screen, DBG_IEEEMATH)) + r300screen->options.ieeemath = true; + if (SCREEN_DBG_ON(r300screen, DBG_FFMATH)) + r300screen->options.ffmath = true; + r300screen->rws = rws; r300screen->screen.destroy = r300_destroy_screen; r300screen->screen.get_name = r300_get_name; diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h index f05fdcab469..6879c465b90 100644 --- a/src/gallium/drivers/r300/r300_screen.h +++ b/src/gallium/drivers/r300/r300_screen.h @@ -92,6 +92,8 @@ radeon_winsys(struct pipe_screen *screen) { #define DBG_NO_HIZ (1 << 22) #define DBG_NO_CMASK (1 << 23) #define DBG_NO_TCL (1 << 25) +#define DBG_IEEEMATH (1 << 26) +#define DBG_FFMATH (1 << 27) /*@}*/ static inline bool SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags) { diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 71915bb28fa..44303e6776a 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -198,6 +198,12 @@ void r300_translate_vertex_shader(struct r300_context *r300, compiler.Base.debug = &r300->context.debug; compiler.Base.is_r500 = r300->screen->caps.is_r500; compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT); + /* Only R500 has few IEEE math opcodes. */ + if (r300->screen->options.ieeemath && r300->screen->caps.is_r500) { + compiler.Base.math_rules = RC_MATH_IEEE; + } else if (r300->screen->options.ffmath) { + compiler.Base.math_rules = RC_MATH_FF; + } compiler.Base.has_half_swizzles = false; compiler.Base.has_presub = false; compiler.Base.has_omod = false;