mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 08:40:11 +01:00
ac: align num_vgprs for gfx10.3
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5383>
This commit is contained in:
parent
2cc4bfbe01
commit
789cdab3b6
6 changed files with 23 additions and 5 deletions
|
|
@ -21,6 +21,7 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "ac_gpu_info.h"
|
||||
#include "ac_binary.h"
|
||||
|
||||
#include "util/u_math.h"
|
||||
|
|
@ -39,6 +40,7 @@
|
|||
void ac_parse_shader_binary_config(const char *data, size_t nbytes,
|
||||
unsigned wave_size,
|
||||
bool really_needs_scratch,
|
||||
const struct radeon_info *info,
|
||||
struct ac_shader_config *conf)
|
||||
{
|
||||
uint32_t scratch_size = 0;
|
||||
|
|
@ -127,6 +129,16 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes,
|
|||
conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(scratch_size) * 256 * 4;
|
||||
}
|
||||
|
||||
/* GFX 10.3 internally:
|
||||
* - aligns VGPRS to 16 for Wave32 and 8 for Wave64
|
||||
* - aligns LDS to 1024
|
||||
*
|
||||
* For shader-db stats, set num_vgprs that the hw actually uses.
|
||||
*/
|
||||
if (info->chip_class >= GFX10_3) {
|
||||
conf->num_vgprs = align(conf->num_vgprs, wave_size == 32 ? 16 : 8);
|
||||
}
|
||||
|
||||
/* Enable 64-bit and 16-bit denormals, because there is no performance
|
||||
* cost.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -32,6 +32,8 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct radeon_info;
|
||||
|
||||
struct ac_shader_config {
|
||||
unsigned num_sgprs;
|
||||
unsigned num_vgprs;
|
||||
|
|
@ -51,6 +53,7 @@ struct ac_shader_config {
|
|||
void ac_parse_shader_binary_config(const char *data, size_t nbytes,
|
||||
unsigned wave_size,
|
||||
bool really_needs_scratch,
|
||||
const struct radeon_info *info,
|
||||
struct ac_shader_config *conf);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -514,7 +514,8 @@ bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name
|
|||
return get_section_by_name(&binary->parts[0], name, data, nbytes);
|
||||
}
|
||||
|
||||
bool ac_rtld_read_config(struct ac_rtld_binary *binary,
|
||||
bool ac_rtld_read_config(const struct radeon_info *info,
|
||||
struct ac_rtld_binary *binary,
|
||||
struct ac_shader_config *config)
|
||||
{
|
||||
for (unsigned i = 0; i < binary->num_parts; ++i) {
|
||||
|
|
@ -529,7 +530,7 @@ bool ac_rtld_read_config(struct ac_rtld_binary *binary,
|
|||
/* TODO: be precise about scratch use? */
|
||||
struct ac_shader_config c = {};
|
||||
ac_parse_shader_binary_config(config_data, config_nbytes,
|
||||
binary->wave_size, true, &c);
|
||||
binary->wave_size, true, info, &c);
|
||||
|
||||
config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
|
||||
config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
|
||||
|
|
|
|||
|
|
@ -116,7 +116,8 @@ void ac_rtld_close(struct ac_rtld_binary *binary);
|
|||
bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name,
|
||||
const char **data, size_t *nbytes);
|
||||
|
||||
bool ac_rtld_read_config(struct ac_rtld_binary *binary,
|
||||
bool ac_rtld_read_config(const struct radeon_info *info,
|
||||
struct ac_rtld_binary *binary,
|
||||
struct ac_shader_config *config);
|
||||
|
||||
struct ac_rtld_upload_info {
|
||||
|
|
|
|||
|
|
@ -982,7 +982,8 @@ radv_shader_variant_create(struct radv_device *device,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (!ac_rtld_read_config(&rtld_binary, &config)) {
|
||||
if (!ac_rtld_read_config(&device->physical_device->rad_info,
|
||||
&rtld_binary, &config)) {
|
||||
ac_rtld_close(&rtld_binary);
|
||||
free(variant);
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary,
|
|||
.elf_sizes = &binary->elf_size}))
|
||||
return false;
|
||||
|
||||
bool ok = ac_rtld_read_config(&rtld, conf);
|
||||
bool ok = ac_rtld_read_config(&sscreen->info, &rtld, conf);
|
||||
ac_rtld_close(&rtld);
|
||||
return ok;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue