ac: align num_vgprs for gfx10.3

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5383>
This commit is contained in:
Marek Olšák 2020-01-28 23:35:49 -05:00 committed by Marge Bot
parent 2cc4bfbe01
commit 789cdab3b6
6 changed files with 23 additions and 5 deletions

View file

@ -21,6 +21,7 @@
* SOFTWARE.
*/
#include "ac_gpu_info.h"
#include "ac_binary.h"
#include "util/u_math.h"
@ -39,6 +40,7 @@
void ac_parse_shader_binary_config(const char *data, size_t nbytes,
unsigned wave_size,
bool really_needs_scratch,
const struct radeon_info *info,
struct ac_shader_config *conf)
{
uint32_t scratch_size = 0;
@ -127,6 +129,16 @@ void ac_parse_shader_binary_config(const char *data, size_t nbytes,
conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(scratch_size) * 256 * 4;
}
/* GFX 10.3 internally:
* - aligns VGPRS to 16 for Wave32 and 8 for Wave64
* - aligns LDS to 1024
*
* For shader-db stats, set num_vgprs that the hw actually uses.
*/
if (info->chip_class >= GFX10_3) {
conf->num_vgprs = align(conf->num_vgprs, wave_size == 32 ? 16 : 8);
}
/* Enable 64-bit and 16-bit denormals, because there is no performance
* cost.
*

View file

@ -32,6 +32,8 @@
extern "C" {
#endif
struct radeon_info;
struct ac_shader_config {
unsigned num_sgprs;
unsigned num_vgprs;
@ -51,6 +53,7 @@ struct ac_shader_config {
void ac_parse_shader_binary_config(const char *data, size_t nbytes,
unsigned wave_size,
bool really_needs_scratch,
const struct radeon_info *info,
struct ac_shader_config *conf);
#ifdef __cplusplus

View file

@ -514,7 +514,8 @@ bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name
return get_section_by_name(&binary->parts[0], name, data, nbytes);
}
bool ac_rtld_read_config(struct ac_rtld_binary *binary,
bool ac_rtld_read_config(const struct radeon_info *info,
struct ac_rtld_binary *binary,
struct ac_shader_config *config)
{
for (unsigned i = 0; i < binary->num_parts; ++i) {
@ -529,7 +530,7 @@ bool ac_rtld_read_config(struct ac_rtld_binary *binary,
/* TODO: be precise about scratch use? */
struct ac_shader_config c = {};
ac_parse_shader_binary_config(config_data, config_nbytes,
binary->wave_size, true, &c);
binary->wave_size, true, info, &c);
config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);

View file

@ -116,7 +116,8 @@ void ac_rtld_close(struct ac_rtld_binary *binary);
bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name,
const char **data, size_t *nbytes);
bool ac_rtld_read_config(struct ac_rtld_binary *binary,
bool ac_rtld_read_config(const struct radeon_info *info,
struct ac_rtld_binary *binary,
struct ac_shader_config *config);
struct ac_rtld_upload_info {

View file

@ -982,7 +982,8 @@ radv_shader_variant_create(struct radv_device *device,
return NULL;
}
if (!ac_rtld_read_config(&rtld_binary, &config)) {
if (!ac_rtld_read_config(&device->physical_device->rad_info,
&rtld_binary, &config)) {
ac_rtld_close(&rtld_binary);
free(variant);
return NULL;

View file

@ -121,7 +121,7 @@ bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary,
.elf_sizes = &binary->elf_size}))
return false;
bool ok = ac_rtld_read_config(&rtld, conf);
bool ok = ac_rtld_read_config(&sscreen->info, &rtld, conf);
ac_rtld_close(&rtld);
return ok;
}