radv/gfx10: Implement bin size calculation.

Reviewed-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Bas Nieuwenhuizen 2019-07-21 01:38:13 +02:00
parent 4b757697e9
commit 3268c806fb

View file

@ -2705,7 +2705,7 @@ struct radv_bin_size_entry {
};
static VkExtent2D
radv_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo)
radv_gfx9_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
static const struct radv_bin_size_entry color_size_table[][3][9] = {
{
@ -2975,6 +2975,110 @@ radv_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCr
return extent;
}
static VkExtent2D
radv_gfx10_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
VkExtent2D extent = {512, 512};
unsigned sdp_interface_count;
switch(pipeline->device->physical_device->rad_info.family) {
case CHIP_NAVI10:
case CHIP_NAVI12:
sdp_interface_count = 16;
break;
case CHIP_NAVI14:
sdp_interface_count = 8;
break;
default:
unreachable("Unhandled GFX10 chip");
}
const unsigned db_tag_size = 64;
const unsigned db_tag_count = 312;
const unsigned color_tag_size = 1024;
const unsigned color_tag_count = 31;
const unsigned fmask_tag_size = 256;
const unsigned fmask_tag_count = 44;
const unsigned rb_count = pipeline->device->physical_device->rad_info.num_render_backends;
const unsigned pipe_count = MAX2(rb_count, sdp_interface_count);
const unsigned db_tag_part = (db_tag_count * rb_count / pipe_count) * db_tag_size * pipe_count;
const unsigned color_tag_part = (color_tag_count * rb_count / pipe_count) * color_tag_size * pipe_count;
const unsigned fmask_tag_part = (fmask_tag_count * rb_count / pipe_count) * fmask_tag_size * pipe_count;
const unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_aa_config);
const unsigned samples_log = util_logbase2_ceil(total_samples);
unsigned color_bytes_per_pixel = 0;
unsigned fmask_bytes_per_pixel = 0;
const VkPipelineColorBlendStateCreateInfo *vkblend = pCreateInfo->pColorBlendState;
if (vkblend) {
for (unsigned i = 0; i < subpass->color_count; i++) {
if (!vkblend->pAttachments[i].colorWriteMask)
continue;
if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
continue;
VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
color_bytes_per_pixel += vk_format_get_blocksize(format);
if (total_samples > 1) {
const unsigned fmask_array[] = {0, 1, 1, 4};
fmask_bytes_per_pixel += fmask_array[samples_log];
}
}
color_bytes_per_pixel *= total_samples;
}
color_bytes_per_pixel = MAX2(color_bytes_per_pixel, 1);
const unsigned color_pixel_count_log = util_logbase2(color_tag_part / color_bytes_per_pixel);
extent.width = 1ull << ((color_pixel_count_log + 1) / 2);
extent.height = 1ull << (color_pixel_count_log / 2);
if (fmask_bytes_per_pixel) {
const unsigned fmask_pixel_count_log = util_logbase2(fmask_tag_part / fmask_bytes_per_pixel);
const VkExtent2D fmask_extent = (VkExtent2D){
.width = 1ull << ((fmask_pixel_count_log + 1) / 2),
.height = 1ull << (color_pixel_count_log / 2)
};
if (fmask_extent.width * fmask_extent.height < extent.width * extent.height)
extent = fmask_extent;
}
if (subpass->depth_stencil_attachment) {
struct radv_render_pass_attachment *attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;
/* Coefficients taken from AMDVLK */
unsigned depth_coeff = vk_format_is_depth(attachment->format) ? 5 : 0;
unsigned stencil_coeff = vk_format_is_stencil(attachment->format) ? 1 : 0;
unsigned db_bytes_per_pixel = (depth_coeff + stencil_coeff) * total_samples;
const unsigned db_pixel_count_log = util_logbase2(db_tag_part / db_bytes_per_pixel);
const VkExtent2D db_extent = (VkExtent2D){
.width = 1ull << ((db_pixel_count_log + 1) / 2),
.height = 1ull << (color_pixel_count_log / 2)
};
if (db_extent.width * db_extent.height < extent.width * extent.height)
extent = db_extent;
}
extent.width = MAX2(extent.width, 128);
extent.height = MAX2(extent.width, 64);
return extent;
}
static void
radv_pipeline_generate_disabled_binning_state(struct radeon_cmdbuf *ctx_cs,
struct radv_pipeline *pipeline,
@ -3027,7 +3131,13 @@ radv_pipeline_generate_binning_state(struct radeon_cmdbuf *ctx_cs,
if (pipeline->device->physical_device->rad_info.chip_class < GFX9)
return;
VkExtent2D bin_size = radv_compute_bin_size(pipeline, pCreateInfo);
VkExtent2D bin_size;
if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
bin_size = radv_gfx10_compute_bin_size(pipeline, pCreateInfo);
} else if (pipeline->device->physical_device->rad_info.chip_class == GFX9) {
bin_size = radv_gfx9_compute_bin_size(pipeline, pCreateInfo);
} else
unreachable("Unhandled generation for binning bin size calculation");
if (pipeline->device->pbb_allowed && bin_size.width && bin_size.height) {
unsigned context_states_per_bin; /* allowed range: [1, 6] */