mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 15:48:36 +02:00
radv: rename radeon_info variables to gpu_info everywhere
Sometimes we might have other info struct, so renaming to gpu_info removes the confusion. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28500>
This commit is contained in:
parent
52663ec80f
commit
2686cd59df
19 changed files with 185 additions and 184 deletions
|
|
@ -1178,12 +1178,12 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_bl
|
|||
const struct radv_image *image = img_bsurf->image;
|
||||
const struct radeon_surf *surf = &image->planes[0].surface;
|
||||
struct radv_device *device = cmd_buffer->device;
|
||||
const struct radeon_info *rad_info = &device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->rad_info;
|
||||
struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws);
|
||||
struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image);
|
||||
|
||||
/* GFX10 will use a different workaround unless this is not a 2D image */
|
||||
if (rad_info->gfx_level < GFX9 || (rad_info->gfx_level >= GFX10 && image->vk.image_type == VK_IMAGE_TYPE_2D) ||
|
||||
if (gpu_info->gfx_level < GFX9 || (gpu_info->gfx_level >= GFX10 && image->vk.image_type == VK_IMAGE_TYPE_2D) ||
|
||||
image->vk.mip_levels == 1 || !vk_format_is_block_compressed(image->vk.format))
|
||||
return;
|
||||
|
||||
|
|
@ -1223,7 +1223,7 @@ fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_bl
|
|||
uint32_t x = (coordY < hw_mip_extent.height) ? hw_mip_extent.width : 0;
|
||||
for (; x < mip_extent.width; x++) {
|
||||
uint32_t coordX = x + mip_offset.x;
|
||||
uint64_t addr = ac_surface_addr_from_coord(addrlib, rad_info, surf, &surf_info, mip_level, coordX, coordY,
|
||||
uint64_t addr = ac_surface_addr_from_coord(addrlib, gpu_info, surf, &surf_info, mip_level, coordX, coordY,
|
||||
img_bsurf->layer, image->vk.image_type == VK_IMAGE_TYPE_3D);
|
||||
struct radeon_winsys_bo *img_bo = image->bindings[0].bo;
|
||||
struct radeon_winsys_bo *mem_bo = buf_bsurf->buffer->bo;
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ bool radv_nir_lower_ray_payload_derefs(nir_shader *shader, uint32_t offset);
|
|||
bool radv_nir_lower_ray_queries(nir_shader *shader, struct radv_device *device);
|
||||
|
||||
bool radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_shader_stage *vs_stage,
|
||||
const struct radv_graphics_state_key *gfx_state, const struct radeon_info *rad_info);
|
||||
const struct radv_graphics_state_key *gfx_state, const struct radeon_info *gpu_info);
|
||||
|
||||
bool radv_nir_lower_primitive_shading_rate(nir_shader *nir, enum amd_gfx_level gfx_level);
|
||||
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ typedef struct {
|
|||
const struct radv_shader_args *args;
|
||||
const struct radv_shader_info *info;
|
||||
const struct radv_graphics_state_key *gfx_state;
|
||||
const struct radeon_info *rad_info;
|
||||
const struct radeon_info *gpu_info;
|
||||
} lower_vs_inputs_state;
|
||||
|
||||
static nir_def *
|
||||
|
|
@ -239,12 +239,12 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs
|
|||
const enum pipe_format attrib_format = s->gfx_state->vi.vertex_attribute_formats[location];
|
||||
const struct util_format_description *f = util_format_description(attrib_format);
|
||||
const struct ac_vtx_format_info *vtx_info =
|
||||
ac_get_vtx_format_info(s->rad_info->gfx_level, s->rad_info->family, attrib_format);
|
||||
ac_get_vtx_format_info(s->gpu_info->gfx_level, s->gpu_info->family, attrib_format);
|
||||
const unsigned binding_index = s->info->vs.use_per_attribute_vb_descs ? location : attrib_binding;
|
||||
const unsigned desc_index = util_bitcount(s->info->vs.vb_desc_usage_mask & u_bit_consecutive(0, binding_index));
|
||||
|
||||
nir_def *vertex_buffers_arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.vertex_buffers);
|
||||
nir_def *vertex_buffers = nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->rad_info->address32_hi));
|
||||
nir_def *vertex_buffers = nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->gpu_info->address32_hi));
|
||||
nir_def *descriptor = nir_load_smem_amd(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16));
|
||||
nir_def *base_index = calc_vs_input_index(b, location, s);
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
|
|
@ -411,7 +411,7 @@ lower_vs_input_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
|
|||
|
||||
bool
|
||||
radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_shader_stage *vs_stage,
|
||||
const struct radv_graphics_state_key *gfx_state, const struct radeon_info *rad_info)
|
||||
const struct radv_graphics_state_key *gfx_state, const struct radeon_info *gpu_info)
|
||||
{
|
||||
assert(shader->info.stage == MESA_SHADER_VERTEX);
|
||||
|
||||
|
|
@ -419,7 +419,7 @@ radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_shader_stage *vs_
|
|||
.info = &vs_stage->info,
|
||||
.args = &vs_stage->args,
|
||||
.gfx_state = gfx_state,
|
||||
.rad_info = rad_info,
|
||||
.gpu_info = gpu_info,
|
||||
};
|
||||
|
||||
return nir_shader_intrinsics_pass(shader, lower_vs_input_instr, nir_metadata_dominance | nir_metadata_block_index,
|
||||
|
|
|
|||
|
|
@ -534,13 +534,13 @@ radv_cmd_buffer_upload_alloc_aligned(struct radv_cmd_buffer *cmd_buffer, unsigne
|
|||
{
|
||||
assert(size % 4 == 0);
|
||||
|
||||
const struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &cmd_buffer->device->physical_device->rad_info;
|
||||
|
||||
/* Align to the scalar cache line size if it results in this allocation
|
||||
* being placed in less of them.
|
||||
*/
|
||||
unsigned offset = cmd_buffer->upload.offset;
|
||||
unsigned line_size = rad_info->gfx_level >= GFX10 ? 64 : 32;
|
||||
unsigned line_size = gpu_info->gfx_level >= GFX10 ? 64 : 32;
|
||||
unsigned gap = align(offset, line_size) - offset;
|
||||
if ((size & (line_size - 1)) > gap)
|
||||
offset = align(offset, line_size);
|
||||
|
|
@ -5260,7 +5260,7 @@ static void
|
|||
radv_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, bool indirect_draw,
|
||||
bool count_from_stream_output, uint32_t draw_vertex_count)
|
||||
{
|
||||
const struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &cmd_buffer->device->physical_device->rad_info;
|
||||
struct radv_cmd_state *state = &cmd_buffer->state;
|
||||
const unsigned patch_control_points = state->dynamic.vk.ts.patch_control_points;
|
||||
const unsigned topology = state->dynamic.vk.ia.primitive_topology;
|
||||
|
|
@ -5273,10 +5273,10 @@ radv_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_
|
|||
patch_control_points, state->tess_num_patches);
|
||||
|
||||
if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) {
|
||||
if (info->gfx_level == GFX9) {
|
||||
if (gpu_info->gfx_level == GFX9) {
|
||||
radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs, R_030960_IA_MULTI_VGT_PARAM, 4,
|
||||
ia_multi_vgt_param);
|
||||
} else if (info->gfx_level >= GFX7) {
|
||||
} else if (gpu_info->gfx_level >= GFX7) {
|
||||
radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
|
||||
} else {
|
||||
radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
|
||||
|
|
@ -5327,14 +5327,14 @@ gfx10_emit_ge_cntl(struct radv_cmd_buffer *cmd_buffer)
|
|||
static void
|
||||
radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info)
|
||||
{
|
||||
const struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &cmd_buffer->device->physical_device->rad_info;
|
||||
struct radv_cmd_state *state = &cmd_buffer->state;
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
uint32_t topology = state->dynamic.vk.ia.primitive_topology;
|
||||
bool disable_instance_packing = false;
|
||||
|
||||
/* Draw state. */
|
||||
if (info->gfx_level >= GFX10) {
|
||||
if (gpu_info->gfx_level >= GFX10) {
|
||||
gfx10_emit_ge_cntl(cmd_buffer);
|
||||
} else {
|
||||
radv_emit_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1, draw_info->indirect,
|
||||
|
|
@ -5345,7 +5345,7 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_d
|
|||
* topologies and instance_count > 1, pipeline stats generated by GE are incorrect. It needs to
|
||||
* be applied for indexed and non-indexed draws.
|
||||
*/
|
||||
if (info->gfx_level == GFX10_3 && state->active_pipeline_queries > 0 &&
|
||||
if (gpu_info->gfx_level == GFX10_3 && state->active_pipeline_queries > 0 &&
|
||||
(draw_info->instance_count > 1 || draw_info->indirect) &&
|
||||
(topology == V_008958_DI_PT_LINELIST_ADJ || topology == V_008958_DI_PT_LINESTRIP_ADJ ||
|
||||
topology == V_008958_DI_PT_TRILIST_ADJ || topology == V_008958_DI_PT_TRISTRIP_ADJ)) {
|
||||
|
|
@ -5353,7 +5353,7 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_d
|
|||
}
|
||||
|
||||
if ((draw_info->indexed && state->index_type != state->last_index_type) ||
|
||||
(info->gfx_level == GFX10_3 &&
|
||||
(gpu_info->gfx_level == GFX10_3 &&
|
||||
(state->last_index_type == -1 ||
|
||||
disable_instance_packing != G_028A7C_DISABLE_INSTANCE_PACKING(state->last_index_type)))) {
|
||||
uint32_t index_type = state->index_type | S_028A7C_DISABLE_INSTANCE_PACKING(disable_instance_packing);
|
||||
|
|
@ -8990,7 +8990,7 @@ radv_emit_fs_state(struct radv_cmd_buffer *cmd_buffer)
|
|||
static void
|
||||
radv_emit_db_shader_control(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
const struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &cmd_buffer->device->physical_device->rad_info;
|
||||
const struct radv_shader *ps = cmd_buffer->state.shaders[MESA_SHADER_FRAGMENT];
|
||||
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
|
||||
const bool uses_ds_feedback_loop =
|
||||
|
|
@ -9004,7 +9004,7 @@ radv_emit_db_shader_control(struct radv_cmd_buffer *cmd_buffer)
|
|||
} else {
|
||||
db_shader_control = S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_ANY_Z) |
|
||||
S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) |
|
||||
S_02880C_DUAL_QUAD_DISABLE(rad_info->has_rbplus && !rad_info->rbplus_allowed);
|
||||
S_02880C_DUAL_QUAD_DISABLE(gpu_info->has_rbplus && !gpu_info->rbplus_allowed);
|
||||
}
|
||||
|
||||
/* When a depth/stencil attachment is used inside feedback loops, use LATE_Z to make sure shader invocations read the
|
||||
|
|
@ -9012,7 +9012,7 @@ radv_emit_db_shader_control(struct radv_cmd_buffer *cmd_buffer)
|
|||
* Also apply the bug workaround for smoothing (overrasterization) on GFX6.
|
||||
*/
|
||||
if (uses_ds_feedback_loop ||
|
||||
(rad_info->gfx_level == GFX6 && d->vk.rs.line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR))
|
||||
(gpu_info->gfx_level == GFX6 && d->vk.rs.line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR))
|
||||
db_shader_control = (db_shader_control & C_02880C_Z_ORDER) | S_02880C_Z_ORDER(V_02880C_LATE_Z);
|
||||
|
||||
if (ps && ps->info.ps.pops) {
|
||||
|
|
@ -9021,7 +9021,7 @@ radv_emit_db_shader_control(struct radv_cmd_buffer *cmd_buffer)
|
|||
* PixelInterlock: 1x.
|
||||
* SampleInterlock: MSAA_EXPOSED_SAMPLES (much faster at common edges of adjacent primitives with MSAA).
|
||||
*/
|
||||
if (rad_info->gfx_level >= GFX11) {
|
||||
if (gpu_info->gfx_level >= GFX11) {
|
||||
db_shader_control |= S_02880C_OVERRIDE_INTRINSIC_RATE_ENABLE(1);
|
||||
if (ps->info.ps.pops_is_per_sample)
|
||||
db_shader_control |= S_02880C_OVERRIDE_INTRINSIC_RATE(util_logbase2(rasterization_samples));
|
||||
|
|
@ -9029,13 +9029,13 @@ radv_emit_db_shader_control(struct radv_cmd_buffer *cmd_buffer)
|
|||
if (ps->info.ps.pops_is_per_sample)
|
||||
db_shader_control |= S_02880C_POPS_OVERLAP_NUM_SAMPLES(util_logbase2(rasterization_samples));
|
||||
|
||||
if (rad_info->has_pops_missed_overlap_bug) {
|
||||
if (gpu_info->has_pops_missed_overlap_bug) {
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028060_DB_DFSM_CONTROL,
|
||||
S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) |
|
||||
S_028060_POPS_DRAIN_PS_ON_OVERLAP(rasterization_samples >= 8));
|
||||
}
|
||||
}
|
||||
} else if (rad_info->has_export_conflict_bug && rasterization_samples == 1) {
|
||||
} else if (gpu_info->has_export_conflict_bug && rasterization_samples == 1) {
|
||||
for (uint32_t i = 0; i < MAX_RTS; i++) {
|
||||
if (d->vk.cb.attachments[i].write_mask && d->vk.cb.attachments[i].blend_enable) {
|
||||
db_shader_control |= S_02880C_OVERRIDE_INTRINSIC_RATE_ENABLE(1) | S_02880C_OVERRIDE_INTRINSIC_RATE(2);
|
||||
|
|
@ -9584,7 +9584,7 @@ radv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_
|
|||
ALWAYS_INLINE static void
|
||||
radv_after_draw(struct radv_cmd_buffer *cmd_buffer, bool dgc)
|
||||
{
|
||||
const struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &cmd_buffer->device->physical_device->rad_info;
|
||||
bool has_prefetch = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7;
|
||||
/* Start prefetches after the draw has been started. Both will
|
||||
* run in parallel, but starting the draw first is more
|
||||
|
|
@ -9598,7 +9598,7 @@ radv_after_draw(struct radv_cmd_buffer *cmd_buffer, bool dgc)
|
|||
* It must be done after drawing.
|
||||
*/
|
||||
if (radv_is_streamout_enabled(cmd_buffer) &&
|
||||
(rad_info->family == CHIP_HAWAII || rad_info->family == CHIP_TONGA || rad_info->family == CHIP_FIJI)) {
|
||||
(gpu_info->family == CHIP_HAWAII || gpu_info->family == CHIP_TONGA || gpu_info->family == CHIP_FIJI)) {
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_STREAMOUT_SYNC;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ VkResult
|
|||
radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_state *queue_state)
|
||||
{
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
const struct radeon_info *info = &device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->rad_info;
|
||||
VkResult result;
|
||||
|
||||
struct radeon_cmdbuf *cs = ws->cs_create(ws, AMD_IP_GFX, false);
|
||||
|
|
@ -56,11 +56,11 @@ radv_create_shadow_regs_preamble(struct radv_device *device, struct radv_queue_s
|
|||
goto fail;
|
||||
|
||||
/* fill the cs for shadow regs preamble ib that starts the register shadowing */
|
||||
ac_create_shadowing_ib_preamble(info, (pm4_cmd_add_fn)&radeon_emit, cs, queue_state->shadowed_regs->va,
|
||||
ac_create_shadowing_ib_preamble(gpu_info, (pm4_cmd_add_fn)&radeon_emit, cs, queue_state->shadowed_regs->va,
|
||||
device->pbb_allowed);
|
||||
|
||||
while (cs->cdw & 7) {
|
||||
if (info->gfx_ib_pad_with_type2)
|
||||
if (gpu_info->gfx_ib_pad_with_type2)
|
||||
radeon_emit(cs, PKT2_NOP_PAD);
|
||||
else
|
||||
radeon_emit(cs, PKT3_NOP_PAD);
|
||||
|
|
@ -125,7 +125,7 @@ radv_emit_shadow_regs_preamble(struct radeon_cmdbuf *cs, const struct radv_devic
|
|||
VkResult
|
||||
radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct radv_queue *queue)
|
||||
{
|
||||
const struct radeon_info *info = &device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->rad_info;
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
struct radeon_cmdbuf *cs;
|
||||
VkResult result;
|
||||
|
|
@ -137,7 +137,7 @@ radv_init_shadowed_regs_buffer_state(const struct radv_device *device, struct ra
|
|||
radeon_check_space(ws, cs, 768);
|
||||
|
||||
radv_emit_shadow_regs_preamble(cs, device, &queue->state);
|
||||
ac_emulate_clear_state(info, cs, radv_set_context_reg_array);
|
||||
ac_emulate_clear_state(gpu_info, cs, radv_set_context_reg_array);
|
||||
|
||||
result = ws->cs_finalize(cs);
|
||||
if (result == VK_SUCCESS) {
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ radv_dump_mmapped_reg(const struct radv_device *device, FILE *f, unsigned offset
|
|||
static void
|
||||
radv_dump_debug_registers(const struct radv_device *device, FILE *f)
|
||||
{
|
||||
const struct radeon_info *info = &device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->rad_info;
|
||||
|
||||
fprintf(f, "Memory-mapped registers:\n");
|
||||
radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
|
||||
|
|
@ -131,7 +131,7 @@ radv_dump_debug_registers(const struct radv_device *device, FILE *f)
|
|||
radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
|
||||
radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
|
||||
radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
|
||||
if (info->gfx_level <= GFX8) {
|
||||
if (gpu_info->gfx_level <= GFX8) {
|
||||
radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
|
||||
radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
|
||||
radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
|
||||
|
|
@ -634,21 +634,21 @@ radv_dump_app_info(const struct radv_device *device, FILE *f)
|
|||
static void
|
||||
radv_dump_device_name(const struct radv_device *device, FILE *f)
|
||||
{
|
||||
const struct radeon_info *info = &device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->rad_info;
|
||||
#ifndef _WIN32
|
||||
char kernel_version[128] = {0};
|
||||
struct utsname uname_data;
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", device->physical_device->marketing_name, info->drm_major,
|
||||
info->drm_minor, info->drm_patchlevel);
|
||||
fprintf(f, "Device name: %s (DRM %i.%i.%i)\n\n", device->physical_device->marketing_name, gpu_info->drm_major,
|
||||
gpu_info->drm_minor, gpu_info->drm_patchlevel);
|
||||
#else
|
||||
if (uname(&uname_data) == 0)
|
||||
snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
|
||||
|
||||
fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", device->physical_device->marketing_name, info->drm_major,
|
||||
info->drm_minor, info->drm_patchlevel, kernel_version);
|
||||
fprintf(f, "Device name: %s (DRM %i.%i.%i%s)\n\n", device->physical_device->marketing_name, gpu_info->drm_major,
|
||||
gpu_info->drm_minor, gpu_info->drm_patchlevel, kernel_version);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1761,13 +1761,13 @@ radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buff
|
|||
ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->vk.samples));
|
||||
|
||||
if (device->physical_device->rad_info.gfx_level >= GFX7) {
|
||||
const struct radeon_info *info = &device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->rad_info;
|
||||
unsigned tiling_index = surf->u.legacy.tiling_index[level];
|
||||
unsigned stencil_index = surf->u.legacy.zs.stencil_tiling_index[level];
|
||||
unsigned macro_index = surf->u.legacy.macro_tile_index;
|
||||
unsigned tile_mode = info->si_tile_mode_array[tiling_index];
|
||||
unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
|
||||
unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
|
||||
unsigned tile_mode = gpu_info->si_tile_mode_array[tiling_index];
|
||||
unsigned stencil_tile_mode = gpu_info->si_tile_mode_array[stencil_index];
|
||||
unsigned macro_mode = gpu_info->cik_macrotile_mode_array[macro_index];
|
||||
|
||||
if (stencil_only)
|
||||
tile_mode = stencil_tile_mode;
|
||||
|
|
|
|||
|
|
@ -857,17 +857,17 @@ radv_image_alloc_values(const struct radv_device *device, struct radv_image *ima
|
|||
static bool
|
||||
radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
|
||||
{
|
||||
const struct radeon_info *rad_info = &device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->rad_info;
|
||||
int log2_samples = util_logbase2(image->vk.samples);
|
||||
|
||||
assert(rad_info->gfx_level >= GFX10);
|
||||
assert(gpu_info->gfx_level >= GFX10);
|
||||
|
||||
for (unsigned i = 0; i < image->plane_count; ++i) {
|
||||
VkFormat fmt = radv_image_get_plane_format(device->physical_device, image, i);
|
||||
int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
|
||||
int log2_bpp_and_samples;
|
||||
|
||||
if (rad_info->gfx_level >= GFX10_3) {
|
||||
if (gpu_info->gfx_level >= GFX10_3) {
|
||||
log2_bpp_and_samples = log2_bpp + log2_samples;
|
||||
} else {
|
||||
if (vk_format_has_depth(image->vk.format) && image->vk.array_layers >= 8) {
|
||||
|
|
@ -877,7 +877,7 @@ radv_image_is_pipe_misaligned(const struct radv_device *device, const struct rad
|
|||
log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
|
||||
}
|
||||
|
||||
int num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
|
||||
int num_pipes = G_0098F8_NUM_PIPES(gpu_info->gb_addr_config);
|
||||
int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
|
||||
|
||||
if (vk_format_has_depth(image->vk.format)) {
|
||||
|
|
@ -885,7 +885,7 @@ radv_image_is_pipe_misaligned(const struct radv_device *device, const struct rad
|
|||
return true;
|
||||
}
|
||||
} else {
|
||||
int max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
|
||||
int max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(gpu_info->gb_addr_config);
|
||||
int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
|
||||
int samples_overlap = MIN2(log2_samples, overlap);
|
||||
|
||||
|
|
|
|||
|
|
@ -211,9 +211,9 @@ radv_get_driver_uuid(void *uuid)
|
|||
}
|
||||
|
||||
static void
|
||||
radv_get_device_uuid(const struct radeon_info *info, void *uuid)
|
||||
radv_get_device_uuid(const struct radeon_info *gpu_info, void *uuid)
|
||||
{
|
||||
ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
|
||||
ac_compute_device_uuid(gpu_info, uuid, VK_UUID_SIZE);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -599,18 +599,18 @@ static void
|
|||
radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves,
|
||||
struct radeon_winsys_bo *scratch_bo)
|
||||
{
|
||||
const struct radeon_info *info = &device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->rad_info;
|
||||
|
||||
if (!scratch_bo)
|
||||
return;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, scratch_bo);
|
||||
|
||||
if (info->gfx_level >= GFX11) {
|
||||
if (gpu_info->gfx_level >= GFX11) {
|
||||
uint64_t va = radv_buffer_get_va(scratch_bo);
|
||||
|
||||
/* WAVES is per SE for SPI_TMPRING_SIZE. */
|
||||
waves /= info->num_se;
|
||||
waves /= gpu_info->num_se;
|
||||
|
||||
radeon_set_context_reg_seq(cs, R_0286E8_SPI_TMPRING_SIZE, 3);
|
||||
radeon_emit(cs, S_0286E8_WAVES(waves) | S_0286E8_WAVESIZE(DIV_ROUND_UP(size_per_wave, 256)));
|
||||
|
|
@ -626,7 +626,7 @@ static void
|
|||
radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t size_per_wave, uint32_t waves,
|
||||
struct radeon_winsys_bo *compute_scratch_bo)
|
||||
{
|
||||
const struct radeon_info *info = &device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->rad_info;
|
||||
uint64_t scratch_va;
|
||||
uint32_t rsrc1;
|
||||
|
||||
|
|
@ -636,28 +636,28 @@ radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
scratch_va = radv_buffer_get_va(compute_scratch_bo);
|
||||
rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
|
||||
|
||||
if (info->gfx_level >= GFX11)
|
||||
if (gpu_info->gfx_level >= GFX11)
|
||||
rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
|
||||
else
|
||||
rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
|
||||
|
||||
radv_cs_add_buffer(device->ws, cs, compute_scratch_bo);
|
||||
|
||||
if (info->gfx_level >= GFX11) {
|
||||
if (gpu_info->gfx_level >= GFX11) {
|
||||
radeon_set_sh_reg_seq(cs, R_00B840_COMPUTE_DISPATCH_SCRATCH_BASE_LO, 2);
|
||||
radeon_emit(cs, scratch_va >> 8);
|
||||
radeon_emit(cs, scratch_va >> 40);
|
||||
|
||||
waves /= info->num_se;
|
||||
waves /= gpu_info->num_se;
|
||||
}
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
|
||||
radeon_emit(cs, scratch_va);
|
||||
radeon_emit(cs, rsrc1);
|
||||
|
||||
radeon_set_sh_reg(
|
||||
cs, R_00B860_COMPUTE_TMPRING_SIZE,
|
||||
S_00B860_WAVES(waves) | S_00B860_WAVESIZE(DIV_ROUND_UP(size_per_wave, info->gfx_level >= GFX11 ? 256 : 1024)));
|
||||
radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
|
||||
S_00B860_WAVES(waves) |
|
||||
S_00B860_WAVESIZE(DIV_ROUND_UP(size_per_wave, gpu_info->gfx_level >= GFX11 ? 256 : 1024)));
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -237,7 +237,7 @@ enum radv_cs_dump_type {
|
|||
struct radeon_winsys {
|
||||
void (*destroy)(struct radeon_winsys *ws);
|
||||
|
||||
void (*query_info)(struct radeon_winsys *ws, struct radeon_info *info);
|
||||
void (*query_info)(struct radeon_winsys *ws, struct radeon_info *gpu_info);
|
||||
|
||||
uint64_t (*query_value)(struct radeon_winsys *ws, enum radeon_value_id value);
|
||||
|
||||
|
|
|
|||
|
|
@ -368,23 +368,23 @@ error:
|
|||
}
|
||||
|
||||
static void
|
||||
fill_memory_info(const struct radeon_info *info, struct vk_rmv_memory_info *out_info, int32_t index)
|
||||
fill_memory_info(const struct radeon_info *gpu_info, struct vk_rmv_memory_info *out_info, int32_t index)
|
||||
{
|
||||
switch (index) {
|
||||
case VK_RMV_MEMORY_LOCATION_DEVICE:
|
||||
out_info->physical_base_address = 0;
|
||||
out_info->size =
|
||||
info->all_vram_visible ? (uint64_t)info->vram_size_kb * 1024ULL : (uint64_t)info->vram_vis_size_kb * 1024ULL;
|
||||
out_info->size = gpu_info->all_vram_visible ? (uint64_t)gpu_info->vram_size_kb * 1024ULL
|
||||
: (uint64_t)gpu_info->vram_vis_size_kb * 1024ULL;
|
||||
break;
|
||||
case VK_RMV_MEMORY_LOCATION_DEVICE_INVISIBLE:
|
||||
out_info->physical_base_address = (uint64_t)info->vram_vis_size_kb * 1024ULL;
|
||||
out_info->size = info->all_vram_visible ? 0 : (uint64_t)info->vram_size_kb * 1024ULL;
|
||||
out_info->physical_base_address = (uint64_t)gpu_info->vram_vis_size_kb * 1024ULL;
|
||||
out_info->size = gpu_info->all_vram_visible ? 0 : (uint64_t)gpu_info->vram_size_kb * 1024ULL;
|
||||
break;
|
||||
case VK_RMV_MEMORY_LOCATION_HOST: {
|
||||
uint64_t ram_size = -1U;
|
||||
os_get_total_physical_memory(&ram_size);
|
||||
out_info->physical_base_address = 0;
|
||||
out_info->size = MIN2((uint64_t)info->gart_size_kb * 1024ULL, ram_size);
|
||||
out_info->size = MIN2((uint64_t)gpu_info->gart_size_kb * 1024ULL, ram_size);
|
||||
} break;
|
||||
default:
|
||||
unreachable("invalid memory index");
|
||||
|
|
@ -423,25 +423,25 @@ memory_type_from_vram_type(uint32_t vram_type)
|
|||
void
|
||||
radv_rmv_fill_device_info(const struct radv_physical_device *pdev, struct vk_rmv_device_info *info)
|
||||
{
|
||||
const struct radeon_info *rad_info = &pdev->rad_info;
|
||||
const struct radeon_info *gpu_info = &pdev->rad_info;
|
||||
|
||||
for (int32_t i = 0; i < VK_RMV_MEMORY_LOCATION_COUNT; ++i) {
|
||||
fill_memory_info(rad_info, &info->memory_infos[i], i);
|
||||
fill_memory_info(gpu_info, &info->memory_infos[i], i);
|
||||
}
|
||||
|
||||
if (rad_info->marketing_name)
|
||||
strncpy(info->device_name, rad_info->marketing_name, sizeof(info->device_name) - 1);
|
||||
info->pcie_family_id = rad_info->family_id;
|
||||
info->pcie_revision_id = rad_info->pci_rev_id;
|
||||
info->pcie_device_id = rad_info->pci.dev;
|
||||
if (gpu_info->marketing_name)
|
||||
strncpy(info->device_name, gpu_info->marketing_name, sizeof(info->device_name) - 1);
|
||||
info->pcie_family_id = gpu_info->family_id;
|
||||
info->pcie_revision_id = gpu_info->pci_rev_id;
|
||||
info->pcie_device_id = gpu_info->pci.dev;
|
||||
info->minimum_shader_clock = 0;
|
||||
info->maximum_shader_clock = rad_info->max_gpu_freq_mhz;
|
||||
info->vram_type = memory_type_from_vram_type(rad_info->vram_type);
|
||||
info->vram_bus_width = rad_info->memory_bus_width;
|
||||
info->vram_operations_per_clock = ac_memory_ops_per_clock(rad_info->vram_type);
|
||||
info->maximum_shader_clock = gpu_info->max_gpu_freq_mhz;
|
||||
info->vram_type = memory_type_from_vram_type(gpu_info->vram_type);
|
||||
info->vram_bus_width = gpu_info->memory_bus_width;
|
||||
info->vram_operations_per_clock = ac_memory_ops_per_clock(gpu_info->vram_type);
|
||||
info->minimum_memory_clock = 0;
|
||||
info->maximum_memory_clock = rad_info->memory_freq_mhz;
|
||||
info->vram_bandwidth = rad_info->memory_bandwidth_gbps;
|
||||
info->maximum_memory_clock = gpu_info->memory_freq_mhz;
|
||||
info->vram_bandwidth = gpu_info->memory_bandwidth_gbps;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -181,26 +181,26 @@ amdgpu_vram_type_to_rra(uint32_t type)
|
|||
}
|
||||
|
||||
static void
|
||||
rra_dump_asic_info(const struct radeon_info *rad_info, FILE *output)
|
||||
rra_dump_asic_info(const struct radeon_info *gpu_info, FILE *output)
|
||||
{
|
||||
struct rra_asic_info asic_info = {
|
||||
/* All frequencies are in Hz */
|
||||
.min_shader_clk_freq = 0,
|
||||
.max_shader_clk_freq = rad_info->max_gpu_freq_mhz * 1000000,
|
||||
.max_shader_clk_freq = gpu_info->max_gpu_freq_mhz * 1000000,
|
||||
.min_mem_clk_freq = 0,
|
||||
.max_mem_clk_freq = rad_info->memory_freq_mhz * 1000000,
|
||||
.max_mem_clk_freq = gpu_info->memory_freq_mhz * 1000000,
|
||||
|
||||
.vram_size = (uint64_t)rad_info->vram_size_kb * 1024,
|
||||
.vram_size = (uint64_t)gpu_info->vram_size_kb * 1024,
|
||||
|
||||
.mem_type = amdgpu_vram_type_to_rra(rad_info->vram_type),
|
||||
.mem_ops_per_clk = ac_memory_ops_per_clock(rad_info->vram_type),
|
||||
.bus_width = rad_info->memory_bus_width,
|
||||
.mem_type = amdgpu_vram_type_to_rra(gpu_info->vram_type),
|
||||
.mem_ops_per_clk = ac_memory_ops_per_clock(gpu_info->vram_type),
|
||||
.bus_width = gpu_info->memory_bus_width,
|
||||
|
||||
.device_id = rad_info->pci.dev,
|
||||
.rev_id = rad_info->pci_rev_id,
|
||||
.device_id = gpu_info->pci.dev,
|
||||
.rev_id = gpu_info->pci_rev_id,
|
||||
};
|
||||
|
||||
strncpy(asic_info.device_name, rad_info->marketing_name ? rad_info->marketing_name : rad_info->name,
|
||||
strncpy(asic_info.device_name, gpu_info->marketing_name ? gpu_info->marketing_name : gpu_info->name,
|
||||
RRA_FILE_DEVICE_NAME_MAX_SIZE - 1);
|
||||
|
||||
fwrite(&asic_info, sizeof(struct rra_asic_info), 1, output);
|
||||
|
|
|
|||
|
|
@ -2064,42 +2064,42 @@ unsigned
|
|||
radv_get_max_waves(const struct radv_device *device, const struct ac_shader_config *conf,
|
||||
const struct radv_shader_info *info)
|
||||
{
|
||||
const struct radeon_info *rad_info = &device->physical_device->rad_info;
|
||||
const enum amd_gfx_level gfx_level = rad_info->gfx_level;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->rad_info;
|
||||
const enum amd_gfx_level gfx_level = gpu_info->gfx_level;
|
||||
const uint8_t wave_size = info->wave_size;
|
||||
gl_shader_stage stage = info->stage;
|
||||
unsigned max_simd_waves = rad_info->max_waves_per_simd;
|
||||
unsigned max_simd_waves = gpu_info->max_waves_per_simd;
|
||||
unsigned lds_per_wave = 0;
|
||||
|
||||
if (stage == MESA_SHADER_FRAGMENT) {
|
||||
lds_per_wave = conf->lds_size * rad_info->lds_encode_granularity + info->ps.num_interp * 48;
|
||||
lds_per_wave = align(lds_per_wave, rad_info->lds_alloc_granularity);
|
||||
lds_per_wave = conf->lds_size * gpu_info->lds_encode_granularity + info->ps.num_interp * 48;
|
||||
lds_per_wave = align(lds_per_wave, gpu_info->lds_alloc_granularity);
|
||||
} else if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_TASK) {
|
||||
unsigned max_workgroup_size = info->workgroup_size;
|
||||
lds_per_wave = align(conf->lds_size * rad_info->lds_encode_granularity, rad_info->lds_alloc_granularity);
|
||||
lds_per_wave = align(conf->lds_size * gpu_info->lds_encode_granularity, gpu_info->lds_alloc_granularity);
|
||||
lds_per_wave /= DIV_ROUND_UP(max_workgroup_size, wave_size);
|
||||
}
|
||||
|
||||
if (conf->num_sgprs && gfx_level < GFX10) {
|
||||
unsigned sgprs = align(conf->num_sgprs, gfx_level >= GFX8 ? 16 : 8);
|
||||
max_simd_waves = MIN2(max_simd_waves, rad_info->num_physical_sgprs_per_simd / sgprs);
|
||||
max_simd_waves = MIN2(max_simd_waves, gpu_info->num_physical_sgprs_per_simd / sgprs);
|
||||
}
|
||||
|
||||
if (conf->num_vgprs) {
|
||||
unsigned physical_vgprs = rad_info->num_physical_wave64_vgprs_per_simd * (64 / wave_size);
|
||||
unsigned physical_vgprs = gpu_info->num_physical_wave64_vgprs_per_simd * (64 / wave_size);
|
||||
unsigned vgprs = align(conf->num_vgprs, wave_size == 32 ? 8 : 4);
|
||||
if (gfx_level >= GFX10_3) {
|
||||
unsigned real_vgpr_gran = rad_info->num_physical_wave64_vgprs_per_simd / 64;
|
||||
unsigned real_vgpr_gran = gpu_info->num_physical_wave64_vgprs_per_simd / 64;
|
||||
vgprs = util_align_npot(vgprs, real_vgpr_gran * (wave_size == 32 ? 2 : 1));
|
||||
}
|
||||
max_simd_waves = MIN2(max_simd_waves, physical_vgprs / vgprs);
|
||||
}
|
||||
|
||||
unsigned simd_per_workgroup = rad_info->num_simd_per_compute_unit;
|
||||
unsigned simd_per_workgroup = gpu_info->num_simd_per_compute_unit;
|
||||
if (gfx_level >= GFX10)
|
||||
simd_per_workgroup *= 2; /* like lds_size_per_workgroup, assume WGP on GFX10+ */
|
||||
|
||||
unsigned max_lds_per_simd = rad_info->lds_size_per_workgroup / simd_per_workgroup;
|
||||
unsigned max_lds_per_simd = gpu_info->lds_size_per_workgroup / simd_per_workgroup;
|
||||
if (lds_per_wave)
|
||||
max_simd_waves = MIN2(max_simd_waves, DIV_ROUND_UP(max_lds_per_simd, lds_per_wave));
|
||||
|
||||
|
|
|
|||
|
|
@ -238,14 +238,14 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum r
|
|||
bool
|
||||
radv_spm_init(struct radv_device *device)
|
||||
{
|
||||
const struct radeon_info *info = &device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->rad_info;
|
||||
struct ac_perfcounters *pc = &device->physical_device->ac_perfcounters;
|
||||
|
||||
/* We failed to initialize the performance counters. */
|
||||
if (!pc->blocks)
|
||||
return false;
|
||||
|
||||
if (!ac_init_spm(info, pc, &device->spm))
|
||||
if (!ac_init_spm(gpu_info, pc, &device->spm))
|
||||
return false;
|
||||
|
||||
if (!radv_spm_init_bo(device))
|
||||
|
|
|
|||
|
|
@ -101,19 +101,19 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs,
|
|||
{
|
||||
const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
|
||||
uint32_t shifted_size = device->sqtt.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
|
||||
const struct radeon_info *rad_info = &device->physical_device->rad_info;
|
||||
const unsigned shader_mask = ac_sqtt_get_shader_mask(rad_info);
|
||||
unsigned max_se = rad_info->max_se;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->rad_info;
|
||||
const unsigned shader_mask = ac_sqtt_get_shader_mask(gpu_info);
|
||||
unsigned max_se = gpu_info->max_se;
|
||||
|
||||
radeon_check_space(device->ws, cs, 6 + max_se * 33);
|
||||
|
||||
for (unsigned se = 0; se < max_se; se++) {
|
||||
uint64_t va = radv_buffer_get_va(device->sqtt.bo);
|
||||
uint64_t data_va = ac_sqtt_get_data_va(rad_info, &device->sqtt, va, se);
|
||||
uint64_t data_va = ac_sqtt_get_data_va(gpu_info, &device->sqtt, va, se);
|
||||
uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
|
||||
int active_cu = ac_sqtt_get_active_cu(&device->physical_device->rad_info, se);
|
||||
|
||||
if (ac_sqtt_se_is_disabled(rad_info, se))
|
||||
if (ac_sqtt_se_is_disabled(gpu_info, se))
|
||||
continue;
|
||||
|
||||
/* Target SEx and SH0. */
|
||||
|
|
@ -936,9 +936,9 @@ bool
|
|||
radv_get_sqtt_trace(struct radv_queue *queue, struct ac_sqtt_trace *sqtt_trace)
|
||||
{
|
||||
struct radv_device *device = queue->device;
|
||||
const struct radeon_info *rad_info = &device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->rad_info;
|
||||
|
||||
if (!ac_sqtt_get_trace(&device->sqtt, rad_info, sqtt_trace)) {
|
||||
if (!ac_sqtt_get_trace(&device->sqtt, gpu_info, sqtt_trace)) {
|
||||
if (!radv_sqtt_resize_bo(device))
|
||||
fprintf(stderr, "radv: Failed to resize the SQTT buffer.\n");
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ radv_write_harvested_raster_configs(struct radv_physical_device *pdev, struct ra
|
|||
void
|
||||
radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
|
||||
{
|
||||
const struct radeon_info *info = &device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &device->physical_device->rad_info;
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
|
||||
radeon_emit(cs, 0);
|
||||
|
|
@ -86,7 +86,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
/* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1,
|
||||
* renamed COMPUTE_DESTINATION_EN_SEn on gfx10. */
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
unsigned cu_mask = i < info->num_se ? info->spi_cu_en : 0x0;
|
||||
unsigned cu_mask = i < gpu_info->num_se ? gpu_info->spi_cu_en : 0x0;
|
||||
radeon_emit(cs, S_00B8AC_SA0_CU_EN(cu_mask) | S_00B8AC_SA1_CU_EN(cu_mask));
|
||||
}
|
||||
|
||||
|
|
@ -94,7 +94,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
/* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */
|
||||
radeon_set_sh_reg_seq(cs, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2);
|
||||
for (unsigned i = 2; i < 4; ++i) {
|
||||
unsigned cu_mask = i < info->num_se ? info->spi_cu_en : 0x0;
|
||||
unsigned cu_mask = i < gpu_info->num_se ? gpu_info->spi_cu_en : 0x0;
|
||||
radeon_emit(cs, S_00B8AC_SA0_CU_EN(cu_mask) | S_00B8AC_SA1_CU_EN(cu_mask));
|
||||
}
|
||||
|
||||
|
|
@ -148,7 +148,7 @@ radv_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
|
|||
radeon_set_sh_reg_seq(cs, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4, 4);
|
||||
/* SE4-SE7 */
|
||||
for (unsigned i = 4; i < 8; ++i) {
|
||||
unsigned cu_mask = i < info->num_se ? info->spi_cu_en : 0x0;
|
||||
unsigned cu_mask = i < gpu_info->num_se ? gpu_info->spi_cu_en : 0x0;
|
||||
radeon_emit(cs, S_00B8AC_SA0_CU_EN(cu_mask) | S_00B8AC_SA1_CU_EN(cu_mask));
|
||||
}
|
||||
|
||||
|
|
@ -817,7 +817,7 @@ radv_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_d
|
|||
bool count_from_stream_output, uint32_t draw_vertex_count, unsigned topology,
|
||||
bool prim_restart_enable, unsigned patch_control_points, unsigned num_tess_patches)
|
||||
{
|
||||
const struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
|
||||
const struct radeon_info *gpu_info = &cmd_buffer->device->physical_device->rad_info;
|
||||
const unsigned max_primgroup_in_wave = 2;
|
||||
/* SWITCH_ON_EOP(0) is always preferable. */
|
||||
bool wd_switch_on_eop = false;
|
||||
|
|
@ -838,7 +838,7 @@ radv_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_d
|
|||
}
|
||||
|
||||
/* GS requirement. */
|
||||
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY) && info->gfx_level <= GFX8) {
|
||||
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY) && gpu_info->gfx_level <= GFX8) {
|
||||
unsigned gs_table_depth = cmd_buffer->device->physical_device->gs_table_depth;
|
||||
if (SI_GS_PER_ES / primgroup_size >= gs_table_depth - 3)
|
||||
partial_es_wave = true;
|
||||
|
|
@ -861,20 +861,20 @@ radv_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_d
|
|||
ia_switch_on_eoi = cmd_buffer->state.ia_multi_vgt_param.ia_switch_on_eoi;
|
||||
partial_vs_wave = cmd_buffer->state.ia_multi_vgt_param.partial_vs_wave;
|
||||
|
||||
if (info->gfx_level >= GFX7) {
|
||||
if (gpu_info->gfx_level >= GFX7) {
|
||||
/* WD_SWITCH_ON_EOP has no effect on GPUs with less than
|
||||
* 4 shader engines. Set 1 to pass the assertion below.
|
||||
* The other cases are hardware requirements. */
|
||||
if (info->max_se < 4 || topology == V_008958_DI_PT_POLYGON || topology == V_008958_DI_PT_LINELOOP ||
|
||||
if (gpu_info->max_se < 4 || topology == V_008958_DI_PT_POLYGON || topology == V_008958_DI_PT_LINELOOP ||
|
||||
topology == V_008958_DI_PT_TRIFAN || topology == V_008958_DI_PT_TRISTRIP_ADJ ||
|
||||
(prim_restart_enable && (info->family < CHIP_POLARIS10 ||
|
||||
(prim_restart_enable && (gpu_info->family < CHIP_POLARIS10 ||
|
||||
(topology != V_008958_DI_PT_POINTLIST && topology != V_008958_DI_PT_LINESTRIP))))
|
||||
wd_switch_on_eop = true;
|
||||
|
||||
/* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0.
|
||||
* We don't know that for indirect drawing, so treat it as
|
||||
* always problematic. */
|
||||
if (info->family == CHIP_HAWAII && (instanced_draw || indirect_draw))
|
||||
if (gpu_info->family == CHIP_HAWAII && (instanced_draw || indirect_draw))
|
||||
wd_switch_on_eop = true;
|
||||
|
||||
/* Performance recommendation for 4 SE Gfx7-8 parts if
|
||||
|
|
@ -882,7 +882,7 @@ radv_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_d
|
|||
* Assume indirect draws always use small instances.
|
||||
* This is needed for good VS wave utilization.
|
||||
*/
|
||||
if (info->gfx_level <= GFX8 && info->max_se == 4 && multi_instances_smaller_than_primgroup)
|
||||
if (gpu_info->gfx_level <= GFX8 && gpu_info->max_se == 4 && multi_instances_smaller_than_primgroup)
|
||||
wd_switch_on_eop = true;
|
||||
|
||||
/* Hardware requirement when drawing primitives from a stream
|
||||
|
|
@ -892,26 +892,26 @@ radv_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_d
|
|||
wd_switch_on_eop = true;
|
||||
|
||||
/* Required on GFX7 and later. */
|
||||
if (info->max_se > 2 && !wd_switch_on_eop)
|
||||
if (gpu_info->max_se > 2 && !wd_switch_on_eop)
|
||||
ia_switch_on_eoi = true;
|
||||
|
||||
/* Required by Hawaii and, for some special cases, by GFX8. */
|
||||
if (ia_switch_on_eoi &&
|
||||
(info->family == CHIP_HAWAII ||
|
||||
(info->gfx_level == GFX8 &&
|
||||
(gpu_info->family == CHIP_HAWAII ||
|
||||
(gpu_info->gfx_level == GFX8 &&
|
||||
/* max primgroup in wave is always 2 - leave this for documentation */
|
||||
(radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY) || max_primgroup_in_wave != 2))))
|
||||
partial_vs_wave = true;
|
||||
|
||||
/* Instancing bug on Bonaire. */
|
||||
if (info->family == CHIP_BONAIRE && ia_switch_on_eoi && (instanced_draw || indirect_draw))
|
||||
if (gpu_info->family == CHIP_BONAIRE && ia_switch_on_eoi && (instanced_draw || indirect_draw))
|
||||
partial_vs_wave = true;
|
||||
|
||||
/* If the WD switch is false, the IA switch must be false too. */
|
||||
assert(wd_switch_on_eop || !ia_switch_on_eop);
|
||||
}
|
||||
/* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
|
||||
if (info->gfx_level <= GFX8 && ia_switch_on_eoi)
|
||||
if (gpu_info->gfx_level <= GFX8 && ia_switch_on_eoi)
|
||||
partial_es_wave = true;
|
||||
|
||||
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_GEOMETRY)) {
|
||||
|
|
@ -919,7 +919,7 @@ radv_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_d
|
|||
* The hw doc says all multi-SE chips are affected, but amdgpu-pro Vulkan
|
||||
* only applies it to Hawaii. Do what amdgpu-pro Vulkan does.
|
||||
*/
|
||||
if (info->family == CHIP_HAWAII && ia_switch_on_eoi) {
|
||||
if (gpu_info->family == CHIP_HAWAII && ia_switch_on_eoi) {
|
||||
bool set_vgt_flush = indirect_draw;
|
||||
if (!set_vgt_flush && instanced_draw) {
|
||||
uint32_t num_prims = radv_prims_for_vertices(&prim_vertex_count, draw_vertex_count);
|
||||
|
|
@ -942,7 +942,7 @@ radv_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_d
|
|||
return cmd_buffer->state.ia_multi_vgt_param.base | S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) |
|
||||
S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
|
||||
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
|
||||
S_028AA8_WD_SWITCH_ON_EOP(info->gfx_level >= GFX7 ? wd_switch_on_eop : 0);
|
||||
S_028AA8_WD_SWITCH_ON_EOP(gpu_info->gfx_level >= GFX7 ? wd_switch_on_eop : 0);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -71,9 +71,9 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
|
|||
}
|
||||
|
||||
static void
|
||||
radv_amdgpu_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *info)
|
||||
radv_amdgpu_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *gpu_info)
|
||||
{
|
||||
*info = ((struct radv_amdgpu_winsys *)rws)->info;
|
||||
*gpu_info = ((struct radv_amdgpu_winsys *)rws)->info;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
|
|
|
|||
|
|
@ -74,95 +74,96 @@ static const struct {
|
|||
};
|
||||
|
||||
static void
|
||||
radv_null_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *info)
|
||||
radv_null_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *gpu_info)
|
||||
{
|
||||
const char *family = getenv("RADV_FORCE_FAMILY");
|
||||
unsigned i;
|
||||
|
||||
info->gfx_level = CLASS_UNKNOWN;
|
||||
info->family = CHIP_UNKNOWN;
|
||||
gpu_info->gfx_level = CLASS_UNKNOWN;
|
||||
gpu_info->family = CHIP_UNKNOWN;
|
||||
|
||||
for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
|
||||
if (!strcasecmp(family, ac_get_family_name(i))) {
|
||||
/* Override family and gfx_level. */
|
||||
info->family = i;
|
||||
info->name = ac_get_family_name(i);
|
||||
gpu_info->family = i;
|
||||
gpu_info->name = ac_get_family_name(i);
|
||||
|
||||
if (info->family >= CHIP_NAVI31)
|
||||
info->gfx_level = GFX11;
|
||||
if (gpu_info->family >= CHIP_NAVI31)
|
||||
gpu_info->gfx_level = GFX11;
|
||||
else if (i >= CHIP_NAVI21)
|
||||
info->gfx_level = GFX10_3;
|
||||
gpu_info->gfx_level = GFX10_3;
|
||||
else if (i >= CHIP_NAVI10)
|
||||
info->gfx_level = GFX10;
|
||||
gpu_info->gfx_level = GFX10;
|
||||
else if (i >= CHIP_VEGA10)
|
||||
info->gfx_level = GFX9;
|
||||
gpu_info->gfx_level = GFX9;
|
||||
else if (i >= CHIP_TONGA)
|
||||
info->gfx_level = GFX8;
|
||||
gpu_info->gfx_level = GFX8;
|
||||
else if (i >= CHIP_BONAIRE)
|
||||
info->gfx_level = GFX7;
|
||||
gpu_info->gfx_level = GFX7;
|
||||
else
|
||||
info->gfx_level = GFX6;
|
||||
gpu_info->gfx_level = GFX6;
|
||||
}
|
||||
}
|
||||
|
||||
if (info->family == CHIP_UNKNOWN) {
|
||||
if (gpu_info->family == CHIP_UNKNOWN) {
|
||||
fprintf(stderr, "radv: Unknown family: %s\n", family);
|
||||
abort();
|
||||
}
|
||||
|
||||
info->pci_id = pci_ids[info->family].pci_id;
|
||||
info->max_se = 4;
|
||||
info->num_se = 4;
|
||||
if (info->gfx_level >= GFX10_3)
|
||||
info->max_waves_per_simd = 16;
|
||||
else if (info->gfx_level >= GFX10)
|
||||
info->max_waves_per_simd = 20;
|
||||
else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
|
||||
info->max_waves_per_simd = 8;
|
||||
gpu_info->pci_id = pci_ids[gpu_info->family].pci_id;
|
||||
gpu_info->max_se = 4;
|
||||
gpu_info->num_se = 4;
|
||||
if (gpu_info->gfx_level >= GFX10_3)
|
||||
gpu_info->max_waves_per_simd = 16;
|
||||
else if (gpu_info->gfx_level >= GFX10)
|
||||
gpu_info->max_waves_per_simd = 20;
|
||||
else if (gpu_info->family >= CHIP_POLARIS10 && gpu_info->family <= CHIP_VEGAM)
|
||||
gpu_info->max_waves_per_simd = 8;
|
||||
else
|
||||
info->max_waves_per_simd = 10;
|
||||
gpu_info->max_waves_per_simd = 10;
|
||||
|
||||
if (info->gfx_level >= GFX10)
|
||||
info->num_physical_sgprs_per_simd = 128 * info->max_waves_per_simd;
|
||||
else if (info->gfx_level >= GFX8)
|
||||
info->num_physical_sgprs_per_simd = 800;
|
||||
if (gpu_info->gfx_level >= GFX10)
|
||||
gpu_info->num_physical_sgprs_per_simd = 128 * gpu_info->max_waves_per_simd;
|
||||
else if (gpu_info->gfx_level >= GFX8)
|
||||
gpu_info->num_physical_sgprs_per_simd = 800;
|
||||
else
|
||||
info->num_physical_sgprs_per_simd = 512;
|
||||
gpu_info->num_physical_sgprs_per_simd = 512;
|
||||
|
||||
info->has_3d_cube_border_color_mipmap = true;
|
||||
info->has_image_opcodes = true;
|
||||
gpu_info->has_3d_cube_border_color_mipmap = true;
|
||||
gpu_info->has_image_opcodes = true;
|
||||
|
||||
if (info->family == CHIP_NAVI31 || info->family == CHIP_NAVI32)
|
||||
info->num_physical_wave64_vgprs_per_simd = 768;
|
||||
else if (info->gfx_level >= GFX10)
|
||||
info->num_physical_wave64_vgprs_per_simd = 512;
|
||||
if (gpu_info->family == CHIP_NAVI31 || gpu_info->family == CHIP_NAVI32)
|
||||
gpu_info->num_physical_wave64_vgprs_per_simd = 768;
|
||||
else if (gpu_info->gfx_level >= GFX10)
|
||||
gpu_info->num_physical_wave64_vgprs_per_simd = 512;
|
||||
else
|
||||
info->num_physical_wave64_vgprs_per_simd = 256;
|
||||
info->num_simd_per_compute_unit = info->gfx_level >= GFX10 ? 2 : 4;
|
||||
info->lds_size_per_workgroup = info->gfx_level >= GFX10 ? 128 * 1024
|
||||
: info->gfx_level >= GFX7 ? 64 * 1024
|
||||
: 32 * 1024;
|
||||
info->lds_encode_granularity = info->gfx_level >= GFX7 ? 128 * 4 : 64 * 4;
|
||||
info->lds_alloc_granularity = info->gfx_level >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity;
|
||||
info->max_render_backends = pci_ids[info->family].num_render_backends;
|
||||
gpu_info->num_physical_wave64_vgprs_per_simd = 256;
|
||||
gpu_info->num_simd_per_compute_unit = gpu_info->gfx_level >= GFX10 ? 2 : 4;
|
||||
gpu_info->lds_size_per_workgroup = gpu_info->gfx_level >= GFX10 ? 128 * 1024
|
||||
: gpu_info->gfx_level >= GFX7 ? 64 * 1024
|
||||
: 32 * 1024;
|
||||
gpu_info->lds_encode_granularity = gpu_info->gfx_level >= GFX7 ? 128 * 4 : 64 * 4;
|
||||
gpu_info->lds_alloc_granularity = gpu_info->gfx_level >= GFX10_3 ? 256 * 4 : gpu_info->lds_encode_granularity;
|
||||
gpu_info->max_render_backends = pci_ids[gpu_info->family].num_render_backends;
|
||||
|
||||
info->has_dedicated_vram = pci_ids[info->family].has_dedicated_vram;
|
||||
info->has_packed_math_16bit = info->gfx_level >= GFX9;
|
||||
gpu_info->has_dedicated_vram = pci_ids[gpu_info->family].has_dedicated_vram;
|
||||
gpu_info->has_packed_math_16bit = gpu_info->gfx_level >= GFX9;
|
||||
|
||||
info->has_image_load_dcc_bug = info->family == CHIP_NAVI23 || info->family == CHIP_VANGOGH;
|
||||
gpu_info->has_image_load_dcc_bug = gpu_info->family == CHIP_NAVI23 || gpu_info->family == CHIP_VANGOGH;
|
||||
|
||||
info->has_accelerated_dot_product =
|
||||
info->family == CHIP_VEGA20 || (info->family >= CHIP_MI100 && info->family != CHIP_NAVI10);
|
||||
gpu_info->has_accelerated_dot_product =
|
||||
gpu_info->family == CHIP_VEGA20 || (gpu_info->family >= CHIP_MI100 && gpu_info->family != CHIP_NAVI10);
|
||||
|
||||
info->address32_hi = info->gfx_level >= GFX9 ? 0xffff8000u : 0x0;
|
||||
gpu_info->address32_hi = gpu_info->gfx_level >= GFX9 ? 0xffff8000u : 0x0;
|
||||
|
||||
info->has_rbplus = info->family == CHIP_STONEY || info->gfx_level >= GFX9;
|
||||
info->rbplus_allowed =
|
||||
info->has_rbplus && (info->family == CHIP_STONEY || info->family == CHIP_VEGA12 || info->family == CHIP_RAVEN ||
|
||||
info->family == CHIP_RAVEN2 || info->family == CHIP_RENOIR || info->gfx_level >= GFX10_3);
|
||||
gpu_info->has_rbplus = gpu_info->family == CHIP_STONEY || gpu_info->gfx_level >= GFX9;
|
||||
gpu_info->rbplus_allowed =
|
||||
gpu_info->has_rbplus &&
|
||||
(gpu_info->family == CHIP_STONEY || gpu_info->family == CHIP_VEGA12 || gpu_info->family == CHIP_RAVEN ||
|
||||
gpu_info->family == CHIP_RAVEN2 || gpu_info->family == CHIP_RENOIR || gpu_info->gfx_level >= GFX10_3);
|
||||
|
||||
info->has_scheduled_fence_dependency = true;
|
||||
info->has_gang_submit = true;
|
||||
gpu_info->has_scheduled_fence_dependency = true;
|
||||
gpu_info->has_gang_submit = true;
|
||||
}
|
||||
|
||||
static const char *
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue