radeonsi/gfx: move code from si_get to si_gfx_screen

These functions can be moved to the gfx subfolder and made static.

Reviewed-by: David Rosca <david.rosca@amd.com>
Reviewed-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41133>
This commit is contained in:
Pierre-Eric Pelloux-Prayer 2026-04-15 18:24:04 +02:00
parent d1c57f742e
commit a335f4be7a
3 changed files with 580 additions and 589 deletions

View file

@ -12,6 +12,7 @@
#include "aco_interface.h"
#include "util/hex.h"
#include "util/u_cpu_detect.h"
#include "util/u_screen.h"
#include <sys/utsname.h>
#include <ctype.h>
@ -249,6 +250,585 @@ static struct disk_cache *si_get_disk_shader_cache(struct pipe_screen *pscreen)
return sscreen->disk_shader_cache;
}
static unsigned si_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer)
{
unsigned num_profiles = si_get_num_shader_profiles();
for (unsigned i = 0; i < num_profiles; i++) {
if (_mesa_printed_blake3_equal(consumer->info.source_blake3, si_shader_profiles[i].blake3)) {
if (si_shader_profiles[i].options & SI_PROFILE_NO_OPT_UNIFORM_VARYINGS)
return 0; /* only propagate constants */
break;
}
}
return ac_nir_varying_expression_max_cost(producer, consumer);
}
static bool enable_mesh_shader(struct si_screen *sscreen)
{
return sscreen->use_ngg &&
sscreen->info.gfx_level >= GFX10_3 &&
/* TODO: not support user queue for now */
!(sscreen->info.userq_ip_mask & BITFIELD_BIT(AMD_IP_GFX)) &&
/* don't support LLVM */
aco_is_gpu_supported(&sscreen->info) &&
!(sscreen->debug_flags & DBG(USE_LLVM));
}
static bool si_alu_to_scalar_packed_math_filter(const nir_instr *instr, const void *data)
{
if (instr->type == nir_instr_type_alu) {
nir_alu_instr *alu = nir_instr_as_alu(instr);
if (alu->def.bit_size == 16 && alu->def.num_components == 2 &&
ac_nir_op_supports_packed_math_16bit(alu)) {
/* ACO requires that all but the first bit of swizzle must be equal. */
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
if ((alu->src[i].swizzle[0] >> 1) != (alu->src[i].swizzle[1] >> 1))
return true;
}
return false;
}
}
return true;
}
static void si_init_screen_nir_options(struct si_screen *sscreen)
{
/* |---------------------------------- Performance & Availability --------------------------------|
* |MAD/MAC/MADAK/MADMK|MAD_LEGACY|MAC_LEGACY| FMA |FMAC/FMAAK/FMAMK|FMA_LEGACY|PK_FMA_F16,|Best choice
* Arch | F32,F16,F64 | F32,F16 | F32,F16 |F32,F16,F64 | F32,F16 | F32 |PK_FMAC_F16|F16,F32,F64
* ------------------------------------------------------------------------------------------------------------------
* gfx6,7 | 1 , - , - | 1 , - | 1 , - |1/4, - ,1/16| - , - | - | - , - | - ,MAD,FMA
* gfx8 | 1 , 1 , - | 1 , - | - , - |1/4, 1 ,1/16| - , - | - | - , - |MAD,MAD,FMA
* gfx9 | 1 ,1|0, - | 1 , - | - , - | 1 , 1 ,1/16| 0|1, - | - | 2 , - |FMA,MAD,FMA
* gfx10 | 1 , - , - | 1 , - | 1 , - | 1 , 1 ,1/16| 1 , 1 | - | 2 , 2 |FMA,MAD,FMA
* gfx10.3| - , - , - | - , - | - , - | 1 , 1 ,1/16| 1 , 1 | 1 | 2 , 2 | all FMA
* gfx11 | - , - , - | - , - | - , - | 2 , 2 ,1/16| 2 , 2 | 2 | 2 , 2 | all FMA
*
* Tahiti, Hawaii, Carrizo, Vega20: FMA_F32 is full rate, FMA_F64 is 1/4
* gfx9 supports MAD_F16 only on Vega10, Raven, Raven2, Renoir.
* gfx9 supports FMAC_F32 only on Vega20, but doesn't support FMAAK and FMAMK.
*
* gfx8 prefers MAD for F16 because of MAC/MADAK/MADMK.
* gfx9 and newer prefer FMA for F16 because of the packed instruction.
* gfx10 and older prefer MAD for F32 because of the legacy instruction.
*/
bool use_fma32 =
sscreen->info.gfx_level >= GFX10_3 ||
(sscreen->info.family >= CHIP_GFX940 && !sscreen->info.has_graphics) ||
/* fma32 is too slow for gpu < gfx9, so apply the option only for gpu >= gfx9 */
(sscreen->info.gfx_level >= GFX9 && sscreen->options.force_use_fma32);
/* GFX8 has precision issues with 16-bit PS outputs. */
bool has_16bit_io = sscreen->info.gfx_level >= GFX9;
nir_shader_compiler_options *options = sscreen->nir_options;
ac_nir_set_options(&sscreen->info.compiler_info, !sscreen->use_aco, options);
options->lower_ffma16 = sscreen->info.gfx_level < GFX9;
options->lower_ffma32 = !use_fma32;
options->lower_ffma64 = false;
options->fuse_ffma16 = sscreen->info.gfx_level >= GFX9;
options->fuse_ffma32 = use_fma32;
options->fuse_ffma64 = true;
options->lower_uniforms_to_ubo = true;
options->lower_to_scalar = true;
options->lower_to_scalar_filter =
sscreen->info.compiler_info.has_packed_math_16bit ? si_alu_to_scalar_packed_math_filter : NULL;
options->max_unroll_iterations = 128;
options->max_unroll_iterations_aggressive = 128;
/* For OpenGL, rounding mode is undefined. We want fast packing with v_cvt_pkrtz_f16,
* but if we use it, all f32->f16 conversions have to round towards zero,
* because both scalar and vec2 down-conversions have to round equally.
*
* For OpenCL, rounding mode is explicit. This will only lower f2f16 to f2f16_rtz
* when execution mode is rtz instead of rtne.
*
* GFX8 has precision issues with this option.
*/
options->force_f2f16_rtz = sscreen->info.gfx_level >= GFX9;
options->io_options |= (!has_16bit_io ? nir_io_mediump_is_32bit : 0) | nir_io_has_intrinsics |
(sscreen->use_ngg_culling ?
nir_io_compaction_groups_tes_inputs_into_pos_and_var_groups : 0);
if (has_16bit_io) {
options->lower_mediump_io = sscreen->options.mediump ? si_nir_lower_mediump_io_option
: si_nir_lower_mediump_io_default;
}
/* HW supports indirect indexing for: | Enabled in driver
* -------------------------------------------------------
* TCS inputs | Yes
* TES inputs | Yes
* GS inputs | No
* -------------------------------------------------------
* VS outputs before TCS | No
* TCS outputs | Yes
* VS/TES outputs before GS | No
*/
options->varying_expression_max_cost = si_varying_expression_max_cost;
unsigned max_support_shader = enable_mesh_shader(sscreen) ?
MESA_SHADER_MESH : MESA_SHADER_COMPUTE;
for (unsigned i = 0; i <= max_support_shader; i++)
sscreen->b.nir_options[i] = sscreen->nir_options;
}
static void si_init_shader_caps(struct si_screen *sscreen)
{
for (unsigned i = 0; i <= MESA_SHADER_MESH; i++) {
if (!sscreen->b.nir_options[i])
continue;
struct pipe_shader_caps *caps =
(struct pipe_shader_caps *)&sscreen->b.shader_caps[i];
/* Shader limits. */
caps->max_instructions =
caps->max_alu_instructions =
caps->max_tex_instructions =
caps->max_tex_indirections =
caps->max_control_flow_depth = 16384;
caps->max_inputs = i == MESA_SHADER_VERTEX ? SI_MAX_ATTRIBS : 32;
caps->max_outputs = i == MESA_SHADER_FRAGMENT ? 8 : 32;
caps->max_temps = 256; /* Max native temporaries. */
caps->max_const_buffer0_size = 1 << 26; /* 64 MB */
caps->max_const_buffers = SI_NUM_CONST_BUFFERS;
caps->max_texture_samplers =
caps->max_sampler_views = SI_NUM_SAMPLERS;
caps->max_shader_buffers = SI_NUM_SHADER_BUFFERS;
caps->max_shader_images = SI_NUM_IMAGES;
caps->supported_irs = (1 << PIPE_SHADER_IR_TGSI) | (1 << PIPE_SHADER_IR_NIR);
/* Supported boolean features. */
caps->cont_supported = true;
caps->tgsi_sqrt_supported = true;
caps->indirect_temp_addr = true;
caps->indirect_const_addr = true;
caps->integers = true;
caps->int64_atomics = true;
caps->tgsi_any_inout_decl_range = true;
/* We need F16C for fast FP16 conversions in glUniform.
* It's supported since Intel Ivy Bridge and AMD Bulldozer.
*/
bool has_16bit_alu = sscreen->info.gfx_level >= GFX8 && util_get_cpu_caps()->has_f16c;
caps->fp16 = has_16bit_alu;
caps->fp16_derivatives = has_16bit_alu;
caps->fp16_const_buffers = has_16bit_alu;
caps->int16 = has_16bit_alu;
caps->glsl_16bit_consts = has_16bit_alu;
caps->glsl_16bit_load_dst = sscreen->info.gfx_level >= GFX9;
}
}
static void si_init_compute_caps(struct si_screen *sscreen)
{
struct pipe_compute_caps *caps =
(struct pipe_compute_caps *)&sscreen->b.compute_caps;
caps->grid_dimension = 3;
/* Use this size, so that internal counters don't overflow 64 bits. */
caps->max_grid_size[0] = UINT32_MAX;
caps->max_grid_size[1] = UINT16_MAX;
caps->max_grid_size[2] = UINT16_MAX;
caps->max_block_size[0] =
caps->max_block_size[1] =
caps->max_block_size[2] = 1024;
caps->max_threads_per_block = 1024;
caps->address_bits = 64;
/* Return 1/4 of the heap size as the maximum because the max size is not practically
* allocatable.
*/
caps->max_mem_alloc_size = (sscreen->info.max_heap_size_kb / 4) * 1024ull;
/* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
* 1/4 of the MAX_GLOBAL_SIZE. Since the
* MAX_MEM_ALLOC_SIZE is fixed for older kernels,
* make sure we never report more than
* 4 * MAX_MEM_ALLOC_SIZE.
*/
caps->max_global_size = MIN2(4 * caps->max_mem_alloc_size,
sscreen->info.max_heap_size_kb * 1024ull);
/* Value reported by the closed source driver. */
caps->max_local_size = sscreen->info.gfx_level == GFX6 ? 32 * 1024 : 64 * 1024;
caps->max_clock_frequency = sscreen->info.max_gpu_freq_mhz;
caps->max_compute_units = sscreen->info.num_cu;
unsigned threads = 1024;
unsigned subgroup_size =
sscreen->shader_debug_flags & DBG(W64_CS) || sscreen->info.gfx_level < GFX10 ? 64 : 32;
caps->max_subgroups = threads / subgroup_size;
if (sscreen->shader_debug_flags & DBG(W32_CS))
caps->subgroup_sizes = 32;
else if (sscreen->shader_debug_flags & DBG(W64_CS))
caps->subgroup_sizes = 64;
else
caps->subgroup_sizes = sscreen->info.gfx_level < GFX10 ? 64 : 64 | 32;
caps->max_variable_threads_per_block =
sscreen->info.compiler_info.has_cs_regalloc_hang_bug ? 256 : SI_MAX_VARIABLE_THREADS_PER_BLOCK;
}
static void si_init_mesh_caps(struct si_screen *sscreen)
{
struct pipe_mesh_caps *caps = (struct pipe_mesh_caps *)&sscreen->b.caps.mesh;
caps->max_task_work_group_total_count = 1 << 22;
caps->max_mesh_work_group_total_count = 1 << 22;
caps->max_mesh_work_group_invocations = 256;
caps->max_task_work_group_invocations = 1024;
caps->max_task_payload_size = 16384;
caps->max_task_shared_memory_size = 65536;
caps->max_mesh_shared_memory_size = 28672;
caps->max_task_payload_and_shared_memory_size = 65536;
caps->max_mesh_payload_and_shared_memory_size =
caps->max_task_payload_size + caps->max_mesh_shared_memory_size;
caps->max_mesh_output_memory_size = 32 * 1024;
caps->max_mesh_payload_and_output_memory_size =
caps->max_task_payload_size + caps->max_mesh_output_memory_size;
caps->max_mesh_output_vertices = 256;
caps->max_mesh_output_primitives = 256;
caps->max_mesh_output_components = 128;
caps->max_mesh_output_layers = 8;
caps->max_mesh_multiview_view_count = 1;
caps->mesh_output_per_vertex_granularity = 1;
caps->mesh_output_per_primitive_granularity = 1;
caps->max_preferred_task_work_group_invocations = 64;
caps->max_preferred_mesh_work_group_invocations = 128;
caps->mesh_prefers_local_invocation_vertex_output = true;
caps->mesh_prefers_local_invocation_primitive_output = true;
caps->mesh_prefers_compact_vertex_output = true;
caps->mesh_prefers_compact_primitive_output = false;
caps->max_task_work_group_count[0] =
caps->max_task_work_group_count[1] =
caps->max_task_work_group_count[2] = 65535;
caps->max_mesh_work_group_count[0] =
caps->max_mesh_work_group_count[1] =
caps->max_mesh_work_group_count[2] = 65535;
caps->max_task_work_group_size[0] =
caps->max_task_work_group_size[1] =
caps->max_task_work_group_size[2] = 1024;
caps->max_mesh_work_group_size[0] =
caps->max_mesh_work_group_size[1] =
caps->max_mesh_work_group_size[2] = 256;
caps->pipeline_statistic_queries = sscreen->info.gfx_level >= GFX11;
}
static void si_init_gfx_caps(struct si_screen *sscreen)
{
struct pipe_caps *caps = (struct pipe_caps *)&sscreen->b.caps;
/* Gfx8 (Polaris11) hangs, so don't enable this on Gfx8 and older chips. */
bool enable_sparse =
sscreen->info.gfx_level >= GFX9 && sscreen->info.has_sparse;
/* Supported features (boolean caps). */
caps->max_dual_source_render_targets = true;
caps->anisotropic_filter = true;
caps->occlusion_query = true;
caps->texture_mirror_clamp = true;
caps->texture_shadow_lod = true;
caps->texture_mirror_clamp_to_edge = true;
caps->blend_equation_separate = true;
caps->texture_swizzle = true;
caps->depth_clip_disable = true;
caps->depth_clip_disable_separate = true;
caps->shader_stencil_export = true;
caps->vertex_element_instance_divisor = true;
caps->fs_coord_origin_upper_left = true;
caps->fs_coord_pixel_center_half_integer = true;
caps->fs_coord_pixel_center_integer = true;
caps->fragment_shader_texture_lod = true;
caps->fragment_shader_derivatives = true;
caps->primitive_restart = true;
caps->primitive_restart_fixed_index = true;
caps->conditional_render = true;
caps->texture_barrier = true;
caps->indep_blend_enable = true;
caps->indep_blend_func = true;
caps->vertex_color_unclamped = true;
caps->start_instance = true;
caps->npot_textures = true;
caps->mixed_framebuffer_sizes = true;
caps->mixed_color_depth_bits = true;
caps->vertex_color_clamped = true;
caps->fragment_color_clamped = true;
caps->vs_instanceid = true;
caps->texture_buffer_objects = true;
caps->vs_layer_viewport = true;
caps->query_pipeline_statistics = true;
caps->sample_shading = true;
caps->draw_indirect = true;
caps->clip_halfz = true;
caps->vs_window_space_position = true;
caps->polygon_offset_clamp = true;
caps->multisample_z_resolve = true;
caps->quads_follow_provoking_vertex_convention = true;
caps->tgsi_texcoord = true;
caps->fs_fine_derivative = true;
caps->conditional_render_inverted = true;
caps->texture_float_linear = true;
caps->texture_half_float_linear = true;
caps->depth_bounds_test = true;
caps->sampler_view_target = true;
caps->texture_query_lod = true;
caps->texture_gather_sm5 = true;
caps->texture_query_samples = true;
caps->force_persample_interp = true;
caps->copy_between_compressed_and_plain_formats = true;
caps->fs_position_is_sysval = true;
caps->fs_face_is_integer_sysval = true;
caps->invalidate_buffer = true;
caps->surface_reinterpret_blocks = true;
caps->compressed_surface_reinterpret_blocks_layered = true;
caps->query_buffer_object = true;
caps->query_memory_info = true;
caps->shader_pack_half_float = true;
caps->framebuffer_no_attachment = true;
caps->robust_buffer_access_behavior = true;
caps->string_marker = true;
caps->cull_distance = true;
caps->shader_array_components = true;
caps->stream_output_pause_resume = true;
caps->stream_output_interleave_buffers = true;
caps->doubles = true;
caps->tes_layer_viewport = true;
caps->bindless_texture = true;
caps->query_timestamp = true;
caps->query_time_elapsed = true;
caps->nir_samplers_as_deref = true;
caps->memobj = true;
caps->load_constbuf = true;
caps->int64 = true;
caps->shader_clock = true;
caps->can_bind_const_buffer_as_vertex = true;
caps->allow_mapped_buffers_during_execution = true;
caps->signed_vertex_buffer_offset = true;
caps->shader_ballot = true;
caps->shader_group_vote = true;
caps->compute_grid_info_last_block = true;
caps->image_load_formatted = true;
caps->prefer_compute_for_multimedia = true;
caps->packed_uniforms = true;
caps->gl_spirv = true;
caps->alpha_to_coverage_dither_control = true;
caps->map_unsynchronized_thread_safe = true;
caps->no_clip_on_copy_tex = true;
caps->shader_atomic_int64 = true;
caps->frontend_noop = true;
caps->demote_to_helper_invocation = true;
caps->prefer_real_buffer_in_constbuf0 = true;
caps->compute_shader_derivatives = true;
caps->image_atomic_inc_wrap = true;
caps->image_store_formatted = true;
caps->allow_draw_out_of_order = true;
caps->query_so_overflow = true;
caps->glsl_tess_levels_as_inputs = true;
caps->device_reset_status_query = true;
caps->texture_multisample = true;
caps->allow_glthread_buffer_subdata_opt = true; /* TODO: remove if it's slow */
caps->null_textures = true;
caps->has_const_bw = true;
caps->cl_gl_sharing = true;
caps->call_finalize_nir_in_linker = true;
caps->blit_3d = true;
caps->glsl_bindless_handles_are_32bit = true;
caps->fbfetch = 1;
caps->graphics = sscreen->info.has_graphics;
caps->mesh_shader = sscreen->b.nir_options[MESA_SHADER_MESH];
caps->compute = sscreen->has_gfx_compute;
/* Tahiti and Verde only: reduction mode is unsupported due to a bug
* (it might work sometimes, but that's not enough)
*/
caps->sampler_reduction_minmax =
caps->sampler_reduction_minmax_arb =
!(sscreen->info.family == CHIP_TAHITI || sscreen->info.family == CHIP_VERDE);
caps->texture_transfer_modes =
PIPE_TEXTURE_TRANSFER_BLIT | PIPE_TEXTURE_TRANSFER_COMPUTE;
caps->draw_vertex_state = !(sscreen->debug_flags & DBG(NO_FAST_DISPLAY_LIST));
caps->shader_samples_identical =
sscreen->info.compiler_info.has_fmask && !(sscreen->debug_flags & DBG(NO_FMASK));
caps->glsl_zero_init = 2;
caps->generate_mipmap =
caps->seamless_cube_map =
caps->seamless_cube_map_per_texture =
caps->cube_map_array =
sscreen->info.compiler_info.has_3d_cube_border_color_mipmap;
caps->post_depth_coverage = sscreen->info.gfx_level >= GFX10;
caps->max_vertex_buffers = SI_MAX_ATTRIBS;
caps->constant_buffer_offset_alignment =
caps->texture_buffer_offset_alignment =
caps->max_texture_gather_components =
caps->max_stream_output_buffers =
caps->max_vertex_streams =
caps->shader_buffer_offset_alignment =
caps->max_window_rectangles = 4;
caps->glsl_feature_level =
caps->glsl_feature_level_compatibility = 460;
/* Optimal number for good TexSubImage performance on Polaris10. */
caps->max_texture_upload_memory_budget = 64 * 1024 * 1024;
caps->gl_begin_end_buffer_size = 4096 * 1024;
/* Return 1/4th of the heap size as the maximum because the max size is not practically
* allocatable. Also, this can only return UINT32_MAX at most.
*/
unsigned max_size = MIN2((sscreen->info.max_heap_size_kb * 1024ull) / 4, UINT32_MAX);
/* Allow max 512 MB to pass CTS with a 32-bit build. */
if (sizeof(void*) == 4)
max_size = MIN2(max_size, 512 * 1024 * 1024);
caps->max_constant_buffer_size =
caps->max_shader_buffer_size = max_size;
unsigned max_texels = caps->max_shader_buffer_size;
/* FYI, BUF_RSRC_WORD2.NUM_RECORDS field limit is UINT32_MAX. */
/* Gfx8 and older use the size in bytes for bounds checking, and the max element size
* is 16B. Gfx9 and newer use the VGPR index for bounds checking.
*/
if (sscreen->info.gfx_level <= GFX8)
max_texels = MIN2(max_texels, UINT32_MAX / 16);
else
/* Gallium has a limitation that it can only bind UINT32_MAX bytes, not texels.
* TODO: Remove this after the gallium interface is changed. */
max_texels = MIN2(max_texels, UINT32_MAX / 16);
caps->max_texel_buffer_elements = max_texels;
/* Allow 1/4th of the heap size. */
caps->max_texture_mb = sscreen->info.max_heap_size_kb / 1024 / 4;
caps->prefer_back_buffer_reuse = false;
caps->prefer_imm_arrays_as_constbuf = false;
caps->performance_monitor =
sscreen->info.gfx_level >= GFX7 && sscreen->info.gfx_level <= GFX10_3;
caps->sparse_buffer_page_size = enable_sparse ? RADEON_SPARSE_PAGE_SIZE : 0;
caps->constbuf0_flags = SI_RESOURCE_FLAG_32BIT;
caps->draw_parameters =
caps->multi_draw_indirect =
caps->multi_draw_indirect_params = sscreen->has_draw_indirect_multi;
caps->max_shader_patch_varyings = 30;
caps->max_varyings =
caps->max_gs_invocations = 32;
caps->texture_border_color_quirk =
sscreen->info.gfx_level <= GFX8 ? PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600 : 0;
/* Stream output. */
caps->max_stream_output_separate_components =
caps->max_stream_output_interleaved_components = 32 * 4;
/* gfx9 has to report 256 to make piglit/gs-max-output pass.
* gfx8 and earlier can do 1024.
*/
caps->max_geometry_output_vertices = 256;
caps->max_geometry_total_output_components = 4095;
caps->max_vertex_attrib_stride = 2048;
caps->max_texture_2d_size = sscreen->info.gfx_level >= GFX12 ? 65536 : 16384;
caps->max_texture_cube_levels = sscreen->info.compiler_info.has_3d_cube_border_color_mipmap ?
(sscreen->info.gfx_level >= GFX12 ? 17 : 15) /* 64K : 16K */ : 0;
caps->max_texture_3d_levels = sscreen->info.compiler_info.has_3d_cube_border_color_mipmap ?
/* This is limited by maximums that both the texture unit and layered rendering support. */
(sscreen->info.gfx_level >= GFX12 ? 15 : /* 16K */
(sscreen->info.gfx_level >= GFX10 ? 14 : 12)) /* 8K : 2K */ : 0;
/* This is limited by maximums that both the texture unit and layered rendering support. */
caps->max_texture_array_layers = sscreen->info.gfx_level >= GFX10 ? 8192 : 2048;
/* Sparse texture */
caps->max_sparse_texture_size = enable_sparse ? caps->max_texture_2d_size : 0;
caps->max_sparse_3d_texture_size = enable_sparse ? (1 << (caps->max_texture_3d_levels - 1)) : 0;
caps->max_sparse_array_texture_layers = enable_sparse ? caps->max_texture_array_layers : 0;
caps->sparse_texture_full_array_cube_mipmaps =
caps->query_sparse_texture_residency =
caps->clamp_sparse_texture_lod = enable_sparse;
/* Viewports and render targets. */
caps->max_viewports = SI_MAX_VIEWPORTS;
caps->viewport_subpixel_bits =
caps->rasterizer_subpixel_bits =
caps->max_render_targets = 8;
caps->framebuffer_msaa_constraints = sscreen->info.has_eqaa_surface_allocator ? 2 : 0;
caps->min_texture_gather_offset =
caps->min_texel_offset = -32;
caps->max_texture_gather_offset =
caps->max_texel_offset = 31;
caps->shader_subgroup_size = 64;
caps->shader_subgroup_supported_stages =
BITFIELD_MASK(caps->mesh_shader ? MESA_SHADER_MESH_STAGES : MESA_SHADER_STAGES);
caps->shader_subgroup_supported_features = PIPE_SHADER_SUBGROUP_FEATURE_MASK;
caps->shader_subgroup_quad_all_stages = true;
caps->min_line_width =
caps->min_line_width_aa = 1; /* due to axis-aligned end caps at line width 1 */
caps->min_point_size =
caps->min_point_size_aa =
caps->point_size_granularity =
caps->line_width_granularity = 1.0 / 8.0; /* due to the register field precision */
/* This depends on the quant mode, though the precise interactions are unknown. */
caps->max_line_width =
caps->max_line_width_aa = 2048;
caps->max_point_size =
caps->max_point_size_aa = SI_MAX_POINT_SIZE;
caps->max_texture_anisotropy = 16.0f;
/* The hw can do 31, but this test fails if we use that:
* KHR-GL46.texture_lod_bias.texture_lod_bias_all
*/
caps->max_texture_lod_bias = 16;
/* Override the value set by u_init_pipe_screen_caps because it was called
* before shader caps are set.
*/
caps->hardware_gl_select = debug_get_bool_option("MESA_HW_ACCEL_SELECT", true);
}
bool si_init_gfx_screen(struct si_screen *sscreen) {
unsigned hw_threads, num_comp_hi_threads, num_comp_lo_threads;
const bool support_aco = aco_is_gpu_supported(&sscreen->info);

View file

@ -123,51 +123,6 @@ static int si_get_screen_fd(struct pipe_screen *screen)
return ws->get_fd(ws);
}
static unsigned si_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer)
{
unsigned num_profiles = si_get_num_shader_profiles();
for (unsigned i = 0; i < num_profiles; i++) {
if (_mesa_printed_blake3_equal(consumer->info.source_blake3, si_shader_profiles[i].blake3)) {
if (si_shader_profiles[i].options & SI_PROFILE_NO_OPT_UNIFORM_VARYINGS)
return 0; /* only propagate constants */
break;
}
}
return ac_nir_varying_expression_max_cost(producer, consumer);
}
static bool enable_mesh_shader(struct si_screen *sscreen)
{
return sscreen->use_ngg &&
sscreen->info.gfx_level >= GFX10_3 &&
/* TODO: not support user queue for now */
!(sscreen->info.userq_ip_mask & BITFIELD_BIT(AMD_IP_GFX)) &&
/* don't support LLVM */
aco_is_gpu_supported(&sscreen->info) &&
!(sscreen->debug_flags & DBG(USE_LLVM));
}
static bool si_alu_to_scalar_packed_math_filter(const nir_instr *instr, const void *data)
{
if (instr->type == nir_instr_type_alu) {
nir_alu_instr *alu = nir_instr_as_alu(instr);
if (alu->def.bit_size == 16 && alu->def.num_components == 2 &&
ac_nir_op_supports_packed_math_16bit(alu)) {
/* ACO requires that all but the first bit of swizzle must be equal. */
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
if ((alu->src[i].swizzle[0] >> 1) != (alu->src[i].swizzle[1] >> 1))
return true;
}
return false;
}
}
return true;
}
void si_init_screen_get_functions(struct si_screen *sscreen)
{
sscreen->b.get_name = si_get_name;
@ -180,544 +135,6 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
sscreen->b.query_memory_info = si_query_memory_info;
}
void si_init_screen_nir_options(struct si_screen *sscreen)
{
#ifndef HAVE_GFX_COMPUTE
return;
#endif
/* |---------------------------------- Performance & Availability --------------------------------|
* |MAD/MAC/MADAK/MADMK|MAD_LEGACY|MAC_LEGACY| FMA |FMAC/FMAAK/FMAMK|FMA_LEGACY|PK_FMA_F16,|Best choice
* Arch | F32,F16,F64 | F32,F16 | F32,F16 |F32,F16,F64 | F32,F16 | F32 |PK_FMAC_F16|F16,F32,F64
* ------------------------------------------------------------------------------------------------------------------
* gfx6,7 | 1 , - , - | 1 , - | 1 , - |1/4, - ,1/16| - , - | - | - , - | - ,MAD,FMA
* gfx8 | 1 , 1 , - | 1 , - | - , - |1/4, 1 ,1/16| - , - | - | - , - |MAD,MAD,FMA
* gfx9 | 1 ,1|0, - | 1 , - | - , - | 1 , 1 ,1/16| 0|1, - | - | 2 , - |FMA,MAD,FMA
* gfx10 | 1 , - , - | 1 , - | 1 , - | 1 , 1 ,1/16| 1 , 1 | - | 2 , 2 |FMA,MAD,FMA
* gfx10.3| - , - , - | - , - | - , - | 1 , 1 ,1/16| 1 , 1 | 1 | 2 , 2 | all FMA
* gfx11 | - , - , - | - , - | - , - | 2 , 2 ,1/16| 2 , 2 | 2 | 2 , 2 | all FMA
*
* Tahiti, Hawaii, Carrizo, Vega20: FMA_F32 is full rate, FMA_F64 is 1/4
* gfx9 supports MAD_F16 only on Vega10, Raven, Raven2, Renoir.
* gfx9 supports FMAC_F32 only on Vega20, but doesn't support FMAAK and FMAMK.
*
* gfx8 prefers MAD for F16 because of MAC/MADAK/MADMK.
* gfx9 and newer prefer FMA for F16 because of the packed instruction.
* gfx10 and older prefer MAD for F32 because of the legacy instruction.
*/
bool use_fma32 =
sscreen->info.gfx_level >= GFX10_3 ||
(sscreen->info.family >= CHIP_GFX940 && !sscreen->info.has_graphics) ||
/* fma32 is too slow for gpu < gfx9, so apply the option only for gpu >= gfx9 */
(sscreen->info.gfx_level >= GFX9 && sscreen->options.force_use_fma32);
/* GFX8 has precision issues with 16-bit PS outputs. */
bool has_16bit_io = sscreen->info.gfx_level >= GFX9;
nir_shader_compiler_options *options = sscreen->nir_options;
ac_nir_set_options(&sscreen->info.compiler_info, !sscreen->use_aco, options);
options->lower_ffma16 = sscreen->info.gfx_level < GFX9;
options->lower_ffma32 = !use_fma32;
options->lower_ffma64 = false;
options->fuse_ffma16 = sscreen->info.gfx_level >= GFX9;
options->fuse_ffma32 = use_fma32;
options->fuse_ffma64 = true;
options->lower_uniforms_to_ubo = true;
options->lower_to_scalar = true;
options->lower_to_scalar_filter =
sscreen->info.compiler_info.has_packed_math_16bit ? si_alu_to_scalar_packed_math_filter : NULL;
options->max_unroll_iterations = 128;
options->max_unroll_iterations_aggressive = 128;
/* For OpenGL, rounding mode is undefined. We want fast packing with v_cvt_pkrtz_f16,
* but if we use it, all f32->f16 conversions have to round towards zero,
* because both scalar and vec2 down-conversions have to round equally.
*
* For OpenCL, rounding mode is explicit. This will only lower f2f16 to f2f16_rtz
* when execution mode is rtz instead of rtne.
*
* GFX8 has precision issues with this option.
*/
options->force_f2f16_rtz = sscreen->info.gfx_level >= GFX9;
options->io_options |= (!has_16bit_io ? nir_io_mediump_is_32bit : 0) | nir_io_has_intrinsics |
(sscreen->use_ngg_culling ?
nir_io_compaction_groups_tes_inputs_into_pos_and_var_groups : 0);
if (has_16bit_io) {
options->lower_mediump_io = sscreen->options.mediump ? si_nir_lower_mediump_io_option
: si_nir_lower_mediump_io_default;
}
/* HW supports indirect indexing for: | Enabled in driver
* -------------------------------------------------------
* TCS inputs | Yes
* TES inputs | Yes
* GS inputs | No
* -------------------------------------------------------
* VS outputs before TCS | No
* TCS outputs | Yes
* VS/TES outputs before GS | No
*/
options->varying_expression_max_cost = si_varying_expression_max_cost;
unsigned max_support_shader = enable_mesh_shader(sscreen) ?
MESA_SHADER_MESH : MESA_SHADER_COMPUTE;
for (unsigned i = 0; i <= max_support_shader; i++)
sscreen->b.nir_options[i] = sscreen->nir_options;
}
void si_init_shader_caps(struct si_screen *sscreen)
{
for (unsigned i = 0; i <= MESA_SHADER_MESH; i++) {
if (!sscreen->b.nir_options[i])
continue;
struct pipe_shader_caps *caps =
(struct pipe_shader_caps *)&sscreen->b.shader_caps[i];
/* Shader limits. */
caps->max_instructions =
caps->max_alu_instructions =
caps->max_tex_instructions =
caps->max_tex_indirections =
caps->max_control_flow_depth = 16384;
caps->max_inputs = i == MESA_SHADER_VERTEX ? SI_MAX_ATTRIBS : 32;
caps->max_outputs = i == MESA_SHADER_FRAGMENT ? 8 : 32;
caps->max_temps = 256; /* Max native temporaries. */
caps->max_const_buffer0_size = 1 << 26; /* 64 MB */
caps->max_const_buffers = SI_NUM_CONST_BUFFERS;
caps->max_texture_samplers =
caps->max_sampler_views = SI_NUM_SAMPLERS;
caps->max_shader_buffers = SI_NUM_SHADER_BUFFERS;
caps->max_shader_images = SI_NUM_IMAGES;
caps->supported_irs = (1 << PIPE_SHADER_IR_TGSI) | (1 << PIPE_SHADER_IR_NIR);
/* Supported boolean features. */
caps->cont_supported = true;
caps->tgsi_sqrt_supported = true;
caps->indirect_temp_addr = true;
caps->indirect_const_addr = true;
caps->integers = true;
caps->int64_atomics = true;
caps->tgsi_any_inout_decl_range = true;
/* We need F16C for fast FP16 conversions in glUniform.
* It's supported since Intel Ivy Bridge and AMD Bulldozer.
*/
bool has_16bit_alu = sscreen->info.gfx_level >= GFX8 && util_get_cpu_caps()->has_f16c;
caps->fp16 = has_16bit_alu;
caps->fp16_derivatives = has_16bit_alu;
caps->fp16_const_buffers = has_16bit_alu;
caps->int16 = has_16bit_alu;
caps->glsl_16bit_consts = has_16bit_alu;
caps->glsl_16bit_load_dst = sscreen->info.gfx_level >= GFX9;
}
}
void si_init_compute_caps(struct si_screen *sscreen)
{
struct pipe_compute_caps *caps =
(struct pipe_compute_caps *)&sscreen->b.compute_caps;
caps->grid_dimension = 3;
/* Use this size, so that internal counters don't overflow 64 bits. */
caps->max_grid_size[0] = UINT32_MAX;
caps->max_grid_size[1] = UINT16_MAX;
caps->max_grid_size[2] = UINT16_MAX;
caps->max_block_size[0] =
caps->max_block_size[1] =
caps->max_block_size[2] = 1024;
caps->max_threads_per_block = 1024;
caps->address_bits = 64;
/* Return 1/4 of the heap size as the maximum because the max size is not practically
* allocatable.
*/
caps->max_mem_alloc_size = (sscreen->info.max_heap_size_kb / 4) * 1024ull;
/* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least
* 1/4 of the MAX_GLOBAL_SIZE. Since the
* MAX_MEM_ALLOC_SIZE is fixed for older kernels,
* make sure we never report more than
* 4 * MAX_MEM_ALLOC_SIZE.
*/
caps->max_global_size = MIN2(4 * caps->max_mem_alloc_size,
sscreen->info.max_heap_size_kb * 1024ull);
/* Value reported by the closed source driver. */
caps->max_local_size = sscreen->info.gfx_level == GFX6 ? 32 * 1024 : 64 * 1024;
caps->max_clock_frequency = sscreen->info.max_gpu_freq_mhz;
caps->max_compute_units = sscreen->info.num_cu;
unsigned threads = 1024;
unsigned subgroup_size =
sscreen->shader_debug_flags & DBG(W64_CS) || sscreen->info.gfx_level < GFX10 ? 64 : 32;
caps->max_subgroups = threads / subgroup_size;
if (sscreen->shader_debug_flags & DBG(W32_CS))
caps->subgroup_sizes = 32;
else if (sscreen->shader_debug_flags & DBG(W64_CS))
caps->subgroup_sizes = 64;
else
caps->subgroup_sizes = sscreen->info.gfx_level < GFX10 ? 64 : 64 | 32;
caps->max_variable_threads_per_block =
sscreen->info.compiler_info.has_cs_regalloc_hang_bug ? 256 : SI_MAX_VARIABLE_THREADS_PER_BLOCK;
}
void si_init_mesh_caps(struct si_screen *sscreen)
{
struct pipe_mesh_caps *caps = (struct pipe_mesh_caps *)&sscreen->b.caps.mesh;
caps->max_task_work_group_total_count = 1 << 22;
caps->max_mesh_work_group_total_count = 1 << 22;
caps->max_mesh_work_group_invocations = 256;
caps->max_task_work_group_invocations = 1024;
caps->max_task_payload_size = 16384;
caps->max_task_shared_memory_size = 65536;
caps->max_mesh_shared_memory_size = 28672;
caps->max_task_payload_and_shared_memory_size = 65536;
caps->max_mesh_payload_and_shared_memory_size =
caps->max_task_payload_size + caps->max_mesh_shared_memory_size;
caps->max_mesh_output_memory_size = 32 * 1024;
caps->max_mesh_payload_and_output_memory_size =
caps->max_task_payload_size + caps->max_mesh_output_memory_size;
caps->max_mesh_output_vertices = 256;
caps->max_mesh_output_primitives = 256;
caps->max_mesh_output_components = 128;
caps->max_mesh_output_layers = 8;
caps->max_mesh_multiview_view_count = 1;
caps->mesh_output_per_vertex_granularity = 1;
caps->mesh_output_per_primitive_granularity = 1;
caps->max_preferred_task_work_group_invocations = 64;
caps->max_preferred_mesh_work_group_invocations = 128;
caps->mesh_prefers_local_invocation_vertex_output = true;
caps->mesh_prefers_local_invocation_primitive_output = true;
caps->mesh_prefers_compact_vertex_output = true;
caps->mesh_prefers_compact_primitive_output = false;
caps->max_task_work_group_count[0] =
caps->max_task_work_group_count[1] =
caps->max_task_work_group_count[2] = 65535;
caps->max_mesh_work_group_count[0] =
caps->max_mesh_work_group_count[1] =
caps->max_mesh_work_group_count[2] = 65535;
caps->max_task_work_group_size[0] =
caps->max_task_work_group_size[1] =
caps->max_task_work_group_size[2] = 1024;
caps->max_mesh_work_group_size[0] =
caps->max_mesh_work_group_size[1] =
caps->max_mesh_work_group_size[2] = 256;
caps->pipeline_statistic_queries = sscreen->info.gfx_level >= GFX11;
}
void si_init_gfx_caps(struct si_screen *sscreen)
{
struct pipe_caps *caps = (struct pipe_caps *)&sscreen->b.caps;
/* Gfx8 (Polaris11) hangs, so don't enable this on Gfx8 and older chips. */
bool enable_sparse =
sscreen->info.gfx_level >= GFX9 && sscreen->info.has_sparse;
/* Supported features (boolean caps). */
caps->max_dual_source_render_targets = true;
caps->anisotropic_filter = true;
caps->occlusion_query = true;
caps->texture_mirror_clamp = true;
caps->texture_shadow_lod = true;
caps->texture_mirror_clamp_to_edge = true;
caps->blend_equation_separate = true;
caps->texture_swizzle = true;
caps->depth_clip_disable = true;
caps->depth_clip_disable_separate = true;
caps->shader_stencil_export = true;
caps->vertex_element_instance_divisor = true;
caps->fs_coord_origin_upper_left = true;
caps->fs_coord_pixel_center_half_integer = true;
caps->fs_coord_pixel_center_integer = true;
caps->fragment_shader_texture_lod = true;
caps->fragment_shader_derivatives = true;
caps->primitive_restart = true;
caps->primitive_restart_fixed_index = true;
caps->conditional_render = true;
caps->texture_barrier = true;
caps->indep_blend_enable = true;
caps->indep_blend_func = true;
caps->vertex_color_unclamped = true;
caps->start_instance = true;
caps->npot_textures = true;
caps->mixed_framebuffer_sizes = true;
caps->mixed_color_depth_bits = true;
caps->vertex_color_clamped = true;
caps->fragment_color_clamped = true;
caps->vs_instanceid = true;
caps->texture_buffer_objects = true;
caps->vs_layer_viewport = true;
caps->query_pipeline_statistics = true;
caps->sample_shading = true;
caps->draw_indirect = true;
caps->clip_halfz = true;
caps->vs_window_space_position = true;
caps->polygon_offset_clamp = true;
caps->multisample_z_resolve = true;
caps->quads_follow_provoking_vertex_convention = true;
caps->tgsi_texcoord = true;
caps->fs_fine_derivative = true;
caps->conditional_render_inverted = true;
caps->texture_float_linear = true;
caps->texture_half_float_linear = true;
caps->depth_bounds_test = true;
caps->sampler_view_target = true;
caps->texture_query_lod = true;
caps->texture_gather_sm5 = true;
caps->texture_query_samples = true;
caps->force_persample_interp = true;
caps->copy_between_compressed_and_plain_formats = true;
caps->fs_position_is_sysval = true;
caps->fs_face_is_integer_sysval = true;
caps->invalidate_buffer = true;
caps->surface_reinterpret_blocks = true;
caps->compressed_surface_reinterpret_blocks_layered = true;
caps->query_buffer_object = true;
caps->query_memory_info = true;
caps->shader_pack_half_float = true;
caps->framebuffer_no_attachment = true;
caps->robust_buffer_access_behavior = true;
caps->string_marker = true;
caps->cull_distance = true;
caps->shader_array_components = true;
caps->stream_output_pause_resume = true;
caps->stream_output_interleave_buffers = true;
caps->doubles = true;
caps->tes_layer_viewport = true;
caps->bindless_texture = true;
caps->query_timestamp = true;
caps->query_time_elapsed = true;
caps->nir_samplers_as_deref = true;
caps->memobj = true;
caps->load_constbuf = true;
caps->int64 = true;
caps->shader_clock = true;
caps->can_bind_const_buffer_as_vertex = true;
caps->allow_mapped_buffers_during_execution = true;
caps->signed_vertex_buffer_offset = true;
caps->shader_ballot = true;
caps->shader_group_vote = true;
caps->compute_grid_info_last_block = true;
caps->image_load_formatted = true;
caps->prefer_compute_for_multimedia = true;
caps->packed_uniforms = true;
caps->gl_spirv = true;
caps->alpha_to_coverage_dither_control = true;
caps->map_unsynchronized_thread_safe = true;
caps->no_clip_on_copy_tex = true;
caps->shader_atomic_int64 = true;
caps->frontend_noop = true;
caps->demote_to_helper_invocation = true;
caps->prefer_real_buffer_in_constbuf0 = true;
caps->compute_shader_derivatives = true;
caps->image_atomic_inc_wrap = true;
caps->image_store_formatted = true;
caps->allow_draw_out_of_order = true;
caps->query_so_overflow = true;
caps->glsl_tess_levels_as_inputs = true;
caps->device_reset_status_query = true;
caps->texture_multisample = true;
caps->allow_glthread_buffer_subdata_opt = true; /* TODO: remove if it's slow */
caps->null_textures = true;
caps->has_const_bw = true;
caps->cl_gl_sharing = true;
caps->call_finalize_nir_in_linker = true;
caps->blit_3d = true;
caps->glsl_bindless_handles_are_32bit = true;
caps->fbfetch = 1;
caps->graphics = sscreen->info.has_graphics;
caps->mesh_shader = sscreen->b.nir_options[MESA_SHADER_MESH];
caps->compute = sscreen->has_gfx_compute;
/* Tahiti and Verde only: reduction mode is unsupported due to a bug
* (it might work sometimes, but that's not enough)
*/
caps->sampler_reduction_minmax =
caps->sampler_reduction_minmax_arb =
!(sscreen->info.family == CHIP_TAHITI || sscreen->info.family == CHIP_VERDE);
caps->texture_transfer_modes =
PIPE_TEXTURE_TRANSFER_BLIT | PIPE_TEXTURE_TRANSFER_COMPUTE;
caps->draw_vertex_state = !(sscreen->debug_flags & DBG(NO_FAST_DISPLAY_LIST));
caps->shader_samples_identical =
sscreen->info.compiler_info.has_fmask && !(sscreen->debug_flags & DBG(NO_FMASK));
caps->glsl_zero_init = 2;
caps->generate_mipmap =
caps->seamless_cube_map =
caps->seamless_cube_map_per_texture =
caps->cube_map_array =
sscreen->info.compiler_info.has_3d_cube_border_color_mipmap;
caps->post_depth_coverage = sscreen->info.gfx_level >= GFX10;
caps->max_vertex_buffers = SI_MAX_ATTRIBS;
caps->constant_buffer_offset_alignment =
caps->texture_buffer_offset_alignment =
caps->max_texture_gather_components =
caps->max_stream_output_buffers =
caps->max_vertex_streams =
caps->shader_buffer_offset_alignment =
caps->max_window_rectangles = 4;
caps->glsl_feature_level =
caps->glsl_feature_level_compatibility = 460;
/* Optimal number for good TexSubImage performance on Polaris10. */
caps->max_texture_upload_memory_budget = 64 * 1024 * 1024;
caps->gl_begin_end_buffer_size = 4096 * 1024;
/* Return 1/4th of the heap size as the maximum because the max size is not practically
* allocatable. Also, this can only return UINT32_MAX at most.
*/
unsigned max_size = MIN2((sscreen->info.max_heap_size_kb * 1024ull) / 4, UINT32_MAX);
/* Allow max 512 MB to pass CTS with a 32-bit build. */
if (sizeof(void*) == 4)
max_size = MIN2(max_size, 512 * 1024 * 1024);
caps->max_constant_buffer_size =
caps->max_shader_buffer_size = max_size;
unsigned max_texels = caps->max_shader_buffer_size;
/* FYI, BUF_RSRC_WORD2.NUM_RECORDS field limit is UINT32_MAX. */
/* Gfx8 and older use the size in bytes for bounds checking, and the max element size
* is 16B. Gfx9 and newer use the VGPR index for bounds checking.
*/
if (sscreen->info.gfx_level <= GFX8)
max_texels = MIN2(max_texels, UINT32_MAX / 16);
else
/* Gallium has a limitation that it can only bind UINT32_MAX bytes, not texels.
* TODO: Remove this after the gallium interface is changed. */
max_texels = MIN2(max_texels, UINT32_MAX / 16);
caps->max_texel_buffer_elements = max_texels;
/* Allow 1/4th of the heap size. */
caps->max_texture_mb = sscreen->info.max_heap_size_kb / 1024 / 4;
caps->prefer_back_buffer_reuse = false;
caps->prefer_imm_arrays_as_constbuf = false;
caps->performance_monitor =
sscreen->info.gfx_level >= GFX7 && sscreen->info.gfx_level <= GFX10_3;
caps->sparse_buffer_page_size = enable_sparse ? RADEON_SPARSE_PAGE_SIZE : 0;
caps->constbuf0_flags = SI_RESOURCE_FLAG_32BIT;
caps->draw_parameters =
caps->multi_draw_indirect =
caps->multi_draw_indirect_params = sscreen->has_draw_indirect_multi;
caps->max_shader_patch_varyings = 30;
caps->max_varyings =
caps->max_gs_invocations = 32;
caps->texture_border_color_quirk =
sscreen->info.gfx_level <= GFX8 ? PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_R600 : 0;
/* Stream output. */
caps->max_stream_output_separate_components =
caps->max_stream_output_interleaved_components = 32 * 4;
/* gfx9 has to report 256 to make piglit/gs-max-output pass.
* gfx8 and earlier can do 1024.
*/
caps->max_geometry_output_vertices = 256;
caps->max_geometry_total_output_components = 4095;
caps->max_vertex_attrib_stride = 2048;
caps->max_texture_2d_size = sscreen->info.gfx_level >= GFX12 ? 65536 : 16384;
caps->max_texture_cube_levels = sscreen->info.compiler_info.has_3d_cube_border_color_mipmap ?
(sscreen->info.gfx_level >= GFX12 ? 17 : 15) /* 64K : 16K */ : 0;
caps->max_texture_3d_levels = sscreen->info.compiler_info.has_3d_cube_border_color_mipmap ?
/* This is limited by maximums that both the texture unit and layered rendering support. */
(sscreen->info.gfx_level >= GFX12 ? 15 : /* 16K */
(sscreen->info.gfx_level >= GFX10 ? 14 : 12)) /* 8K : 2K */ : 0;
/* This is limited by maximums that both the texture unit and layered rendering support. */
caps->max_texture_array_layers = sscreen->info.gfx_level >= GFX10 ? 8192 : 2048;
/* Sparse texture */
caps->max_sparse_texture_size = enable_sparse ? caps->max_texture_2d_size : 0;
caps->max_sparse_3d_texture_size = enable_sparse ? (1 << (caps->max_texture_3d_levels - 1)) : 0;
caps->max_sparse_array_texture_layers = enable_sparse ? caps->max_texture_array_layers : 0;
caps->sparse_texture_full_array_cube_mipmaps =
caps->query_sparse_texture_residency =
caps->clamp_sparse_texture_lod = enable_sparse;
/* Viewports and render targets. */
caps->max_viewports = SI_MAX_VIEWPORTS;
caps->viewport_subpixel_bits =
caps->rasterizer_subpixel_bits =
caps->max_render_targets = 8;
caps->framebuffer_msaa_constraints = sscreen->info.has_eqaa_surface_allocator ? 2 : 0;
caps->min_texture_gather_offset =
caps->min_texel_offset = -32;
caps->max_texture_gather_offset =
caps->max_texel_offset = 31;
caps->shader_subgroup_size = 64;
caps->shader_subgroup_supported_stages =
BITFIELD_MASK(caps->mesh_shader ? MESA_SHADER_MESH_STAGES : MESA_SHADER_STAGES);
caps->shader_subgroup_supported_features = PIPE_SHADER_SUBGROUP_FEATURE_MASK;
caps->shader_subgroup_quad_all_stages = true;
caps->min_line_width =
caps->min_line_width_aa = 1; /* due to axis-aligned end caps at line width 1 */
caps->min_point_size =
caps->min_point_size_aa =
caps->point_size_granularity =
caps->line_width_granularity = 1.0 / 8.0; /* due to the register field precision */
/* This depends on the quant mode, though the precise interactions are unknown. */
caps->max_line_width =
caps->max_line_width_aa = 2048;
caps->max_point_size =
caps->max_point_size_aa = SI_MAX_POINT_SIZE;
caps->max_texture_anisotropy = 16.0f;
/* The hw can do 31, but this test fails if we use that:
* KHR-GL46.texture_lod_bias.texture_lod_bias_all
*/
caps->max_texture_lod_bias = 16;
/* Override the value set by u_init_pipe_screen_caps because it was called
* before shader caps are set.
*/
caps->hardware_gl_select = debug_get_bool_option("MESA_HW_ACCEL_SELECT", true);
}
void si_init_screen_caps(struct si_screen *sscreen)
{
struct pipe_caps *caps = (struct pipe_caps *)&sscreen->b.caps;
@ -750,7 +167,6 @@ void si_init_screen_caps(struct si_screen *sscreen)
caps->native_fence_fd = sscreen->info.has_fence_to_handle;
caps->endianness = PIPE_ENDIAN_LITTLE;
caps->vendor_id = ATI_VENDOR_ID;
caps->device_id = sscreen->info.pci_id;
caps->video_memory = sscreen->info.vram_size_kb >> 10;

View file

@ -1592,12 +1592,7 @@ struct pipe_fence_handle *si_create_fence(struct pipe_context *ctx,
/* si_get.c */
void si_init_screen_get_functions(struct si_screen *sscreen);
void si_init_screen_nir_options(struct si_screen *sscreen);
void si_init_shader_caps(struct si_screen *sscreen);
void si_init_compute_caps(struct si_screen *sscreen);
void si_init_screen_caps(struct si_screen *sscreen);
void si_init_mesh_caps(struct si_screen *screen);
void si_init_gfx_caps(struct si_screen *sscreen);
void si_init_renderer_string(struct si_screen *sscreen);
bool si_sdma_copy_image(struct si_context *ctx, struct si_texture *dst, struct si_texture *src);