svga: shader translation for compute, image views and shader buffers

This patch handles shader translation for compute, image views and shader
buffers and updates the corresponding shader compile keys.
It also includes support of using shader raw buffer for shader buffer used
as constant buffer.
This patch is squash of numerous in house patches.

Reviewed-by: Charmaine Lee <charmainel@vmware.com>

v2: As pointed out by Thomas, fix revert of 64292c0f caused by this patch.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14270>
This commit is contained in:
Neha Bhende 2021-12-16 14:52:26 -08:00 committed by Marge Bot
parent 247c61f2d0
commit 1e99a30738
6 changed files with 2082 additions and 161 deletions

View file

@ -98,7 +98,6 @@ enum svga_hud {
#define SVGA_MAX_CONST_BUF_SIZE (4096 * 4 * sizeof(int))
#define CONST0_UPLOAD_ALIGNMENT 256
#define SVGA_MAX_IMAGES SVGA3D_MAX_UAVIEWS
#define SVGA_MAX_SHADER_BUFFERS SVGA3D_MAX_UAVIEWS
#define SVGA_MAX_ATOMIC_BUFFERS SVGA3D_MAX_UAVIEWS
@ -624,6 +623,9 @@ struct svga_context
/** bitmasks of which const buffers are changed */
unsigned dirty_constbufs[PIPE_SHADER_TYPES];
/** bitmasks of which const buffers to be bound as raw buffers */
unsigned raw_constbufs[PIPE_SHADER_TYPES];
unsigned texture_timestamp;
unsigned uav_timestamp[2];
@ -967,6 +969,21 @@ svga_rects_equal(const SVGA3dRect *r1, const SVGA3dRect *r2)
return memcmp(r1, r2, sizeof(*r1)) == 0;
}
/* A helper function to return TRUE if sampler state mapping is
* to be used. Sampler state mapping is used in GL43 context
* if the number of sampler states exceeds the SVGA device limit or
* the sampler state mapping environment variable is set.
*/
static inline boolean
svga_use_sampler_state_mapping(const struct svga_context *svga,
unsigned num_sampler_states)
{
return svga_have_gl43(svga) &&
(svga_screen(svga->pipe.screen)->debug.sampler_state_mapping ||
num_sampler_states > SVGA3D_DX_MAX_SAMPLERS);
}
/**
* If the Gallium HUD is enabled, this will return the current time.
* Otherwise, just return zero.

View file

@ -639,13 +639,13 @@ vgpu10_get_shader_param(struct pipe_screen *screen,
if (shader == PIPE_SHADER_FRAGMENT)
return VGPU10_MAX_FS_INPUTS;
else if (shader == PIPE_SHADER_GEOMETRY)
return VGPU10_MAX_GS_INPUTS;
return svgascreen->max_gs_inputs;
else if (shader == PIPE_SHADER_TESS_CTRL)
return VGPU11_MAX_HS_INPUT_CONTROL_POINTS;
else if (shader == PIPE_SHADER_TESS_EVAL)
return VGPU11_MAX_DS_INPUT_CONTROL_POINTS;
else
return VGPU10_MAX_VS_INPUTS;
return svgascreen->max_vs_inputs;
case PIPE_SHADER_CAP_MAX_OUTPUTS:
if (shader == PIPE_SHADER_FRAGMENT)
return VGPU10_MAX_FS_OUTPUTS;
@ -656,7 +656,8 @@ vgpu10_get_shader_param(struct pipe_screen *screen,
else if (shader == PIPE_SHADER_TESS_EVAL)
return VGPU11_MAX_DS_OUTPUTS;
else
return VGPU10_MAX_VS_OUTPUTS;
return svgascreen->max_vs_outputs;
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
return VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT * sizeof(float[4]);
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
@ -973,6 +974,9 @@ svga_screen_create(struct svga_winsys_screen *sws)
goto error2;
}
svgascreen->debug.sampler_state_mapping =
debug_get_bool_option("SVGA_SAMPLER_STATE_MAPPING", FALSE);
debug_printf("%s enabled\n",
sws->have_sm5 ? "SM5" :
sws->have_sm4_1 ? "SM4_1" :
@ -1060,6 +1064,18 @@ svga_screen_create(struct svga_winsys_screen *sws)
screen->is_format_supported = svga_is_dx_format_supported;
svgascreen->max_viewports = SVGA3D_DX_MAX_VIEWPORTS;
/* Shader limits */
if (sws->have_sm4_1) {
svgascreen->max_vs_inputs = VGPU10_1_MAX_VS_INPUTS;
svgascreen->max_vs_outputs = VGPU10_1_MAX_VS_OUTPUTS;
svgascreen->max_gs_inputs = VGPU10_1_MAX_GS_INPUTS;
}
else {
svgascreen->max_vs_inputs = VGPU10_MAX_VS_INPUTS;
svgascreen->max_vs_outputs = VGPU10_MAX_VS_OUTPUTS;
svgascreen->max_gs_inputs = VGPU10_MAX_GS_INPUTS;
}
}
else {
/* VGPU9 */
@ -1097,6 +1113,11 @@ svga_screen_create(struct svga_winsys_screen *sws)
/* Only one viewport */
svgascreen->max_viewports = 1;
/* Shader limits */
svgascreen->max_vs_inputs = 16;
svgascreen->max_vs_outputs = 10;
svgascreen->max_gs_inputs = 0;
}
/* common VGPU9 / VGPU10 caps */

View file

@ -58,14 +58,20 @@ struct svga_screen
unsigned max_const_buffers;
unsigned max_viewports;
unsigned ms_samples;
unsigned max_vs_inputs;
unsigned max_vs_outputs;
unsigned max_gs_inputs;
struct {
boolean force_level_surface_view;
boolean force_surface_view;
boolean no_surface_view;
boolean force_sampler_view;
boolean no_sampler_view;
boolean no_cache_index_buffers;
unsigned force_level_surface_view:1;
unsigned force_surface_view:1;
unsigned no_surface_view:1;
unsigned force_sampler_view:1;
unsigned no_sampler_view:1;
unsigned no_cache_index_buffers:1;
unsigned tessellation:1;
unsigned sampler_state_mapping:1;
unsigned pad:24;
} debug;
unsigned texture_timestamp;

View file

@ -223,6 +223,16 @@ static const enum pipe_swizzle set_XXXY[PIPE_SWIZZLE_MAX] = {
PIPE_SWIZZLE_NONE
};
static const enum pipe_swizzle set_YYYY[PIPE_SWIZZLE_MAX] = {
PIPE_SWIZZLE_Y,
PIPE_SWIZZLE_Y,
PIPE_SWIZZLE_Y,
PIPE_SWIZZLE_Y,
PIPE_SWIZZLE_0,
PIPE_SWIZZLE_1,
PIPE_SWIZZLE_NONE
};
static VGPU10_RESOURCE_RETURN_TYPE
vgpu10_return_type(enum pipe_format format)
@ -242,6 +252,17 @@ vgpu10_return_type(enum pipe_format format)
}
/**
* A helper function to return TRUE if the specified format
* is a supported format for sample_c instruction.
*/
static bool
isValidSampleCFormat(enum pipe_format format)
{
return util_format_is_depth_or_stencil(format);
}
/**
* Initialize the shader-neutral fields of svga_compile_key from context
* state. This is basically the texture-related state.
@ -253,15 +274,28 @@ svga_init_shader_key_common(const struct svga_context *svga,
struct svga_compile_key *key)
{
unsigned i, idx = 0;
unsigned sampler_slots = 0;
assert(shader_type < ARRAY_SIZE(svga->curr.num_sampler_views));
/* In case the number of samplers and sampler_views doesn't match,
* loop over the lower of the two counts.
* loop over the upper of the two counts.
*/
key->num_textures = MAX2(svga->curr.num_sampler_views[shader_type],
svga->curr.num_samplers[shader_type]);
key->num_samplers = 0;
/* Set sampler_state_mapping only if GL43 is supported and
* the number of samplers exceeds SVGA limit or the sampler state
* mapping env is set.
*/
boolean sampler_state_mapping =
svga_use_sampler_state_mapping(svga, svga->curr.num_samplers[shader_type]);
key->sampler_state_mapping =
key->num_textures && sampler_state_mapping ? 1 : 0;
for (i = 0; i < key->num_textures; i++) {
struct pipe_sampler_view *view = svga->curr.sampler_views[shader_type][i];
const struct svga_sampler_state
@ -269,22 +303,21 @@ svga_init_shader_key_common(const struct svga_context *svga,
if (view) {
assert(view->texture);
assert(view->texture->target < (1 << 4)); /* texture_target:4 */
enum pipe_texture_target target = view->target;
assert(target < (1 << 4)); /* texture_target:4 */
key->tex[i].target = target;
key->tex[i].sampler_return_type = vgpu10_return_type(view->format);
key->tex[i].sampler_view = 1;
/* 1D/2D array textures with one slice and cube map array textures
* with one cube are treated as non-arrays by the SVGA3D device.
* Set the is_array flag only if we know that we have more than 1
* element. This will be used to select shader instruction/resource
* types during shader translation.
*/
switch (view->texture->target) {
switch (target) {
case PIPE_TEXTURE_1D_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
key->tex[i].is_array = view->texture->array_size > 1;
@ -300,10 +333,12 @@ svga_init_shader_key_common(const struct svga_context *svga,
key->tex[i].num_samples = view->texture->nr_samples;
const enum pipe_swizzle *swizzle_tab;
if (view->texture->target == PIPE_BUFFER) {
if (target == PIPE_BUFFER) {
SVGA3dSurfaceFormat svga_format;
unsigned tf_flags;
assert(view->texture->target == PIPE_BUFFER);
/* Apply any special swizzle mask for the view format if needed */
svga_translate_texture_buffer_view_format(view->format,
@ -334,11 +369,24 @@ svga_init_shader_key_common(const struct svga_context *svga,
view->texture->format == PIPE_FORMAT_DXT1_SRGB)
swizzle_tab = set_alpha;
if (view->format == PIPE_FORMAT_X24S8_UINT ||
view->format == PIPE_FORMAT_X32_S8X24_UINT)
swizzle_tab = set_YYYY;
/* Save the compare function as we need to handle
* depth compare in the shader.
*/
key->tex[i].compare_mode = sampler->compare_mode;
key->tex[i].compare_func = sampler->compare_func;
/* Set the compare_in_shader bit if the view format
* is not a supported format for shadow compare.
* In this case, we'll do the comparison in the shader.
*/
if ((sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) &&
!isValidSampleCFormat(view->format)) {
key->tex[i].compare_in_shader = TRUE;
}
}
key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r];
@ -364,6 +412,139 @@ svga_init_shader_key_common(const struct svga_context *svga,
key->tex[i].texel_bias = TRUE;
}
}
if (!sampler_state_mapping) {
/* Use the same index if sampler state mapping is not supported */
key->tex[i].sampler_index = i;
key->num_samplers = i + 1;
}
else {
/* The current samplers list can have redundant entries.
* In order to allow the number of bound samplers within the
* max limit supported by SVGA, we'll recreate the list with
* unique sampler state objects only.
*/
/* Check to see if this sampler is already on the list.
* If so, set the sampler index of this sampler to the
* same sampler index.
*/
for (unsigned j = 0; j <= i; j++) {
if (svga->curr.sampler[shader_type][j] == sampler) {
if (!(sampler_slots & (1 << j))) {
/* if this sampler is not added to the new list yet,
* set its sampler index to the next sampler index,
* increment the sampler count, and mark this
* sampler as added to the list.
*/
unsigned next_index =
MIN2(key->num_samplers, SVGA3D_DX_MAX_SAMPLERS-1);
key->tex[i].sampler_index = next_index;
key->num_samplers = next_index + 1;
if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
/* reserve one slot for the alternate sampler */
key->num_samplers++;
}
sampler_slots |= (1 << j);
}
else {
key->tex[i].sampler_index = key->tex[j].sampler_index;
}
break;
}
}
}
}
}
if (svga_have_gl43(svga)) {
if (shader->info.images_declared ||
shader->info.shader_buffers_declared) {
/* Save the uavSpliceIndex which is the index used for the first uav
* in the draw pipeline. For compute, uavSpliceIndex is always 0.
*/
if (shader_type != PIPE_SHADER_COMPUTE)
key->uav_splice_index = svga->state.hw_draw.uavSpliceIndex;
unsigned uav_splice_index = key->uav_splice_index;
/* Also get the texture data type to be used in the uav declaration */
struct svga_image_view *cur_image_view =
&svga->curr.image_views[shader_type][0];
for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.image_views[shader_type]);
i++, cur_image_view++) {
struct pipe_resource *resource = cur_image_view->desc.resource;
if (resource) {
key->images[i].return_type =
svga_get_texture_datatype(cur_image_view->desc.format);
key->images[i].is_array = resource->array_size > 1;
/* Save the image resource target in the shader key because
* for single layer image view, the resource target in the
* tgsi shader is changed to a different texture target.
*/
key->images[i].resource_target = resource->target;
if (resource->target == PIPE_TEXTURE_3D ||
resource->target == PIPE_TEXTURE_1D_ARRAY ||
resource->target == PIPE_TEXTURE_2D_ARRAY ||
resource->target == PIPE_TEXTURE_CUBE ||
resource->target == PIPE_TEXTURE_CUBE_ARRAY) {
key->images[i].is_single_layer =
cur_image_view->desc.u.tex.first_layer ==
cur_image_view->desc.u.tex.last_layer;
}
key->images[i].uav_index = cur_image_view->uav_index + uav_splice_index;
}
else
key->images[i].uav_index = SVGA3D_INVALID_ID;
}
struct svga_shader_buffer *cur_sbuf =
&svga->curr.shader_buffers[shader_type][0];
for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.shader_buffers[shader_type]);
i++, cur_sbuf++) {
if (cur_sbuf->resource)
key->shader_buf_uav_index[i] = cur_sbuf->uav_index + uav_splice_index;
else
key->shader_buf_uav_index[i] = SVGA3D_INVALID_ID;
}
struct svga_shader_buffer *cur_buf = &svga->curr.atomic_buffers[0];
for (unsigned i = 0; i < ARRAY_SIZE(svga->curr.atomic_buffers);
i++, cur_buf++) {
if (cur_buf->resource)
key->atomic_buf_uav_index[i] = cur_buf->uav_index + uav_splice_index;
else
key->atomic_buf_uav_index[i] = SVGA3D_INVALID_ID;
}
}
/* Save info about which constant buffers are to be viewed
* as raw buffers in the shader key.
*/
if (shader->info.const_buffers_declared &
svga->state.raw_constbufs[shader_type]) {
key->raw_buffers = svga->state.raw_constbufs[shader_type];
/* beginning index for srv for raw buffers */
key->srv_raw_buf_index = PIPE_MAX_SAMPLERS;
}
}

View file

@ -121,15 +121,18 @@ struct svga_compile_key
/* any shader type */
int8_t generic_remap_table[MAX_GENERIC_VARYING];
unsigned num_textures:8;
unsigned num_samplers:8;
unsigned num_unnormalized_coords:8;
unsigned clip_plane_enable:PIPE_MAX_CLIP_PLANES;
unsigned last_vertex_stage:1;
unsigned clamp_vertex_color:1;
unsigned sampler_state_mapping:1; /* Set if use sampler state mapping */
unsigned sprite_origin_lower_left:1;
uint16_t sprite_coord_enable;
struct {
unsigned compare_mode:1;
unsigned compare_func:3;
unsigned compare_in_shader:1;
unsigned unnormalized:1;
unsigned texel_bias:1;
unsigned width_height_idx:5; /**< texture unit */
@ -142,10 +145,25 @@ struct svga_compile_key
unsigned target:4;
unsigned sampler_return_type:4;
unsigned sampler_view:1;
unsigned sampler_index:5;
} tex[PIPE_MAX_SAMPLERS];
/* Note: svga_compile_keys_equal() depends on the variable-size
* tex[] array being at the end of this structure.
*/
unsigned uav_splice_index:4; /* starting uav index */
unsigned srv_raw_buf_index:8; /* start index for srv raw buffers */
unsigned image_size_used:1;
uint16_t raw_buffers; /* bitmask of raw buffers */
struct {
enum tgsi_return_type return_type;
enum pipe_texture_target resource_target;
unsigned is_array:1;
unsigned is_single_layer:1;
unsigned uav_index:7;
} images[PIPE_MAX_SHADER_IMAGES];
uint16_t shader_buf_uav_index[PIPE_MAX_SHADER_BUFFERS];
uint16_t atomic_buf_uav_index[PIPE_MAX_HW_ATOMIC_BUFFERS];
};
/* A key for a variant of token string of a shader */
@ -223,7 +241,8 @@ struct svga_fs_variant
unsigned fs_shadow_compare_units;
/** For FS-based polygon stipple */
unsigned pstipple_sampler_unit;
unsigned pstipple_sampler_unit:8;
unsigned pstipple_sampler_state_index:8;
};
@ -368,8 +387,7 @@ static inline boolean
svga_compile_keys_equal(const struct svga_compile_key *a,
const struct svga_compile_key *b)
{
unsigned key_size =
(const char *) &a->tex[a->num_textures] - (const char *) a;
unsigned key_size = sizeof(*a);
return memcmp(a, b, key_size) == 0;
}

File diff suppressed because it is too large Load diff