mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-01 16:10:09 +01:00
agx: Lower VBOs in NIR
Now we support all the vertex formats! This means we don't hit u_vbuf for format translation, which helps performance in lots of applications. By doing the lowering in NIR, the vertex fetch code itself can be optimized by NIR (e.g. nir_opt_algebraic) which can improve generated code quality. In my first implementation of this, I had a big switch statement mapping format enums to interchange formats and post-processing code. This ends up being really unwieldly, the combinatorics of bit packing + conversion + swizzles is enormous and for performance we want to support everything (no u_vbuf fallbacks). To keep the combinatorics in check, we rely on parsing the util_format_description to separate out the issues of bit packing, conversion, and swizzling, allowing us to handle bizarro formats like B10G10R10A2_SNORM with no special casing. In an effort to support everything in one shot, this handles all the formats needed for the extensions EXT_vertex_array_bgra, ARB_vertex_type_2_10_10_10_rev, and ARB_vertex_type_10f_11f_11f_rev. Passes dEQP-GLES3.functional.vertex_arrays.* Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19996>
This commit is contained in:
parent
fb49715a2c
commit
8dcf7648f1
11 changed files with 318 additions and 222 deletions
|
|
@ -108,7 +108,7 @@ GL 3.3, GLSL 3.30 --- all DONE: freedreno, i965, nv50, nvc0, r600, radeonsi, llv
|
|||
GL_ARB_texture_swizzle DONE (v3d, vc4, panfrost, lima, asahi)
|
||||
GL_ARB_timer_query DONE ()
|
||||
GL_ARB_instanced_arrays DONE (etnaviv/HALTI2, v3d, panfrost)
|
||||
GL_ARB_vertex_type_2_10_10_10_rev DONE (v3d, panfrost)
|
||||
GL_ARB_vertex_type_2_10_10_10_rev DONE (v3d, panfrost, asahi)
|
||||
|
||||
|
||||
GL 4.0, GLSL 4.00 --- all DONE: freedreno/a6xx, i965/gen7+, nvc0, r600, radeonsi, llvmpipe, virgl, zink, d3d12
|
||||
|
|
@ -208,7 +208,7 @@ GL 4.4, GLSL 4.40 -- all DONE: freedreno/a6xx, i965/gen8+, nvc0, r600, radeonsi,
|
|||
GL_ARB_query_buffer_object DONE (freedreno/a6xx, i965/hsw+, virgl)
|
||||
GL_ARB_texture_mirror_clamp_to_edge DONE (freedreno, i965, nv50, softpipe, virgl, v3d, panfrost)
|
||||
GL_ARB_texture_stencil8 DONE (freedreno, i965/hsw+, nv50, softpipe, virgl, v3d, panfrost, d3d12, asahi)
|
||||
GL_ARB_vertex_type_10f_11f_11f_rev DONE (freedreno, i965, nv50, softpipe, virgl, panfrost, d3d12)
|
||||
GL_ARB_vertex_type_10f_11f_11f_rev DONE (freedreno, i965, nv50, softpipe, virgl, panfrost, d3d12, asahi)
|
||||
|
||||
GL 4.5, GLSL 4.50 -- all DONE: freedreno/a6xx, nvc0, r600, radeonsi, llvmpipe, zink
|
||||
|
||||
|
|
|
|||
|
|
@ -358,61 +358,6 @@ agx_format_for_pipe(enum pipe_format format)
|
|||
unreachable("Invalid format");
|
||||
}
|
||||
|
||||
/* AGX appears to lack support for vertex attributes. Lower to global loads. */
|
||||
static void
|
||||
agx_emit_load_attr(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
|
||||
{
|
||||
nir_src *offset_src = nir_get_io_offset_src(instr);
|
||||
assert(nir_src_is_const(*offset_src) && "no attribute indirects");
|
||||
unsigned index = nir_intrinsic_base(instr) +
|
||||
nir_src_as_uint(*offset_src);
|
||||
|
||||
struct agx_shader_key *key = b->shader->key;
|
||||
struct agx_attribute attrib = key->vs.attributes[index];
|
||||
|
||||
/* address = base + (stride * vertex_id) + src_offset */
|
||||
unsigned buf = attrib.buf;
|
||||
unsigned stride = key->vs.vbuf_strides[buf];
|
||||
unsigned shift = agx_format_shift(attrib.format);
|
||||
|
||||
agx_index shifted_stride = agx_mov_imm(b, 32, stride >> shift);
|
||||
agx_index src_offset = agx_mov_imm(b, 32, attrib.src_offset);
|
||||
|
||||
/* A nonzero divisor requires dividing the instance ID. A zero divisor
|
||||
* specifies per-instance data. */
|
||||
agx_index element_id = (attrib.divisor == 0) ? agx_vertex_id(b) :
|
||||
agx_udiv_const(b, agx_instance_id(b), attrib.divisor);
|
||||
|
||||
agx_index offset = agx_imad(b, element_id, shifted_stride, src_offset, 0);
|
||||
|
||||
/* Each VBO has a 64-bit = 4 x 16-bit address, lookup the base address as a
|
||||
* sysval. Mov around the base to handle uniform restrictions, copyprop will
|
||||
* usually clean that up.
|
||||
*/
|
||||
agx_index base = agx_mov(b, agx_vbo_base(b->shader, buf));
|
||||
|
||||
/* Load the data */
|
||||
assert(instr->num_components <= 4);
|
||||
|
||||
unsigned actual_comps = (attrib.nr_comps_minus_1 + 1);
|
||||
agx_index vec = agx_vec_for_dest(b->shader, &instr->dest);
|
||||
agx_device_load_to(b, vec, base, offset, attrib.format,
|
||||
BITFIELD_MASK(attrib.nr_comps_minus_1 + 1), 0, 0);
|
||||
agx_wait(b, 0);
|
||||
|
||||
agx_index dests[4] = { agx_null() };
|
||||
agx_emit_split(b, dests, vec, actual_comps);
|
||||
|
||||
agx_index one = agx_mov_imm(b, 32, fui(1.0));
|
||||
agx_index zero = agx_mov_imm(b, 32, 0);
|
||||
agx_index default_value[4] = { zero, zero, zero, one };
|
||||
|
||||
for (unsigned i = actual_comps; i < instr->num_components; ++i)
|
||||
dests[i] = default_value[i];
|
||||
|
||||
agx_emit_collect_to(b, dest, instr->num_components, dests);
|
||||
}
|
||||
|
||||
static void
|
||||
agx_emit_load_vary_flat(agx_builder *b, agx_index dest, nir_intrinsic_instr *instr)
|
||||
{
|
||||
|
|
@ -733,13 +678,8 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
|
|||
return NULL;
|
||||
|
||||
case nir_intrinsic_load_input:
|
||||
if (stage == MESA_SHADER_FRAGMENT)
|
||||
agx_emit_load_vary_flat(b, dst, instr);
|
||||
else if (stage == MESA_SHADER_VERTEX)
|
||||
agx_emit_load_attr(b, dst, instr);
|
||||
else
|
||||
unreachable("Unsupported shader stage");
|
||||
|
||||
assert(stage == MESA_SHADER_FRAGMENT && "vertex loads lowered");
|
||||
agx_emit_load_vary_flat(b, dst, instr);
|
||||
return NULL;
|
||||
|
||||
case nir_intrinsic_load_global:
|
||||
|
|
@ -785,6 +725,10 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
|
|||
nir_src_as_uint(instr->src[0]) * 4,
|
||||
b->shader->nir->info.num_ubos * 4));
|
||||
|
||||
case nir_intrinsic_load_vbo_base_agx:
|
||||
return agx_mov_to(b, dst,
|
||||
agx_vbo_base(b->shader, nir_src_as_uint(instr->src[0])));
|
||||
|
||||
case nir_intrinsic_load_vertex_id:
|
||||
return agx_mov_to(b, dst, agx_abs(agx_vertex_id(b)));
|
||||
|
||||
|
|
|
|||
|
|
@ -182,8 +182,6 @@ struct agx_shader_info {
|
|||
};
|
||||
|
||||
#define AGX_MAX_RTS (8)
|
||||
#define AGX_MAX_ATTRIBS (16)
|
||||
#define AGX_MAX_VBUFS (16)
|
||||
|
||||
enum agx_format {
|
||||
AGX_FORMAT_I8 = 0,
|
||||
|
|
@ -203,56 +201,6 @@ enum agx_format {
|
|||
AGX_NUM_FORMATS,
|
||||
};
|
||||
|
||||
/* Returns the number of bits at the bottom of the address required to be zero.
|
||||
* That is, returns the base-2 logarithm of the minimum alignment for an
|
||||
* agx_format, where the minimum alignment is 2^n where n is the result of this
|
||||
* function. The offset argument to device_load is left-shifted by this amount
|
||||
* in the hardware */
|
||||
|
||||
static inline unsigned
|
||||
agx_format_shift(enum agx_format format)
|
||||
{
|
||||
switch (format) {
|
||||
case AGX_FORMAT_I8:
|
||||
case AGX_FORMAT_U8NORM:
|
||||
case AGX_FORMAT_S8NORM:
|
||||
case AGX_FORMAT_SRGBA8:
|
||||
return 0;
|
||||
|
||||
case AGX_FORMAT_I16:
|
||||
case AGX_FORMAT_F16:
|
||||
case AGX_FORMAT_U16NORM:
|
||||
case AGX_FORMAT_S16NORM:
|
||||
return 1;
|
||||
|
||||
case AGX_FORMAT_I32:
|
||||
case AGX_FORMAT_RGB10A2:
|
||||
case AGX_FORMAT_RG11B10F:
|
||||
case AGX_FORMAT_RGB9E5:
|
||||
return 2;
|
||||
|
||||
default:
|
||||
unreachable("invalid format");
|
||||
}
|
||||
}
|
||||
|
||||
struct agx_attribute {
|
||||
uint32_t divisor;
|
||||
|
||||
unsigned buf : 5;
|
||||
unsigned src_offset : 16;
|
||||
unsigned nr_comps_minus_1 : 2;
|
||||
enum agx_format format : 4;
|
||||
unsigned padding : 5;
|
||||
};
|
||||
|
||||
struct agx_vs_shader_key {
|
||||
unsigned num_vbufs;
|
||||
unsigned vbuf_strides[AGX_MAX_VBUFS];
|
||||
|
||||
struct agx_attribute attributes[AGX_MAX_ATTRIBS];
|
||||
};
|
||||
|
||||
struct agx_fs_shader_key {
|
||||
/* Normally, access to the tilebuffer must be guarded by appropriate fencing
|
||||
* instructions to ensure correct results in the presence of out-of-order
|
||||
|
|
@ -269,7 +217,6 @@ struct agx_fs_shader_key {
|
|||
|
||||
struct agx_shader_key {
|
||||
union {
|
||||
struct agx_vs_shader_key vs;
|
||||
struct agx_fs_shader_key fs;
|
||||
};
|
||||
};
|
||||
|
|
|
|||
|
|
@ -190,69 +190,3 @@ const struct agx_pixel_format_entry agx_pixel_format[PIPE_FORMAT_COUNT] = {
|
|||
AGX_FMT(BPTC_RGBA_UNORM, BC7, UNORM, F, _),
|
||||
AGX_FMT(BPTC_SRGBA, BC7, UNORM, F, _),
|
||||
};
|
||||
|
||||
const enum agx_format
|
||||
agx_vertex_format[PIPE_FORMAT_COUNT] = {
|
||||
[PIPE_FORMAT_R32_FLOAT] = AGX_FORMAT_I32,
|
||||
[PIPE_FORMAT_R32_SINT] = AGX_FORMAT_I32,
|
||||
[PIPE_FORMAT_R32_UINT] = AGX_FORMAT_I32,
|
||||
[PIPE_FORMAT_R32G32_FLOAT] = AGX_FORMAT_I32,
|
||||
[PIPE_FORMAT_R32G32_SINT] = AGX_FORMAT_I32,
|
||||
[PIPE_FORMAT_R32G32_UINT] = AGX_FORMAT_I32,
|
||||
[PIPE_FORMAT_R32G32B32_FLOAT] = AGX_FORMAT_I32,
|
||||
[PIPE_FORMAT_R32G32B32_UINT] = AGX_FORMAT_I32,
|
||||
[PIPE_FORMAT_R32G32B32_SINT] = AGX_FORMAT_I32,
|
||||
[PIPE_FORMAT_R32G32B32A32_FLOAT] = AGX_FORMAT_I32,
|
||||
[PIPE_FORMAT_R32G32B32A32_UINT] = AGX_FORMAT_I32,
|
||||
[PIPE_FORMAT_R32G32B32A32_SINT] = AGX_FORMAT_I32,
|
||||
|
||||
[PIPE_FORMAT_R8_UNORM] = AGX_FORMAT_U8NORM,
|
||||
[PIPE_FORMAT_R8G8_UNORM] = AGX_FORMAT_U8NORM,
|
||||
[PIPE_FORMAT_R8G8B8_UNORM] = AGX_FORMAT_U8NORM,
|
||||
[PIPE_FORMAT_R8G8B8A8_UNORM] = AGX_FORMAT_U8NORM,
|
||||
|
||||
[PIPE_FORMAT_R8_SNORM] = AGX_FORMAT_S8NORM,
|
||||
[PIPE_FORMAT_R8G8_SNORM] = AGX_FORMAT_S8NORM,
|
||||
[PIPE_FORMAT_R8G8B8_SNORM] = AGX_FORMAT_S8NORM,
|
||||
[PIPE_FORMAT_R8G8B8A8_SNORM] = AGX_FORMAT_S8NORM,
|
||||
|
||||
[PIPE_FORMAT_R16_UNORM] = AGX_FORMAT_U16NORM,
|
||||
[PIPE_FORMAT_R16G16_UNORM] = AGX_FORMAT_U16NORM,
|
||||
[PIPE_FORMAT_R16G16B16_UNORM] = AGX_FORMAT_U16NORM,
|
||||
[PIPE_FORMAT_R16G16B16A16_UNORM] = AGX_FORMAT_U16NORM,
|
||||
|
||||
[PIPE_FORMAT_R16_SNORM] = AGX_FORMAT_S16NORM,
|
||||
[PIPE_FORMAT_R16G16_SNORM] = AGX_FORMAT_S16NORM,
|
||||
[PIPE_FORMAT_R16G16B16_SNORM] = AGX_FORMAT_S16NORM,
|
||||
[PIPE_FORMAT_R16G16B16A16_SNORM] = AGX_FORMAT_S16NORM,
|
||||
|
||||
[PIPE_FORMAT_R8_UINT] = AGX_FORMAT_I8,
|
||||
[PIPE_FORMAT_R8G8_UINT] = AGX_FORMAT_I8,
|
||||
[PIPE_FORMAT_R8G8B8_UINT] = AGX_FORMAT_I8,
|
||||
[PIPE_FORMAT_R8G8B8A8_UINT] = AGX_FORMAT_I8,
|
||||
|
||||
[PIPE_FORMAT_R8_SINT] = AGX_FORMAT_I8,
|
||||
[PIPE_FORMAT_R8G8_SINT] = AGX_FORMAT_I8,
|
||||
[PIPE_FORMAT_R8G8B8_SINT] = AGX_FORMAT_I8,
|
||||
[PIPE_FORMAT_R8G8B8A8_SINT] = AGX_FORMAT_I8,
|
||||
|
||||
[PIPE_FORMAT_R16_UINT] = AGX_FORMAT_I16,
|
||||
[PIPE_FORMAT_R16G16_UINT] = AGX_FORMAT_I16,
|
||||
[PIPE_FORMAT_R16G16B16_UINT] = AGX_FORMAT_I16,
|
||||
[PIPE_FORMAT_R16G16B16A16_UINT] = AGX_FORMAT_I16,
|
||||
|
||||
[PIPE_FORMAT_R16_SINT] = AGX_FORMAT_I16,
|
||||
[PIPE_FORMAT_R16G16_SINT] = AGX_FORMAT_I16,
|
||||
[PIPE_FORMAT_R16G16B16_SINT] = AGX_FORMAT_I16,
|
||||
[PIPE_FORMAT_R16G16B16A16_SINT] = AGX_FORMAT_I16,
|
||||
|
||||
[PIPE_FORMAT_R32_UINT] = AGX_FORMAT_I32,
|
||||
[PIPE_FORMAT_R32G32_UINT] = AGX_FORMAT_I32,
|
||||
[PIPE_FORMAT_R32G32B32_UINT] = AGX_FORMAT_I32,
|
||||
[PIPE_FORMAT_R32G32B32A32_UINT] = AGX_FORMAT_I32,
|
||||
|
||||
[PIPE_FORMAT_R32_SINT] = AGX_FORMAT_I32,
|
||||
[PIPE_FORMAT_R32G32_SINT] = AGX_FORMAT_I32,
|
||||
[PIPE_FORMAT_R32G32B32_SINT] = AGX_FORMAT_I32,
|
||||
[PIPE_FORMAT_R32G32B32A32_SINT] = AGX_FORMAT_I32,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@
|
|||
#define __AGX_FORMATS_H_
|
||||
|
||||
#include "util/format/u_format.h"
|
||||
#include "asahi/compiler/agx_compile.h"
|
||||
|
||||
struct agx_pixel_format_entry {
|
||||
uint8_t channels;
|
||||
|
|
@ -36,7 +35,6 @@ struct agx_pixel_format_entry {
|
|||
};
|
||||
|
||||
extern const struct agx_pixel_format_entry agx_pixel_format[PIPE_FORMAT_COUNT];
|
||||
extern const enum agx_format agx_vertex_format[PIPE_FORMAT_COUNT];
|
||||
|
||||
/* N.b. hardware=0 corresponds to R8 UNORM, which is renderable. So a zero
|
||||
* entry indicates an invalid format. */
|
||||
|
|
|
|||
239
src/asahi/lib/agx_nir_lower_vbo.c
Normal file
239
src/asahi/lib/agx_nir_lower_vbo.c
Normal file
|
|
@ -0,0 +1,239 @@
|
|||
/*
|
||||
* Copyright 2022 Alyssa Rosenzweig
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "agx_nir_lower_vbo.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "compiler/nir/nir_format_convert.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
static bool
|
||||
is_rgb10_a2(const struct util_format_description *desc)
|
||||
{
|
||||
return desc->channel[0].shift == 0 && desc->channel[0].size == 10 &&
|
||||
desc->channel[1].shift == 10 && desc->channel[1].size == 10 &&
|
||||
desc->channel[2].shift == 20 && desc->channel[2].size == 10 &&
|
||||
desc->channel[3].shift == 30 && desc->channel[3].size == 2;
|
||||
}
|
||||
|
||||
static enum pipe_format
|
||||
agx_vbo_internal_format(enum pipe_format format)
|
||||
{
|
||||
const struct util_format_description *desc = util_format_description(format);
|
||||
|
||||
/* RGB10A2 formats are native for UNORM and unpacked otherwise */
|
||||
if (is_rgb10_a2(desc)) {
|
||||
if (desc->is_unorm)
|
||||
return PIPE_FORMAT_R10G10B10A2_UNORM;
|
||||
else
|
||||
return PIPE_FORMAT_R32_UINT;
|
||||
}
|
||||
|
||||
/* R11G11B10F is native and special */
|
||||
if (format == PIPE_FORMAT_R11G11B10_FLOAT)
|
||||
return format;
|
||||
|
||||
/* No other non-array formats handled */
|
||||
if (!desc->is_array)
|
||||
return PIPE_FORMAT_NONE;
|
||||
|
||||
/* Otherwise look at one (any) channel */
|
||||
int idx = util_format_get_first_non_void_channel(format);
|
||||
if (idx < 0)
|
||||
return PIPE_FORMAT_NONE;
|
||||
|
||||
/* We only handle RGB formats (we could do SRGB if we wanted though?) */
|
||||
if ((desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB) ||
|
||||
(desc->layout != UTIL_FORMAT_LAYOUT_PLAIN))
|
||||
return PIPE_FORMAT_NONE;
|
||||
|
||||
/* We have native 8-bit and 16-bit normalized formats */
|
||||
struct util_format_channel_description chan = desc->channel[idx];
|
||||
|
||||
if (chan.normalized) {
|
||||
if (chan.size == 8)
|
||||
return desc->is_unorm ? PIPE_FORMAT_R8_UNORM : PIPE_FORMAT_R8_SNORM;
|
||||
else if (chan.size == 16)
|
||||
return desc->is_unorm ? PIPE_FORMAT_R16_UNORM : PIPE_FORMAT_R16_SNORM;
|
||||
}
|
||||
|
||||
/* Otherwise map to the corresponding integer format */
|
||||
switch (chan.size) {
|
||||
case 32: return PIPE_FORMAT_R32_UINT;
|
||||
case 16: return PIPE_FORMAT_R16_UINT;
|
||||
case 8: return PIPE_FORMAT_R8_UINT;
|
||||
default: return PIPE_FORMAT_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
agx_vbo_supports_format(enum pipe_format format)
|
||||
{
|
||||
return agx_vbo_internal_format(format) != PIPE_FORMAT_NONE;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
apply_swizzle_channel(nir_builder *b, nir_ssa_def *vec,
|
||||
unsigned swizzle, bool is_int)
|
||||
{
|
||||
switch (swizzle) {
|
||||
case PIPE_SWIZZLE_X: return nir_channel(b, vec, 0);
|
||||
case PIPE_SWIZZLE_Y: return nir_channel(b, vec, 1);
|
||||
case PIPE_SWIZZLE_Z: return nir_channel(b, vec, 2);
|
||||
case PIPE_SWIZZLE_W: return nir_channel(b, vec, 3);
|
||||
case PIPE_SWIZZLE_0: return nir_imm_intN_t(b, 0, vec->bit_size);
|
||||
case PIPE_SWIZZLE_1: return is_int ? nir_imm_intN_t(b, 1, vec->bit_size) :
|
||||
nir_imm_floatN_t(b, 1.0, vec->bit_size);
|
||||
default: unreachable("Invalid swizzle channel");
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
pass(struct nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic != nir_intrinsic_load_input)
|
||||
return false;
|
||||
|
||||
struct agx_vbufs *vbufs = data;
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
nir_src *offset_src = nir_get_io_offset_src(intr);
|
||||
assert(nir_src_is_const(*offset_src) && "no attribute indirects");
|
||||
unsigned index = nir_intrinsic_base(intr) + nir_src_as_uint(*offset_src);
|
||||
|
||||
struct agx_attribute attrib = vbufs->attributes[index];
|
||||
uint32_t stride = vbufs->strides[attrib.buf];
|
||||
uint16_t offset = attrib.src_offset;
|
||||
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(attrib.format);
|
||||
int chan = util_format_get_first_non_void_channel(attrib.format);
|
||||
assert(chan >= 0);
|
||||
|
||||
bool is_float = desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT;
|
||||
bool is_unsigned = desc->channel[chan].type == UTIL_FORMAT_TYPE_UNSIGNED;
|
||||
bool is_signed = desc->channel[chan].type == UTIL_FORMAT_TYPE_SIGNED;
|
||||
bool is_fixed = desc->channel[chan].type == UTIL_FORMAT_TYPE_FIXED;
|
||||
bool is_int = util_format_is_pure_integer(attrib.format);
|
||||
|
||||
assert((is_float ^ is_unsigned ^ is_signed ^ is_fixed) && "Invalid format");
|
||||
|
||||
enum pipe_format interchange_format = agx_vbo_internal_format(attrib.format);
|
||||
assert(interchange_format != PIPE_FORMAT_NONE);
|
||||
|
||||
unsigned interchange_align = util_format_get_blocksize(interchange_format);
|
||||
unsigned interchange_comps = util_format_get_nr_components(attrib.format);
|
||||
|
||||
/* In the hardware, uint formats zero-extend and float formats convert.
|
||||
* However, non-uint formats using a uint interchange format shouldn't be
|
||||
* zero extended.
|
||||
*/
|
||||
unsigned interchange_register_size =
|
||||
util_format_is_pure_uint(interchange_format) && !util_format_is_pure_uint(attrib.format) ?
|
||||
(interchange_align * 8):
|
||||
nir_dest_bit_size(intr->dest);
|
||||
|
||||
/* Non-UNORM R10G10B10A2 loaded as a scalar and unpacked */
|
||||
if (interchange_format == PIPE_FORMAT_R32_UINT && !desc->is_array)
|
||||
interchange_comps = 1;
|
||||
|
||||
/* Calculate the element to fetch the vertex for. Divide the instance ID by
|
||||
* the divisor for per-instance data. Divisor=0 specifies per-vertex data.
|
||||
*/
|
||||
nir_ssa_def *el = (attrib.divisor == 0) ?
|
||||
nir_load_vertex_id(b) :
|
||||
nir_udiv_imm(b, nir_load_instance_id(b), attrib.divisor);
|
||||
|
||||
nir_ssa_def *base = nir_load_vbo_base_agx(b, nir_imm_int(b, attrib.buf));
|
||||
|
||||
assert((stride % interchange_align) == 0 && "must be aligned");
|
||||
assert((offset % interchange_align) == 0 && "must be aligned");
|
||||
|
||||
unsigned stride_el = stride / interchange_align;
|
||||
unsigned offset_el = offset / interchange_align;
|
||||
|
||||
nir_ssa_def *stride_offset_el =
|
||||
nir_iadd_imm(b, nir_imul_imm(b, el, stride_el), offset_el);
|
||||
|
||||
/* Load the raw vector */
|
||||
nir_ssa_def *memory =
|
||||
nir_load_constant_agx(b, interchange_comps,
|
||||
interchange_register_size,
|
||||
base,
|
||||
stride_offset_el,
|
||||
.format = interchange_format);
|
||||
|
||||
unsigned dest_size = nir_dest_bit_size(intr->dest);
|
||||
|
||||
/* Unpack but do not convert non-native non-array formats */
|
||||
if (is_rgb10_a2(desc) && interchange_format == PIPE_FORMAT_R32_UINT) {
|
||||
unsigned bits[] = { 10, 10, 10, 2 };
|
||||
|
||||
if (is_signed)
|
||||
memory = nir_format_unpack_sint(b, memory, bits, 4);
|
||||
else
|
||||
memory = nir_format_unpack_uint(b, memory, bits, 4);
|
||||
}
|
||||
|
||||
if (desc->channel[chan].normalized) {
|
||||
/* 8/16-bit normalized formats are native, others converted here */
|
||||
if (is_rgb10_a2(desc) && is_signed) {
|
||||
unsigned bits[] = { 10, 10, 10, 2 };
|
||||
memory = nir_format_snorm_to_float(b, memory, bits);
|
||||
} else if (desc->channel[chan].size == 32) {
|
||||
assert(desc->is_array && "no non-array 32-bit norm formats");
|
||||
unsigned bits[] = { 32, 32, 32, 32 };
|
||||
|
||||
if (is_signed)
|
||||
memory = nir_format_snorm_to_float(b, memory, bits);
|
||||
else
|
||||
memory = nir_format_unorm_to_float(b, memory, bits);
|
||||
}
|
||||
} else if (desc->channel[chan].pure_integer) {
|
||||
/* Zero-extension is native, may need to sign extend */
|
||||
if (is_signed)
|
||||
memory = nir_i2iN(b, memory, dest_size);
|
||||
} else {
|
||||
if (is_unsigned)
|
||||
memory = nir_u2fN(b, memory, dest_size);
|
||||
else if (is_signed || is_fixed)
|
||||
memory = nir_i2fN(b, memory, dest_size);
|
||||
else
|
||||
memory = nir_f2fN(b, memory, dest_size);
|
||||
|
||||
/* 16.16 fixed-point weirdo GL formats need to be scaled */
|
||||
if (is_fixed) {
|
||||
assert(desc->is_array && desc->channel[chan].size == 32);
|
||||
assert(dest_size == 32 && "overflow if smaller");
|
||||
memory = nir_fmul_imm(b, memory, 1.0 / 65536.0);
|
||||
}
|
||||
}
|
||||
|
||||
/* We now have a properly formatted vector of the components in memory. Apply
|
||||
* the format swizzle forwards to trim/pad/reorder as needed.
|
||||
*/
|
||||
nir_ssa_def *channels[4] = { NULL };
|
||||
assert(nir_intrinsic_component(intr) == 0 && "unimplemented");
|
||||
|
||||
for (unsigned i = 0; i < intr->num_components; ++i)
|
||||
channels[i] = apply_swizzle_channel(b, memory, desc->swizzle[i], is_int);
|
||||
|
||||
nir_ssa_def *logical = nir_vec(b, channels, intr->num_components);
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, logical);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
agx_nir_lower_vbo(nir_shader *shader, struct agx_vbufs *vbufs)
|
||||
{
|
||||
assert(shader->info.stage == MESA_SHADER_VERTEX);
|
||||
return nir_shader_instructions_pass(shader, pass,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
vbufs);
|
||||
}
|
||||
46
src/asahi/lib/agx_nir_lower_vbo.h
Normal file
46
src/asahi/lib/agx_nir_lower_vbo.h
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright 2022 Alyssa Rosenzweig
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef __AGX_NIR_LOWER_VBO_H
|
||||
#define __AGX_NIR_LOWER_VBO_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include "nir.h"
|
||||
#include "util/format/u_formats.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define AGX_MAX_ATTRIBS (16)
|
||||
#define AGX_MAX_VBUFS (16)
|
||||
|
||||
/* See pipe_vertex_element for justification on the sizes. This structure should
|
||||
* be small so it can be embedded into a shader key.
|
||||
*/
|
||||
struct agx_attribute {
|
||||
uint32_t divisor;
|
||||
uint16_t src_offset;
|
||||
uint8_t buf;
|
||||
|
||||
/* pipe_format, all vertex formats should be <= 255 */
|
||||
uint8_t format;
|
||||
};
|
||||
|
||||
struct agx_vbufs {
|
||||
unsigned count;
|
||||
uint32_t strides[AGX_MAX_VBUFS];
|
||||
struct agx_attribute attributes[AGX_MAX_ATTRIBS];
|
||||
};
|
||||
|
||||
bool agx_nir_lower_vbo(nir_shader *shader, struct agx_vbufs *vbufs);
|
||||
bool agx_vbo_supports_format(enum pipe_format format);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern C */
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -27,6 +27,7 @@ libasahi_lib_files = files(
|
|||
'agx_meta.c',
|
||||
'agx_tilebuffer.c',
|
||||
'agx_nir_lower_tilebuffer.c',
|
||||
'agx_nir_lower_vbo.c',
|
||||
'agx_ppp.h',
|
||||
'pool.c',
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1573,18 +1573,8 @@ agx_is_format_supported(struct pipe_screen* pscreen,
|
|||
return false;
|
||||
}
|
||||
|
||||
/* TODO: formats */
|
||||
if (usage & PIPE_BIND_VERTEX_BUFFER) {
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_R32_FLOAT:
|
||||
case PIPE_FORMAT_R32G32_FLOAT:
|
||||
case PIPE_FORMAT_R32G32B32_FLOAT:
|
||||
case PIPE_FORMAT_R32G32B32A32_FLOAT:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if ((usage & PIPE_BIND_VERTEX_BUFFER) && !agx_vbo_supports_format(format))
|
||||
return false;
|
||||
|
||||
if (usage & PIPE_BIND_DEPTH_STENCIL) {
|
||||
switch (format) {
|
||||
|
|
|
|||
|
|
@ -987,18 +987,13 @@ agx_create_vertex_elements(struct pipe_context *ctx,
|
|||
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(ve.src_format);
|
||||
|
||||
unsigned chan_size = desc->channel[0].size / 8;
|
||||
|
||||
assert(chan_size == 1 || chan_size == 2 || chan_size == 4);
|
||||
assert(desc->nr_channels >= 1 && desc->nr_channels <= 4);
|
||||
assert((ve.src_offset & (chan_size - 1)) == 0);
|
||||
|
||||
attribs[i] = (struct agx_attribute) {
|
||||
.buf = ve.vertex_buffer_index,
|
||||
.src_offset = ve.src_offset / chan_size,
|
||||
.nr_comps_minus_1 = desc->nr_channels - 1,
|
||||
.format = agx_vertex_format[ve.src_format],
|
||||
.src_offset = ve.src_offset,
|
||||
.format = ve.src_format,
|
||||
.divisor = ve.instance_divisor
|
||||
};
|
||||
}
|
||||
|
|
@ -1184,7 +1179,9 @@ agx_compile_variant(struct agx_device *dev,
|
|||
|
||||
agx_preprocess_nir(nir);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
NIR_PASS_V(nir, agx_nir_lower_vbo, &key->vbuf);
|
||||
} else {
|
||||
struct agx_tilebuffer_layout tib =
|
||||
agx_build_tilebuffer_layout(key->rt_formats, key->nr_cbufs, 1);
|
||||
|
||||
|
|
@ -1243,13 +1240,12 @@ agx_create_shader_state(struct pipe_context *pctx,
|
|||
switch (so->nir->info.stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
{
|
||||
key.base.vs.num_vbufs = AGX_MAX_VBUFS;
|
||||
key.vbuf.count = AGX_MAX_VBUFS;
|
||||
for (unsigned i = 0; i < AGX_MAX_VBUFS; ++i) {
|
||||
key.base.vs.vbuf_strides[i] = 16;
|
||||
key.base.vs.attributes[i] = (struct agx_attribute) {
|
||||
key.vbuf.strides[i] = 16;
|
||||
key.vbuf.attributes[i] = (struct agx_attribute) {
|
||||
.buf = i,
|
||||
.nr_comps_minus_1 = 4 - 1,
|
||||
.format = AGX_FORMAT_I32
|
||||
.format = PIPE_FORMAT_R32G32B32A32_FLOAT
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -1295,20 +1291,18 @@ agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out,
|
|||
static bool
|
||||
agx_update_vs(struct agx_context *ctx)
|
||||
{
|
||||
struct agx_vs_shader_key key = { 0 };
|
||||
|
||||
memcpy(key.attributes, ctx->attributes,
|
||||
sizeof(key.attributes[0]) * AGX_MAX_ATTRIBS);
|
||||
|
||||
u_foreach_bit(i, ctx->vb_mask) {
|
||||
key.vbuf_strides[i] = ctx->vertex_buffers[i].stride;
|
||||
}
|
||||
|
||||
struct asahi_shader_key akey = {
|
||||
.base.vs = key
|
||||
struct asahi_shader_key key = {
|
||||
.vbuf.count = util_last_bit(ctx->vb_mask),
|
||||
};
|
||||
|
||||
return agx_update_shader(ctx, &ctx->vs, PIPE_SHADER_VERTEX, &akey);
|
||||
memcpy(key.vbuf.attributes, ctx->attributes,
|
||||
sizeof(key.vbuf.attributes[0]) * AGX_MAX_ATTRIBS);
|
||||
|
||||
u_foreach_bit(i, ctx->vb_mask) {
|
||||
key.vbuf.strides[i] = ctx->vertex_buffers[i].stride;
|
||||
}
|
||||
|
||||
return agx_update_shader(ctx, &ctx->vs, PIPE_SHADER_VERTEX, &key);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@
|
|||
#include "asahi/lib/agx_device.h"
|
||||
#include "asahi/lib/pool.h"
|
||||
#include "asahi/lib/agx_tilebuffer.h"
|
||||
#include "asahi/lib/agx_nir_lower_vbo.h"
|
||||
#include "asahi/compiler/agx_compile.h"
|
||||
#include "asahi/layout/layout.h"
|
||||
#include "compiler/nir/nir_lower_blend.h"
|
||||
|
|
@ -142,6 +143,8 @@ struct agx_blend {
|
|||
|
||||
struct asahi_shader_key {
|
||||
struct agx_shader_key base;
|
||||
struct agx_vbufs vbuf;
|
||||
|
||||
struct agx_blend blend;
|
||||
unsigned nr_cbufs;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue