r600: Move r600_create_vertex_fetch_shader to r600_shader.c

For r600_asm to be usable outside Gallium

Signed-off-by: Vitaliy Triang3l Kuzmin <triang3l@yandex.ru>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25695>
This commit is contained in:
Vitaliy Triang3l Kuzmin 2023-10-07 19:10:46 +03:00 committed by Marge Bot
parent 5521840cbc
commit 564b972196
4 changed files with 164 additions and 164 deletions

View file

@ -28,8 +28,6 @@
#include <errno.h>
#include "util/u_bitcast.h"
#include "util/u_dump.h"
#include "util/u_endian.h"
#include "util/u_memory.h"
#include "util/u_math.h"
#include "pipe/p_shader_tokens.h"
@ -2807,164 +2805,6 @@ out_unknown:
R600_ERR("unsupported vertex format %s\n", util_format_name(pformat));
}
void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
unsigned count,
const struct pipe_vertex_element *elements)
{
struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_bytecode bc;
struct r600_bytecode_vtx vtx;
const struct util_format_description *desc;
unsigned fetch_resource_start = rctx->b.gfx_level >= EVERGREEN ? 0 : 160;
unsigned format, num_format, format_comp, endian;
uint32_t *bytecode;
int i, j, r, fs_size;
uint32_t buffer_mask = 0;
struct r600_fetch_shader *shader;
unsigned strides[PIPE_MAX_ATTRIBS];
assert(count < 32);
memset(&bc, 0, sizeof(bc));
r600_bytecode_init(&bc, rctx->b.gfx_level, rctx->b.family,
rctx->screen->has_compressed_msaa_texturing);
bc.isa = rctx->isa;
for (i = 0; i < count; i++) {
if (elements[i].instance_divisor > 1) {
if (rctx->b.gfx_level == CAYMAN) {
for (j = 0; j < 4; j++) {
struct r600_bytecode_alu alu;
memset(&alu, 0, sizeof(alu));
alu.op = ALU_OP2_MULHI_UINT;
alu.src[0].sel = 0;
alu.src[0].chan = 3;
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1;
alu.dst.sel = i + 1;
alu.dst.chan = j;
alu.dst.write = j == 3;
alu.last = j == 3;
if ((r = r600_bytecode_add_alu(&bc, &alu))) {
r600_bytecode_clear(&bc);
return NULL;
}
}
} else {
struct r600_bytecode_alu alu;
memset(&alu, 0, sizeof(alu));
alu.op = ALU_OP2_MULHI_UINT;
alu.src[0].sel = 0;
alu.src[0].chan = 3;
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1;
alu.dst.sel = i + 1;
alu.dst.chan = 3;
alu.dst.write = 1;
alu.last = 1;
if ((r = r600_bytecode_add_alu(&bc, &alu))) {
r600_bytecode_clear(&bc);
return NULL;
}
}
}
strides[elements[i].vertex_buffer_index] = elements[i].src_stride;
buffer_mask |= BITFIELD_BIT(elements[i].vertex_buffer_index);
}
for (i = 0; i < count; i++) {
r600_vertex_data_type(elements[i].src_format,
&format, &num_format, &format_comp, &endian);
desc = util_format_description(elements[i].src_format);
if (elements[i].src_offset > 65535) {
r600_bytecode_clear(&bc);
R600_ERR("too big src_offset: %u\n", elements[i].src_offset);
return NULL;
}
memset(&vtx, 0, sizeof(vtx));
vtx.buffer_id = elements[i].vertex_buffer_index + fetch_resource_start;
vtx.fetch_type = elements[i].instance_divisor ? SQ_VTX_FETCH_INSTANCE_DATA : SQ_VTX_FETCH_VERTEX_DATA;
vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0;
vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0;
vtx.mega_fetch_count = 0x1F;
vtx.dst_gpr = i + 1;
vtx.dst_sel_x = desc->swizzle[0];
vtx.dst_sel_y = desc->swizzle[1];
vtx.dst_sel_z = desc->swizzle[2];
vtx.dst_sel_w = desc->swizzle[3];
vtx.data_format = format;
vtx.num_format_all = num_format;
vtx.format_comp_all = format_comp;
vtx.offset = elements[i].src_offset;
vtx.endian = endian;
if ((r = r600_bytecode_add_vtx(&bc, &vtx))) {
r600_bytecode_clear(&bc);
return NULL;
}
}
r600_bytecode_add_cfinst(&bc, CF_OP_RET);
if ((r = r600_bytecode_build(&bc))) {
r600_bytecode_clear(&bc);
return NULL;
}
if (rctx->screen->b.debug_flags & DBG_FS) {
fprintf(stderr, "--------------------------------------------------------------\n");
fprintf(stderr, "Vertex elements state:\n");
for (i = 0; i < count; i++) {
fprintf(stderr, " ");
util_dump_vertex_element(stderr, elements+i);
fprintf(stderr, "\n");
}
r600_bytecode_disasm(&bc);
}
fs_size = bc.ndw*4;
/* Allocate the CSO. */
shader = CALLOC_STRUCT(r600_fetch_shader);
if (!shader) {
r600_bytecode_clear(&bc);
return NULL;
}
memcpy(shader->strides, strides, sizeof(strides));
shader->buffer_mask = buffer_mask;
u_suballocator_alloc(&rctx->allocator_fetch_shader, fs_size, 256,
&shader->offset,
(struct pipe_resource**)&shader->buffer);
if (!shader->buffer) {
r600_bytecode_clear(&bc);
FREE(shader);
return NULL;
}
bytecode = r600_buffer_map_sync_with_rings
(&rctx->b, shader->buffer,
PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED | RADEON_MAP_TEMPORARY);
bytecode += shader->offset / 4;
if (UTIL_ARCH_BIG_ENDIAN) {
for (i = 0; i < fs_size / 4; ++i) {
bytecode[i] = util_cpu_to_le32(bc.bytecode[i]);
}
} else {
memcpy(bytecode, bc.bytecode, fs_size);
}
rctx->b.ws->buffer_unmap(rctx->b.ws, shader->buffer->buf);
r600_bytecode_clear(&bc);
return shader;
}
void r600_bytecode_alu_read(struct r600_bytecode *bc,
struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1)
{

View file

@ -332,10 +332,6 @@ int r600_load_ar(struct r600_bytecode *bc, bool for_src);
int cm_bytecode_add_cf_end(struct r600_bytecode *bc);
void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
unsigned count,
const struct pipe_vertex_element *elements);
/* r700_asm.c */
void r700_bytecode_cf_vtx_build(uint32_t *bytecode,
const struct r600_bytecode_cf *cf);

View file

@ -40,7 +40,9 @@
#include "nir/tgsi_to_nir.h"
#include "nir/nir_to_tgsi_info.h"
#include "compiler/nir/nir.h"
#include "util/macros.h"
#include "util/u_bitcast.h"
#include "util/u_dump.h"
#include "util/u_endian.h"
#include "util/u_memory.h"
#include "util/u_math.h"
@ -369,6 +371,164 @@ struct r600_shader_ctx {
unsigned enabled_stream_buffers_mask;
};
void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
unsigned count,
const struct pipe_vertex_element *elements)
{
struct r600_context *rctx = (struct r600_context *)ctx;
struct r600_bytecode bc;
struct r600_bytecode_vtx vtx;
const struct util_format_description *desc;
unsigned fetch_resource_start = rctx->b.gfx_level >= EVERGREEN ? 0 : 160;
unsigned format, num_format, format_comp, endian;
uint32_t *bytecode;
int i, j, r, fs_size;
uint32_t buffer_mask = 0;
struct r600_fetch_shader *shader;
unsigned strides[PIPE_MAX_ATTRIBS];
assert(count < 32);
memset(&bc, 0, sizeof(bc));
r600_bytecode_init(&bc, rctx->b.gfx_level, rctx->b.family,
rctx->screen->has_compressed_msaa_texturing);
bc.isa = rctx->isa;
for (i = 0; i < count; i++) {
if (elements[i].instance_divisor > 1) {
if (rctx->b.gfx_level == CAYMAN) {
for (j = 0; j < 4; j++) {
struct r600_bytecode_alu alu;
memset(&alu, 0, sizeof(alu));
alu.op = ALU_OP2_MULHI_UINT;
alu.src[0].sel = 0;
alu.src[0].chan = 3;
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1;
alu.dst.sel = i + 1;
alu.dst.chan = j;
alu.dst.write = j == 3;
alu.last = j == 3;
if ((r = r600_bytecode_add_alu(&bc, &alu))) {
r600_bytecode_clear(&bc);
return NULL;
}
}
} else {
struct r600_bytecode_alu alu;
memset(&alu, 0, sizeof(alu));
alu.op = ALU_OP2_MULHI_UINT;
alu.src[0].sel = 0;
alu.src[0].chan = 3;
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1;
alu.dst.sel = i + 1;
alu.dst.chan = 3;
alu.dst.write = 1;
alu.last = 1;
if ((r = r600_bytecode_add_alu(&bc, &alu))) {
r600_bytecode_clear(&bc);
return NULL;
}
}
}
strides[elements[i].vertex_buffer_index] = elements[i].src_stride;
buffer_mask |= BITFIELD_BIT(elements[i].vertex_buffer_index);
}
for (i = 0; i < count; i++) {
r600_vertex_data_type(elements[i].src_format,
&format, &num_format, &format_comp, &endian);
desc = util_format_description(elements[i].src_format);
if (elements[i].src_offset > 65535) {
r600_bytecode_clear(&bc);
R600_ERR("too big src_offset: %u\n", elements[i].src_offset);
return NULL;
}
memset(&vtx, 0, sizeof(vtx));
vtx.buffer_id = elements[i].vertex_buffer_index + fetch_resource_start;
vtx.fetch_type = elements[i].instance_divisor ? SQ_VTX_FETCH_INSTANCE_DATA : SQ_VTX_FETCH_VERTEX_DATA;
vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0;
vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0;
vtx.mega_fetch_count = 0x1F;
vtx.dst_gpr = i + 1;
vtx.dst_sel_x = desc->swizzle[0];
vtx.dst_sel_y = desc->swizzle[1];
vtx.dst_sel_z = desc->swizzle[2];
vtx.dst_sel_w = desc->swizzle[3];
vtx.data_format = format;
vtx.num_format_all = num_format;
vtx.format_comp_all = format_comp;
vtx.offset = elements[i].src_offset;
vtx.endian = endian;
if ((r = r600_bytecode_add_vtx(&bc, &vtx))) {
r600_bytecode_clear(&bc);
return NULL;
}
}
r600_bytecode_add_cfinst(&bc, CF_OP_RET);
if ((r = r600_bytecode_build(&bc))) {
r600_bytecode_clear(&bc);
return NULL;
}
if (rctx->screen->b.debug_flags & DBG_FS) {
fprintf(stderr, "--------------------------------------------------------------\n");
fprintf(stderr, "Vertex elements state:\n");
for (i = 0; i < count; i++) {
fprintf(stderr, " ");
util_dump_vertex_element(stderr, elements+i);
fprintf(stderr, "\n");
}
r600_bytecode_disasm(&bc);
}
fs_size = bc.ndw*4;
/* Allocate the CSO. */
shader = CALLOC_STRUCT(r600_fetch_shader);
if (!shader) {
r600_bytecode_clear(&bc);
return NULL;
}
memcpy(shader->strides, strides, sizeof(strides));
shader->buffer_mask = buffer_mask;
u_suballocator_alloc(&rctx->allocator_fetch_shader, fs_size, 256,
&shader->offset,
(struct pipe_resource**)&shader->buffer);
if (!shader->buffer) {
r600_bytecode_clear(&bc);
FREE(shader);
return NULL;
}
bytecode = r600_buffer_map_sync_with_rings
(&rctx->b, shader->buffer,
PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED | RADEON_MAP_TEMPORARY);
bytecode += shader->offset / 4;
if (UTIL_ARCH_BIG_ENDIAN) {
for (i = 0; i < fs_size / 4; ++i) {
bytecode[i] = util_cpu_to_le32(bc.bytecode[i]);
}
} else {
memcpy(bytecode, bc.bytecode, fs_size);
}
rctx->b.ws->buffer_unmap(rctx->b.ws, shader->buffer->buf);
r600_bytecode_clear(&bc);
return shader;
}
int eg_get_interpolator_index(unsigned interpolate, unsigned location)
{
if (interpolate == TGSI_INTERPOLATE_COLOR ||

View file

@ -187,6 +187,10 @@ struct r600_pipe_shader {
unsigned scratch_space_needed; /* size of scratch space (if > 0) counted in vec4 */
};
void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
unsigned count,
const struct pipe_vertex_element *elements);
/* return the table index 0-5 for TGSI_INTERPOLATE_LINEAR/PERSPECTIVE and
TGSI_INTERPOLATE_LOC_CENTER/SAMPLE/COUNT. Other input values return -1. */
int eg_get_interpolator_index(unsigned interpolate, unsigned location);