mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 13:40:11 +01:00
freedreno/a6xx: split out const emit
In order to inline the const emit and drop the per-gen vfuncs to emit the correct sort of packet, we should consolidate all of the entry- points to const emit in one object file, otherwise we'll end up with multiple copies per gen. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4813>
This commit is contained in:
parent
58fd1d7ecd
commit
aff93f5419
7 changed files with 375 additions and 249 deletions
|
|
@ -171,6 +171,8 @@ a6xx_SOURCES := \
|
|||
a6xx/fd6_blitter.h \
|
||||
a6xx/fd6_compute.c \
|
||||
a6xx/fd6_compute.h \
|
||||
a6xx/fd6_const.c \
|
||||
a6xx/fd6_const.h \
|
||||
a6xx/fd6_context.c \
|
||||
a6xx/fd6_context.h \
|
||||
a6xx/fd6_draw.c \
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
#include "freedreno_resource.h"
|
||||
|
||||
#include "fd6_compute.h"
|
||||
#include "fd6_const.h"
|
||||
#include "fd6_context.h"
|
||||
#include "fd6_emit.h"
|
||||
|
||||
|
|
@ -140,7 +141,7 @@ fd6_launch_grid(struct fd_context *ctx, const struct pipe_grid_info *info)
|
|||
cs_program_emit(ring, v);
|
||||
|
||||
fd6_emit_cs_state(ctx, ring, v);
|
||||
ir3_emit_cs_consts(v, ring, ctx, info);
|
||||
fd6_emit_cs_consts(v, ring, ctx, info);
|
||||
|
||||
foreach_bit(i, ctx->global_bindings.enabled_mask)
|
||||
nglobal++;
|
||||
|
|
|
|||
305
src/gallium/drivers/freedreno/a6xx/fd6_const.c
Normal file
305
src/gallium/drivers/freedreno/a6xx/fd6_const.c
Normal file
|
|
@ -0,0 +1,305 @@
|
|||
/*
|
||||
* Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
|
||||
* Copyright © 2018 Google, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "fd6_const.h"
|
||||
|
||||
/* regid: base const register
|
||||
* prsc or dwords: buffer containing constant values
|
||||
* sizedwords: size of const value buffer
|
||||
*/
|
||||
void
|
||||
fd6_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type,
|
||||
uint32_t regid, uint32_t offset, uint32_t sizedwords,
|
||||
const uint32_t *dwords, struct pipe_resource *prsc)
|
||||
{
|
||||
uint32_t i, sz, align_sz;
|
||||
enum a6xx_state_src src;
|
||||
|
||||
debug_assert((regid % 4) == 0);
|
||||
|
||||
if (prsc) {
|
||||
sz = 0;
|
||||
src = SS6_INDIRECT;
|
||||
} else {
|
||||
sz = sizedwords;
|
||||
src = SS6_DIRECT;
|
||||
}
|
||||
|
||||
align_sz = align(sz, 4);
|
||||
|
||||
OUT_PKT7(ring, fd6_stage2opcode(type), 3 + align_sz);
|
||||
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(src) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(DIV_ROUND_UP(sizedwords, 4)));
|
||||
if (prsc) {
|
||||
struct fd_bo *bo = fd_resource(prsc)->bo;
|
||||
OUT_RELOC(ring, bo, offset, 0, 0);
|
||||
} else {
|
||||
OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
||||
OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
||||
dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
|
||||
}
|
||||
|
||||
for (i = 0; i < sz; i++) {
|
||||
OUT_RING(ring, dwords[i]);
|
||||
}
|
||||
|
||||
/* Zero-pad to multiple of 4 dwords */
|
||||
for (i = sz; i < align_sz; i++) {
|
||||
OUT_RING(ring, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fd6_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write,
|
||||
uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
|
||||
{
|
||||
uint32_t anum = align(num, 2);
|
||||
uint32_t i;
|
||||
|
||||
debug_assert((regid % 4) == 0);
|
||||
|
||||
OUT_PKT7(ring, fd6_stage2opcode(type), 3 + (2 * anum));
|
||||
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS)|
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(anum/2));
|
||||
OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
||||
OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
if (prscs[i]) {
|
||||
if (write) {
|
||||
OUT_RELOCW(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0);
|
||||
} else {
|
||||
OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0);
|
||||
}
|
||||
} else {
|
||||
OUT_RING(ring, 0xbad00000 | (i << 16));
|
||||
OUT_RING(ring, 0xbad00000 | (i << 16));
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < anum; i++) {
|
||||
OUT_RING(ring, 0xffffffff);
|
||||
OUT_RING(ring, 0xffffffff);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_tess_bos(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3_shader_variant *s)
|
||||
{
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
const unsigned regid = s->shader->const_state.offsets.primitive_param * 4 + 4;
|
||||
uint32_t dwords = 16;
|
||||
|
||||
OUT_PKT7(ring, fd6_stage2opcode(s->type), 3);
|
||||
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid / 4) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS)|
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(s->type)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(dwords / 4));
|
||||
OUT_RB(ring, ctx->batch->tess_addrs_constobj);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_stage_tess_consts(struct fd_ringbuffer *ring, struct ir3_shader_variant *v,
|
||||
uint32_t *params, int num_params)
|
||||
{
|
||||
const unsigned regid = v->shader->const_state.offsets.primitive_param;
|
||||
int size = MIN2(1 + regid, v->constlen) - regid;
|
||||
if (size > 0)
|
||||
fd6_emit_const(ring, v->type, regid * 4, 0, num_params, params, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_tess_consts(struct fd6_emit *emit)
|
||||
{
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
|
||||
struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(
|
||||
ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
|
||||
|
||||
/* VS sizes are in bytes since that's what STLW/LDLW use, while the HS
|
||||
* size is dwords, since that's what LDG/STG use.
|
||||
*/
|
||||
unsigned num_vertices =
|
||||
emit->hs ?
|
||||
emit->info->vertices_per_patch :
|
||||
emit->gs->shader->nir->info.gs.vertices_in;
|
||||
|
||||
uint32_t vs_params[4] = {
|
||||
emit->vs->shader->output_size * num_vertices * 4, /* vs primitive stride */
|
||||
emit->vs->shader->output_size * 4, /* vs vertex stride */
|
||||
0,
|
||||
0
|
||||
};
|
||||
|
||||
emit_stage_tess_consts(constobj, emit->vs, vs_params, ARRAY_SIZE(vs_params));
|
||||
|
||||
if (emit->hs) {
|
||||
uint32_t hs_params[4] = {
|
||||
emit->vs->shader->output_size * num_vertices * 4, /* vs primitive stride */
|
||||
emit->vs->shader->output_size * 4, /* vs vertex stride */
|
||||
emit->hs->shader->output_size,
|
||||
emit->info->vertices_per_patch
|
||||
};
|
||||
|
||||
emit_stage_tess_consts(constobj, emit->hs, hs_params, ARRAY_SIZE(hs_params));
|
||||
emit_tess_bos(constobj, emit, emit->hs);
|
||||
|
||||
if (emit->gs)
|
||||
num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
|
||||
|
||||
uint32_t ds_params[4] = {
|
||||
emit->ds->shader->output_size * num_vertices * 4, /* ds primitive stride */
|
||||
emit->ds->shader->output_size * 4, /* ds vertex stride */
|
||||
emit->hs->shader->output_size, /* hs vertex stride (dwords) */
|
||||
emit->hs->shader->nir->info.tess.tcs_vertices_out
|
||||
};
|
||||
|
||||
emit_stage_tess_consts(constobj, emit->ds, ds_params, ARRAY_SIZE(ds_params));
|
||||
emit_tess_bos(constobj, emit, emit->ds);
|
||||
}
|
||||
|
||||
if (emit->gs) {
|
||||
struct ir3_shader_variant *prev;
|
||||
if (emit->ds)
|
||||
prev = emit->ds;
|
||||
else
|
||||
prev = emit->vs;
|
||||
|
||||
uint32_t gs_params[4] = {
|
||||
prev->shader->output_size * num_vertices * 4, /* ds primitive stride */
|
||||
prev->shader->output_size * 4, /* ds vertex stride */
|
||||
0,
|
||||
0,
|
||||
};
|
||||
|
||||
num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
|
||||
emit_stage_tess_consts(constobj, emit->gs, gs_params, ARRAY_SIZE(gs_params));
|
||||
}
|
||||
|
||||
fd6_emit_take_group(emit, constobj, FD6_GROUP_PRIMITIVE_PARAMS, ENABLE_ALL);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_user_consts(struct fd6_emit *emit)
|
||||
{
|
||||
static const enum pipe_shader_type types[] = {
|
||||
PIPE_SHADER_VERTEX, PIPE_SHADER_TESS_CTRL, PIPE_SHADER_TESS_EVAL,
|
||||
PIPE_SHADER_GEOMETRY, PIPE_SHADER_FRAGMENT,
|
||||
};
|
||||
const struct ir3_shader_variant *variants[] = {
|
||||
emit->vs, emit->hs, emit->ds, emit->gs, emit->fs,
|
||||
};
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
unsigned sz = 0;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(types); i++) {
|
||||
if (!variants[i])
|
||||
continue;
|
||||
sz += variants[i]->shader->ubo_state.cmdstream_size;
|
||||
}
|
||||
|
||||
struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(
|
||||
ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(types); i++) {
|
||||
if (!variants[i])
|
||||
continue;
|
||||
ir3_emit_user_consts(ctx->screen, variants[i], constobj, &ctx->constbuf[types[i]]);
|
||||
ir3_emit_ubos(ctx->screen, variants[i], constobj, &ctx->constbuf[types[i]]);
|
||||
}
|
||||
|
||||
fd6_emit_take_group(emit, constobj, FD6_GROUP_CONST, ENABLE_ALL);
|
||||
}
|
||||
|
||||
void
|
||||
fd6_emit_consts(struct fd6_emit *emit)
|
||||
{
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
struct fd6_context *fd6_ctx = fd6_context(ctx);
|
||||
|
||||
if (emit->dirty & (FD_DIRTY_CONST | FD_DIRTY_PROG))
|
||||
emit_user_consts(emit);
|
||||
|
||||
if (emit->key.key.has_gs || emit->key.key.tessellation)
|
||||
emit_tess_consts(emit);
|
||||
|
||||
/* if driver-params are needed, emit each time: */
|
||||
const struct ir3_shader_variant *vs = emit->vs;
|
||||
if (ir3_needs_vs_driver_params(vs)) {
|
||||
struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer(
|
||||
ctx->batch->submit, IR3_DP_VS_COUNT * 4, FD_RINGBUFFER_STREAMING);
|
||||
ir3_emit_vs_driver_params(vs, dpconstobj, ctx, emit->info);
|
||||
fd6_emit_take_group(emit, dpconstobj, FD6_GROUP_VS_DRIVER_PARAMS, ENABLE_ALL);
|
||||
fd6_ctx->has_dp_state = true;
|
||||
} else if (fd6_ctx->has_dp_state) {
|
||||
fd6_emit_take_group(emit, NULL, FD6_GROUP_VS_DRIVER_PARAMS, ENABLE_ALL);
|
||||
fd6_ctx->has_dp_state = false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fd6_emit_ibo_consts(struct fd6_emit *emit, const struct ir3_shader_variant *v,
|
||||
enum pipe_shader_type stage, struct fd_ringbuffer *ring)
|
||||
{
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
|
||||
ir3_emit_ssbo_sizes(ctx->screen, v, ring, &ctx->shaderbuf[stage]);
|
||||
ir3_emit_image_dims(ctx->screen, v, ring, &ctx->shaderimg[stage]);
|
||||
}
|
||||
|
||||
void
|
||||
fd6_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
|
||||
struct fd_context *ctx, const struct pipe_grid_info *info)
|
||||
{
|
||||
ir3_emit_cs_consts(v, ring, ctx, info);
|
||||
}
|
||||
|
||||
void
|
||||
fd6_emit_immediates(struct fd_screen *screen, const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring)
|
||||
{
|
||||
ir3_emit_immediates(screen, v, ring);
|
||||
}
|
||||
|
||||
void
|
||||
fd6_user_consts_size(struct ir3_ubo_analysis_state *state,
|
||||
unsigned *packets, unsigned *size)
|
||||
{
|
||||
ir3_user_consts_size(state, packets, size);
|
||||
}
|
||||
|
||||
void
|
||||
fd6_emit_link_map(struct fd_screen *screen,
|
||||
const struct ir3_shader_variant *producer,
|
||||
const struct ir3_shader_variant *v, struct fd_ringbuffer *ring)
|
||||
{
|
||||
ir3_emit_link_map(screen, producer, v, ring);
|
||||
}
|
||||
50
src/gallium/drivers/freedreno/a6xx/fd6_const.h
Normal file
50
src/gallium/drivers/freedreno/a6xx/fd6_const.h
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
|
||||
* Copyright © 2018 Google, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef FD6_CONST_H
|
||||
#define FD6_CONST_H
|
||||
|
||||
#include "fd6_emit.h"
|
||||
|
||||
void fd6_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type,
|
||||
uint32_t regid, uint32_t offset, uint32_t sizedwords,
|
||||
const uint32_t *dwords, struct pipe_resource *prsc);
|
||||
void fd6_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write,
|
||||
uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets);
|
||||
|
||||
void fd6_emit_consts(struct fd6_emit *emit);
|
||||
void fd6_emit_ibo_consts(struct fd6_emit *emit, const struct ir3_shader_variant *v,
|
||||
enum pipe_shader_type stage, struct fd_ringbuffer *ring);
|
||||
void fd6_emit_cs_consts(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
|
||||
struct fd_context *ctx, const struct pipe_grid_info *info);
|
||||
void fd6_emit_immediates(struct fd_screen *screen, const struct ir3_shader_variant *v,
|
||||
struct fd_ringbuffer *ring);
|
||||
void fd6_user_consts_size(struct ir3_ubo_analysis_state *state,
|
||||
unsigned *packets, unsigned *size);
|
||||
void fd6_emit_link_map(struct fd_screen *screen,
|
||||
const struct ir3_shader_variant *producer,
|
||||
const struct ir3_shader_variant *v, struct fd_ringbuffer *ring);
|
||||
|
||||
#endif /* FD6_CONST_H */
|
||||
|
|
@ -38,6 +38,7 @@
|
|||
|
||||
#include "fd6_emit.h"
|
||||
#include "fd6_blend.h"
|
||||
#include "fd6_const.h"
|
||||
#include "fd6_context.h"
|
||||
#include "fd6_image.h"
|
||||
#include "fd6_program.h"
|
||||
|
|
@ -46,92 +47,6 @@
|
|||
#include "fd6_format.h"
|
||||
#include "fd6_zsa.h"
|
||||
|
||||
/* regid: base const register
|
||||
* prsc or dwords: buffer containing constant values
|
||||
* sizedwords: size of const value buffer
|
||||
*/
|
||||
static void
|
||||
fd6_emit_const(struct fd_ringbuffer *ring, gl_shader_stage type,
|
||||
uint32_t regid, uint32_t offset, uint32_t sizedwords,
|
||||
const uint32_t *dwords, struct pipe_resource *prsc)
|
||||
{
|
||||
uint32_t i, sz, align_sz;
|
||||
enum a6xx_state_src src;
|
||||
|
||||
debug_assert((regid % 4) == 0);
|
||||
|
||||
if (prsc) {
|
||||
sz = 0;
|
||||
src = SS6_INDIRECT;
|
||||
} else {
|
||||
sz = sizedwords;
|
||||
src = SS6_DIRECT;
|
||||
}
|
||||
|
||||
align_sz = align(sz, 4);
|
||||
|
||||
OUT_PKT7(ring, fd6_stage2opcode(type), 3 + align_sz);
|
||||
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
|
||||
CP_LOAD_STATE6_0_STATE_SRC(src) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(DIV_ROUND_UP(sizedwords, 4)));
|
||||
if (prsc) {
|
||||
struct fd_bo *bo = fd_resource(prsc)->bo;
|
||||
OUT_RELOC(ring, bo, offset, 0, 0);
|
||||
} else {
|
||||
OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
||||
OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
||||
dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
|
||||
}
|
||||
|
||||
for (i = 0; i < sz; i++) {
|
||||
OUT_RING(ring, dwords[i]);
|
||||
}
|
||||
|
||||
/* Zero-pad to multiple of 4 dwords */
|
||||
for (i = sz; i < align_sz; i++) {
|
||||
OUT_RING(ring, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fd6_emit_const_bo(struct fd_ringbuffer *ring, gl_shader_stage type, boolean write,
|
||||
uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets)
|
||||
{
|
||||
uint32_t anum = align(num, 2);
|
||||
uint32_t i;
|
||||
|
||||
debug_assert((regid % 4) == 0);
|
||||
|
||||
OUT_PKT7(ring, fd6_stage2opcode(type), 3 + (2 * anum));
|
||||
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS)|
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(type)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(anum/2));
|
||||
OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
|
||||
OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
if (prscs[i]) {
|
||||
if (write) {
|
||||
OUT_RELOCW(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0);
|
||||
} else {
|
||||
OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0);
|
||||
}
|
||||
} else {
|
||||
OUT_RING(ring, 0xbad00000 | (i << 16));
|
||||
OUT_RING(ring, 0xbad00000 | (i << 16));
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < anum; i++) {
|
||||
OUT_RING(ring, 0xffffffff);
|
||||
OUT_RING(ring, 0xffffffff);
|
||||
}
|
||||
}
|
||||
|
||||
/* Border color layout is diff from a4xx/a5xx.. if it turns out to be
|
||||
* the same as a6xx then move this somewhere common ;-)
|
||||
*
|
||||
|
|
@ -807,140 +722,10 @@ fd6_emit_streamout(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_tess_bos(struct fd_ringbuffer *ring, struct fd6_emit *emit, struct ir3_shader_variant *s)
|
||||
{
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
const unsigned regid = s->shader->const_state.offsets.primitive_param * 4 + 4;
|
||||
uint32_t dwords = 16;
|
||||
|
||||
OUT_PKT7(ring, fd6_stage2opcode(s->type), 3);
|
||||
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid / 4) |
|
||||
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS)|
|
||||
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
|
||||
CP_LOAD_STATE6_0_STATE_BLOCK(fd6_stage2shadersb(s->type)) |
|
||||
CP_LOAD_STATE6_0_NUM_UNIT(dwords / 4));
|
||||
OUT_RB(ring, ctx->batch->tess_addrs_constobj);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_stage_tess_consts(struct fd_ringbuffer *ring, struct ir3_shader_variant *v,
|
||||
uint32_t *params, int num_params)
|
||||
{
|
||||
const unsigned regid = v->shader->const_state.offsets.primitive_param;
|
||||
int size = MIN2(1 + regid, v->constlen) - regid;
|
||||
if (size > 0)
|
||||
fd6_emit_const(ring, v->type, regid * 4, 0, num_params, params, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
fd6_emit_tess_const(struct fd6_emit *emit)
|
||||
{
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
|
||||
struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(
|
||||
ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
|
||||
|
||||
/* VS sizes are in bytes since that's what STLW/LDLW use, while the HS
|
||||
* size is dwords, since that's what LDG/STG use.
|
||||
*/
|
||||
unsigned num_vertices =
|
||||
emit->hs ?
|
||||
emit->info->vertices_per_patch :
|
||||
emit->gs->shader->nir->info.gs.vertices_in;
|
||||
|
||||
uint32_t vs_params[4] = {
|
||||
emit->vs->shader->output_size * num_vertices * 4, /* vs primitive stride */
|
||||
emit->vs->shader->output_size * 4, /* vs vertex stride */
|
||||
0,
|
||||
0
|
||||
};
|
||||
|
||||
emit_stage_tess_consts(constobj, emit->vs, vs_params, ARRAY_SIZE(vs_params));
|
||||
|
||||
if (emit->hs) {
|
||||
uint32_t hs_params[4] = {
|
||||
emit->vs->shader->output_size * num_vertices * 4, /* vs primitive stride */
|
||||
emit->vs->shader->output_size * 4, /* vs vertex stride */
|
||||
emit->hs->shader->output_size,
|
||||
emit->info->vertices_per_patch
|
||||
};
|
||||
|
||||
emit_stage_tess_consts(constobj, emit->hs, hs_params, ARRAY_SIZE(hs_params));
|
||||
emit_tess_bos(constobj, emit, emit->hs);
|
||||
|
||||
if (emit->gs)
|
||||
num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
|
||||
|
||||
uint32_t ds_params[4] = {
|
||||
emit->ds->shader->output_size * num_vertices * 4, /* ds primitive stride */
|
||||
emit->ds->shader->output_size * 4, /* ds vertex stride */
|
||||
emit->hs->shader->output_size, /* hs vertex stride (dwords) */
|
||||
emit->hs->shader->nir->info.tess.tcs_vertices_out
|
||||
};
|
||||
|
||||
emit_stage_tess_consts(constobj, emit->ds, ds_params, ARRAY_SIZE(ds_params));
|
||||
emit_tess_bos(constobj, emit, emit->ds);
|
||||
}
|
||||
|
||||
if (emit->gs) {
|
||||
struct ir3_shader_variant *prev;
|
||||
if (emit->ds)
|
||||
prev = emit->ds;
|
||||
else
|
||||
prev = emit->vs;
|
||||
|
||||
uint32_t gs_params[4] = {
|
||||
prev->shader->output_size * num_vertices * 4, /* ds primitive stride */
|
||||
prev->shader->output_size * 4, /* ds vertex stride */
|
||||
0,
|
||||
0,
|
||||
};
|
||||
|
||||
num_vertices = emit->gs->shader->nir->info.gs.vertices_in;
|
||||
emit_stage_tess_consts(constobj, emit->gs, gs_params, ARRAY_SIZE(gs_params));
|
||||
}
|
||||
|
||||
fd6_emit_take_group(emit, constobj, FD6_GROUP_PRIMITIVE_PARAMS, ENABLE_ALL);
|
||||
}
|
||||
|
||||
static void
|
||||
fd6_emit_consts(struct fd6_emit *emit)
|
||||
{
|
||||
static const enum pipe_shader_type types[] = {
|
||||
PIPE_SHADER_VERTEX, PIPE_SHADER_TESS_CTRL, PIPE_SHADER_TESS_EVAL,
|
||||
PIPE_SHADER_GEOMETRY, PIPE_SHADER_FRAGMENT,
|
||||
};
|
||||
const struct ir3_shader_variant *variants[] = {
|
||||
emit->vs, emit->hs, emit->ds, emit->gs, emit->fs,
|
||||
};
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
unsigned sz = 0;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(types); i++) {
|
||||
if (!variants[i])
|
||||
continue;
|
||||
sz += variants[i]->shader->ubo_state.cmdstream_size;
|
||||
}
|
||||
|
||||
struct fd_ringbuffer *constobj = fd_submit_new_ringbuffer(
|
||||
ctx->batch->submit, sz, FD_RINGBUFFER_STREAMING);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(types); i++) {
|
||||
if (!variants[i])
|
||||
continue;
|
||||
ir3_emit_user_consts(ctx->screen, variants[i], constobj, &ctx->constbuf[types[i]]);
|
||||
ir3_emit_ubos(ctx->screen, variants[i], constobj, &ctx->constbuf[types[i]]);
|
||||
}
|
||||
|
||||
fd6_emit_take_group(emit, constobj, FD6_GROUP_CONST, ENABLE_ALL);
|
||||
}
|
||||
|
||||
void
|
||||
fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
|
||||
{
|
||||
struct fd_context *ctx = emit->ctx;
|
||||
struct fd6_context *fd6_ctx = fd6_context(ctx);
|
||||
struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
|
||||
const struct fd6_program_state *prog = fd6_emit_get_prog(emit);
|
||||
const struct ir3_shader_variant *vs = emit->vs;
|
||||
|
|
@ -1088,24 +873,7 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
|
|||
fd6_emit_take_group(emit, ring, FD6_GROUP_PROG_FB_RAST, ENABLE_DRAW);
|
||||
}
|
||||
|
||||
if (dirty & (FD_DIRTY_CONST | FD_DIRTY_PROG)) {
|
||||
fd6_emit_consts(emit);
|
||||
}
|
||||
|
||||
if (emit->key.key.has_gs || emit->key.key.tessellation)
|
||||
fd6_emit_tess_const(emit);
|
||||
|
||||
/* if driver-params are needed, emit each time: */
|
||||
if (ir3_needs_vs_driver_params(vs)) {
|
||||
struct fd_ringbuffer *dpconstobj = fd_submit_new_ringbuffer(
|
||||
ctx->batch->submit, IR3_DP_VS_COUNT * 4, FD_RINGBUFFER_STREAMING);
|
||||
ir3_emit_vs_driver_params(vs, dpconstobj, ctx, emit->info);
|
||||
fd6_emit_take_group(emit, dpconstobj, FD6_GROUP_VS_DRIVER_PARAMS, ENABLE_ALL);
|
||||
fd6_ctx->has_dp_state = true;
|
||||
} else if (fd6_ctx->has_dp_state) {
|
||||
fd6_emit_take_group(emit, NULL, FD6_GROUP_VS_DRIVER_PARAMS, ENABLE_ALL);
|
||||
fd6_ctx->has_dp_state = false;
|
||||
}
|
||||
fd6_emit_consts(emit);
|
||||
|
||||
struct ir3_stream_output_info *info = &fd6_last_shader(prog)->shader->stream_output;
|
||||
if (info->num_outputs)
|
||||
|
|
@ -1177,10 +945,7 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
|
|||
OUT_PKT4(obj, REG_A6XX_SP_IBO_COUNT, 1);
|
||||
OUT_RING(obj, ir3_shader_nibo(fs));
|
||||
|
||||
ir3_emit_ssbo_sizes(ctx->screen, fs, obj,
|
||||
&ctx->shaderbuf[PIPE_SHADER_FRAGMENT]);
|
||||
ir3_emit_image_dims(ctx->screen, fs, obj,
|
||||
&ctx->shaderimg[PIPE_SHADER_FRAGMENT]);
|
||||
fd6_emit_ibo_consts(emit, fs, PIPE_SHADER_FRAGMENT, ring);
|
||||
|
||||
fd6_emit_take_group(emit, obj, FD6_GROUP_IBO, ENABLE_DRAW);
|
||||
fd_ringbuffer_del(state);
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@
|
|||
#include "freedreno_program.h"
|
||||
|
||||
#include "fd6_program.h"
|
||||
#include "fd6_const.h"
|
||||
#include "fd6_emit.h"
|
||||
#include "fd6_texture.h"
|
||||
#include "fd6_format.h"
|
||||
|
|
@ -425,7 +426,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
|
|||
COND(vs->need_pixlod, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE));
|
||||
|
||||
fd6_emit_shader(ring, vs);
|
||||
ir3_emit_immediates(screen, vs, ring);
|
||||
fd6_emit_immediates(screen, vs, ring);
|
||||
|
||||
struct ir3_shader_linkage l = {0};
|
||||
const struct ir3_shader_variant *last_shader = fd6_last_shader(state);
|
||||
|
|
@ -510,8 +511,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
|
|||
COND(hs->need_pixlod, A6XX_SP_HS_CTRL_REG0_PIXLODENABLE));
|
||||
|
||||
fd6_emit_shader(ring, hs);
|
||||
ir3_emit_immediates(screen, hs, ring);
|
||||
ir3_emit_link_map(screen, vs, hs, ring);
|
||||
fd6_emit_immediates(screen, hs, ring);
|
||||
fd6_emit_link_map(screen, vs, hs, ring);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_SP_DS_CTRL_REG0, 1);
|
||||
OUT_RING(ring, A6XX_SP_DS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
|
||||
|
|
@ -520,8 +521,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
|
|||
COND(ds->need_pixlod, A6XX_SP_DS_CTRL_REG0_PIXLODENABLE));
|
||||
|
||||
fd6_emit_shader(ring, ds);
|
||||
ir3_emit_immediates(screen, ds, ring);
|
||||
ir3_emit_link_map(screen, hs, ds, ring);
|
||||
fd6_emit_immediates(screen, ds, ring);
|
||||
fd6_emit_link_map(screen, hs, ds, ring);
|
||||
|
||||
shader_info *hs_info = &hs->shader->nir->info;
|
||||
OUT_PKT4(ring, REG_A6XX_PC_TESS_NUM_VERTEX, 1);
|
||||
|
|
@ -701,11 +702,11 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
|
|||
COND(gs->need_pixlod, A6XX_SP_GS_CTRL_REG0_PIXLODENABLE));
|
||||
|
||||
fd6_emit_shader(ring, gs);
|
||||
ir3_emit_immediates(screen, gs, ring);
|
||||
fd6_emit_immediates(screen, gs, ring);
|
||||
if (ds)
|
||||
ir3_emit_link_map(screen, ds, gs, ring);
|
||||
fd6_emit_link_map(screen, ds, gs, ring);
|
||||
else
|
||||
ir3_emit_link_map(screen, vs, gs, ring);
|
||||
fd6_emit_link_map(screen, vs, gs, ring);
|
||||
|
||||
OUT_PKT4(ring, REG_A6XX_VPC_PACK_GS, 1);
|
||||
OUT_RING(ring, A6XX_VPC_PACK_GS_POSITIONLOC(pos_loc) |
|
||||
|
|
@ -818,7 +819,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen,
|
|||
OUT_RING(ring, COND(fragz, A6XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z));
|
||||
|
||||
if (!binning_pass)
|
||||
ir3_emit_immediates(screen, fs, ring);
|
||||
fd6_emit_immediates(screen, fs, ring);
|
||||
}
|
||||
|
||||
static struct fd_ringbuffer *
|
||||
|
|
@ -1026,7 +1027,7 @@ fd6_shader_state_create(struct pipe_context *pctx, const struct pipe_shader_stat
|
|||
unsigned packets, size;
|
||||
|
||||
/* pre-calculate size required for userconst stateobj: */
|
||||
ir3_user_consts_size(&shader->ubo_state, &packets, &size);
|
||||
fd6_user_consts_size(&shader->ubo_state, &packets, &size);
|
||||
|
||||
/* also account for UBO addresses: */
|
||||
packets += 1;
|
||||
|
|
|
|||
|
|
@ -181,6 +181,8 @@ files_libfreedreno = files(
|
|||
'a6xx/fd6_blitter.h',
|
||||
'a6xx/fd6_compute.c',
|
||||
'a6xx/fd6_compute.h',
|
||||
'a6xx/fd6_const.c',
|
||||
'a6xx/fd6_const.h',
|
||||
'a6xx/fd6_context.c',
|
||||
'a6xx/fd6_context.h',
|
||||
'a6xx/fd6_draw.c',
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue