mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-11 05:50:14 +01:00
broadcom: only support v42 and v71
Acked-by: Emma Anholt <emma@anholt.net> Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25851>
This commit is contained in:
parent
dcc4e1b4d7
commit
85f26828fe
47 changed files with 210 additions and 1597 deletions
|
|
@ -21,8 +21,6 @@
|
|||
# [version, cle XML version]
|
||||
v3d_versions = [
|
||||
[21, 21],
|
||||
[33, 33],
|
||||
[41, 33],
|
||||
[42, 33],
|
||||
[71, 33]
|
||||
]
|
||||
|
|
|
|||
|
|
@ -31,10 +31,6 @@
|
|||
|
||||
#if (V3D_VERSION == 21)
|
||||
# include "cle/v3d_packet_v21_pack.h"
|
||||
#elif (V3D_VERSION == 33)
|
||||
# include "cle/v3d_packet_v33_pack.h"
|
||||
#elif (V3D_VERSION == 41)
|
||||
# include "cle/v3d_packet_v41_pack.h"
|
||||
#elif (V3D_VERSION == 42)
|
||||
# include "cle/v3d_packet_v42_pack.h"
|
||||
#elif (V3D_VERSION == 71)
|
||||
|
|
|
|||
|
|
@ -106,12 +106,16 @@ static bool
|
|||
clif_dump_packet(struct clif_dump *clif, uint32_t offset, const uint8_t *cl,
|
||||
uint32_t *size, bool reloc_mode)
|
||||
{
|
||||
if (clif->devinfo->ver >= 42)
|
||||
|
||||
switch (clif->devinfo->ver) {
|
||||
case 42:
|
||||
return v3d42_clif_dump_packet(clif, offset, cl, size, reloc_mode);
|
||||
else if (clif->devinfo->ver >= 41)
|
||||
return v3d41_clif_dump_packet(clif, offset, cl, size, reloc_mode);
|
||||
else
|
||||
return v3d33_clif_dump_packet(clif, offset, cl, size, reloc_mode);
|
||||
case 71:
|
||||
return v3d71_clif_dump_packet(clif, offset, cl, size, reloc_mode);
|
||||
default:
|
||||
break;
|
||||
};
|
||||
unreachable("Unknown HW version");
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
|
|
|
|||
|
|
@ -95,10 +95,6 @@ clif_dump_add_address_to_worklist(struct clif_dump *clif,
|
|||
enum reloc_worklist_type type,
|
||||
uint32_t addr);
|
||||
|
||||
bool v3d33_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
|
||||
const uint8_t *cl, uint32_t *size, bool reloc_mode);
|
||||
bool v3d41_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
|
||||
const uint8_t *cl, uint32_t *size, bool reloc_mode);
|
||||
bool v3d42_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
|
||||
const uint8_t *cl, uint32_t *size, bool reloc_mode);
|
||||
bool v3d71_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
|
||||
|
|
|
|||
|
|
@ -68,8 +68,6 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
|
|||
devinfo->has_accumulators = devinfo->ver < 71;
|
||||
|
||||
switch (devinfo->ver) {
|
||||
case 33:
|
||||
case 41:
|
||||
case 42:
|
||||
case 71:
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -32,12 +32,6 @@
|
|||
#if (V3D_VERSION == 21)
|
||||
# define V3DX(x) V3D21_##x
|
||||
# define v3dX(x) v3d21_##x
|
||||
#elif (V3D_VERSION == 33)
|
||||
# define V3DX(x) V3D33_##x
|
||||
# define v3dX(x) v3d33_##x
|
||||
#elif (V3D_VERSION == 41)
|
||||
# define V3DX(x) V3D41_##x
|
||||
# define v3dX(x) v3d41_##x
|
||||
#elif (V3D_VERSION == 42)
|
||||
# define V3DX(x) V3D42_##x
|
||||
# define v3dX(x) v3d42_##x
|
||||
|
|
|
|||
|
|
@ -130,7 +130,7 @@ static const char *v3d_performance_counters[][3] = {
|
|||
{"QPU", "QPU-stalls-other", "[QPU] Stalled qcycles waiting for any other reason (vary/W/Z)"},
|
||||
};
|
||||
|
||||
#elif (V3D_VERSION >= 41)
|
||||
#elif (V3D_VERSION >= 42)
|
||||
|
||||
static const char *v3d_performance_counters[][3] = {
|
||||
{"FEP", "FEP-valid-primitives-no-rendered-pixels", "[FEP] Valid primitives that result in no rendered pixels, for all rendered tiles"},
|
||||
|
|
|
|||
|
|
@ -32,9 +32,7 @@ libbroadcom_compiler_files = files(
|
|||
'vir_to_qpu.c',
|
||||
'qpu_schedule.c',
|
||||
'qpu_validate.c',
|
||||
'v3d33_tex.c',
|
||||
'v3d40_tex.c',
|
||||
'v3d33_vpm_setup.c',
|
||||
'v3d_tex.c',
|
||||
'v3d_compiler.h',
|
||||
'v3d_nir_lower_io.c',
|
||||
'v3d_nir_lower_image_load_store.c',
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@
|
|||
#define __gen_address_type uint32_t
|
||||
#define __gen_address_offset(reloc) (*reloc)
|
||||
#define __gen_emit_reloc(cl, reloc)
|
||||
#include "cle/v3d_packet_v41_pack.h"
|
||||
#include "cle/v3d_packet_v42_pack.h"
|
||||
|
||||
#define GENERAL_TMU_LOOKUP_PER_QUAD (0 << 7)
|
||||
#define GENERAL_TMU_LOOKUP_PER_PIXEL (1 << 7)
|
||||
|
|
@ -963,10 +963,7 @@ ntq_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
|||
break;
|
||||
}
|
||||
|
||||
if (c->devinfo->ver >= 40)
|
||||
v3d40_vir_emit_tex(c, instr);
|
||||
else
|
||||
v3d33_vir_emit_tex(c, instr);
|
||||
v3d_vir_emit_tex(c, instr);
|
||||
}
|
||||
|
||||
static struct qreg
|
||||
|
|
@ -1040,15 +1037,10 @@ emit_fragment_varying(struct v3d_compile *c, nir_variable *var,
|
|||
|
||||
struct qinst *ldvary = NULL;
|
||||
struct qreg vary;
|
||||
if (c->devinfo->ver >= 41) {
|
||||
ldvary = vir_add_inst(V3D_QPU_A_NOP, c->undef,
|
||||
c->undef, c->undef);
|
||||
ldvary->qpu.sig.ldvary = true;
|
||||
vary = vir_emit_def(c, ldvary);
|
||||
} else {
|
||||
vir_NOP(c)->qpu.sig.ldvary = true;
|
||||
vary = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R3);
|
||||
}
|
||||
ldvary = vir_add_inst(V3D_QPU_A_NOP, c->undef,
|
||||
c->undef, c->undef);
|
||||
ldvary->qpu.sig.ldvary = true;
|
||||
vary = vir_emit_def(c, ldvary);
|
||||
|
||||
/* Store the input value before interpolation so we can implement
|
||||
* GLSL's interpolateAt functions if the shader uses them.
|
||||
|
|
@ -1904,12 +1896,8 @@ emit_frag_end(struct v3d_compile *c)
|
|||
inst = vir_MOV_dest(c, tlbu_reg,
|
||||
c->outputs[c->output_position_index]);
|
||||
|
||||
if (c->devinfo->ver >= 42) {
|
||||
tlb_specifier |= (TLB_V42_DEPTH_TYPE_PER_PIXEL |
|
||||
TLB_SAMPLE_MODE_PER_PIXEL);
|
||||
} else {
|
||||
tlb_specifier |= TLB_DEPTH_TYPE_PER_PIXEL;
|
||||
}
|
||||
tlb_specifier |= (TLB_V42_DEPTH_TYPE_PER_PIXEL |
|
||||
TLB_SAMPLE_MODE_PER_PIXEL);
|
||||
} else {
|
||||
/* Shader doesn't write to gl_FragDepth, take Z from
|
||||
* FEP.
|
||||
|
|
@ -1917,16 +1905,11 @@ emit_frag_end(struct v3d_compile *c)
|
|||
c->writes_z_from_fep = true;
|
||||
inst = vir_MOV_dest(c, tlbu_reg, vir_nop_reg());
|
||||
|
||||
if (c->devinfo->ver >= 42) {
|
||||
/* The spec says the PER_PIXEL flag is ignored
|
||||
* for invariant writes, but the simulator
|
||||
* demands it.
|
||||
*/
|
||||
tlb_specifier |= (TLB_V42_DEPTH_TYPE_INVARIANT |
|
||||
TLB_SAMPLE_MODE_PER_PIXEL);
|
||||
} else {
|
||||
tlb_specifier |= TLB_DEPTH_TYPE_INVARIANT;
|
||||
}
|
||||
/* The spec says the PER_PIXEL flag is ignored for
|
||||
* invariant writes, but the simulator demands it.
|
||||
*/
|
||||
tlb_specifier |= (TLB_V42_DEPTH_TYPE_INVARIANT |
|
||||
TLB_SAMPLE_MODE_PER_PIXEL);
|
||||
|
||||
/* Since (single-threaded) fragment shaders always need
|
||||
* a TLB write, if we dond't have any we emit a
|
||||
|
|
@ -1956,7 +1939,6 @@ vir_VPM_WRITE_indirect(struct v3d_compile *c,
|
|||
struct qreg vpm_index,
|
||||
bool uniform_vpm_index)
|
||||
{
|
||||
assert(c->devinfo->ver >= 40);
|
||||
if (uniform_vpm_index)
|
||||
vir_STVPMV(c, vpm_index, val);
|
||||
else
|
||||
|
|
@ -1966,13 +1948,8 @@ vir_VPM_WRITE_indirect(struct v3d_compile *c,
|
|||
static void
|
||||
vir_VPM_WRITE(struct v3d_compile *c, struct qreg val, uint32_t vpm_index)
|
||||
{
|
||||
if (c->devinfo->ver >= 40) {
|
||||
vir_VPM_WRITE_indirect(c, val,
|
||||
vir_uniform_ui(c, vpm_index), true);
|
||||
} else {
|
||||
/* XXX: v3d33_vir_vpm_write_setup(c); */
|
||||
vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val);
|
||||
}
|
||||
vir_VPM_WRITE_indirect(c, val,
|
||||
vir_uniform_ui(c, vpm_index), true);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -1980,7 +1957,7 @@ emit_vert_end(struct v3d_compile *c)
|
|||
{
|
||||
/* GFXH-1684: VPM writes need to be complete by the end of the shader.
|
||||
*/
|
||||
if (c->devinfo->ver >= 40 && c->devinfo->ver <= 42)
|
||||
if (c->devinfo->ver == 42)
|
||||
vir_VPMWT(c);
|
||||
}
|
||||
|
||||
|
|
@ -1989,7 +1966,7 @@ emit_geom_end(struct v3d_compile *c)
|
|||
{
|
||||
/* GFXH-1684: VPM writes need to be complete by the end of the shader.
|
||||
*/
|
||||
if (c->devinfo->ver >= 40 && c->devinfo->ver <= 42)
|
||||
if (c->devinfo->ver == 42)
|
||||
vir_VPMWT(c);
|
||||
}
|
||||
|
||||
|
|
@ -2174,26 +2151,9 @@ ntq_emit_vpm_read(struct v3d_compile *c,
|
|||
uint32_t *remaining,
|
||||
uint32_t vpm_index)
|
||||
{
|
||||
if (c->devinfo->ver >= 40 ) {
|
||||
return vir_LDVPMV_IN(c,
|
||||
vir_uniform_ui(c,
|
||||
(*num_components_queued)++));
|
||||
}
|
||||
|
||||
struct qreg vpm = vir_reg(QFILE_VPM, vpm_index);
|
||||
if (*num_components_queued != 0) {
|
||||
(*num_components_queued)--;
|
||||
return vir_MOV(c, vpm);
|
||||
}
|
||||
|
||||
uint32_t num_components = MIN2(*remaining, 32);
|
||||
|
||||
v3d33_vir_vpm_read_setup(c, num_components);
|
||||
|
||||
*num_components_queued = num_components - 1;
|
||||
*remaining -= num_components;
|
||||
|
||||
return vir_MOV(c, vpm);
|
||||
return vir_LDVPMV_IN(c,
|
||||
vir_uniform_ui(c,
|
||||
(*num_components_queued)++));
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -2263,31 +2223,8 @@ ntq_setup_vs_inputs(struct v3d_compile *c)
|
|||
}
|
||||
|
||||
/* The actual loads will happen directly in nir_intrinsic_load_input
|
||||
* on newer versions.
|
||||
*/
|
||||
if (c->devinfo->ver >= 40)
|
||||
return;
|
||||
|
||||
for (int loc = 0; loc < ARRAY_SIZE(c->vattr_sizes); loc++) {
|
||||
resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
|
||||
(loc + 1) * 4);
|
||||
|
||||
for (int i = 0; i < c->vattr_sizes[loc]; i++) {
|
||||
c->inputs[loc * 4 + i] =
|
||||
ntq_emit_vpm_read(c,
|
||||
&vpm_components_queued,
|
||||
&num_components,
|
||||
loc * 4 + i);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (c->devinfo->ver >= 40) {
|
||||
assert(vpm_components_queued == num_components);
|
||||
} else {
|
||||
assert(vpm_components_queued == 0);
|
||||
assert(num_components == 0);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -2533,10 +2470,8 @@ vir_emit_tlb_color_read(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
|||
* switch instead -- see vir_emit_thrsw().
|
||||
*/
|
||||
if (!c->emitted_tlb_load) {
|
||||
if (!c->last_thrsw_at_top_level) {
|
||||
assert(c->devinfo->ver >= 41);
|
||||
if (!c->last_thrsw_at_top_level)
|
||||
vir_emit_thrsw(c);
|
||||
}
|
||||
|
||||
c->emitted_tlb_load = true;
|
||||
}
|
||||
|
|
@ -2744,7 +2679,7 @@ ntq_emit_load_input(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
|||
unsigned offset =
|
||||
nir_intrinsic_base(instr) + nir_src_as_uint(instr->src[0]);
|
||||
|
||||
if (c->s->info.stage != MESA_SHADER_FRAGMENT && c->devinfo->ver >= 40) {
|
||||
if (c->s->info.stage != MESA_SHADER_FRAGMENT) {
|
||||
/* Emit the LDVPM directly now, rather than at the top
|
||||
* of the shader like we did for V3D 3.x (which needs
|
||||
* vpmsetup when not just taking the next offset).
|
||||
|
|
@ -3328,11 +3263,11 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
|||
case nir_intrinsic_image_store:
|
||||
case nir_intrinsic_image_atomic:
|
||||
case nir_intrinsic_image_atomic_swap:
|
||||
v3d40_vir_emit_image_load_store(c, instr);
|
||||
v3d_vir_emit_image_load_store(c, instr);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_image_load:
|
||||
v3d40_vir_emit_image_load_store(c, instr);
|
||||
v3d_vir_emit_image_load_store(c, instr);
|
||||
/* Not really a general TMU load, but we only use this flag
|
||||
* for NIR scheduling and we do schedule these under the same
|
||||
* policy as general TMU.
|
||||
|
|
@ -3502,21 +3437,8 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
|||
* (actually supergroup) to block until the last
|
||||
* invocation reaches the TSY op.
|
||||
*/
|
||||
if (c->devinfo->ver >= 42) {
|
||||
vir_BARRIERID_dest(c, vir_reg(QFILE_MAGIC,
|
||||
V3D_QPU_WADDR_SYNCB));
|
||||
} else {
|
||||
struct qinst *sync =
|
||||
vir_BARRIERID_dest(c,
|
||||
vir_reg(QFILE_MAGIC,
|
||||
V3D_QPU_WADDR_SYNCU));
|
||||
sync->uniform =
|
||||
vir_get_uniform_index(c, QUNIFORM_CONSTANT,
|
||||
0xffffff00 |
|
||||
V3D_TSY_WAIT_INC_CHECK);
|
||||
|
||||
}
|
||||
|
||||
vir_BARRIERID_dest(c, vir_reg(QFILE_MAGIC,
|
||||
V3D_QPU_WADDR_SYNCB));
|
||||
/* The blocking of a TSY op only happens at the next
|
||||
* thread switch. No texturing may be outstanding at the
|
||||
* time of a TSY blocking operation.
|
||||
|
|
@ -4330,14 +4252,12 @@ nir_to_vir(struct v3d_compile *c)
|
|||
emit_fragment_varying(c, NULL, -1, 0, 0);
|
||||
}
|
||||
|
||||
if (c->fs_key->is_points &&
|
||||
(c->devinfo->ver < 40 || program_reads_point_coord(c))) {
|
||||
if (c->fs_key->is_points && program_reads_point_coord(c)) {
|
||||
c->point_x = emit_fragment_varying(c, NULL, -1, 0, 0);
|
||||
c->point_y = emit_fragment_varying(c, NULL, -1, 0, 0);
|
||||
c->uses_implicit_point_line_varyings = true;
|
||||
} else if (c->fs_key->is_lines &&
|
||||
(c->devinfo->ver < 40 ||
|
||||
BITSET_TEST(c->s->info.system_values_read,
|
||||
(BITSET_TEST(c->s->info.system_values_read,
|
||||
SYSTEM_VALUE_LINE_COORD))) {
|
||||
c->line_x = emit_fragment_varying(c, NULL, -1, 0, 0);
|
||||
c->uses_implicit_point_line_varyings = true;
|
||||
|
|
@ -4350,7 +4270,7 @@ nir_to_vir(struct v3d_compile *c)
|
|||
V3D_QPU_WADDR_SYNC));
|
||||
}
|
||||
|
||||
if (c->devinfo->ver <= 42) {
|
||||
if (c->devinfo->ver == 42) {
|
||||
c->cs_payload[0] = vir_MOV(c, vir_reg(QFILE_REG, 0));
|
||||
c->cs_payload[1] = vir_MOV(c, vir_reg(QFILE_REG, 2));
|
||||
} else if (c->devinfo->ver >= 71) {
|
||||
|
|
@ -4461,25 +4381,12 @@ vir_emit_last_thrsw(struct v3d_compile *c,
|
|||
{
|
||||
*restore_last_thrsw = c->last_thrsw;
|
||||
|
||||
/* On V3D before 4.1, we need a TMU op to be outstanding when thread
|
||||
* switching, so disable threads if we didn't do any TMU ops (each of
|
||||
* which would have emitted a THRSW).
|
||||
*/
|
||||
if (!c->last_thrsw_at_top_level && c->devinfo->ver < 41) {
|
||||
c->threads = 1;
|
||||
if (c->last_thrsw)
|
||||
vir_remove_thrsw(c);
|
||||
*restore_last_thrsw = NULL;
|
||||
}
|
||||
|
||||
/* If we're threaded and the last THRSW was in conditional code, then
|
||||
* we need to emit another one so that we can flag it as the last
|
||||
* thrsw.
|
||||
*/
|
||||
if (c->last_thrsw && !c->last_thrsw_at_top_level) {
|
||||
assert(c->devinfo->ver >= 41);
|
||||
if (c->last_thrsw && !c->last_thrsw_at_top_level)
|
||||
vir_emit_thrsw(c);
|
||||
}
|
||||
|
||||
/* If we're threaded, then we need to mark the last THRSW instruction
|
||||
* so we can emit a pair of them at QPU emit time.
|
||||
|
|
@ -4487,10 +4394,8 @@ vir_emit_last_thrsw(struct v3d_compile *c,
|
|||
* For V3D 4.x, we can spawn the non-fragment shaders already in the
|
||||
* post-last-THRSW state, so we can skip this.
|
||||
*/
|
||||
if (!c->last_thrsw && c->s->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
assert(c->devinfo->ver >= 41);
|
||||
if (!c->last_thrsw && c->s->info.stage == MESA_SHADER_FRAGMENT)
|
||||
vir_emit_thrsw(c);
|
||||
}
|
||||
|
||||
/* If we have not inserted a last thread switch yet, do it now to ensure
|
||||
* any potential spilling we do happens before this. If we don't spill
|
||||
|
|
@ -4616,7 +4521,7 @@ v3d_nir_to_vir(struct v3d_compile *c)
|
|||
/* Attempt to allocate registers for the temporaries. If we fail,
|
||||
* reduce thread count and try again.
|
||||
*/
|
||||
int min_threads = (c->devinfo->ver >= 41) ? 2 : 1;
|
||||
int min_threads = 2;
|
||||
struct qpu_reg *temp_registers;
|
||||
while (true) {
|
||||
temp_registers = v3d_register_allocate(c);
|
||||
|
|
|
|||
|
|
@ -202,9 +202,6 @@ tmu_write_is_sequence_terminator(uint32_t waddr)
|
|||
static bool
|
||||
can_reorder_tmu_write(const struct v3d_device_info *devinfo, uint32_t waddr)
|
||||
{
|
||||
if (devinfo->ver < 40)
|
||||
return false;
|
||||
|
||||
if (tmu_write_is_sequence_terminator(waddr))
|
||||
return false;
|
||||
|
||||
|
|
@ -267,8 +264,7 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
|
|||
break;
|
||||
|
||||
case V3D_QPU_WADDR_UNIFA:
|
||||
if (state->devinfo->ver >= 40)
|
||||
add_write_dep(state, &state->last_unifa, n);
|
||||
add_write_dep(state, &state->last_unifa, n);
|
||||
break;
|
||||
|
||||
case V3D_QPU_WADDR_NOP:
|
||||
|
|
@ -660,7 +656,7 @@ writes_too_soon_after_write(const struct v3d_device_info *devinfo,
|
|||
v3d_qpu_writes_r4(devinfo, inst))
|
||||
return true;
|
||||
|
||||
if (devinfo->ver <= 42)
|
||||
if (devinfo->ver == 42)
|
||||
return false;
|
||||
|
||||
/* Don't schedule anything that writes rf0 right after ldvary, since
|
||||
|
|
@ -854,13 +850,10 @@ qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo,
|
|||
if (util_bitcount(a_peripherals) + util_bitcount(b_peripherals) <= 1)
|
||||
return true;
|
||||
|
||||
if (devinfo->ver < 41)
|
||||
return false;
|
||||
|
||||
/* V3D 4.x can't do more than one peripheral access except in a
|
||||
* few cases:
|
||||
*/
|
||||
if (devinfo->ver <= 42) {
|
||||
if (devinfo->ver == 42) {
|
||||
/* WRTMUC signal with TMU register write (other than tmuc). */
|
||||
if (a_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
|
||||
b_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
|
||||
|
|
@ -984,7 +977,7 @@ qpu_merge_raddrs(struct v3d_qpu_instr *result,
|
|||
result->sig.small_imm_d) <= 1;
|
||||
}
|
||||
|
||||
assert(devinfo->ver <= 42);
|
||||
assert(devinfo->ver == 42);
|
||||
|
||||
uint64_t raddrs_used = qpu_raddrs_used(add_instr, mul_instr);
|
||||
int naddrs = util_bitcount64(raddrs_used);
|
||||
|
|
@ -1499,7 +1492,7 @@ retry:
|
|||
* as long as it is not the last delay slot.
|
||||
*/
|
||||
if (inst->sig.ldvary) {
|
||||
if (c->devinfo->ver <= 42 &&
|
||||
if (c->devinfo->ver == 42 &&
|
||||
scoreboard->last_thrsw_tick + 2 >=
|
||||
scoreboard->tick - 1) {
|
||||
continue;
|
||||
|
|
@ -1607,7 +1600,7 @@ update_scoreboard_for_magic_waddr(struct choose_scoreboard *scoreboard,
|
|||
{
|
||||
if (v3d_qpu_magic_waddr_is_sfu(waddr))
|
||||
scoreboard->last_magic_sfu_write_tick = scoreboard->tick;
|
||||
else if (devinfo->ver >= 40 && waddr == V3D_QPU_WADDR_UNIFA)
|
||||
else if (waddr == V3D_QPU_WADDR_UNIFA)
|
||||
scoreboard->last_unifa_write_tick = scoreboard->tick;
|
||||
}
|
||||
|
||||
|
|
@ -1938,7 +1931,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
|
|||
if (slot > 0 && qinst->uniform != ~0)
|
||||
return false;
|
||||
|
||||
if (c->devinfo->ver <= 42 && v3d_qpu_waits_vpm(inst))
|
||||
if (c->devinfo->ver == 42 && v3d_qpu_waits_vpm(inst))
|
||||
return false;
|
||||
|
||||
if (inst->sig.ldvary)
|
||||
|
|
@ -1946,12 +1939,12 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
|
|||
|
||||
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||
/* GFXH-1625: TMUWT not allowed in the final instruction. */
|
||||
if (c->devinfo->ver <= 42 && slot == 2 &&
|
||||
if (c->devinfo->ver == 42 && slot == 2 &&
|
||||
inst->alu.add.op == V3D_QPU_A_TMUWT) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (c->devinfo->ver <= 42) {
|
||||
if (c->devinfo->ver == 42) {
|
||||
/* No writing physical registers at the end. */
|
||||
bool add_is_nop = inst->alu.add.op == V3D_QPU_A_NOP;
|
||||
bool mul_is_nop = inst->alu.mul.op == V3D_QPU_M_NOP;
|
||||
|
|
@ -1977,10 +1970,7 @@ qpu_inst_valid_in_thrend_slot(struct v3d_compile *c,
|
|||
}
|
||||
}
|
||||
|
||||
if (c->devinfo->ver < 40 && inst->alu.add.op == V3D_QPU_A_SETMSF)
|
||||
return false;
|
||||
|
||||
if (c->devinfo->ver <= 42) {
|
||||
if (c->devinfo->ver == 42) {
|
||||
/* RF0-2 might be overwritten during the delay slots by
|
||||
* fragment shader setup.
|
||||
*/
|
||||
|
|
@ -2034,7 +2024,7 @@ qpu_inst_before_thrsw_valid_in_delay_slot(struct v3d_compile *c,
|
|||
return false;
|
||||
|
||||
if (qinst->qpu.sig.ldvary) {
|
||||
if (c->devinfo->ver <= 42 && slot > 0)
|
||||
if (c->devinfo->ver == 42 && slot > 0)
|
||||
return false;
|
||||
if (c->devinfo->ver >= 71 && slot == 2)
|
||||
return false;
|
||||
|
|
@ -2475,7 +2465,7 @@ alu_reads_register(const struct v3d_device_info *devinfo,
|
|||
else
|
||||
num_src = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
|
||||
|
||||
if (devinfo->ver <= 42) {
|
||||
if (devinfo->ver == 42) {
|
||||
enum v3d_qpu_mux mux_a, mux_b;
|
||||
if (add) {
|
||||
mux_a = inst->alu.add.a.mux;
|
||||
|
|
@ -2639,7 +2629,7 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
|
|||
* and flagging it for a fixup. In V3D 7.x this is limited only to the
|
||||
* second delay slot.
|
||||
*/
|
||||
assert((devinfo->ver <= 42 &&
|
||||
assert((devinfo->ver == 42 &&
|
||||
scoreboard->last_thrsw_tick + 2 < scoreboard->tick - 1) ||
|
||||
(devinfo->ver >= 71 &&
|
||||
scoreboard->last_thrsw_tick + 2 != scoreboard->tick - 1));
|
||||
|
|
@ -2672,7 +2662,7 @@ fixup_pipelined_ldvary(struct v3d_compile *c,
|
|||
* ldvary write to r5/rf0 happens in the next instruction).
|
||||
*/
|
||||
assert(!v3d_qpu_writes_r5(devinfo, inst));
|
||||
assert(devinfo->ver <= 42 ||
|
||||
assert(devinfo->ver == 42 ||
|
||||
(!v3d_qpu_writes_rf0_implicitly(devinfo, inst) &&
|
||||
!v3d71_qpu_writes_waddr_explicitly(devinfo, inst, 0)));
|
||||
|
||||
|
|
|
|||
|
|
@ -243,7 +243,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
|||
}
|
||||
|
||||
if (inst->sig.ldvary) {
|
||||
if (devinfo->ver <= 42)
|
||||
if (devinfo->ver == 42)
|
||||
fail_instr(state, "LDVARY during THRSW delay slots");
|
||||
if (devinfo->ver >= 71 &&
|
||||
state->ip - state->last_thrsw_ip == 2) {
|
||||
|
|
@ -276,7 +276,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
|||
vpm_writes +
|
||||
tlb_writes +
|
||||
tsy_writes +
|
||||
(devinfo->ver <= 42 ? inst->sig.ldtmu : 0) +
|
||||
(devinfo->ver == 42 ? inst->sig.ldtmu : 0) +
|
||||
inst->sig.ldtlb +
|
||||
inst->sig.ldvpm +
|
||||
inst->sig.ldtlbu > 1) {
|
||||
|
|
@ -316,7 +316,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
|||
inst->type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||
if ((inst->alu.add.op != V3D_QPU_A_NOP &&
|
||||
!inst->alu.add.magic_write)) {
|
||||
if (devinfo->ver <= 42) {
|
||||
if (devinfo->ver == 42) {
|
||||
fail_instr(state, "RF write after THREND");
|
||||
} else if (devinfo->ver >= 71) {
|
||||
if (state->last_thrsw_ip - state->ip == 0) {
|
||||
|
|
@ -333,7 +333,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
|||
|
||||
if ((inst->alu.mul.op != V3D_QPU_M_NOP &&
|
||||
!inst->alu.mul.magic_write)) {
|
||||
if (devinfo->ver <= 42) {
|
||||
if (devinfo->ver == 42) {
|
||||
fail_instr(state, "RF write after THREND");
|
||||
} else if (devinfo->ver >= 71) {
|
||||
if (state->last_thrsw_ip - state->ip == 0) {
|
||||
|
|
@ -351,7 +351,7 @@ qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
|
|||
|
||||
if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
|
||||
!inst->sig_magic) {
|
||||
if (devinfo->ver <= 42) {
|
||||
if (devinfo->ver == 42) {
|
||||
fail_instr(state, "RF write after THREND");
|
||||
} else if (devinfo->ver >= 71 &&
|
||||
(inst->sig_addr == 2 ||
|
||||
|
|
|
|||
|
|
@ -1,193 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2016-2018 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "v3d_compiler.h"
|
||||
|
||||
/* We don't do any address packing. */
|
||||
#define __gen_user_data void
|
||||
#define __gen_address_type uint32_t
|
||||
#define __gen_address_offset(reloc) (*reloc)
|
||||
#define __gen_emit_reloc(cl, reloc)
|
||||
#include "cle/v3d_packet_v33_pack.h"
|
||||
|
||||
void
|
||||
v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
||||
{
|
||||
/* FIXME: We don't bother implementing pipelining for texture reads
|
||||
* for any pre 4.x hardware. It should be straight forward to do but
|
||||
* we are not really testing or even targeting this hardware at
|
||||
* present.
|
||||
*/
|
||||
ntq_flush_tmu(c);
|
||||
|
||||
unsigned unit = instr->texture_index;
|
||||
|
||||
struct V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1 p0_unpacked = {
|
||||
V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_header,
|
||||
|
||||
.fetch_sample_mode = instr->op == nir_texop_txf,
|
||||
};
|
||||
|
||||
struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 p1_unpacked = {
|
||||
};
|
||||
|
||||
switch (instr->sampler_dim) {
|
||||
case GLSL_SAMPLER_DIM_1D:
|
||||
if (instr->is_array)
|
||||
p0_unpacked.lookup_type = TEXTURE_1D_ARRAY;
|
||||
else
|
||||
p0_unpacked.lookup_type = TEXTURE_1D;
|
||||
break;
|
||||
case GLSL_SAMPLER_DIM_2D:
|
||||
case GLSL_SAMPLER_DIM_RECT:
|
||||
if (instr->is_array)
|
||||
p0_unpacked.lookup_type = TEXTURE_2D_ARRAY;
|
||||
else
|
||||
p0_unpacked.lookup_type = TEXTURE_2D;
|
||||
break;
|
||||
case GLSL_SAMPLER_DIM_3D:
|
||||
p0_unpacked.lookup_type = TEXTURE_3D;
|
||||
break;
|
||||
case GLSL_SAMPLER_DIM_CUBE:
|
||||
p0_unpacked.lookup_type = TEXTURE_CUBE_MAP;
|
||||
break;
|
||||
default:
|
||||
unreachable("Bad sampler type");
|
||||
}
|
||||
|
||||
struct qreg coords[5];
|
||||
int next_coord = 0;
|
||||
for (unsigned i = 0; i < instr->num_srcs; i++) {
|
||||
switch (instr->src[i].src_type) {
|
||||
case nir_tex_src_coord:
|
||||
for (int j = 0; j < instr->coord_components; j++) {
|
||||
coords[next_coord++] =
|
||||
ntq_get_src(c, instr->src[i].src, j);
|
||||
}
|
||||
if (instr->coord_components < 2)
|
||||
coords[next_coord++] = vir_uniform_f(c, 0.5);
|
||||
break;
|
||||
case nir_tex_src_bias:
|
||||
coords[next_coord++] =
|
||||
ntq_get_src(c, instr->src[i].src, 0);
|
||||
|
||||
p0_unpacked.bias_supplied = true;
|
||||
break;
|
||||
case nir_tex_src_lod:
|
||||
coords[next_coord++] =
|
||||
vir_FADD(c,
|
||||
ntq_get_src(c, instr->src[i].src, 0),
|
||||
vir_uniform(c, QUNIFORM_TEXTURE_FIRST_LEVEL,
|
||||
unit));
|
||||
|
||||
if (instr->op != nir_texop_txf &&
|
||||
instr->op != nir_texop_tg4) {
|
||||
p0_unpacked.disable_autolod_use_bias_only = true;
|
||||
}
|
||||
break;
|
||||
case nir_tex_src_comparator:
|
||||
coords[next_coord++] =
|
||||
ntq_get_src(c, instr->src[i].src, 0);
|
||||
|
||||
p0_unpacked.shadow = true;
|
||||
break;
|
||||
|
||||
case nir_tex_src_offset: {
|
||||
p0_unpacked.texel_offset_for_s_coordinate =
|
||||
nir_src_comp_as_int(instr->src[i].src, 0);
|
||||
|
||||
if (instr->coord_components >= 2)
|
||||
p0_unpacked.texel_offset_for_t_coordinate =
|
||||
nir_src_comp_as_int(instr->src[i].src, 1);
|
||||
|
||||
if (instr->coord_components >= 3)
|
||||
p0_unpacked.texel_offset_for_r_coordinate =
|
||||
nir_src_comp_as_int(instr->src[i].src, 2);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("unknown texture source");
|
||||
}
|
||||
}
|
||||
|
||||
/* Limit the number of channels returned to both how many the NIR
|
||||
* instruction writes and how many the instruction could produce.
|
||||
*/
|
||||
p1_unpacked.return_words_of_texture_data =
|
||||
nir_def_components_read(&instr->def);
|
||||
|
||||
uint32_t p0_packed;
|
||||
V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL,
|
||||
(uint8_t *)&p0_packed,
|
||||
&p0_unpacked);
|
||||
|
||||
uint32_t p1_packed;
|
||||
V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(NULL,
|
||||
(uint8_t *)&p1_packed,
|
||||
&p1_unpacked);
|
||||
/* Load unit number into the address field, which will be be used by
|
||||
* the driver to decide which texture to put in the actual address
|
||||
* field.
|
||||
*/
|
||||
p1_packed |= unit << 5;
|
||||
|
||||
/* There is no native support for GL texture rectangle coordinates, so
|
||||
* we have to rescale from ([0, width], [0, height]) to ([0, 1], [0,
|
||||
* 1]).
|
||||
*/
|
||||
if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
|
||||
coords[0] = vir_FMUL(c, coords[0],
|
||||
vir_uniform(c, QUNIFORM_TEXRECT_SCALE_X,
|
||||
unit));
|
||||
coords[1] = vir_FMUL(c, coords[1],
|
||||
vir_uniform(c, QUNIFORM_TEXRECT_SCALE_Y,
|
||||
unit));
|
||||
}
|
||||
|
||||
int texture_u[] = {
|
||||
vir_get_uniform_index(c, QUNIFORM_TEXTURE_CONFIG_P0_0 + unit, p0_packed),
|
||||
vir_get_uniform_index(c, QUNIFORM_TEXTURE_CONFIG_P1, p1_packed),
|
||||
};
|
||||
|
||||
for (int i = 0; i < next_coord; i++) {
|
||||
struct qreg dst;
|
||||
|
||||
if (i == next_coord - 1)
|
||||
dst = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUL);
|
||||
else
|
||||
dst = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMU);
|
||||
|
||||
struct qinst *tmu = vir_MOV_dest(c, dst, coords[i]);
|
||||
|
||||
if (i < 2)
|
||||
tmu->uniform = texture_u[i];
|
||||
}
|
||||
|
||||
vir_emit_thrsw(c);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (p1_unpacked.return_words_of_texture_data & (1 << i))
|
||||
ntq_store_def(c, &instr->def, i, vir_LDTMU(c));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,75 +0,0 @@
|
|||
/*
|
||||
* Copyright © 2016-2018 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "v3d_compiler.h"
|
||||
|
||||
/* We don't do any address packing. */
|
||||
#define __gen_user_data void
|
||||
#define __gen_address_type uint32_t
|
||||
#define __gen_address_offset(reloc) (*reloc)
|
||||
#define __gen_emit_reloc(cl, reloc)
|
||||
#include "broadcom/cle/v3d_packet_v33_pack.h"
|
||||
|
||||
void
|
||||
v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components)
|
||||
{
|
||||
struct V3D33_VPM_GENERIC_BLOCK_READ_SETUP unpacked = {
|
||||
V3D33_VPM_GENERIC_BLOCK_READ_SETUP_header,
|
||||
|
||||
.horiz = true,
|
||||
.laned = false,
|
||||
/* If the field is 0, that means a read count of 32. */
|
||||
.num = num_components & 31,
|
||||
.segs = true,
|
||||
.stride = 1,
|
||||
.size = VPM_SETUP_SIZE_32_BIT,
|
||||
.addr = c->num_inputs,
|
||||
};
|
||||
|
||||
uint32_t packed;
|
||||
V3D33_VPM_GENERIC_BLOCK_READ_SETUP_pack(NULL,
|
||||
(uint8_t *)&packed,
|
||||
&unpacked);
|
||||
vir_VPMSETUP(c, vir_uniform_ui(c, packed));
|
||||
}
|
||||
|
||||
void
|
||||
v3d33_vir_vpm_write_setup(struct v3d_compile *c)
|
||||
{
|
||||
uint32_t packed;
|
||||
struct V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP unpacked = {
|
||||
V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_header,
|
||||
|
||||
.horiz = true,
|
||||
.laned = false,
|
||||
.segs = true,
|
||||
.stride = 1,
|
||||
.size = VPM_SETUP_SIZE_32_BIT,
|
||||
.addr = 0,
|
||||
};
|
||||
|
||||
V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_pack(NULL,
|
||||
(uint8_t *)&packed,
|
||||
&unpacked);
|
||||
vir_VPMSETUP(c, vir_uniform_ui(c, packed));
|
||||
}
|
||||
|
|
@ -96,14 +96,6 @@ enum qfile {
|
|||
*/
|
||||
QFILE_TEMP,
|
||||
|
||||
/**
|
||||
* VPM reads use this with an index value to say what part of the VPM
|
||||
* is being read.
|
||||
*
|
||||
* Used only for ver < 40. For ver >= 40 we use ldvpm.
|
||||
*/
|
||||
QFILE_VPM,
|
||||
|
||||
/**
|
||||
* Stores an immediate value in the index field that will be used
|
||||
* directly by qpu_load_imm().
|
||||
|
|
@ -1150,7 +1142,6 @@ bool vir_is_raw_mov(struct qinst *inst);
|
|||
bool vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst);
|
||||
bool vir_is_add(struct qinst *inst);
|
||||
bool vir_is_mul(struct qinst *inst);
|
||||
bool vir_writes_r3_implicitly(const struct v3d_device_info *devinfo, struct qinst *inst);
|
||||
bool vir_writes_r4_implicitly(const struct v3d_device_info *devinfo, struct qinst *inst);
|
||||
struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
|
||||
uint8_t vir_channels_written(struct qinst *inst);
|
||||
|
|
@ -1187,12 +1178,9 @@ bool v3d_nir_lower_txf_ms(nir_shader *s);
|
|||
bool v3d_nir_lower_image_load_store(nir_shader *s);
|
||||
bool v3d_nir_lower_load_store_bitsize(nir_shader *s);
|
||||
|
||||
void v3d33_vir_vpm_read_setup(struct v3d_compile *c, int num_components);
|
||||
void v3d33_vir_vpm_write_setup(struct v3d_compile *c);
|
||||
void v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr);
|
||||
void v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr);
|
||||
void v3d40_vir_emit_image_load_store(struct v3d_compile *c,
|
||||
nir_intrinsic_instr *instr);
|
||||
void v3d_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr);
|
||||
void v3d_vir_emit_image_load_store(struct v3d_compile *c,
|
||||
nir_intrinsic_instr *instr);
|
||||
|
||||
void v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers);
|
||||
uint32_t v3d_qpu_schedule_instructions(struct v3d_compile *c);
|
||||
|
|
@ -1302,28 +1290,18 @@ vir_##name(struct v3d_compile *c, struct qreg a, struct qreg b) \
|
|||
#define VIR_SFU(name) \
|
||||
static inline struct qreg \
|
||||
vir_##name(struct v3d_compile *c, struct qreg a) \
|
||||
{ \
|
||||
if (c->devinfo->ver >= 41) { \
|
||||
return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name, \
|
||||
c->undef, \
|
||||
a, c->undef)); \
|
||||
} else { \
|
||||
vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \
|
||||
return vir_FMOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \
|
||||
} \
|
||||
{ \
|
||||
return vir_emit_def(c, vir_add_inst(V3D_QPU_A_##name, \
|
||||
c->undef, \
|
||||
a, c->undef)); \
|
||||
} \
|
||||
static inline struct qinst * \
|
||||
vir_##name##_dest(struct v3d_compile *c, struct qreg dest, \
|
||||
struct qreg a) \
|
||||
{ \
|
||||
if (c->devinfo->ver >= 41) { \
|
||||
return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name, \
|
||||
dest, \
|
||||
a, c->undef)); \
|
||||
} else { \
|
||||
vir_FMOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_##name), a); \
|
||||
return vir_FMOV_dest(c, dest, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4)); \
|
||||
} \
|
||||
return vir_emit_nondef(c, vir_add_inst(V3D_QPU_A_##name, \
|
||||
dest, \
|
||||
a, c->undef)); \
|
||||
}
|
||||
|
||||
#define VIR_A_ALU2(name) VIR_ALU2(name, vir_add_inst, V3D_QPU_A_##name)
|
||||
|
|
@ -1454,16 +1432,11 @@ vir_NOP(struct v3d_compile *c)
|
|||
static inline struct qreg
|
||||
vir_LDTMU(struct v3d_compile *c)
|
||||
{
|
||||
if (c->devinfo->ver >= 41) {
|
||||
struct qinst *ldtmu = vir_add_inst(V3D_QPU_A_NOP, c->undef,
|
||||
c->undef, c->undef);
|
||||
ldtmu->qpu.sig.ldtmu = true;
|
||||
struct qinst *ldtmu = vir_add_inst(V3D_QPU_A_NOP, c->undef,
|
||||
c->undef, c->undef);
|
||||
ldtmu->qpu.sig.ldtmu = true;
|
||||
|
||||
return vir_emit_def(c, ldtmu);
|
||||
} else {
|
||||
vir_NOP(c)->qpu.sig.ldtmu = true;
|
||||
return vir_MOV(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_R4));
|
||||
}
|
||||
return vir_emit_def(c, ldtmu);
|
||||
}
|
||||
|
||||
static inline struct qreg
|
||||
|
|
@ -1476,7 +1449,6 @@ vir_UMUL(struct v3d_compile *c, struct qreg src0, struct qreg src1)
|
|||
static inline struct qreg
|
||||
vir_TLBU_COLOR_READ(struct v3d_compile *c, uint32_t config)
|
||||
{
|
||||
assert(c->devinfo->ver >= 41); /* XXX */
|
||||
assert((config & 0xffffff00) == 0xffffff00);
|
||||
|
||||
struct qinst *ldtlb = vir_add_inst(V3D_QPU_A_NOP, c->undef,
|
||||
|
|
@ -1489,8 +1461,6 @@ vir_TLBU_COLOR_READ(struct v3d_compile *c, uint32_t config)
|
|||
static inline struct qreg
|
||||
vir_TLB_COLOR_READ(struct v3d_compile *c)
|
||||
{
|
||||
assert(c->devinfo->ver >= 41); /* XXX */
|
||||
|
||||
struct qinst *ldtlb = vir_add_inst(V3D_QPU_A_NOP, c->undef,
|
||||
c->undef, c->undef);
|
||||
ldtlb->qpu.sig.ldtlb = true;
|
||||
|
|
|
|||
|
|
@ -515,7 +515,7 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
|
|||
* The correct fix for this as recommended by Broadcom
|
||||
* is to convert to .8 fixed-point with ffloor().
|
||||
*/
|
||||
if (c->devinfo->ver <= 42)
|
||||
if (c->devinfo->ver == 42)
|
||||
pos = nir_f2i32(b, nir_ffloor(b, pos));
|
||||
else
|
||||
pos = nir_f2i32(b, nir_fround_even(b, pos));
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@
|
|||
#define __gen_address_type uint32_t
|
||||
#define __gen_address_offset(reloc) (*reloc)
|
||||
#define __gen_emit_reloc(cl, reloc)
|
||||
#include "cle/v3d_packet_v41_pack.h"
|
||||
#include "cle/v3d_packet_v42_pack.h"
|
||||
|
||||
static inline struct qinst *
|
||||
vir_TMU_WRITE(struct v3d_compile *c, enum v3d_qpu_waddr waddr, struct qreg val)
|
||||
|
|
@ -61,11 +61,11 @@ vir_WRTMUC(struct v3d_compile *c, enum quniform_contents contents, uint32_t data
|
|||
inst->uniform = vir_get_uniform_index(c, contents, data);
|
||||
}
|
||||
|
||||
static const struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked_default = {
|
||||
static const struct V3D42_TMU_CONFIG_PARAMETER_1 p1_unpacked_default = {
|
||||
.per_pixel_mask_enable = true,
|
||||
};
|
||||
|
||||
static const struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked_default = {
|
||||
static const struct V3D42_TMU_CONFIG_PARAMETER_2 p2_unpacked_default = {
|
||||
.op = V3D_TMU_OP_REGULAR,
|
||||
};
|
||||
|
||||
|
|
@ -86,7 +86,7 @@ handle_tex_src(struct v3d_compile *c,
|
|||
nir_tex_instr *instr,
|
||||
unsigned src_idx,
|
||||
unsigned non_array_components,
|
||||
struct V3D41_TMU_CONFIG_PARAMETER_2 *p2_unpacked,
|
||||
struct V3D42_TMU_CONFIG_PARAMETER_2 *p2_unpacked,
|
||||
struct qreg *s_out,
|
||||
unsigned *tmu_writes)
|
||||
{
|
||||
|
|
@ -201,7 +201,7 @@ handle_tex_src(struct v3d_compile *c,
|
|||
static void
|
||||
vir_tex_handle_srcs(struct v3d_compile *c,
|
||||
nir_tex_instr *instr,
|
||||
struct V3D41_TMU_CONFIG_PARAMETER_2 *p2_unpacked,
|
||||
struct V3D42_TMU_CONFIG_PARAMETER_2 *p2_unpacked,
|
||||
struct qreg *s,
|
||||
unsigned *tmu_writes)
|
||||
{
|
||||
|
|
@ -224,10 +224,8 @@ get_required_tex_tmu_writes(struct v3d_compile *c, nir_tex_instr *instr)
|
|||
}
|
||||
|
||||
void
|
||||
v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
||||
v3d_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
||||
{
|
||||
assert(instr->op != nir_texop_lod || c->devinfo->ver >= 42);
|
||||
|
||||
unsigned texture_idx = instr->texture_index;
|
||||
|
||||
/* For instructions that don't have a sampler (i.e. txf) we bind
|
||||
|
|
@ -244,7 +242,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
|||
bool output_type_32_bit =
|
||||
c->key->sampler[sampler_idx].return_size == 32;
|
||||
|
||||
struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
|
||||
struct V3D42_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
|
||||
};
|
||||
|
||||
/* Limit the number of channels returned to both how many the NIR
|
||||
|
|
@ -275,7 +273,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
|||
}
|
||||
assert(p0_unpacked.return_words_of_texture_data != 0);
|
||||
|
||||
struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked = {
|
||||
struct V3D42_TMU_CONFIG_PARAMETER_2 p2_unpacked = {
|
||||
.op = V3D_TMU_OP_REGULAR,
|
||||
.gather_mode = instr->op == nir_texop_tg4,
|
||||
.gather_component = instr->component,
|
||||
|
|
@ -304,12 +302,12 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
|||
vir_tex_handle_srcs(c, instr, &p2_unpacked, &s, NULL);
|
||||
|
||||
uint32_t p0_packed;
|
||||
V3D41_TMU_CONFIG_PARAMETER_0_pack(NULL,
|
||||
V3D42_TMU_CONFIG_PARAMETER_0_pack(NULL,
|
||||
(uint8_t *)&p0_packed,
|
||||
&p0_unpacked);
|
||||
|
||||
uint32_t p2_packed;
|
||||
V3D41_TMU_CONFIG_PARAMETER_2_pack(NULL,
|
||||
V3D42_TMU_CONFIG_PARAMETER_2_pack(NULL,
|
||||
(uint8_t *)&p2_packed,
|
||||
&p2_unpacked);
|
||||
|
||||
|
|
@ -339,7 +337,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
|||
output_type_32_bit;
|
||||
|
||||
if (non_default_p1_config) {
|
||||
struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
|
||||
struct V3D42_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
|
||||
.output_type_32_bit = output_type_32_bit,
|
||||
|
||||
.unnormalized_coordinates = (instr->sampler_dim ==
|
||||
|
|
@ -356,7 +354,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
|||
p0_unpacked.return_words_of_texture_data < (1 << 2));
|
||||
|
||||
uint32_t p1_packed;
|
||||
V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL,
|
||||
V3D42_TMU_CONFIG_PARAMETER_1_pack(NULL,
|
||||
(uint8_t *)&p1_packed,
|
||||
&p1_unpacked);
|
||||
|
||||
|
|
@ -384,7 +382,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
|||
* address
|
||||
*/
|
||||
uint32_t p1_packed_default;
|
||||
V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL,
|
||||
V3D42_TMU_CONFIG_PARAMETER_1_pack(NULL,
|
||||
(uint8_t *)&p1_packed_default,
|
||||
&p1_unpacked_default);
|
||||
vir_WRTMUC(c, QUNIFORM_CONSTANT, p1_packed_default);
|
||||
|
|
@ -412,7 +410,7 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
|||
}
|
||||
|
||||
static uint32_t
|
||||
v3d40_image_atomic_tmu_op(nir_intrinsic_instr *instr)
|
||||
v3d_image_atomic_tmu_op(nir_intrinsic_instr *instr)
|
||||
{
|
||||
nir_atomic_op atomic_op = nir_intrinsic_atomic_op(instr);
|
||||
switch (atomic_op) {
|
||||
|
|
@ -431,7 +429,7 @@ v3d40_image_atomic_tmu_op(nir_intrinsic_instr *instr)
|
|||
}
|
||||
|
||||
static uint32_t
|
||||
v3d40_image_load_store_tmu_op(nir_intrinsic_instr *instr)
|
||||
v3d_image_load_store_tmu_op(nir_intrinsic_instr *instr)
|
||||
{
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_image_load:
|
||||
|
|
@ -440,7 +438,7 @@ v3d40_image_load_store_tmu_op(nir_intrinsic_instr *instr)
|
|||
|
||||
case nir_intrinsic_image_atomic:
|
||||
case nir_intrinsic_image_atomic_swap:
|
||||
return v3d40_image_atomic_tmu_op(instr);
|
||||
return v3d_image_atomic_tmu_op(instr);
|
||||
|
||||
default:
|
||||
unreachable("unknown image intrinsic");
|
||||
|
|
@ -552,21 +550,21 @@ get_required_image_tmu_writes(struct v3d_compile *c,
|
|||
}
|
||||
|
||||
void
|
||||
v3d40_vir_emit_image_load_store(struct v3d_compile *c,
|
||||
nir_intrinsic_instr *instr)
|
||||
v3d_vir_emit_image_load_store(struct v3d_compile *c,
|
||||
nir_intrinsic_instr *instr)
|
||||
{
|
||||
unsigned format = nir_intrinsic_format(instr);
|
||||
unsigned unit = nir_src_as_uint(instr->src[0]);
|
||||
|
||||
struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
|
||||
struct V3D42_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
|
||||
};
|
||||
|
||||
struct V3D41_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
|
||||
struct V3D42_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
|
||||
.per_pixel_mask_enable = true,
|
||||
.output_type_32_bit = v3d_gl_format_is_return_32(format),
|
||||
};
|
||||
|
||||
struct V3D41_TMU_CONFIG_PARAMETER_2 p2_unpacked = { 0 };
|
||||
struct V3D42_TMU_CONFIG_PARAMETER_2 p2_unpacked = { 0 };
|
||||
|
||||
/* Limit the number of channels returned to both how many the NIR
|
||||
* instruction writes and how many the instruction could produce.
|
||||
|
|
@ -578,7 +576,7 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c,
|
|||
p0_unpacked.return_words_of_texture_data =
|
||||
(1 << instr_return_channels) - 1;
|
||||
|
||||
p2_unpacked.op = v3d40_image_load_store_tmu_op(instr);
|
||||
p2_unpacked.op = v3d_image_load_store_tmu_op(instr);
|
||||
|
||||
/* If we were able to replace atomic_add for an inc/dec, then we
|
||||
* need/can to do things slightly different, like not loading the
|
||||
|
|
@ -591,7 +589,7 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c,
|
|||
p2_unpacked.op == V3D_TMU_OP_WRITE_OR_READ_DEC);
|
||||
|
||||
uint32_t p0_packed;
|
||||
V3D41_TMU_CONFIG_PARAMETER_0_pack(NULL,
|
||||
V3D42_TMU_CONFIG_PARAMETER_0_pack(NULL,
|
||||
(uint8_t *)&p0_packed,
|
||||
&p0_unpacked);
|
||||
|
||||
|
|
@ -602,12 +600,12 @@ v3d40_vir_emit_image_load_store(struct v3d_compile *c,
|
|||
p0_packed |= unit << 24;
|
||||
|
||||
uint32_t p1_packed;
|
||||
V3D41_TMU_CONFIG_PARAMETER_1_pack(NULL,
|
||||
V3D42_TMU_CONFIG_PARAMETER_1_pack(NULL,
|
||||
(uint8_t *)&p1_packed,
|
||||
&p1_unpacked);
|
||||
|
||||
uint32_t p2_packed;
|
||||
V3D41_TMU_CONFIG_PARAMETER_2_pack(NULL,
|
||||
V3D42_TMU_CONFIG_PARAMETER_2_pack(NULL,
|
||||
(uint8_t *)&p2_packed,
|
||||
&p2_unpacked);
|
||||
|
||||
|
|
@ -155,32 +155,6 @@ vir_is_tex(const struct v3d_device_info *devinfo, struct qinst *inst)
|
|||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
vir_writes_r3_implicitly(const struct v3d_device_info *devinfo,
|
||||
struct qinst *inst)
|
||||
{
|
||||
if (!devinfo->has_accumulators)
|
||||
return false;
|
||||
|
||||
for (int i = 0; i < vir_get_nsrc(inst); i++) {
|
||||
switch (inst->src[i].file) {
|
||||
case QFILE_VPM:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (devinfo->ver < 41 && (inst->qpu.sig.ldvary ||
|
||||
inst->qpu.sig.ldtlb ||
|
||||
inst->qpu.sig.ldtlbu ||
|
||||
inst->qpu.sig.ldvpm)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
vir_writes_r4_implicitly(const struct v3d_device_info *devinfo,
|
||||
struct qinst *inst)
|
||||
|
|
@ -203,9 +177,6 @@ vir_writes_r4_implicitly(const struct v3d_device_info *devinfo,
|
|||
break;
|
||||
}
|
||||
|
||||
if (devinfo->ver < 41 && inst->qpu.sig.ldtmu)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -182,11 +182,6 @@ vir_print_reg(struct v3d_compile *c, const struct qinst *inst,
|
|||
break;
|
||||
}
|
||||
|
||||
case QFILE_VPM:
|
||||
fprintf(stderr, "vpm%d.%d",
|
||||
reg.index / 4, reg.index % 4);
|
||||
break;
|
||||
|
||||
case QFILE_TEMP:
|
||||
fprintf(stderr, "t%d", reg.index);
|
||||
break;
|
||||
|
|
@ -197,9 +192,6 @@ static void
|
|||
vir_dump_sig_addr(const struct v3d_device_info *devinfo,
|
||||
const struct v3d_qpu_instr *instr)
|
||||
{
|
||||
if (devinfo->ver < 41)
|
||||
return;
|
||||
|
||||
if (!instr->sig_magic)
|
||||
fprintf(stderr, ".rf%d", instr->sig_addr);
|
||||
else {
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ is_copy_mov(const struct v3d_device_info *devinfo, struct qinst *inst)
|
|||
return false;
|
||||
}
|
||||
|
||||
if (devinfo->ver <= 42) {
|
||||
if (devinfo->ver == 42) {
|
||||
switch (inst->src[0].file) {
|
||||
case QFILE_MAGIC:
|
||||
/* No copy propagating from R3/R4/R5 -- the MOVs from
|
||||
|
|
|
|||
|
|
@ -51,22 +51,11 @@ dce(struct v3d_compile *c, struct qinst *inst)
|
|||
vir_remove_instruction(c, inst);
|
||||
}
|
||||
|
||||
static bool
|
||||
has_nonremovable_reads(struct v3d_compile *c, struct qinst *inst)
|
||||
{
|
||||
for (int i = 0; i < vir_get_nsrc(inst); i++) {
|
||||
if (inst->src[i].file == QFILE_VPM)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
can_write_to_null(struct v3d_compile *c, struct qinst *inst)
|
||||
{
|
||||
/* The SFU instructions must write to a physical register. */
|
||||
if (c->devinfo->ver >= 41 && v3d_qpu_uses_sfu(&inst->qpu))
|
||||
if (v3d_qpu_uses_sfu(&inst->qpu))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
|
@ -241,7 +230,6 @@ vir_opt_dead_code(struct v3d_compile *c)
|
|||
}
|
||||
|
||||
if (v3d_qpu_writes_flags(&inst->qpu) ||
|
||||
has_nonremovable_reads(c, inst) ||
|
||||
(is_ldunifa && !is_first_ldunifa && !is_last_ldunifa)) {
|
||||
/* If we can't remove the instruction, but we
|
||||
* don't need its destination value, just
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ vir_opt_small_immediates(struct v3d_compile *c)
|
|||
*/
|
||||
struct v3d_qpu_sig new_sig = inst->qpu.sig;
|
||||
uint32_t sig_packed;
|
||||
if (c->devinfo->ver <= 42) {
|
||||
if (c->devinfo->ver == 42) {
|
||||
new_sig.small_imm_b = true;
|
||||
} else {
|
||||
if (vir_is_add(inst)) {
|
||||
|
|
|
|||
|
|
@ -942,7 +942,7 @@ v3d_ra_select_rf(struct v3d_ra_select_callback_data *v3d_ra,
|
|||
* avoid allocating these to registers used by the last instructions
|
||||
* in the shader.
|
||||
*/
|
||||
const uint32_t safe_rf_start = v3d_ra->devinfo->ver <= 42 ? 3 : 4;
|
||||
const uint32_t safe_rf_start = v3d_ra->devinfo->ver == 42 ? 3 : 4;
|
||||
if (v3d_ra->nodes->info[node].is_program_end &&
|
||||
v3d_ra->next_phys < safe_rf_start) {
|
||||
v3d_ra->next_phys = safe_rf_start;
|
||||
|
|
@ -1004,7 +1004,7 @@ vir_init_reg_sets(struct v3d_compiler *compiler)
|
|||
/* Allocate up to 3 regfile classes, for the ways the physical
|
||||
* register file can be divided up for fragment shader threading.
|
||||
*/
|
||||
int max_thread_index = (compiler->devinfo->ver >= 40 ? 2 : 3);
|
||||
int max_thread_index = 2;
|
||||
uint8_t phys_index = get_phys_index(compiler->devinfo);
|
||||
|
||||
compiler->regs = ra_alloc_reg_set(compiler, phys_index + PHYS_COUNT,
|
||||
|
|
@ -1070,20 +1070,10 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c,
|
|||
int32_t ip = inst->ip;
|
||||
assert(ip >= 0);
|
||||
|
||||
/* If the instruction writes r3/r4 (and optionally moves its
|
||||
* result to a temp), nothing else can be stored in r3/r4 across
|
||||
/* If the instruction writes r4 (and optionally moves its
|
||||
* result to a temp), nothing else can be stored in r4 across
|
||||
* it.
|
||||
*/
|
||||
if (vir_writes_r3_implicitly(c->devinfo, inst)) {
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
|
||||
ra_add_node_interference(c->g,
|
||||
temp_to_node(c, i),
|
||||
acc_nodes[3]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (vir_writes_r4_implicitly(c->devinfo, inst)) {
|
||||
for (int i = 0; i < c->num_temps; i++) {
|
||||
if (c->temp_start[i] < ip && c->temp_end[i] > ip) {
|
||||
|
|
@ -1207,15 +1197,6 @@ update_graph_and_reg_classes_for_inst(struct v3d_compile *c,
|
|||
set_temp_class_bits(c, inst->dst.index,
|
||||
class_bits);
|
||||
|
||||
} else {
|
||||
/* Until V3D 4.x, we could only load a uniform
|
||||
* to r5, so we'll need to spill if uniform
|
||||
* loads interfere with each other.
|
||||
*/
|
||||
if (c->devinfo->ver < 40) {
|
||||
set_temp_class_bits(c, inst->dst.index,
|
||||
CLASS_BITS_R5);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Make sure we don't allocate the ldvary's
|
||||
|
|
@ -1320,7 +1301,7 @@ v3d_register_allocate(struct v3d_compile *c)
|
|||
* RF0-2. Start at RF4 in 7.x to prevent TLB writes from
|
||||
* using RF2-3.
|
||||
*/
|
||||
.next_phys = c->devinfo->ver <= 42 ? 3 : 4,
|
||||
.next_phys = c->devinfo->ver == 42 ? 3 : 4,
|
||||
.nodes = &c->nodes,
|
||||
.devinfo = c->devinfo,
|
||||
};
|
||||
|
|
@ -1333,10 +1314,8 @@ v3d_register_allocate(struct v3d_compile *c)
|
|||
* are available at both 1x and 2x threading, and 4x has 32.
|
||||
*/
|
||||
c->thread_index = ffs(c->threads) - 1;
|
||||
if (c->devinfo->ver >= 40) {
|
||||
if (c->thread_index >= 1)
|
||||
c->thread_index--;
|
||||
}
|
||||
if (c->thread_index >= 1)
|
||||
c->thread_index--;
|
||||
|
||||
c->g = ra_alloc_interference_graph(c->compiler->regs, num_ra_nodes);
|
||||
ra_set_select_reg_callback(c->g, v3d_ra_select_callback, &callback_data);
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ v3d71_set_src(struct v3d_qpu_instr *instr, uint8_t *raddr, struct qpu_reg src)
|
|||
* fields of the instruction.
|
||||
*/
|
||||
static void
|
||||
v3d33_set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
|
||||
v3d42_set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
|
||||
{
|
||||
if (src.smimm) {
|
||||
assert(instr->sig.small_imm_b);
|
||||
|
|
@ -158,13 +158,13 @@ set_src(struct v3d_qpu_instr *instr,
|
|||
const struct v3d_device_info *devinfo)
|
||||
{
|
||||
if (devinfo->ver < 71)
|
||||
return v3d33_set_src(instr, mux, src);
|
||||
return v3d42_set_src(instr, mux, src);
|
||||
else
|
||||
return v3d71_set_src(instr, raddr, src);
|
||||
}
|
||||
|
||||
static bool
|
||||
v3d33_mov_src_and_dst_equal(struct qinst *qinst)
|
||||
v3d42_mov_src_and_dst_equal(struct qinst *qinst)
|
||||
{
|
||||
enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;
|
||||
if (qinst->qpu.alu.mul.magic_write) {
|
||||
|
|
@ -216,7 +216,7 @@ mov_src_and_dst_equal(struct qinst *qinst,
|
|||
const struct v3d_device_info *devinfo)
|
||||
{
|
||||
if (devinfo->ver < 71)
|
||||
return v3d33_mov_src_and_dst_equal(qinst);
|
||||
return v3d42_mov_src_and_dst_equal(qinst);
|
||||
else
|
||||
return v3d71_mov_src_and_dst_equal(qinst);
|
||||
}
|
||||
|
|
@ -262,8 +262,6 @@ v3d_generate_code_block(struct v3d_compile *c,
|
|||
struct qblock *block,
|
||||
struct qpu_reg *temp_registers)
|
||||
{
|
||||
int last_vpm_read_index = -1;
|
||||
|
||||
vir_for_each_inst_safe(qinst, block) {
|
||||
#if 0
|
||||
fprintf(stderr, "translating qinst to qpu: ");
|
||||
|
|
@ -271,8 +269,6 @@ v3d_generate_code_block(struct v3d_compile *c,
|
|||
fprintf(stderr, "\n");
|
||||
#endif
|
||||
|
||||
struct qinst *temp;
|
||||
|
||||
if (vir_has_uniform(qinst))
|
||||
c->num_uniforms++;
|
||||
|
||||
|
|
@ -303,19 +299,6 @@ v3d_generate_code_block(struct v3d_compile *c,
|
|||
case QFILE_SMALL_IMM:
|
||||
src[i].smimm = true;
|
||||
break;
|
||||
|
||||
case QFILE_VPM:
|
||||
assert(c->devinfo->ver < 40);
|
||||
assert((int)qinst->src[i].index >=
|
||||
last_vpm_read_index);
|
||||
(void)last_vpm_read_index;
|
||||
last_vpm_read_index = qinst->src[i].index;
|
||||
|
||||
temp = new_qpu_nop_before(qinst);
|
||||
temp->qpu.sig.ldvpm = true;
|
||||
|
||||
src[i] = qpu_magic(V3D_QPU_WADDR_R3);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -337,10 +320,6 @@ v3d_generate_code_block(struct v3d_compile *c,
|
|||
dst = temp_registers[qinst->dst.index];
|
||||
break;
|
||||
|
||||
case QFILE_VPM:
|
||||
dst = qpu_magic(V3D_QPU_WADDR_VPM);
|
||||
break;
|
||||
|
||||
case QFILE_SMALL_IMM:
|
||||
case QFILE_LOAD_IMM:
|
||||
assert(!"not reached");
|
||||
|
|
@ -361,8 +340,6 @@ v3d_generate_code_block(struct v3d_compile *c,
|
|||
}
|
||||
|
||||
if (use_rf) {
|
||||
assert(c->devinfo->ver >= 40);
|
||||
|
||||
if (qinst->qpu.sig.ldunif) {
|
||||
qinst->qpu.sig.ldunif = false;
|
||||
qinst->qpu.sig.ldunifrf = true;
|
||||
|
|
@ -470,11 +447,7 @@ v3d_dump_qpu(struct v3d_compile *c)
|
|||
const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
|
||||
fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);
|
||||
|
||||
/* We can only do this on 4.x, because we're not tracking TMU
|
||||
* implicit uniforms here on 3.x.
|
||||
*/
|
||||
if (c->devinfo->ver >= 40 &&
|
||||
reads_uniform(c->devinfo, c->qpu_insts[i])) {
|
||||
if (reads_uniform(c->devinfo, c->qpu_insts[i])) {
|
||||
fprintf(stderr, " (");
|
||||
vir_dump_uniform(c->uniform_contents[next_uniform],
|
||||
c->uniform_data[next_uniform]);
|
||||
|
|
@ -486,8 +459,7 @@ v3d_dump_qpu(struct v3d_compile *c)
|
|||
}
|
||||
|
||||
/* Make sure our dumping lined up. */
|
||||
if (c->devinfo->ver >= 40)
|
||||
assert(next_uniform == c->num_uniforms);
|
||||
assert(next_uniform == c->num_uniforms);
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ inc_broadcom = include_directories('.', 'cle')
|
|||
|
||||
subdir('cle')
|
||||
|
||||
v3d_versions = ['33', '41', '42', '71']
|
||||
v3d_versions = ['42', '71']
|
||||
v3d_libs = []
|
||||
|
||||
if with_gallium_v3d or with_broadcom_vk
|
||||
|
|
|
|||
|
|
@ -45,11 +45,7 @@ uint32_t v3d_simulator_get_mem_free(void);
|
|||
#ifdef v3dX
|
||||
# include "v3dx_simulator.h"
|
||||
#else
|
||||
# define v3dX(x) v3d33_##x
|
||||
# include "v3dx_simulator.h"
|
||||
# undef v3dX
|
||||
|
||||
# define v3dX(x) v3d41_##x
|
||||
# define v3dX(x) v3d42_##x
|
||||
# include "v3dx_simulator.h"
|
||||
# undef v3dX
|
||||
|
||||
|
|
@ -61,15 +57,10 @@ uint32_t v3d_simulator_get_mem_free(void);
|
|||
|
||||
/* Helper to call simulator ver specific functions */
|
||||
#define v3d_X_simulator(thing) ({ \
|
||||
__typeof(&v3d33_simulator_##thing) v3d_X_sim_thing;\
|
||||
__typeof(&v3d42_simulator_##thing) v3d_X_sim_thing;\
|
||||
switch (sim_state.ver) { \
|
||||
case 33: \
|
||||
case 40: \
|
||||
v3d_X_sim_thing = &v3d33_simulator_##thing; \
|
||||
break; \
|
||||
case 41: \
|
||||
case 42: \
|
||||
v3d_X_sim_thing = &v3d41_simulator_##thing; \
|
||||
v3d_X_sim_thing = &v3d42_simulator_##thing; \
|
||||
break; \
|
||||
case 71: \
|
||||
v3d_X_sim_thing = &v3d71_simulator_##thing; \
|
||||
|
|
|
|||
|
|
@ -51,27 +51,14 @@
|
|||
#if V3D_VERSION == 71
|
||||
#include "libs/core/v3d/registers/7.1.6.0/v3d.h"
|
||||
#else
|
||||
#if V3D_VERSION == 41 || V3D_VERSION == 42
|
||||
#if V3D_VERSION == 42
|
||||
#include "libs/core/v3d/registers/4.2.14.0/v3d.h"
|
||||
#else
|
||||
#include "libs/core/v3d/registers/3.3.0.0/v3d.h"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
|
||||
#define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
|
||||
|
||||
static void
|
||||
v3d_invalidate_l3(struct v3d_hw *v3d)
|
||||
{
|
||||
#if V3D_VERSION < 40
|
||||
uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL);
|
||||
|
||||
V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET);
|
||||
V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Invalidates the L2C cache. This is a read-only cache for uniforms and instructions. */
|
||||
static void
|
||||
v3d_invalidate_l2c(struct v3d_hw *v3d)
|
||||
|
|
@ -156,7 +143,6 @@ v3d_invalidate_slices(struct v3d_hw *v3d)
|
|||
static void
|
||||
v3d_invalidate_caches(struct v3d_hw *v3d)
|
||||
{
|
||||
v3d_invalidate_l3(v3d);
|
||||
v3d_invalidate_l2c(v3d);
|
||||
v3d_invalidate_l2t(v3d);
|
||||
v3d_invalidate_slices(v3d);
|
||||
|
|
@ -225,7 +211,7 @@ v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
|
|||
struct drm_v3d_submit_csd *args,
|
||||
uint32_t gmp_ofs)
|
||||
{
|
||||
#if V3D_VERSION >= 41
|
||||
#if V3D_VERSION >= 42
|
||||
int last_completed_jobs = (V3D_READ(V3D_CSD_0_STATUS) &
|
||||
V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET);
|
||||
g_gmp_ofs = gmp_ofs;
|
||||
|
|
@ -282,13 +268,13 @@ v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
|
|||
args->value = 1;
|
||||
return 0;
|
||||
case DRM_V3D_PARAM_SUPPORTS_CSD:
|
||||
args->value = V3D_VERSION >= 41;
|
||||
args->value = V3D_VERSION >= 42;
|
||||
return 0;
|
||||
case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
|
||||
args->value = 1;
|
||||
return 0;
|
||||
case DRM_V3D_PARAM_SUPPORTS_PERFMON:
|
||||
args->value = V3D_VERSION >= 41;
|
||||
args->value = V3D_VERSION >= 42;
|
||||
return 0;
|
||||
case DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT:
|
||||
args->value = 1;
|
||||
|
|
@ -359,8 +345,7 @@ handle_mmu_interruptions(struct v3d_hw *v3d,
|
|||
uint32_t axi_id = V3D_READ(V3D_MMU_VIO_ID);
|
||||
uint32_t va_width = 30;
|
||||
|
||||
#if V3D_VERSION >= 41
|
||||
static const char *const v3d41_axi_ids[] = {
|
||||
static const char *const v3d42_axi_ids[] = {
|
||||
"L2T",
|
||||
"PTB",
|
||||
"PSE",
|
||||
|
|
@ -372,14 +357,14 @@ handle_mmu_interruptions(struct v3d_hw *v3d,
|
|||
};
|
||||
|
||||
axi_id = axi_id >> 5;
|
||||
if (axi_id < ARRAY_SIZE(v3d41_axi_ids))
|
||||
client = v3d41_axi_ids[axi_id];
|
||||
if (axi_id < ARRAY_SIZE(v3d42_axi_ids))
|
||||
client = v3d42_axi_ids[axi_id];
|
||||
|
||||
uint32_t mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO);
|
||||
|
||||
va_width += ((mmu_debug & V3D_MMU_DEBUG_INFO_VA_WIDTH_SET)
|
||||
>> V3D_MMU_DEBUG_INFO_VA_WIDTH_LSB);
|
||||
#endif
|
||||
|
||||
/* Only the top bits (final number depends on the gen) of the virtual
|
||||
* address are reported in the MMU VIO_ADDR register.
|
||||
*/
|
||||
|
|
@ -454,18 +439,6 @@ v3d_isr(uint32_t hub_status)
|
|||
void
|
||||
v3dX(simulator_init_regs)(struct v3d_hw *v3d)
|
||||
{
|
||||
#if V3D_VERSION == 33
|
||||
/* Set OVRTMUOUT to match kernel behavior.
|
||||
*
|
||||
* This means that the texture sampler uniform configuration's tmu
|
||||
* output type field is used, instead of using the hardware default
|
||||
* behavior based on the texture type. If you want the default
|
||||
* behavior, you can still put "2" in the indirect texture state's
|
||||
* output_type field.
|
||||
*/
|
||||
V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET);
|
||||
#endif
|
||||
|
||||
/* FIXME: the kernel captures some additional core interrupts here,
|
||||
* for tracing. Perhaps we should evaluate to do the same here and add
|
||||
* some debug options.
|
||||
|
|
@ -514,13 +487,11 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
|
|||
V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
|
||||
V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms);
|
||||
}
|
||||
#if V3D_VERSION >= 41
|
||||
if (submit->qts) {
|
||||
V3D_WRITE(V3D_CLE_0_CT0QTS,
|
||||
V3D_CLE_0_CT0QTS_CTQTSEN_SET |
|
||||
submit->qts);
|
||||
}
|
||||
#endif
|
||||
V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
|
||||
V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);
|
||||
|
||||
|
|
@ -544,21 +515,18 @@ v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
|
|||
}
|
||||
}
|
||||
|
||||
#if V3D_VERSION >= 41
|
||||
#define V3D_PCTR_0_PCTR_N(x) (V3D_PCTR_0_PCTR0 + 4 * (x))
|
||||
#define V3D_PCTR_0_SRC_N(x) (V3D_PCTR_0_SRC_0_3 + 4 * (x))
|
||||
#define V3D_PCTR_0_SRC_N_SHIFT(x) ((x) * 8)
|
||||
#define V3D_PCTR_0_SRC_N_MASK(x) (BITFIELD_RANGE(V3D_PCTR_0_SRC_N_SHIFT(x), \
|
||||
V3D_PCTR_0_SRC_N_SHIFT(x) + \
|
||||
V3D_PCTR_0_SRC_0_3_PCTRS0_MSB))
|
||||
#endif
|
||||
|
||||
void
|
||||
v3dX(simulator_perfmon_start)(struct v3d_hw *v3d,
|
||||
uint32_t ncounters,
|
||||
uint8_t *events)
|
||||
{
|
||||
#if V3D_VERSION >= 41
|
||||
int i, j;
|
||||
uint32_t source;
|
||||
uint32_t mask = BITFIELD_RANGE(0, ncounters);
|
||||
|
|
@ -573,21 +541,18 @@ v3dX(simulator_perfmon_start)(struct v3d_hw *v3d,
|
|||
V3D_WRITE(V3D_PCTR_0_CLR, mask);
|
||||
V3D_WRITE(V3D_PCTR_0_OVERFLOW, mask);
|
||||
V3D_WRITE(V3D_PCTR_0_EN, mask);
|
||||
#endif
|
||||
}
|
||||
|
||||
void v3dX(simulator_perfmon_stop)(struct v3d_hw *v3d,
|
||||
uint32_t ncounters,
|
||||
uint64_t *values)
|
||||
{
|
||||
#if V3D_VERSION >= 41
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ncounters; i++)
|
||||
values[i] += V3D_READ(V3D_PCTR_0_PCTR_N(i));
|
||||
|
||||
V3D_WRITE(V3D_PCTR_0_EN, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
void v3dX(simulator_get_perfcnt_total)(uint32_t *count)
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@
|
|||
* versions, so we just explicitly set the V3D_VERSION and include v3dx_pack
|
||||
* here
|
||||
*/
|
||||
#define V3D_VERSION 33
|
||||
#define V3D_VERSION 42
|
||||
#include "broadcom/common/v3d_macros.h"
|
||||
#include "broadcom/cle/v3dx_pack.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -618,10 +618,10 @@ struct v3dv_device_memory {
|
|||
|
||||
#define V3DV_MAX_PLANE_COUNT 3
|
||||
struct v3dv_format_plane {
|
||||
/* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
|
||||
/* One of V3D42_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
|
||||
uint8_t rt_type;
|
||||
|
||||
/* One of V3D33_TEXTURE_DATA_FORMAT_*. */
|
||||
/* One of V3D42_TEXTURE_DATA_FORMAT_*. */
|
||||
uint8_t tex_type;
|
||||
|
||||
/* Swizzle to apply to the RGBA shader output for storing to the tile
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ if dep_v3dv3.found()
|
|||
v3d_args += '-DUSE_V3D_SIMULATOR'
|
||||
endif
|
||||
|
||||
v3d_versions = ['33', '42', '71']
|
||||
v3d_versions = ['42', '71']
|
||||
|
||||
per_version_libs = []
|
||||
foreach ver : v3d_versions
|
||||
|
|
|
|||
|
|
@ -309,7 +309,7 @@ v3d_tlb_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
|
|||
struct v3d_screen *screen = v3d->screen;
|
||||
struct v3d_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
if (devinfo->ver < 40 || !info->mask)
|
||||
if (!info->mask)
|
||||
return;
|
||||
|
||||
bool is_color_blit = info->mask & PIPE_MASK_RGBA;
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@
|
|||
* hw versions, so we just explicitly set the V3D_VERSION and include
|
||||
* v3dx_pack here
|
||||
*/
|
||||
#define V3D_VERSION 33
|
||||
#define V3D_VERSION 42
|
||||
#include "broadcom/common/v3d_macros.h"
|
||||
#include "broadcom/cle/v3dx_pack.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -300,16 +300,11 @@ v3d_get_sample_position(struct pipe_context *pctx,
|
|||
unsigned sample_count, unsigned sample_index,
|
||||
float *xy)
|
||||
{
|
||||
struct v3d_context *v3d = v3d_context(pctx);
|
||||
|
||||
if (sample_count <= 1) {
|
||||
xy[0] = 0.5;
|
||||
xy[1] = 0.5;
|
||||
} else {
|
||||
static const int xoffsets_v33[] = { 1, -3, 3, -1 };
|
||||
static const int xoffsets_v42[] = { -1, 3, -3, 1 };
|
||||
const int *xoffsets = (v3d->screen->devinfo.ver >= 42 ?
|
||||
xoffsets_v42 : xoffsets_v33);
|
||||
static const int xoffsets[] = { -1, 3, -3, 1 };
|
||||
|
||||
xy[0] = 0.5 + xoffsets[sample_index] * .125;
|
||||
xy[1] = .125 + sample_index * .25;
|
||||
|
|
|
|||
|
|
@ -825,12 +825,8 @@ void v3d_disk_cache_store(struct v3d_context *v3d,
|
|||
|
||||
/* Helper to call hw ver specific functions */
|
||||
#define v3d_X(devinfo, thing) ({ \
|
||||
__typeof(&v3d33_##thing) v3d_X_thing; \
|
||||
__typeof(&v3d42_##thing) v3d_X_thing; \
|
||||
switch (devinfo->ver) { \
|
||||
case 33: \
|
||||
case 40: \
|
||||
v3d_X_thing = &v3d33_##thing; \
|
||||
break; \
|
||||
case 42: \
|
||||
v3d_X_thing = &v3d42_##thing; \
|
||||
break; \
|
||||
|
|
@ -846,19 +842,13 @@ void v3d_disk_cache_store(struct v3d_context *v3d,
|
|||
/* FIXME: The same for vulkan/opengl. Common place? define it at the
|
||||
* v3d_packet files?
|
||||
*/
|
||||
#define V3D33_CLIPPER_XY_GRANULARITY 256.0f
|
||||
#define V3D42_CLIPPER_XY_GRANULARITY 256.0f
|
||||
#define V3D71_CLIPPER_XY_GRANULARITY 64.0f
|
||||
|
||||
/* Helper to get hw-specific macro values */
|
||||
#define V3DV_X(devinfo, thing) ({ \
|
||||
__typeof(V3D33_##thing) V3D_X_THING; \
|
||||
__typeof(V3D42_##thing) V3D_X_THING; \
|
||||
switch (devinfo->ver) { \
|
||||
case 33: \
|
||||
case 40: \
|
||||
V3D_X_THING = V3D33_##thing; \
|
||||
break; \
|
||||
case 41: \
|
||||
case 42: \
|
||||
V3D_X_THING = V3D42_##thing; \
|
||||
break; \
|
||||
|
|
@ -874,10 +864,6 @@ void v3d_disk_cache_store(struct v3d_context *v3d,
|
|||
#ifdef v3dX
|
||||
# include "v3dx_context.h"
|
||||
#else
|
||||
# define v3dX(x) v3d33_##x
|
||||
# include "v3dx_context.h"
|
||||
# undef v3dX
|
||||
|
||||
# define v3dX(x) v3d42_##x
|
||||
# include "v3dx_context.h"
|
||||
# undef v3dX
|
||||
|
|
|
|||
|
|
@ -30,10 +30,10 @@ struct v3d_format {
|
|||
/** Set if the pipe format is defined in the table. */
|
||||
bool present;
|
||||
|
||||
/** One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
|
||||
/** One of V3D42_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
|
||||
uint8_t rt_type;
|
||||
|
||||
/** One of V3D33_TEXTURE_DATA_FORMAT_*. */
|
||||
/** One of V3D42_TEXTURE_DATA_FORMAT_*. */
|
||||
uint8_t tex_type;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@
|
|||
#include "v3d_format_table.h"
|
||||
|
||||
/* The format internal types are the same across V3D versions */
|
||||
#define V3D_VERSION 33
|
||||
#define V3D_VERSION 42
|
||||
#include "broadcom/cle/v3dx_pack.h"
|
||||
|
||||
bool
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@
|
|||
#include <xf86drm.h>
|
||||
#include "v3d_context.h"
|
||||
/* The OQ/semaphore packets are the same across V3D versions. */
|
||||
#define V3D_VERSION 33
|
||||
#define V3D_VERSION 42
|
||||
#include "broadcom/cle/v3dx_pack.h"
|
||||
#include "broadcom/common/v3d_macros.h"
|
||||
#include "util/hash_table.h"
|
||||
|
|
@ -547,7 +547,7 @@ v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job)
|
|||
/* On V3D 4.1, the tile alloc/state setup moved to register writes
|
||||
* instead of binner packets.
|
||||
*/
|
||||
if (devinfo->ver >= 41) {
|
||||
if (devinfo->ver >= 42) {
|
||||
v3d_job_add_bo(job, job->tile_alloc);
|
||||
job->submit.qma = job->tile_alloc->offset;
|
||||
job->submit.qms = job->tile_alloc->size;
|
||||
|
|
|
|||
|
|
@ -35,7 +35,8 @@
|
|||
#include "nir/tgsi_to_nir.h"
|
||||
#include "compiler/v3d_compiler.h"
|
||||
#include "v3d_context.h"
|
||||
#include "broadcom/cle/v3d_packet_v33_pack.h"
|
||||
/* packets here are the same across V3D versions. */
|
||||
#include "broadcom/cle/v3d_packet_v42_pack.h"
|
||||
|
||||
static struct v3d_compiled_shader *
|
||||
v3d_get_compiled_shader(struct v3d_context *v3d,
|
||||
|
|
@ -136,7 +137,7 @@ v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so,
|
|||
while (vpm_size) {
|
||||
uint32_t write_size = MIN2(vpm_size, 1 << 4);
|
||||
|
||||
struct V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = {
|
||||
struct V3D42_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC unpacked = {
|
||||
/* We need the offset from the coordinate shader's VPM
|
||||
* output block, which has the [X, Y, Z, W, Xs, Ys]
|
||||
* values at the start.
|
||||
|
|
@ -151,7 +152,7 @@ v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so,
|
|||
so->num_tf_specs != 0);
|
||||
|
||||
assert(so->num_tf_specs != ARRAY_SIZE(so->tf_specs));
|
||||
V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
|
||||
V3D42_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
|
||||
(void *)&so->tf_specs[so->num_tf_specs],
|
||||
&unpacked);
|
||||
|
||||
|
|
@ -166,7 +167,7 @@ v3d_set_transform_feedback_outputs(struct v3d_uncompiled_shader *so,
|
|||
assert(unpacked.first_shaded_vertex_value_to_output != 8 ||
|
||||
so->num_tf_specs != 0);
|
||||
|
||||
V3D33_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
|
||||
V3D42_TRANSFORM_FEEDBACK_OUTPUT_DATA_SPEC_pack(NULL,
|
||||
(void *)&so->tf_specs_psiz[so->num_tf_specs],
|
||||
&unpacked);
|
||||
so->num_tf_specs++;
|
||||
|
|
@ -559,7 +560,6 @@ v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key,
|
|||
assert(key->num_tex_used == key->num_samplers_used);
|
||||
for (int i = 0; i < texstate->num_textures; i++) {
|
||||
struct pipe_sampler_view *sampler = texstate->textures[i];
|
||||
struct v3d_sampler_view *v3d_sampler = v3d_sampler_view(sampler);
|
||||
|
||||
if (!sampler)
|
||||
continue;
|
||||
|
|
@ -573,27 +573,16 @@ v3d_setup_shared_key(struct v3d_context *v3d, struct v3d_key *key,
|
|||
*/
|
||||
if (key->sampler[i].return_size == 16) {
|
||||
key->sampler[i].return_channels = 2;
|
||||
} else if (devinfo->ver > 40) {
|
||||
key->sampler[i].return_channels = 4;
|
||||
} else {
|
||||
key->sampler[i].return_channels =
|
||||
v3d_get_tex_return_channels(devinfo,
|
||||
sampler->format);
|
||||
key->sampler[i].return_channels = 4;
|
||||
}
|
||||
|
||||
if (key->sampler[i].return_size == 32 && devinfo->ver < 40) {
|
||||
memcpy(key->tex[i].swizzle,
|
||||
v3d_sampler->swizzle,
|
||||
sizeof(v3d_sampler->swizzle));
|
||||
} else {
|
||||
/* For 16-bit returns, we let the sampler state handle
|
||||
* the swizzle.
|
||||
*/
|
||||
key->tex[i].swizzle[0] = PIPE_SWIZZLE_X;
|
||||
key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y;
|
||||
key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z;
|
||||
key->tex[i].swizzle[3] = PIPE_SWIZZLE_W;
|
||||
}
|
||||
/* We let the sampler state handle the swizzle.
|
||||
*/
|
||||
key->tex[i].swizzle[0] = PIPE_SWIZZLE_X;
|
||||
key->tex[i].swizzle[1] = PIPE_SWIZZLE_Y;
|
||||
key->tex[i].swizzle[2] = PIPE_SWIZZLE_Z;
|
||||
key->tex[i].swizzle[3] = PIPE_SWIZZLE_W;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,8 @@
|
|||
#include "v3d_screen.h"
|
||||
#include "v3d_context.h"
|
||||
#include "v3d_resource.h"
|
||||
#include "broadcom/cle/v3d_packet_v33_pack.h"
|
||||
/* The packets used here the same across V3D versions. */
|
||||
#include "broadcom/cle/v3d_packet_v42_pack.h"
|
||||
|
||||
static void
|
||||
v3d_debug_resource_layout(struct v3d_resource *rsc, const char *caller)
|
||||
|
|
@ -747,8 +748,6 @@ static struct v3d_resource *
|
|||
v3d_resource_setup(struct pipe_screen *pscreen,
|
||||
const struct pipe_resource *tmpl)
|
||||
{
|
||||
struct v3d_screen *screen = v3d_screen(pscreen);
|
||||
struct v3d_device_info *devinfo = &screen->devinfo;
|
||||
struct v3d_resource *rsc = CALLOC_STRUCT(v3d_resource);
|
||||
|
||||
if (!rsc)
|
||||
|
|
@ -760,34 +759,7 @@ v3d_resource_setup(struct pipe_screen *pscreen,
|
|||
pipe_reference_init(&prsc->reference, 1);
|
||||
prsc->screen = pscreen;
|
||||
|
||||
if (prsc->nr_samples <= 1 ||
|
||||
devinfo->ver >= 40 ||
|
||||
util_format_is_depth_or_stencil(prsc->format)) {
|
||||
rsc->cpp = util_format_get_blocksize(prsc->format);
|
||||
if (devinfo->ver < 40 && prsc->nr_samples > 1)
|
||||
rsc->cpp *= prsc->nr_samples;
|
||||
} else {
|
||||
assert(v3d_rt_format_supported(devinfo, prsc->format));
|
||||
uint32_t output_image_format =
|
||||
v3d_get_rt_format(devinfo, prsc->format);
|
||||
uint32_t internal_type;
|
||||
uint32_t internal_bpp;
|
||||
v3d_X(devinfo, get_internal_type_bpp_for_output_format)
|
||||
(output_image_format, &internal_type, &internal_bpp);
|
||||
|
||||
switch (internal_bpp) {
|
||||
case V3D_INTERNAL_BPP_32:
|
||||
rsc->cpp = 4;
|
||||
break;
|
||||
case V3D_INTERNAL_BPP_64:
|
||||
rsc->cpp = 8;
|
||||
break;
|
||||
case V3D_INTERNAL_BPP_128:
|
||||
rsc->cpp = 16;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
rsc->cpp = util_format_get_blocksize(prsc->format);
|
||||
rsc->serial_id++;
|
||||
|
||||
assert(rsc->cpp);
|
||||
|
|
|
|||
|
|
@ -153,7 +153,7 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
return 1;
|
||||
|
||||
case PIPE_CAP_POLYGON_OFFSET_CLAMP:
|
||||
return screen->devinfo.ver >= 41;
|
||||
return screen->devinfo.ver >= 42;
|
||||
|
||||
|
||||
case PIPE_CAP_TEXTURE_QUERY_LOD:
|
||||
|
|
@ -182,20 +182,18 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
return PIPE_TEXTURE_TRANSFER_BLIT;
|
||||
|
||||
case PIPE_CAP_COMPUTE:
|
||||
return screen->has_csd && screen->devinfo.ver >= 41;
|
||||
return screen->has_csd && screen->devinfo.ver >= 42;
|
||||
|
||||
case PIPE_CAP_GENERATE_MIPMAP:
|
||||
return v3d_has_feature(screen, DRM_V3D_PARAM_SUPPORTS_TFU);
|
||||
|
||||
case PIPE_CAP_INDEP_BLEND_ENABLE:
|
||||
return screen->devinfo.ver >= 40;
|
||||
return 1;
|
||||
|
||||
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
|
||||
return V3D_NON_COHERENT_ATOM_SIZE;
|
||||
|
||||
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
|
||||
if (screen->devinfo.ver < 40)
|
||||
return 0;
|
||||
return 4;
|
||||
|
||||
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
|
||||
|
|
@ -218,15 +216,9 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_FS_COORD_ORIGIN_LOWER_LEFT:
|
||||
return 0;
|
||||
case PIPE_CAP_FS_COORD_PIXEL_CENTER_INTEGER:
|
||||
if (screen->devinfo.ver >= 40)
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
return 0;
|
||||
case PIPE_CAP_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
|
||||
if (screen->devinfo.ver >= 40)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
return 1;
|
||||
|
||||
case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
|
||||
case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
|
||||
|
|
@ -240,18 +232,13 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||
|
||||
/* Texturing. */
|
||||
case PIPE_CAP_MAX_TEXTURE_2D_SIZE:
|
||||
if (screen->devinfo.ver < 40)
|
||||
return 2048;
|
||||
else if (screen->nonmsaa_texture_size_limit)
|
||||
if (screen->nonmsaa_texture_size_limit)
|
||||
return 7680;
|
||||
else
|
||||
return V3D_MAX_IMAGE_DIMENSION;
|
||||
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
|
||||
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
|
||||
if (screen->devinfo.ver < 40)
|
||||
return 12;
|
||||
else
|
||||
return V3D_MAX_MIP_LEVELS;
|
||||
return V3D_MAX_MIP_LEVELS;
|
||||
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
|
||||
return V3D_MAX_ARRAY_LAYERS;
|
||||
|
||||
|
|
@ -361,7 +348,7 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type s
|
|||
return 0;
|
||||
break;
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
if (screen->devinfo.ver < 41)
|
||||
if (screen->devinfo.ver < 42)
|
||||
return 0;
|
||||
break;
|
||||
default:
|
||||
|
|
@ -454,7 +441,7 @@ v3d_screen_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type s
|
|||
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
|
||||
if (screen->has_cache_flush) {
|
||||
if (screen->devinfo.ver < 41)
|
||||
if (screen->devinfo.ver < 42)
|
||||
return 0;
|
||||
else
|
||||
return PIPE_MAX_SHADER_IMAGES;
|
||||
|
|
|
|||
|
|
@ -28,9 +28,9 @@
|
|||
#include "compiler/v3d_compiler.h"
|
||||
|
||||
/* We don't expect that the packets we use in this file change across across
|
||||
* hw versions, so we just include directly the v33 header
|
||||
* hw versions, so we just include directly the v42 header
|
||||
*/
|
||||
#include "broadcom/cle/v3d_packet_v33_pack.h"
|
||||
#include "broadcom/cle/v3d_packet_v42_pack.h"
|
||||
|
||||
static uint32_t
|
||||
get_texrect_scale(struct v3d_texture_stateobj *texstate,
|
||||
|
|
@ -124,54 +124,6 @@ get_image_size(struct v3d_shaderimg_stateobj *shaderimg,
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the V3D 3.x P0 (CFG_MODE=1) texture parameter.
|
||||
*
|
||||
* Some bits of this field are dependent on the type of sample being done by
|
||||
* the shader, while other bits are dependent on the sampler state. We OR the
|
||||
* two together here.
|
||||
*/
|
||||
static void
|
||||
write_texture_p0(struct v3d_job *job,
|
||||
struct v3d_cl_out **uniforms,
|
||||
struct v3d_texture_stateobj *texstate,
|
||||
uint32_t unit,
|
||||
uint32_t shader_data)
|
||||
{
|
||||
struct pipe_sampler_state *psampler = texstate->samplers[unit];
|
||||
struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);
|
||||
|
||||
cl_aligned_u32(uniforms, shader_data | sampler->p0);
|
||||
}
|
||||
|
||||
/** Writes the V3D 3.x P1 (CFG_MODE=1) texture parameter. */
|
||||
static void
|
||||
write_texture_p1(struct v3d_job *job,
|
||||
struct v3d_cl_out **uniforms,
|
||||
struct v3d_texture_stateobj *texstate,
|
||||
uint32_t data)
|
||||
{
|
||||
/* Extract the texture unit from the top bits, and the compiler's
|
||||
* packed p1 from the bottom.
|
||||
*/
|
||||
uint32_t unit = data >> 5;
|
||||
uint32_t p1 = data & 0x1f;
|
||||
|
||||
struct pipe_sampler_view *psview = texstate->textures[unit];
|
||||
struct v3d_sampler_view *sview = v3d_sampler_view(psview);
|
||||
|
||||
struct V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1 unpacked = {
|
||||
.texture_state_record_base_address = texstate->texture_state[unit],
|
||||
};
|
||||
|
||||
uint32_t packed;
|
||||
V3D33_TEXTURE_UNIFORM_PARAMETER_1_CFG_MODE1_pack(&job->indirect,
|
||||
(uint8_t *)&packed,
|
||||
&unpacked);
|
||||
|
||||
cl_aligned_u32(uniforms, p1 | packed | sview->p1);
|
||||
}
|
||||
|
||||
/** Writes the V3D 4.x TMU configuration parameter 0. */
|
||||
static void
|
||||
write_tmu_p0(struct v3d_job *job,
|
||||
|
|
@ -328,11 +280,6 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job,
|
|||
&v3d->shaderimg[stage], data);
|
||||
break;
|
||||
|
||||
case QUNIFORM_TEXTURE_CONFIG_P1:
|
||||
write_texture_p1(job, &uniforms, texstate,
|
||||
data);
|
||||
break;
|
||||
|
||||
case QUNIFORM_TEXRECT_SCALE_X:
|
||||
case QUNIFORM_TEXRECT_SCALE_Y:
|
||||
cl_aligned_u32(&uniforms,
|
||||
|
|
@ -437,13 +384,7 @@ v3d_write_uniforms(struct v3d_context *v3d, struct v3d_job *job,
|
|||
break;
|
||||
|
||||
default:
|
||||
assert(quniform_contents_is_texture_p0(uinfo->contents[i]));
|
||||
|
||||
write_texture_p0(job, &uniforms, texstate,
|
||||
uinfo->contents[i] -
|
||||
QUNIFORM_TEXTURE_CONFIG_P0_0,
|
||||
data);
|
||||
break;
|
||||
unreachable("Unknown QUNIFORM");
|
||||
|
||||
}
|
||||
#if 0
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job)
|
|||
|
||||
job->tile_alloc = v3d_bo_alloc(v3d->screen, tile_alloc_size,
|
||||
"tile_alloc");
|
||||
uint32_t tsda_per_tile_size = v3d->screen->devinfo.ver >= 40 ? 256 : 64;
|
||||
uint32_t tsda_per_tile_size = 256;
|
||||
job->tile_state = v3d_bo_alloc(v3d->screen,
|
||||
MAX2(job->num_layers, 1) *
|
||||
job->draw_tiles_y *
|
||||
|
|
@ -83,7 +83,6 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job)
|
|||
tsda_per_tile_size,
|
||||
"TSDA");
|
||||
|
||||
#if V3D_VERSION >= 41
|
||||
/* This must go before the binning mode configuration. It is
|
||||
* required for layered framebuffers to work.
|
||||
*/
|
||||
|
|
@ -92,7 +91,6 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job)
|
|||
config.number_of_layers = job->num_layers;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(!job->msaa || !job->double_buffer);
|
||||
#if V3D_VERSION >= 71
|
||||
|
|
@ -113,7 +111,7 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job)
|
|||
|
||||
#endif
|
||||
|
||||
#if V3D_VERSION >= 40 && V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) {
|
||||
config.width_in_pixels = job->draw_width;
|
||||
config.height_in_pixels = job->draw_height;
|
||||
|
|
@ -126,34 +124,6 @@ v3dX(start_binning)(struct v3d_context *v3d, struct v3d_job *job)
|
|||
config.maximum_bpp_of_all_render_targets = job->internal_bpp;
|
||||
}
|
||||
#endif
|
||||
#if V3D_VERSION < 40
|
||||
/* "Binning mode lists start with a Tile Binning Mode Configuration
|
||||
* item (120)"
|
||||
*
|
||||
* Part1 signals the end of binning config setup.
|
||||
*/
|
||||
cl_emit(&job->bcl, TILE_BINNING_MODE_CFG_PART2, config) {
|
||||
config.tile_allocation_memory_address =
|
||||
cl_address(job->tile_alloc, 0);
|
||||
config.tile_allocation_memory_size = job->tile_alloc->size;
|
||||
}
|
||||
|
||||
cl_emit(&job->bcl, TILE_BINNING_MODE_CFG_PART1, config) {
|
||||
config.tile_state_data_array_base_address =
|
||||
cl_address(job->tile_state, 0);
|
||||
|
||||
config.width_in_tiles = job->draw_tiles_x;
|
||||
config.height_in_tiles = job->draw_tiles_y;
|
||||
/* Must be >= 1 */
|
||||
config.number_of_render_targets =
|
||||
MAX2(job->nr_cbufs, 1);
|
||||
|
||||
config.multisample_mode_4x = job->msaa;
|
||||
config.double_buffer_in_non_ms_mode = job->double_buffer;
|
||||
|
||||
config.maximum_bpp_of_all_render_targets = job->internal_bpp;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* There's definitely nothing in the VCD cache we want. */
|
||||
cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
|
||||
|
|
@ -380,7 +350,6 @@ v3d_emit_wait_for_tf_if_needed(struct v3d_context *v3d, struct v3d_job *job)
|
|||
}
|
||||
}
|
||||
|
||||
#if V3D_VERSION >= 41
|
||||
static void
|
||||
v3d_emit_gs_state_record(struct v3d_job *job,
|
||||
struct v3d_compiled_shader *gs_bin,
|
||||
|
|
@ -396,7 +365,7 @@ v3d_emit_gs_state_record(struct v3d_job *job,
|
|||
gs_bin->prog_data.gs->base.threads == 4;
|
||||
shader.geometry_bin_mode_shader_start_in_final_thread_section =
|
||||
gs_bin->prog_data.gs->base.single_seg;
|
||||
#if V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
shader.geometry_bin_mode_shader_propagate_nans = true;
|
||||
#endif
|
||||
shader.geometry_bin_mode_shader_uniforms_address =
|
||||
|
|
@ -408,7 +377,7 @@ v3d_emit_gs_state_record(struct v3d_job *job,
|
|||
gs->prog_data.gs->base.threads == 4;
|
||||
shader.geometry_render_mode_shader_start_in_final_thread_section =
|
||||
gs->prog_data.gs->base.single_seg;
|
||||
#if V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
shader.geometry_render_mode_shader_propagate_nans = true;
|
||||
#endif
|
||||
shader.geometry_render_mode_shader_uniforms_address =
|
||||
|
|
@ -500,7 +469,6 @@ v3d_emit_tes_gs_shader_params(struct v3d_job *job,
|
|||
shader.gbg_min_gs_output_segments_required_in_play = 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
v3d_emit_gl_shader_state(struct v3d_context *v3d,
|
||||
|
|
@ -559,14 +527,12 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
|
|||
|
||||
uint32_t shader_state_record_length =
|
||||
cl_packet_length(GL_SHADER_STATE_RECORD);
|
||||
#if V3D_VERSION >= 41
|
||||
if (v3d->prog.gs) {
|
||||
shader_state_record_length +=
|
||||
cl_packet_length(GEOMETRY_SHADER_STATE_RECORD) +
|
||||
cl_packet_length(TESSELLATION_GEOMETRY_COMMON_PARAMS) +
|
||||
2 * cl_packet_length(TESSELLATION_GEOMETRY_SHADER_PARAMS);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* See GFXH-930 workaround below */
|
||||
uint32_t shader_rec_offset =
|
||||
|
|
@ -582,8 +548,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
|
|||
*/
|
||||
|
||||
struct vpm_config vpm_cfg_bin, vpm_cfg;
|
||||
|
||||
assert(v3d->screen->devinfo.ver >= 41 || !v3d->prog.gs);
|
||||
v3d_compute_vpm_config(&v3d->screen->devinfo,
|
||||
v3d->prog.cs->prog_data.vs,
|
||||
v3d->prog.vs->prog_data.vs,
|
||||
|
|
@ -593,7 +557,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
|
|||
&vpm_cfg);
|
||||
|
||||
if (v3d->prog.gs) {
|
||||
#if V3D_VERSION >= 41
|
||||
v3d_emit_gs_state_record(v3d->job,
|
||||
v3d->prog.gs_bin, gs_bin_uniforms,
|
||||
v3d->prog.gs, gs_uniforms);
|
||||
|
|
@ -614,9 +577,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
|
|||
vpm_cfg.gs_width,
|
||||
vpm_cfg.Gd,
|
||||
vpm_cfg.Gv);
|
||||
#else
|
||||
unreachable("No GS support pre-4.1");
|
||||
#endif
|
||||
}
|
||||
|
||||
cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) {
|
||||
|
|
@ -643,20 +603,16 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
|
|||
shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
|
||||
v3d->prog.fs->prog_data.fs->uses_center_w;
|
||||
|
||||
#if V3D_VERSION >= 41
|
||||
shader.any_shader_reads_hardware_written_primitive_id =
|
||||
(v3d->prog.gs && v3d->prog.gs->prog_data.gs->uses_pid) ||
|
||||
v3d->prog.fs->prog_data.fs->uses_pid;
|
||||
shader.insert_primitive_id_as_first_varying_to_fragment_shader =
|
||||
!v3d->prog.gs && v3d->prog.fs->prog_data.fs->uses_pid;
|
||||
#endif
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
shader.do_scoreboard_wait_on_first_thread_switch =
|
||||
shader.do_scoreboard_wait_on_first_thread_switch =
|
||||
v3d->prog.fs->prog_data.fs->lock_scoreboard_on_first_thrsw;
|
||||
shader.disable_implicit_point_line_varyings =
|
||||
shader.disable_implicit_point_line_varyings =
|
||||
!v3d->prog.fs->prog_data.fs->uses_implicit_point_line_varyings;
|
||||
#endif
|
||||
|
||||
shader.number_of_varyings_in_fragment_shader =
|
||||
v3d->prog.fs->prog_data.fs->num_inputs;
|
||||
|
|
@ -671,7 +627,7 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
|
|||
cl_address(v3d_resource(v3d->prog.fs->resource)->bo,
|
||||
v3d->prog.fs->offset);
|
||||
|
||||
#if V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
shader.coordinate_shader_propagate_nans = true;
|
||||
shader.vertex_shader_propagate_nans = true;
|
||||
shader.fragment_shader_propagate_nans = true;
|
||||
|
|
@ -711,7 +667,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
|
|||
shader.vertex_shader_uniforms_address = vs_uniforms;
|
||||
shader.fragment_shader_uniforms_address = fs_uniforms;
|
||||
|
||||
#if V3D_VERSION >= 41
|
||||
shader.min_coord_shader_input_segments_required_in_play =
|
||||
vpm_cfg_bin.As;
|
||||
shader.min_vertex_shader_input_segments_required_in_play =
|
||||
|
|
@ -735,20 +690,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
|
|||
v3d->prog.vs->prog_data.vs->base.single_seg;
|
||||
shader.fragment_shader_start_in_final_thread_section =
|
||||
v3d->prog.fs->prog_data.fs->base.single_seg;
|
||||
#else
|
||||
shader.coordinate_shader_4_way_threadable =
|
||||
v3d->prog.cs->prog_data.vs->base.threads == 4;
|
||||
shader.coordinate_shader_2_way_threadable =
|
||||
v3d->prog.cs->prog_data.vs->base.threads == 2;
|
||||
shader.vertex_shader_4_way_threadable =
|
||||
v3d->prog.vs->prog_data.vs->base.threads == 4;
|
||||
shader.vertex_shader_2_way_threadable =
|
||||
v3d->prog.vs->prog_data.vs->base.threads == 2;
|
||||
shader.fragment_shader_4_way_threadable =
|
||||
v3d->prog.fs->prog_data.fs->base.threads == 4;
|
||||
shader.fragment_shader_2_way_threadable =
|
||||
v3d->prog.fs->prog_data.fs->base.threads == 2;
|
||||
#endif
|
||||
|
||||
shader.vertex_id_read_by_coordinate_shader =
|
||||
v3d->prog.cs->prog_data.vs->uses_vid;
|
||||
|
|
@ -759,7 +700,7 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
|
|||
shader.instance_id_read_by_vertex_shader =
|
||||
v3d->prog.vs->prog_data.vs->uses_iid;
|
||||
|
||||
#if V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
shader.address_of_default_attribute_values =
|
||||
cl_address(v3d_resource(vtx->defaults)->bo,
|
||||
vtx->defaults_offset);
|
||||
|
|
@ -802,9 +743,7 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
|
|||
if (i == vtx->num_elements - 1 && !cs_loaded_any) {
|
||||
attr.number_of_values_read_by_coordinate_shader = 1;
|
||||
}
|
||||
#if V3D_VERSION >= 41
|
||||
attr.maximum_index = 0xffffff;
|
||||
#endif
|
||||
}
|
||||
STATIC_ASSERT(sizeof(vtx->attrs) >= V3D_MAX_VS_INPUTS / 4 * size);
|
||||
}
|
||||
|
|
@ -833,7 +772,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
|
|||
vcm.number_of_16_vertex_batches_for_rendering = vpm_cfg.Vc;
|
||||
}
|
||||
|
||||
#if V3D_VERSION >= 41
|
||||
if (v3d->prog.gs) {
|
||||
cl_emit(&job->bcl, GL_SHADER_STATE_INCLUDING_GS, state) {
|
||||
state.address = cl_address(job->indirect.bo,
|
||||
|
|
@ -847,13 +785,6 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
|
|||
state.number_of_attribute_arrays = num_elements_to_emit;
|
||||
}
|
||||
}
|
||||
#else
|
||||
assert(!v3d->prog.gs);
|
||||
cl_emit(&job->bcl, GL_SHADER_STATE, state) {
|
||||
state.address = cl_address(job->indirect.bo, shader_rec_offset);
|
||||
state.number_of_attribute_arrays = num_elements_to_emit;
|
||||
}
|
||||
#endif
|
||||
|
||||
v3d_bo_unreference(&cs_uniforms.bo);
|
||||
v3d_bo_unreference(&vs_uniforms.bo);
|
||||
|
|
@ -1164,13 +1095,6 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
}
|
||||
|
||||
uint32_t prim_tf_enable = 0;
|
||||
#if V3D_VERSION < 40
|
||||
/* V3D 3.x: The HW only processes transform feedback on primitives
|
||||
* with the flag set.
|
||||
*/
|
||||
if (v3d->streamout.num_targets)
|
||||
prim_tf_enable = (V3D_PRIM_POINTS_TF - V3D_PRIM_POINTS);
|
||||
#endif
|
||||
|
||||
v3d->prim_restart = info->primitive_restart;
|
||||
|
||||
|
|
@ -1194,20 +1118,14 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
}
|
||||
struct v3d_resource *rsc = v3d_resource(prsc);
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) {
|
||||
ib.address = cl_address(rsc->bo, 0);
|
||||
ib.size = rsc->bo->size;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (indirect && indirect->buffer) {
|
||||
cl_emit(&job->bcl, INDIRECT_INDEXED_INSTANCED_PRIM_LIST, prim) {
|
||||
prim.index_type = ffs(info->index_size) - 1;
|
||||
#if V3D_VERSION < 40
|
||||
prim.address_of_indices_list =
|
||||
cl_address(rsc->bo, offset);
|
||||
#endif /* V3D_VERSION < 40 */
|
||||
prim.mode = hw_prim_type | prim_tf_enable;
|
||||
prim.enable_primitive_restarts = info->primitive_restart;
|
||||
|
||||
|
|
@ -1220,13 +1138,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
} else if (info->instance_count > 1) {
|
||||
cl_emit(&job->bcl, INDEXED_INSTANCED_PRIM_LIST, prim) {
|
||||
prim.index_type = ffs(info->index_size) - 1;
|
||||
#if V3D_VERSION >= 40
|
||||
prim.index_offset = offset;
|
||||
#else /* V3D_VERSION < 40 */
|
||||
prim.maximum_index = (1u << 31) - 1; /* XXX */
|
||||
prim.address_of_indices_list =
|
||||
cl_address(rsc->bo, offset);
|
||||
#endif /* V3D_VERSION < 40 */
|
||||
prim.mode = hw_prim_type | prim_tf_enable;
|
||||
prim.enable_primitive_restarts = info->primitive_restart;
|
||||
|
||||
|
|
@ -1237,13 +1149,7 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
cl_emit(&job->bcl, INDEXED_PRIM_LIST, prim) {
|
||||
prim.index_type = ffs(info->index_size) - 1;
|
||||
prim.length = draws[0].count;
|
||||
#if V3D_VERSION >= 40
|
||||
prim.index_offset = offset;
|
||||
#else /* V3D_VERSION < 40 */
|
||||
prim.maximum_index = (1u << 31) - 1; /* XXX */
|
||||
prim.address_of_indices_list =
|
||||
cl_address(rsc->bo, offset);
|
||||
#endif /* V3D_VERSION < 40 */
|
||||
prim.mode = hw_prim_type | prim_tf_enable;
|
||||
prim.enable_primitive_restarts = info->primitive_restart;
|
||||
}
|
||||
|
|
@ -1361,7 +1267,6 @@ v3d_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
|
|||
v3d_flush(pctx);
|
||||
}
|
||||
|
||||
#if V3D_VERSION >= 41
|
||||
#define V3D_CSD_CFG012_WG_COUNT_SHIFT 16
|
||||
#define V3D_CSD_CFG012_WG_OFFSET_SHIFT 0
|
||||
/* Allow this dispatch to start while the last one is still running. */
|
||||
|
|
@ -1563,7 +1468,6 @@ v3d_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
|
|||
v3d_bo_unreference(&uniforms.bo);
|
||||
v3d_bo_unreference(&v3d->compute_shared_memory);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Implements gallium's clear() hook (glClear()) by drawing a pair of triangles.
|
||||
|
|
@ -1607,7 +1511,7 @@ v3d_tlb_clear(struct v3d_job *job, unsigned buffers,
|
|||
* if it would be possible to need to emit a load of just one after
|
||||
* we've set up our TLB clears. This issue is fixed since V3D 4.3.18.
|
||||
*/
|
||||
if (v3d->screen->devinfo.ver <= 42 &&
|
||||
if (v3d->screen->devinfo.ver == 42 &&
|
||||
buffers & PIPE_CLEAR_DEPTHSTENCIL &&
|
||||
(buffers & PIPE_CLEAR_DEPTHSTENCIL) != PIPE_CLEAR_DEPTHSTENCIL &&
|
||||
job->zsbuf &&
|
||||
|
|
@ -1762,8 +1666,6 @@ v3dX(draw_init)(struct pipe_context *pctx)
|
|||
pctx->clear = v3d_clear;
|
||||
pctx->clear_render_target = v3d_clear_render_target;
|
||||
pctx->clear_depth_stencil = v3d_clear_depth_stencil;
|
||||
#if V3D_VERSION >= 41
|
||||
if (v3d_context(pctx)->screen->has_csd)
|
||||
pctx->launch_grid = v3d_launch_grid;
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
|
|
@ -78,172 +78,6 @@ v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
|
|||
}
|
||||
}
|
||||
|
||||
#if V3D_VERSION < 40
|
||||
static inline uint16_t
|
||||
swizzled_border_color(const struct v3d_device_info *devinfo,
|
||||
struct pipe_sampler_state *sampler,
|
||||
struct v3d_sampler_view *sview,
|
||||
int chan)
|
||||
{
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(sview->base.format);
|
||||
uint8_t swiz = chan;
|
||||
|
||||
/* If we're doing swizzling in the sampler, then only rearrange the
|
||||
* border color for the mismatch between the V3D texture format and
|
||||
* the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by
|
||||
* the sampler's swizzle.
|
||||
*
|
||||
* For swizzling in the shader, we don't do any pre-swizzling of the
|
||||
* border color.
|
||||
*/
|
||||
if (v3d_get_tex_return_size(devinfo, sview->base.format) != 32)
|
||||
swiz = desc->swizzle[swiz];
|
||||
|
||||
switch (swiz) {
|
||||
case PIPE_SWIZZLE_0:
|
||||
return _mesa_float_to_half(0.0);
|
||||
case PIPE_SWIZZLE_1:
|
||||
return _mesa_float_to_half(1.0);
|
||||
default:
|
||||
return _mesa_float_to_half(sampler->border_color.f[swiz]);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_one_texture(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex,
|
||||
int i)
|
||||
{
|
||||
struct v3d_job *job = v3d->job;
|
||||
struct pipe_sampler_state *psampler = stage_tex->samplers[i];
|
||||
struct v3d_sampler_state *sampler = v3d_sampler_state(psampler);
|
||||
struct pipe_sampler_view *psview = stage_tex->textures[i];
|
||||
struct v3d_sampler_view *sview = v3d_sampler_view(psview);
|
||||
struct pipe_resource *prsc = psview->texture;
|
||||
struct v3d_resource *rsc = v3d_resource(prsc);
|
||||
const struct v3d_device_info *devinfo = &v3d->screen->devinfo;
|
||||
|
||||
stage_tex->texture_state[i].offset =
|
||||
v3d_cl_ensure_space(&job->indirect,
|
||||
cl_packet_length(TEXTURE_SHADER_STATE),
|
||||
32);
|
||||
v3d_bo_set_reference(&stage_tex->texture_state[i].bo,
|
||||
job->indirect.bo);
|
||||
|
||||
uint32_t return_size = v3d_get_tex_return_size(devinfo, psview->format);
|
||||
|
||||
struct V3D33_TEXTURE_SHADER_STATE unpacked = {
|
||||
/* XXX */
|
||||
.border_color_red = swizzled_border_color(devinfo, psampler,
|
||||
sview, 0),
|
||||
.border_color_green = swizzled_border_color(devinfo, psampler,
|
||||
sview, 1),
|
||||
.border_color_blue = swizzled_border_color(devinfo, psampler,
|
||||
sview, 2),
|
||||
.border_color_alpha = swizzled_border_color(devinfo, psampler,
|
||||
sview, 3),
|
||||
|
||||
/* In the normal texturing path, the LOD gets clamped between
|
||||
* min/max, and the base_level field (set in the sampler view
|
||||
* from first_level) only decides where the min/mag switch
|
||||
* happens, so we need to use the LOD clamps to keep us
|
||||
* between min and max.
|
||||
*
|
||||
* For txf, the LOD clamp is still used, despite GL not
|
||||
* wanting that. We will need to have a separate
|
||||
* TEXTURE_SHADER_STATE that ignores psview->min/max_lod to
|
||||
* support txf properly.
|
||||
*/
|
||||
.min_level_of_detail = MIN2(psview->u.tex.first_level +
|
||||
MAX2(psampler->min_lod, 0),
|
||||
psview->u.tex.last_level),
|
||||
.max_level_of_detail = MIN2(psview->u.tex.first_level +
|
||||
MAX2(psampler->max_lod,
|
||||
psampler->min_lod),
|
||||
psview->u.tex.last_level),
|
||||
|
||||
.texture_base_pointer = cl_address(rsc->bo,
|
||||
rsc->slices[0].offset),
|
||||
|
||||
.output_32_bit = return_size == 32,
|
||||
};
|
||||
|
||||
/* Set up the sampler swizzle if we're doing 16-bit sampling. For
|
||||
* 32-bit, we leave swizzling up to the shader compiler.
|
||||
*
|
||||
* Note: Contrary to the docs, the swizzle still applies even if the
|
||||
* return size is 32. It's just that you probably want to swizzle in
|
||||
* the shader, because you need the Y/Z/W channels to be defined.
|
||||
*/
|
||||
if (return_size == 32) {
|
||||
unpacked.swizzle_r = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_X);
|
||||
unpacked.swizzle_g = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_Y);
|
||||
unpacked.swizzle_b = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_Z);
|
||||
unpacked.swizzle_a = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_W);
|
||||
} else {
|
||||
unpacked.swizzle_r = v3d_translate_pipe_swizzle(sview->swizzle[0]);
|
||||
unpacked.swizzle_g = v3d_translate_pipe_swizzle(sview->swizzle[1]);
|
||||
unpacked.swizzle_b = v3d_translate_pipe_swizzle(sview->swizzle[2]);
|
||||
unpacked.swizzle_a = v3d_translate_pipe_swizzle(sview->swizzle[3]);
|
||||
}
|
||||
|
||||
int min_img_filter = psampler->min_img_filter;
|
||||
int min_mip_filter = psampler->min_mip_filter;
|
||||
int mag_img_filter = psampler->mag_img_filter;
|
||||
|
||||
if (return_size == 32) {
|
||||
min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
|
||||
min_img_filter = PIPE_TEX_FILTER_NEAREST;
|
||||
mag_img_filter = PIPE_TEX_FILTER_NEAREST;
|
||||
}
|
||||
|
||||
bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST;
|
||||
switch (min_mip_filter) {
|
||||
case PIPE_TEX_MIPFILTER_NONE:
|
||||
unpacked.filter += min_nearest ? 2 : 0;
|
||||
break;
|
||||
case PIPE_TEX_MIPFILTER_NEAREST:
|
||||
unpacked.filter += min_nearest ? 4 : 8;
|
||||
break;
|
||||
case PIPE_TEX_MIPFILTER_LINEAR:
|
||||
unpacked.filter += min_nearest ? 4 : 8;
|
||||
unpacked.filter += 2;
|
||||
break;
|
||||
}
|
||||
|
||||
if (mag_img_filter == PIPE_TEX_FILTER_NEAREST)
|
||||
unpacked.filter++;
|
||||
|
||||
if (psampler->max_anisotropy > 8)
|
||||
unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1;
|
||||
else if (psampler->max_anisotropy > 4)
|
||||
unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1;
|
||||
else if (psampler->max_anisotropy > 2)
|
||||
unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1;
|
||||
else if (psampler->max_anisotropy)
|
||||
unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1;
|
||||
|
||||
uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)];
|
||||
cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked);
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(packed); i++)
|
||||
packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i];
|
||||
|
||||
/* TMU indirect structs need to be 32b aligned. */
|
||||
v3d_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32);
|
||||
cl_emit_prepacked(&job->indirect, &packed);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_textures(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex)
|
||||
{
|
||||
for (int i = 0; i < stage_tex->num_textures; i++) {
|
||||
if (stage_tex->textures[i])
|
||||
emit_one_texture(v3d, stage_tex, i);
|
||||
}
|
||||
}
|
||||
#endif /* V3D_VERSION < 40 */
|
||||
|
||||
static uint32_t
|
||||
translate_colormask(struct v3d_context *v3d, uint32_t colormask, int rt)
|
||||
{
|
||||
|
|
@ -263,18 +97,12 @@ emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job,
|
|||
{
|
||||
struct pipe_rt_blend_state *rtblend = &blend->rt[rt];
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
/* We don't need to emit blend state for disabled RTs. */
|
||||
if (!rtblend->blend_enable)
|
||||
return;
|
||||
#endif
|
||||
|
||||
cl_emit(&job->bcl, BLEND_CFG, config) {
|
||||
#if V3D_VERSION >= 40
|
||||
config.render_target_mask = rt_mask;
|
||||
#else
|
||||
assert(rt == 0);
|
||||
#endif
|
||||
|
||||
config.color_blend_mode = rtblend->rgb_func;
|
||||
config.color_blend_dst_factor =
|
||||
|
|
@ -311,7 +139,6 @@ emit_flat_shade_flags(struct v3d_job *job,
|
|||
}
|
||||
}
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
static void
|
||||
emit_noperspective_flags(struct v3d_job *job,
|
||||
int varying_offset,
|
||||
|
|
@ -345,7 +172,6 @@ emit_centroid_flags(struct v3d_job *job,
|
|||
higher;
|
||||
}
|
||||
}
|
||||
#endif /* V3D_VERSION >= 40 */
|
||||
|
||||
static bool
|
||||
emit_varying_flags(struct v3d_job *job, uint32_t *flags,
|
||||
|
|
@ -433,14 +259,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
|
|||
if (maxx > minx && maxy > miny) {
|
||||
clip.clip_window_width_in_pixels = maxx - minx;
|
||||
clip.clip_window_height_in_pixels = maxy - miny;
|
||||
} else if (V3D_VERSION < 41) {
|
||||
/* The HW won't entirely clip out when scissor
|
||||
* w/h is 0. Just treat it the same as
|
||||
* rasterizer discard.
|
||||
*/
|
||||
rasterizer_discard = true;
|
||||
clip.clip_window_width_in_pixels = 1;
|
||||
clip.clip_window_height_in_pixels = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -512,14 +330,14 @@ v3dX(emit_state)(struct pipe_context *pctx)
|
|||
/* Note: EZ state may update based on the compiled FS,
|
||||
* along with ZSA
|
||||
*/
|
||||
#if V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
config.early_z_updates_enable =
|
||||
(job->ez_state != V3D_EZ_DISABLED);
|
||||
#endif
|
||||
if (v3d->zsa->base.depth_enabled) {
|
||||
config.z_updates_enable =
|
||||
v3d->zsa->base.depth_writemask;
|
||||
#if V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
config.early_z_enable =
|
||||
config.early_z_updates_enable;
|
||||
#endif
|
||||
|
|
@ -559,7 +377,7 @@ v3dX(emit_state)(struct pipe_context *pctx)
|
|||
|
||||
if (v3d->dirty & V3D_DIRTY_RASTERIZER &&
|
||||
v3d->rasterizer->base.offset_tri) {
|
||||
if (v3d->screen->devinfo.ver <= 42 &&
|
||||
if (v3d->screen->devinfo.ver == 42 &&
|
||||
job->zsbuf &&
|
||||
job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
|
||||
cl_emit_prepacked_sized(&job->bcl,
|
||||
|
|
@ -583,7 +401,7 @@ v3dX(emit_state)(struct pipe_context *pctx)
|
|||
}
|
||||
|
||||
if (v3d->dirty & V3D_DIRTY_VIEWPORT) {
|
||||
#if V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
|
||||
clip.viewport_half_width_in_1_256th_of_pixel =
|
||||
v3d->viewport.scale[0] * 256.0f;
|
||||
|
|
@ -617,12 +435,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
|
|||
}
|
||||
|
||||
cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
|
||||
#if V3D_VERSION < 41
|
||||
vp.viewport_centre_x_coordinate =
|
||||
v3d->viewport.translate[0];
|
||||
vp.viewport_centre_y_coordinate =
|
||||
v3d->viewport.translate[1];
|
||||
#else
|
||||
float vp_fine_x = v3d->viewport.translate[0];
|
||||
float vp_fine_y = v3d->viewport.translate[1];
|
||||
int32_t vp_coarse_x = 0;
|
||||
|
|
@ -649,7 +461,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
|
|||
vp.fine_y = vp_fine_y;
|
||||
vp.coarse_x = vp_coarse_x;
|
||||
vp.coarse_y = vp_coarse_y;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -657,11 +468,9 @@ v3dX(emit_state)(struct pipe_context *pctx)
|
|||
struct v3d_blend_state *blend = v3d->blend;
|
||||
|
||||
if (blend->blend_enables) {
|
||||
#if V3D_VERSION >= 40
|
||||
cl_emit(&job->bcl, BLEND_ENABLES, enables) {
|
||||
enables.mask = blend->blend_enables;
|
||||
}
|
||||
#endif
|
||||
|
||||
const uint32_t max_rts =
|
||||
V3D_MAX_RENDER_TARGETS(v3d->screen->devinfo.ver);
|
||||
|
|
@ -716,8 +525,7 @@ v3dX(emit_state)(struct pipe_context *pctx)
|
|||
/* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant
|
||||
* color.
|
||||
*/
|
||||
if (v3d->dirty & V3D_DIRTY_BLEND_COLOR ||
|
||||
(V3D_VERSION < 41 && (v3d->dirty & V3D_DIRTY_BLEND))) {
|
||||
if (v3d->dirty & V3D_DIRTY_BLEND_COLOR) {
|
||||
cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) {
|
||||
color.red_f16 = (v3d->swap_color_rb ?
|
||||
v3d->blend_color.hf[2] :
|
||||
|
|
@ -751,20 +559,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
|
|||
}
|
||||
}
|
||||
|
||||
#if V3D_VERSION < 40
|
||||
/* Pre-4.x, we have texture state that depends on both the sampler and
|
||||
* the view, so we merge them together at draw time.
|
||||
*/
|
||||
if (v3d->dirty & V3D_DIRTY_FRAGTEX)
|
||||
emit_textures(v3d, &v3d->tex[PIPE_SHADER_FRAGMENT]);
|
||||
|
||||
if (v3d->dirty & V3D_DIRTY_GEOMTEX)
|
||||
emit_textures(v3d, &v3d->tex[PIPE_SHADER_GEOMETRY]);
|
||||
|
||||
if (v3d->dirty & V3D_DIRTY_VERTTEX)
|
||||
emit_textures(v3d, &v3d->tex[PIPE_SHADER_VERTEX]);
|
||||
#endif
|
||||
|
||||
if (v3d->dirty & V3D_DIRTY_FLAT_SHADE_FLAGS) {
|
||||
if (!emit_varying_flags(job,
|
||||
v3d->prog.fs->prog_data.fs->flat_shade_flags,
|
||||
|
|
@ -773,7 +567,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
|
|||
}
|
||||
}
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
if (v3d->dirty & V3D_DIRTY_NOPERSPECTIVE_FLAGS) {
|
||||
if (!emit_varying_flags(job,
|
||||
v3d->prog.fs->prog_data.fs->noperspective_flags,
|
||||
|
|
@ -789,7 +582,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
|
|||
cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Set up the transform feedback data specs (which VPM entries to
|
||||
* output to which buffers).
|
||||
|
|
@ -807,7 +599,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
|
|||
tf_shader->tf_specs_psiz :
|
||||
tf_shader->tf_specs);
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
bool tf_enabled = v3d_transform_feedback_enabled(v3d);
|
||||
job->tf_enabled |= tf_enabled;
|
||||
|
||||
|
|
@ -816,23 +607,13 @@ v3dX(emit_state)(struct pipe_context *pctx)
|
|||
tf_shader->num_tf_specs;
|
||||
tfe.enable = tf_enabled;
|
||||
};
|
||||
#else /* V3D_VERSION < 40 */
|
||||
cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) {
|
||||
tfe.number_of_32_bit_output_buffer_address_following =
|
||||
so->num_targets;
|
||||
tfe.number_of_16_bit_output_data_specs_following =
|
||||
tf_shader->num_tf_specs;
|
||||
};
|
||||
#endif /* V3D_VERSION < 40 */
|
||||
for (int i = 0; i < tf_shader->num_tf_specs; i++) {
|
||||
cl_emit_prepacked(&job->bcl, &tf_specs[i]);
|
||||
}
|
||||
} else {
|
||||
#if V3D_VERSION >= 40
|
||||
cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
|
||||
tfe.enable = false;
|
||||
};
|
||||
#endif /* V3D_VERSION >= 40 */
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -850,7 +631,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
|
|||
uint32_t offset = target ?
|
||||
v3d_stream_output_target(target)->offset * info->stride[i] * 4 : 0;
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
if (!target)
|
||||
continue;
|
||||
|
||||
|
|
@ -863,16 +643,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
|
|||
(target->buffer_size - offset) >> 2;
|
||||
output.buffer_number = i;
|
||||
}
|
||||
#else /* V3D_VERSION < 40 */
|
||||
cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) {
|
||||
if (target) {
|
||||
output.address =
|
||||
cl_address(rsc->bo,
|
||||
target->buffer_offset +
|
||||
offset);
|
||||
}
|
||||
};
|
||||
#endif /* V3D_VERSION < 40 */
|
||||
if (target) {
|
||||
v3d_job_add_tf_write_resource(v3d->job,
|
||||
target->buffer);
|
||||
|
|
@ -889,7 +659,6 @@ v3dX(emit_state)(struct pipe_context *pctx)
|
|||
}
|
||||
}
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
if (v3d->dirty & V3D_DIRTY_SAMPLE_STATE) {
|
||||
cl_emit(&job->bcl, SAMPLE_STATE, state) {
|
||||
/* Note: SampleCoverage was handled at the
|
||||
|
|
@ -899,5 +668,4 @@ v3dX(emit_state)(struct pipe_context *pctx)
|
|||
state.mask = job->msaa ? v3d->sample_mask : 0xf;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
|
|
@ -145,7 +145,6 @@ static const struct v3d_format format_table[] = {
|
|||
FORMAT(R11G11B10_FLOAT, R11F_G11F_B10F, R11F_G11F_B10F, SWIZ_XYZ1, 16, 0),
|
||||
FORMAT(R9G9B9E5_FLOAT, NO, RGB9_E5, SWIZ_XYZ1, 16, 0),
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
FORMAT(S8_UINT_Z24_UNORM, D24S8, DEPTH24_X8, SWIZ_XXXX, 32, 1),
|
||||
FORMAT(X8Z24_UNORM, D24S8, DEPTH24_X8, SWIZ_XXXX, 32, 1),
|
||||
FORMAT(S8X24_UINT, S8, RGBA8UI, SWIZ_XXXX, 16, 1),
|
||||
|
|
@ -155,16 +154,6 @@ static const struct v3d_format format_table[] = {
|
|||
/* Pretend we support this, but it'll be separate Z32F depth and S8. */
|
||||
FORMAT(Z32_FLOAT_S8X24_UINT, D32F, DEPTH_COMP32F, SWIZ_XXXX, 32, 1),
|
||||
FORMAT(X32_S8X24_UINT, S8, R8UI, SWIZ_XXXX, 16, 1),
|
||||
#else
|
||||
FORMAT(S8_UINT_Z24_UNORM, ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1),
|
||||
FORMAT(X8Z24_UNORM, ZS_DEPTH24_STENCIL8, DEPTH24_X8, SWIZ_XXXX, 32, 1),
|
||||
FORMAT(S8X24_UINT, NO, R32F, SWIZ_XXXX, 32, 1),
|
||||
FORMAT(Z32_FLOAT, ZS_DEPTH_COMPONENT32F, R32F, SWIZ_XXXX, 32, 1),
|
||||
FORMAT(Z16_UNORM, ZS_DEPTH_COMPONENT16, DEPTH_COMP16, SWIZ_XXXX, 32, 1),
|
||||
|
||||
/* Pretend we support this, but it'll be separate Z32F depth and S8. */
|
||||
FORMAT(Z32_FLOAT_S8X24_UINT, ZS_DEPTH_COMPONENT32F, R32F, SWIZ_XXXX, 32, 1),
|
||||
#endif
|
||||
|
||||
FORMAT(ETC2_RGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0),
|
||||
FORMAT(ETC2_SRGB8, NO, RGB8_ETC2, SWIZ_XYZ1, 16, 0),
|
||||
|
|
@ -233,9 +222,6 @@ v3dX(get_internal_type_bpp_for_output_format)(uint32_t format,
|
|||
{
|
||||
switch (format) {
|
||||
case V3D_OUTPUT_IMAGE_FORMAT_RGBA8:
|
||||
#if V3D_VERSION < 41
|
||||
case V3D_OUTPUT_IMAGE_FORMAT_RGBX8:
|
||||
#endif
|
||||
case V3D_OUTPUT_IMAGE_FORMAT_RGB8:
|
||||
case V3D_OUTPUT_IMAGE_FORMAT_RG8:
|
||||
case V3D_OUTPUT_IMAGE_FORMAT_R8:
|
||||
|
|
@ -264,9 +250,6 @@ v3dX(get_internal_type_bpp_for_output_format)(uint32_t format,
|
|||
case V3D_OUTPUT_IMAGE_FORMAT_SRGB:
|
||||
case V3D_OUTPUT_IMAGE_FORMAT_RGB10_A2:
|
||||
case V3D_OUTPUT_IMAGE_FORMAT_R11F_G11F_B10F:
|
||||
#if V3D_VERSION < 41
|
||||
case V3D_OUTPUT_IMAGE_FORMAT_SRGBX8:
|
||||
#endif
|
||||
case V3D_OUTPUT_IMAGE_FORMAT_RGBA16F:
|
||||
/* Note that sRGB RTs are stored in the tile buffer at 16F,
|
||||
* and the conversion to sRGB happens at tilebuffer
|
||||
|
|
|
|||
|
|
@ -34,9 +34,7 @@ void v3dX(bcl_epilogue)(struct v3d_context *v3d, struct v3d_job *job)
|
|||
{
|
||||
v3d_cl_ensure_space_with_branch(&job->bcl,
|
||||
cl_packet_length(PRIMITIVE_COUNTS_FEEDBACK) +
|
||||
#if V3D_VERSION >= 41
|
||||
cl_packet_length(TRANSFORM_FEEDBACK_SPECS) +
|
||||
#endif
|
||||
cl_packet_length(FLUSH));
|
||||
|
||||
if (job->tf_enabled || job->needs_primitives_generated) {
|
||||
|
|
@ -57,13 +55,11 @@ void v3dX(bcl_epilogue)(struct v3d_context *v3d, struct v3d_job *job)
|
|||
* cleans up and finishes before it gets reset by the next
|
||||
* frame's tile binning mode cfg packet. (SWVC5-718).
|
||||
*/
|
||||
#if V3D_VERSION >= 41
|
||||
if (job->tf_enabled) {
|
||||
cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
|
||||
tfe.enable = false;
|
||||
};
|
||||
}
|
||||
#endif /* V3D_VERSION >= 41 */
|
||||
|
||||
/* We just FLUSH here to tell the HW to cap the bin CLs with a
|
||||
* return. Any remaining state changes won't be flushed to
|
||||
|
|
|
|||
|
|
@ -36,23 +36,6 @@
|
|||
|
||||
#define PIPE_FIRST_COLOR_BUFFER_BIT (ffs(PIPE_CLEAR_COLOR0) - 1)
|
||||
|
||||
/* The HW queues up the load until the tile coordinates show up, but can only
|
||||
* track one at a time. If we need to do more than one load, then we need to
|
||||
* flush out the previous load by emitting the tile coordinates and doing a
|
||||
* dummy store.
|
||||
*/
|
||||
static void
|
||||
flush_last_load(struct v3d_cl *cl)
|
||||
{
|
||||
if (V3D_VERSION >= 40)
|
||||
return;
|
||||
|
||||
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
|
||||
cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
|
||||
store.buffer_to_store = NONE;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer,
|
||||
int layer, uint32_t pipe_bit, uint32_t *loads_pending)
|
||||
|
|
@ -73,7 +56,6 @@ load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer,
|
|||
load.buffer_to_load = buffer;
|
||||
load.address = cl_address(rsc->bo, layer_offset);
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
load.memory_format = surf->tiling;
|
||||
if (separate_stencil)
|
||||
load.input_image_format = V3D_OUTPUT_IMAGE_FORMAT_S8;
|
||||
|
|
@ -96,20 +78,9 @@ load_general(struct v3d_cl *cl, struct pipe_surface *psurf, int buffer,
|
|||
else
|
||||
load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
|
||||
|
||||
#else /* V3D_VERSION < 40 */
|
||||
/* Can't do raw ZSTENCIL loads -- need to load/store them to
|
||||
* separate buffers for Z and stencil.
|
||||
*/
|
||||
assert(buffer != ZSTENCIL);
|
||||
load.raw_mode = true;
|
||||
load.padded_height_of_output_image_in_uif_blocks =
|
||||
surf->padded_height_of_output_image_in_uif_blocks;
|
||||
#endif /* V3D_VERSION < 40 */
|
||||
}
|
||||
|
||||
*loads_pending &= ~pipe_bit;
|
||||
if (*loads_pending)
|
||||
flush_last_load(cl);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -127,7 +98,6 @@ store_general(struct v3d_job *job,
|
|||
}
|
||||
|
||||
*stores_pending &= ~pipe_bit;
|
||||
bool last_store = !(*stores_pending);
|
||||
|
||||
struct v3d_resource *rsc = v3d_resource(psurf->texture);
|
||||
|
||||
|
|
@ -140,7 +110,6 @@ store_general(struct v3d_job *job,
|
|||
store.buffer_to_store = buffer;
|
||||
store.address = cl_address(rsc->bo, layer_offset);
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
store.clear_buffer_being_stored = false;
|
||||
|
||||
if (separate_stencil)
|
||||
|
|
@ -168,35 +137,6 @@ store_general(struct v3d_job *job,
|
|||
store.decimate_mode = V3D_DECIMATE_MODE_4X;
|
||||
else
|
||||
store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
|
||||
|
||||
#else /* V3D_VERSION < 40 */
|
||||
/* Can't do raw ZSTENCIL stores -- need to load/store them to
|
||||
* separate buffers for Z and stencil.
|
||||
*/
|
||||
assert(buffer != ZSTENCIL);
|
||||
store.raw_mode = true;
|
||||
if (!last_store) {
|
||||
store.disable_color_buffers_clear_on_write = true;
|
||||
store.disable_z_buffer_clear_on_write = true;
|
||||
store.disable_stencil_buffer_clear_on_write = true;
|
||||
} else {
|
||||
store.disable_color_buffers_clear_on_write =
|
||||
!(((pipe_bit & PIPE_CLEAR_COLOR_BUFFERS) &&
|
||||
general_color_clear &&
|
||||
(job->clear & pipe_bit)));
|
||||
store.disable_z_buffer_clear_on_write =
|
||||
!(job->clear & PIPE_CLEAR_DEPTH);
|
||||
store.disable_stencil_buffer_clear_on_write =
|
||||
!(job->clear & PIPE_CLEAR_STENCIL);
|
||||
}
|
||||
store.padded_height_of_output_image_in_uif_blocks =
|
||||
surf->padded_height_of_output_image_in_uif_blocks;
|
||||
#endif /* V3D_VERSION < 40 */
|
||||
}
|
||||
|
||||
/* There must be a TILE_COORDINATES_IMPLICIT between each store. */
|
||||
if (V3D_VERSION < 40 && !last_store) {
|
||||
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -223,7 +163,6 @@ v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer)
|
|||
*/
|
||||
assert(!job->bbuf || job->load == 0);
|
||||
assert(!job->bbuf || job->nr_cbufs <= 1);
|
||||
assert(!job->bbuf || V3D_VERSION >= 40);
|
||||
|
||||
uint32_t loads_pending = job->bbuf ? job->store : job->load;
|
||||
|
||||
|
|
@ -235,18 +174,14 @@ v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer)
|
|||
struct pipe_surface *psurf = job->bbuf ? job->bbuf : job->cbufs[i];
|
||||
assert(!job->bbuf || i == 0);
|
||||
|
||||
if (!psurf || (V3D_VERSION < 40 &&
|
||||
psurf->texture->nr_samples <= 1)) {
|
||||
if (!psurf)
|
||||
continue;
|
||||
}
|
||||
|
||||
load_general(cl, psurf, RENDER_TARGET_0 + i, layer,
|
||||
bit, &loads_pending);
|
||||
}
|
||||
|
||||
if ((loads_pending & PIPE_CLEAR_DEPTHSTENCIL) &&
|
||||
(V3D_VERSION >= 40 ||
|
||||
(job->zsbuf && job->zsbuf->texture->nr_samples > 1))) {
|
||||
if (loads_pending & PIPE_CLEAR_DEPTHSTENCIL) {
|
||||
assert(!job->early_zs_clear);
|
||||
struct pipe_surface *src = job->bbuf ? job->bbuf : job->zsbuf;
|
||||
struct v3d_resource *rsc = v3d_resource(src->texture);
|
||||
|
|
@ -268,57 +203,14 @@ v3d_rcl_emit_loads(struct v3d_job *job, struct v3d_cl *cl, int layer)
|
|||
}
|
||||
}
|
||||
|
||||
#if V3D_VERSION < 40
|
||||
/* The initial reload will be queued until we get the
|
||||
* tile coordinates.
|
||||
*/
|
||||
if (loads_pending) {
|
||||
cl_emit(cl, RELOAD_TILE_COLOR_BUFFER, load) {
|
||||
load.disable_color_buffer_load =
|
||||
(~loads_pending &
|
||||
PIPE_CLEAR_COLOR_BUFFERS) >>
|
||||
PIPE_FIRST_COLOR_BUFFER_BIT;
|
||||
load.enable_z_load =
|
||||
loads_pending & PIPE_CLEAR_DEPTH;
|
||||
load.enable_stencil_load =
|
||||
loads_pending & PIPE_CLEAR_STENCIL;
|
||||
}
|
||||
}
|
||||
#else /* V3D_VERSION >= 40 */
|
||||
assert(!loads_pending);
|
||||
cl_emit(cl, END_OF_LOADS, end);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
|
||||
{
|
||||
#if V3D_VERSION < 40
|
||||
UNUSED bool needs_color_clear = job->clear & PIPE_CLEAR_COLOR_BUFFERS;
|
||||
UNUSED bool needs_z_clear = job->clear & PIPE_CLEAR_DEPTH;
|
||||
UNUSED bool needs_s_clear = job->clear & PIPE_CLEAR_STENCIL;
|
||||
|
||||
/* For clearing color in a TLB general on V3D 3.3:
|
||||
*
|
||||
* - NONE buffer store clears all TLB color buffers.
|
||||
* - color buffer store clears just the TLB color buffer being stored.
|
||||
* - Z/S buffers store may not clear the TLB color buffer.
|
||||
*
|
||||
* And on V3D 4.1, we only have one flag for "clear the buffer being
|
||||
* stored" in the general packet, and a separate packet to clear all
|
||||
* color TLB buffers.
|
||||
*
|
||||
* As a result, we only bother flagging TLB color clears in a general
|
||||
* packet when we don't have to emit a separate packet to clear all
|
||||
* TLB color buffers.
|
||||
*/
|
||||
bool general_color_clear = (needs_color_clear &&
|
||||
(job->clear & PIPE_CLEAR_COLOR_BUFFERS) ==
|
||||
(job->store & PIPE_CLEAR_COLOR_BUFFERS));
|
||||
#else
|
||||
bool general_color_clear = false;
|
||||
#endif
|
||||
|
||||
uint32_t stores_pending = job->store;
|
||||
|
||||
/* For V3D 4.1, use general stores for all TLB stores.
|
||||
|
|
@ -337,17 +229,14 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
|
|||
continue;
|
||||
|
||||
struct pipe_surface *psurf = job->cbufs[i];
|
||||
if (!psurf ||
|
||||
(V3D_VERSION < 40 && psurf->texture->nr_samples <= 1)) {
|
||||
if (!psurf)
|
||||
continue;
|
||||
}
|
||||
|
||||
store_general(job, cl, psurf, layer, RENDER_TARGET_0 + i, bit,
|
||||
&stores_pending, general_color_clear, job->bbuf);
|
||||
}
|
||||
|
||||
if (job->store & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf &&
|
||||
!(V3D_VERSION < 40 && job->zsbuf->texture->nr_samples <= 1)) {
|
||||
if (job->store & PIPE_CLEAR_DEPTHSTENCIL && job->zsbuf) {
|
||||
assert(!job->early_zs_clear);
|
||||
struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
|
||||
if (rsc->separate_stencil) {
|
||||
|
|
@ -375,35 +264,7 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
|
|||
}
|
||||
}
|
||||
|
||||
#if V3D_VERSION < 40
|
||||
if (stores_pending) {
|
||||
cl_emit(cl, STORE_MULTI_SAMPLE_RESOLVED_TILE_COLOR_BUFFER_EXTENDED, store) {
|
||||
|
||||
store.disable_color_buffer_write =
|
||||
(~stores_pending >>
|
||||
PIPE_FIRST_COLOR_BUFFER_BIT) & 0xf;
|
||||
store.enable_z_write = stores_pending & PIPE_CLEAR_DEPTH;
|
||||
store.enable_stencil_write = stores_pending & PIPE_CLEAR_STENCIL;
|
||||
|
||||
/* Note that when set this will clear all of the color
|
||||
* buffers.
|
||||
*/
|
||||
store.disable_color_buffers_clear_on_write =
|
||||
!needs_color_clear;
|
||||
store.disable_z_buffer_clear_on_write =
|
||||
!needs_z_clear;
|
||||
store.disable_stencil_buffer_clear_on_write =
|
||||
!needs_s_clear;
|
||||
};
|
||||
} else if (needs_color_clear && !general_color_clear) {
|
||||
/* If we didn't do our color clears in the general packet,
|
||||
* then emit a packet to clear all the TLB color buffers now.
|
||||
*/
|
||||
cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
|
||||
store.buffer_to_store = NONE;
|
||||
}
|
||||
}
|
||||
#else /* V3D_VERSION >= 40 */
|
||||
/* If we're emitting an RCL with GL_ARB_framebuffer_no_attachments,
|
||||
* we still need to emit some sort of store.
|
||||
*/
|
||||
|
|
@ -421,7 +282,7 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
|
|||
* clearing Z/S.
|
||||
*/
|
||||
if (job->clear) {
|
||||
#if V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
cl_emit(cl, CLEAR_TILE_BUFFERS, clear) {
|
||||
clear.clear_z_stencil_buffer = !job->early_zs_clear;
|
||||
clear.clear_all_render_targets = true;
|
||||
|
|
@ -432,7 +293,6 @@ v3d_rcl_emit_stores(struct v3d_job *job, struct v3d_cl *cl, int layer)
|
|||
#endif
|
||||
|
||||
}
|
||||
#endif /* V3D_VERSION >= 40 */
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -445,22 +305,13 @@ v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer)
|
|||
v3d_cl_ensure_space(cl, 200, 1);
|
||||
struct v3d_cl_reloc tile_list_start = cl_get_address(cl);
|
||||
|
||||
if (V3D_VERSION >= 40) {
|
||||
/* V3D 4.x only requires a single tile coordinates, and
|
||||
* END_OF_LOADS switches us between loading and rendering.
|
||||
*/
|
||||
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
|
||||
}
|
||||
/* V3D 4.x/7.x only requires a single tile coordinates, and
|
||||
* END_OF_LOADS switches us between loading and rendering.
|
||||
*/
|
||||
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
|
||||
|
||||
v3d_rcl_emit_loads(job, cl, layer);
|
||||
|
||||
if (V3D_VERSION < 40) {
|
||||
/* Tile Coordinates triggers the last reload and sets where
|
||||
* the stores go. There must be one per store packet.
|
||||
*/
|
||||
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
|
||||
}
|
||||
|
||||
/* The binner starts out writing tiles assuming that the initial mode
|
||||
* is triangles, so make sure that's the case.
|
||||
*/
|
||||
|
|
@ -468,20 +319,16 @@ v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer)
|
|||
fmt.primitive_type = LIST_TRIANGLES;
|
||||
}
|
||||
|
||||
#if V3D_VERSION >= 41
|
||||
/* PTB assumes that value to be 0, but hw will not set it. */
|
||||
cl_emit(cl, SET_INSTANCEID, set) {
|
||||
set.instance_id = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
|
||||
|
||||
v3d_rcl_emit_stores(job, cl, layer);
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
cl_emit(cl, END_OF_TILE_MARKER, end);
|
||||
#endif
|
||||
|
||||
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
|
||||
|
||||
|
|
@ -491,7 +338,6 @@ v3d_rcl_emit_generic_per_tile_list(struct v3d_job *job, int layer)
|
|||
}
|
||||
}
|
||||
|
||||
#if V3D_VERSION > 33
|
||||
/* Note that for v71, render target cfg packets has just one field that
|
||||
* combined the internal type and clamp mode. For simplicity we keep just one
|
||||
* helper.
|
||||
|
|
@ -503,13 +349,11 @@ static uint32_t
|
|||
v3dX(clamp_for_format_and_type)(uint32_t rt_type,
|
||||
enum pipe_format format)
|
||||
{
|
||||
#if V3D_VERSION >= 40 && V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
if (util_format_is_srgb(format)) {
|
||||
return V3D_RENDER_TARGET_CLAMP_NORM;
|
||||
#if V3D_VERSION >= 42
|
||||
} else if (util_format_is_pure_integer(format)) {
|
||||
return V3D_RENDER_TARGET_CLAMP_INT;
|
||||
#endif
|
||||
} else {
|
||||
return V3D_RENDER_TARGET_CLAMP_NONE;
|
||||
}
|
||||
|
|
@ -541,9 +385,8 @@ v3dX(clamp_for_format_and_type)(uint32_t rt_type,
|
|||
}
|
||||
return V3D_RENDER_TARGET_TYPE_CLAMP_INVALID;
|
||||
#endif
|
||||
return 0;
|
||||
unreachable("Wrong V3D_VERSION");
|
||||
}
|
||||
#endif
|
||||
|
||||
#if V3D_VERSION >= 71
|
||||
static void
|
||||
|
|
@ -566,7 +409,7 @@ v3d_setup_render_target(struct v3d_job *job,
|
|||
}
|
||||
#endif
|
||||
|
||||
#if V3D_VERSION >= 40 && V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
static void
|
||||
v3d_setup_render_target(struct v3d_job *job,
|
||||
int cbuf,
|
||||
|
|
@ -589,36 +432,6 @@ v3d_setup_render_target(struct v3d_job *job,
|
|||
}
|
||||
#endif
|
||||
|
||||
#if V3D_VERSION < 40
|
||||
static void
|
||||
v3d_emit_z_stencil_config(struct v3d_job *job, struct v3d_surface *surf,
|
||||
struct v3d_resource *rsc, bool is_separate_stencil)
|
||||
{
|
||||
cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_Z_STENCIL, zs) {
|
||||
zs.address = cl_address(rsc->bo, surf->offset);
|
||||
|
||||
if (!is_separate_stencil) {
|
||||
zs.internal_type = surf->internal_type;
|
||||
zs.output_image_format = surf->format;
|
||||
} else {
|
||||
zs.z_stencil_id = 1; /* Separate stencil */
|
||||
}
|
||||
|
||||
zs.padded_height_of_output_image_in_uif_blocks =
|
||||
surf->padded_height_of_output_image_in_uif_blocks;
|
||||
|
||||
assert(surf->tiling != V3D_TILING_RASTER);
|
||||
zs.memory_format = surf->tiling;
|
||||
}
|
||||
|
||||
if (job->store & (is_separate_stencil ?
|
||||
PIPE_CLEAR_STENCIL :
|
||||
PIPE_CLEAR_DEPTHSTENCIL)) {
|
||||
rsc->writes++;
|
||||
}
|
||||
}
|
||||
#endif /* V3D_VERSION < 40 */
|
||||
|
||||
static bool
|
||||
supertile_in_job_scissors(struct v3d_job *job,
|
||||
uint32_t x, uint32_t y, uint32_t w, uint32_t h)
|
||||
|
|
@ -648,7 +461,6 @@ supertile_in_job_scissors(struct v3d_job *job,
|
|||
return false;
|
||||
}
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
static inline bool
|
||||
do_double_initial_tile_clear(const struct v3d_job *job)
|
||||
{
|
||||
|
|
@ -663,7 +475,6 @@ do_double_initial_tile_clear(const struct v3d_job *job)
|
|||
return job->double_buffer &&
|
||||
(job->draw_tiles_x > 1 || job->draw_tiles_y > 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
emit_render_layer(struct v3d_job *job, uint32_t layer)
|
||||
|
|
@ -730,12 +541,6 @@ emit_render_layer(struct v3d_job *job, uint32_t layer)
|
|||
* state, we need 1 dummy store in between internal type/size
|
||||
* changes on V3D 3.x, and 2 dummy stores on 4.x.
|
||||
*/
|
||||
#if V3D_VERSION < 40
|
||||
cl_emit(&job->rcl, STORE_TILE_BUFFER_GENERAL, store) {
|
||||
store.buffer_to_store = NONE;
|
||||
}
|
||||
#endif
|
||||
#if V3D_VERSION >= 40
|
||||
for (int i = 0; i < 2; i++) {
|
||||
if (i > 0)
|
||||
cl_emit(&job->rcl, TILE_COORDINATES, coords);
|
||||
|
|
@ -756,7 +561,6 @@ emit_render_layer(struct v3d_job *job, uint32_t layer)
|
|||
}
|
||||
cl_emit(&job->rcl, END_OF_TILE_MARKER, end);
|
||||
}
|
||||
#endif
|
||||
cl_emit(&job->rcl, FLUSH_VCD_CACHE, flush);
|
||||
|
||||
v3d_rcl_emit_generic_per_tile_list(job, layer);
|
||||
|
|
@ -808,15 +612,10 @@ v3dX(emit_rcl)(struct v3d_job *job)
|
|||
* optional updates to the previous HW state.
|
||||
*/
|
||||
cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
|
||||
#if V3D_VERSION < 40
|
||||
config.enable_z_store = job->store & PIPE_CLEAR_DEPTH;
|
||||
config.enable_stencil_store = job->store & PIPE_CLEAR_STENCIL;
|
||||
#else /* V3D_VERSION >= 40 */
|
||||
if (job->zsbuf) {
|
||||
struct v3d_surface *surf = v3d_surface(job->zsbuf);
|
||||
config.internal_depth_type = surf->internal_type;
|
||||
}
|
||||
#endif /* V3D_VERSION >= 40 */
|
||||
|
||||
if (job->decided_global_ez_enable) {
|
||||
switch (job->first_ez_state) {
|
||||
|
|
@ -839,7 +638,6 @@ v3dX(emit_rcl)(struct v3d_job *job)
|
|||
config.early_z_disable = true;
|
||||
}
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
assert(job->zsbuf || config.early_z_disable);
|
||||
|
||||
job->early_zs_clear = (job->clear & PIPE_CLEAR_DEPTHSTENCIL) &&
|
||||
|
|
@ -847,7 +645,6 @@ v3dX(emit_rcl)(struct v3d_job *job)
|
|||
!(job->store & PIPE_CLEAR_DEPTHSTENCIL);
|
||||
|
||||
config.early_depth_stencil_clear = job->early_zs_clear;
|
||||
#endif /* V3D_VERSION >= 40 */
|
||||
|
||||
config.image_width_pixels = job->draw_width;
|
||||
config.image_height_pixels = job->draw_height;
|
||||
|
|
@ -858,7 +655,7 @@ v3dX(emit_rcl)(struct v3d_job *job)
|
|||
config.multisample_mode_4x = job->msaa;
|
||||
config.double_buffer_in_non_ms_mode = job->double_buffer;
|
||||
|
||||
#if V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
config.maximum_bpp_of_all_render_targets = job->internal_bpp;
|
||||
#endif
|
||||
#if V3D_VERSION >= 71
|
||||
|
|
@ -921,22 +718,7 @@ v3dX(emit_rcl)(struct v3d_job *job)
|
|||
}
|
||||
}
|
||||
|
||||
#if V3D_VERSION < 40
|
||||
cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
|
||||
rt.address = cl_address(rsc->bo, surf->offset);
|
||||
rt.internal_type = surf->internal_type;
|
||||
rt.output_image_format = surf->format;
|
||||
rt.memory_format = surf->tiling;
|
||||
rt.internal_bpp = surf->internal_bpp;
|
||||
rt.render_target_number = i;
|
||||
rt.pad = config_pad;
|
||||
|
||||
if (job->store & PIPE_CLEAR_COLOR0 << i)
|
||||
rsc->writes++;
|
||||
}
|
||||
#endif /* V3D_VERSION < 40 */
|
||||
|
||||
#if V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1,
|
||||
clear) {
|
||||
clear.clear_color_low_32_bits = job->clear_color[i][0];
|
||||
|
|
@ -1000,7 +782,7 @@ v3dX(emit_rcl)(struct v3d_job *job)
|
|||
#endif
|
||||
}
|
||||
|
||||
#if V3D_VERSION >= 40 && V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
|
||||
v3d_setup_render_target(job, 0,
|
||||
&rt.render_target_0_internal_bpp,
|
||||
|
|
@ -1021,27 +803,6 @@ v3dX(emit_rcl)(struct v3d_job *job)
|
|||
}
|
||||
#endif
|
||||
|
||||
#if V3D_VERSION < 40
|
||||
/* FIXME: Don't bother emitting if we don't load/clear Z/S. */
|
||||
if (job->zsbuf) {
|
||||
struct pipe_surface *psurf = job->zsbuf;
|
||||
struct v3d_surface *surf = v3d_surface(psurf);
|
||||
struct v3d_resource *rsc = v3d_resource(psurf->texture);
|
||||
|
||||
v3d_emit_z_stencil_config(job, surf, rsc, false);
|
||||
|
||||
/* Emit the separate stencil packet if we have a resource for
|
||||
* it. The HW will only load/store this buffer if the
|
||||
* Z/Stencil config doesn't have stencil in its format.
|
||||
*/
|
||||
if (surf->separate_stencil) {
|
||||
v3d_emit_z_stencil_config(job,
|
||||
v3d_surface(surf->separate_stencil),
|
||||
rsc->separate_stencil, true);
|
||||
}
|
||||
}
|
||||
#endif /* V3D_VERSION < 40 */
|
||||
|
||||
/* Ends rendering mode config. */
|
||||
cl_emit(&job->rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES,
|
||||
clear) {
|
||||
|
|
|
|||
|
|
@ -106,21 +106,17 @@ v3d_create_rasterizer_state(struct pipe_context *pctx,
|
|||
v3dx_pack(&so->depth_offset, DEPTH_OFFSET, depth) {
|
||||
depth.depth_offset_factor = cso->offset_scale;
|
||||
depth.depth_offset_units = cso->offset_units;
|
||||
#if V3D_VERSION >= 41
|
||||
depth.limit = cso->offset_clamp;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* V3d 4.x treats polygon offset units based on a Z24 buffer, so we
|
||||
* need to scale up offset_units if we're only Z16.
|
||||
*/
|
||||
#if V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
v3dx_pack(&so->depth_offset_z16, DEPTH_OFFSET, depth) {
|
||||
depth.depth_offset_factor = cso->offset_scale;
|
||||
depth.depth_offset_units = cso->offset_units * 256.0;
|
||||
#if V3D_VERSION >= 41
|
||||
depth.limit = cso->offset_clamp;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -144,10 +140,6 @@ v3d_create_blend_state(struct pipe_context *pctx,
|
|||
if (cso->independent_blend_enable) {
|
||||
for (int i = 0; i < max_rts; i++) {
|
||||
so->blend_enables |= cso->rt[i].blend_enable << i;
|
||||
|
||||
/* V3D 4.x is when we got independent blend enables. */
|
||||
assert(V3D_VERSION >= 40 ||
|
||||
cso->rt[i].blend_enable == cso->rt[0].blend_enable);
|
||||
}
|
||||
} else {
|
||||
if (cso->rt[0].blend_enable)
|
||||
|
|
@ -343,7 +335,7 @@ v3d_zsa_state_bind(struct pipe_context *pctx, void *hwcso)
|
|||
static bool
|
||||
needs_default_attribute_values(void)
|
||||
{
|
||||
#if V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
/* FIXME: on vulkan we are able to refine even further, as we know in
|
||||
* advance when we create the pipeline if we have an integer vertex
|
||||
* attrib. Pending to check if we could do something similar here.
|
||||
|
|
@ -517,18 +509,10 @@ v3d_set_framebuffer_state(struct pipe_context *pctx,
|
|||
struct pipe_surface *cbuf = v3d->framebuffer.cbufs[i];
|
||||
if (!cbuf)
|
||||
continue;
|
||||
struct v3d_surface *v3d_cbuf = v3d_surface(cbuf);
|
||||
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(cbuf->format);
|
||||
|
||||
/* For BGRA8 formats (DRI window system default format), we
|
||||
* need to swap R and B, since the HW's format is RGBA8. On
|
||||
* V3D 4.1+, the RCL can swap R and B on load/store.
|
||||
*/
|
||||
if (v3d->screen->devinfo.ver < 41 && v3d_cbuf->swap_rb)
|
||||
v3d->swap_color_rb |= 1 << i;
|
||||
|
||||
if (desc->swizzle[3] == PIPE_SWIZZLE_1)
|
||||
v3d->blend_dst_alpha_one |= 1 << i;
|
||||
}
|
||||
|
|
@ -555,7 +539,6 @@ translate_wrap(uint32_t pipe_wrap)
|
|||
}
|
||||
}
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
static void
|
||||
v3d_upload_sampler_state_variant(void *map,
|
||||
const struct pipe_sampler_state *cso,
|
||||
|
|
@ -720,7 +703,7 @@ v3d_upload_sampler_state_variant(void *map,
|
|||
break;
|
||||
}
|
||||
|
||||
#if V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
/* The TMU in V3D 7.x always takes 32-bit floats and handles conversions
|
||||
* for us. In V3D 4.x we need to manually convert floating point color
|
||||
* values to the expected format.
|
||||
|
|
@ -739,7 +722,6 @@ v3d_upload_sampler_state_variant(void *map,
|
|||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static void *
|
||||
v3d_create_sampler_state(struct pipe_context *pctx,
|
||||
|
|
@ -757,7 +739,6 @@ v3d_create_sampler_state(struct pipe_context *pctx,
|
|||
enum V3DX(Wrap_Mode) wrap_t = translate_wrap(cso->wrap_t);
|
||||
enum V3DX(Wrap_Mode) wrap_r = translate_wrap(cso->wrap_r);
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
bool uses_border_color = (wrap_s == V3D_WRAP_MODE_BORDER ||
|
||||
wrap_t == V3D_WRAP_MODE_BORDER ||
|
||||
wrap_r == V3D_WRAP_MODE_BORDER);
|
||||
|
|
@ -807,20 +788,6 @@ v3d_create_sampler_state(struct pipe_context *pctx,
|
|||
so->border_color_variants ? i : border_variant);
|
||||
}
|
||||
|
||||
#else /* V3D_VERSION < 40 */
|
||||
v3dx_pack(&so->p0, TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1, p0) {
|
||||
p0.s_wrap_mode = wrap_s;
|
||||
p0.t_wrap_mode = wrap_t;
|
||||
p0.r_wrap_mode = wrap_r;
|
||||
}
|
||||
|
||||
v3dx_pack(&so->texture_shader_state, TEXTURE_SHADER_STATE, tex) {
|
||||
tex.depth_compare_function = cso->compare_mode ?
|
||||
cso->compare_func :
|
||||
V3D_COMPARE_FUNC_NEVER;
|
||||
tex.fixed_bias = cso->lod_bias;
|
||||
}
|
||||
#endif /* V3D_VERSION < 40 */
|
||||
return so;
|
||||
}
|
||||
|
||||
|
|
@ -911,8 +878,7 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo,
|
|||
tex->image_width = prsc->width0 * msaa_scale;
|
||||
tex->image_height = prsc->height0 * msaa_scale;
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
/* On 4.x, the height of a 1D texture is redefined to be the
|
||||
/* On 4.x, the height of a 1D texture is redefined to be the
|
||||
* upper 14 bits of the width (which is only usable with txf).
|
||||
*/
|
||||
if (prsc->target == PIPE_TEXTURE_1D ||
|
||||
|
|
@ -922,7 +888,6 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo,
|
|||
|
||||
tex->image_width &= (1 << 14) - 1;
|
||||
tex->image_height &= (1 << 14) - 1;
|
||||
#endif
|
||||
|
||||
if (prsc->target == PIPE_TEXTURE_3D) {
|
||||
tex->image_depth = prsc->depth0;
|
||||
|
|
@ -941,7 +906,6 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo,
|
|||
|
||||
tex->base_level = base_level;
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
tex->max_level = last_level;
|
||||
/* Note that we don't have a job to reference the texture's sBO
|
||||
* at state create time, so any time this sampler view is used
|
||||
|
|
@ -951,8 +915,6 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo,
|
|||
v3d_layer_offset(prsc, 0, first_layer);
|
||||
|
||||
tex->texture_base_pointer = cl_address(NULL, base_offset);
|
||||
#endif
|
||||
|
||||
tex->array_stride_64_byte_aligned = rsc->cube_map_stride / 64;
|
||||
|
||||
#if V3D_VERSION >= 71
|
||||
|
|
@ -976,12 +938,10 @@ v3d_setup_texture_shader_state(const struct v3d_device_info *devinfo,
|
|||
if (tex->level_0_is_strictly_uif)
|
||||
tex->level_0_ub_pad = rsc->slices[0].ub_pad;
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
if (tex->uif_xor_disable ||
|
||||
tex->level_0_is_strictly_uif) {
|
||||
tex->extended = true;
|
||||
}
|
||||
#endif /* V3D_VERSION >= 40 */
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -997,16 +957,10 @@ v3dX(create_texture_shader_state_bo)(struct v3d_context *v3d,
|
|||
|
||||
assert(so->serial_id != rsc->serial_id);
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
v3d_bo_unreference(&so->bo);
|
||||
so->bo = v3d_bo_alloc(v3d->screen,
|
||||
cl_packet_length(TEXTURE_SHADER_STATE), "sampler");
|
||||
map = v3d_bo_map(so->bo);
|
||||
#else /* V3D_VERSION < 40 */
|
||||
STATIC_ASSERT(sizeof(so->texture_shader_state) >=
|
||||
cl_packet_length(TEXTURE_SHADER_STATE));
|
||||
map = &so->texture_shader_state;
|
||||
#endif
|
||||
|
||||
v3dx_pack(map, TEXTURE_SHADER_STATE, tex) {
|
||||
if (prsc->target != PIPE_BUFFER) {
|
||||
|
|
@ -1025,69 +979,20 @@ v3dX(create_texture_shader_state_bo)(struct v3d_context *v3d,
|
|||
}
|
||||
|
||||
bool is_srgb = util_format_is_srgb(cso->format);
|
||||
#if V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
tex.srgb = is_srgb;
|
||||
#endif
|
||||
#if V3D_VERSION >= 71
|
||||
tex.transfer_func = is_srgb ? TRANSFER_FUNC_SRGB : TRANSFER_FUNC_NONE;
|
||||
#endif
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
tex.swizzle_r = v3d_translate_pipe_swizzle(so->swizzle[0]);
|
||||
tex.swizzle_g = v3d_translate_pipe_swizzle(so->swizzle[1]);
|
||||
tex.swizzle_b = v3d_translate_pipe_swizzle(so->swizzle[2]);
|
||||
tex.swizzle_a = v3d_translate_pipe_swizzle(so->swizzle[3]);
|
||||
#endif
|
||||
|
||||
if (prsc->nr_samples > 1 && V3D_VERSION < 40) {
|
||||
/* Using texture views to reinterpret formats on our
|
||||
* MSAA textures won't work, because we don't lay out
|
||||
* the bits in memory as it's expected -- for example,
|
||||
* RGBA8 and RGB10_A2 are compatible in the
|
||||
* ARB_texture_view spec, but in HW we lay them out as
|
||||
* 32bpp RGBA8 and 64bpp RGBA16F. Just assert for now
|
||||
* to catch failures.
|
||||
*
|
||||
* We explicitly allow remapping S8Z24 to RGBA8888 for
|
||||
* v3d_blit.c's stencil blits.
|
||||
*/
|
||||
assert((util_format_linear(cso->format) ==
|
||||
util_format_linear(prsc->format)) ||
|
||||
(prsc->format == PIPE_FORMAT_S8_UINT_Z24_UNORM &&
|
||||
cso->format == PIPE_FORMAT_R8G8B8A8_UNORM));
|
||||
uint32_t output_image_format =
|
||||
v3d_get_rt_format(&screen->devinfo, cso->format);
|
||||
uint32_t internal_type;
|
||||
uint32_t internal_bpp;
|
||||
v3dX(get_internal_type_bpp_for_output_format)(output_image_format,
|
||||
&internal_type,
|
||||
&internal_bpp);
|
||||
|
||||
switch (internal_type) {
|
||||
case V3D_INTERNAL_TYPE_8:
|
||||
tex.texture_type = TEXTURE_DATA_FORMAT_RGBA8;
|
||||
break;
|
||||
case V3D_INTERNAL_TYPE_16F:
|
||||
tex.texture_type = TEXTURE_DATA_FORMAT_RGBA16F;
|
||||
break;
|
||||
default:
|
||||
unreachable("Bad MSAA texture type");
|
||||
}
|
||||
|
||||
/* sRGB was stored in the tile buffer as linear and
|
||||
* would have been encoded to sRGB on resolved tile
|
||||
* buffer store. Note that this means we would need
|
||||
* shader code if we wanted to read an MSAA sRGB
|
||||
* texture without sRGB decode.
|
||||
*/
|
||||
#if V3D_VERSION <= 42
|
||||
tex.srgb = false;
|
||||
#endif
|
||||
|
||||
} else {
|
||||
tex.texture_type = v3d_get_tex_format(&screen->devinfo,
|
||||
cso->format);
|
||||
}
|
||||
tex.texture_type = v3d_get_tex_format(&screen->devinfo,
|
||||
cso->format);
|
||||
};
|
||||
|
||||
so->serial_id = rsc->serial_id;
|
||||
|
|
@ -1141,7 +1046,6 @@ v3d_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
|
|||
if (sample_format == PIPE_FORMAT_S8_UINT_Z24_UNORM)
|
||||
sample_format = PIPE_FORMAT_X8Z24_UNORM;
|
||||
|
||||
#if V3D_VERSION >= 40
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(sample_format);
|
||||
|
||||
|
|
@ -1202,7 +1106,6 @@ v3d_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
|
|||
V3D_SAMPLER_STATE_F16);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* V3D still doesn't support sampling from raster textures, so we will
|
||||
* have to copy to a temporary tiled texture.
|
||||
|
|
@ -1433,7 +1336,6 @@ v3d_create_image_view_texture_shader_state(struct v3d_context *v3d,
|
|||
struct v3d_shaderimg_stateobj *so,
|
||||
int img)
|
||||
{
|
||||
#if V3D_VERSION >= 40
|
||||
struct v3d_image_view *iview = &so->si[img];
|
||||
|
||||
void *map;
|
||||
|
|
@ -1469,12 +1371,6 @@ v3d_create_image_view_texture_shader_state(struct v3d_context *v3d,
|
|||
tex.texture_type = v3d_get_tex_format(&v3d->screen->devinfo,
|
||||
iview->base.format);
|
||||
};
|
||||
#else /* V3D_VERSION < 40 */
|
||||
/* V3D 3.x doesn't use support shader image load/store operations on
|
||||
* textures, so it would get lowered in the shader to general memory
|
||||
* accesses.
|
||||
*/
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ v3dX(tfu)(struct pipe_context *pctx,
|
|||
break;
|
||||
}
|
||||
|
||||
#if V3D_VERSION <= 42
|
||||
#if V3D_VERSION == 42
|
||||
if (src_base_slice->tiling == V3D_TILING_RASTER) {
|
||||
tfu.icfg |= (V3D33_TFU_ICFG_FORMAT_RASTER <<
|
||||
V3D33_TFU_ICFG_FORMAT_SHIFT);
|
||||
|
|
@ -152,7 +152,7 @@ v3dX(tfu)(struct pipe_context *pctx,
|
|||
implicit_padded_height) / uif_block_h) <<
|
||||
V3D33_TFU_ICFG_OPAD_SHIFT);
|
||||
}
|
||||
#endif /* V3D_VERSION <= 42 */
|
||||
#endif /* V3D_VERSION == 42 */
|
||||
|
||||
#if V3D_VERSION >= 71
|
||||
if (src_base_slice->tiling == V3D_TILING_RASTER) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue