mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 13:00:09 +01:00
r600: add ARB_shader_storage_buffer_object support (v3)
This just builds on the image support. Evergreen only has ssbo
for fragment and compute no other stages.
v2: handle images and ssbo in the same shader properly (Ilia)
v3: fix RESQ on buffers,
fix missing atom emit
fix first element offset
use R32 format
write separate buffer rat store path.
(from running deqp gles3.1 tests)
Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
parent
c758fd05d8
commit
4e7f6437b5
8 changed files with 372 additions and 24 deletions
|
|
@ -179,7 +179,7 @@ GL 4.3, GLSL 4.30 -- all DONE: i965/gen8+, nvc0, radeonsi
|
|||
GL_ARB_program_interface_query DONE (all drivers)
|
||||
GL_ARB_robust_buffer_access_behavior DONE (i965)
|
||||
GL_ARB_shader_image_size DONE (freedreno/a5xx, i965, r600, softpipe)
|
||||
GL_ARB_shader_storage_buffer_object DONE (freedreno/a5xx, i965, softpipe)
|
||||
GL_ARB_shader_storage_buffer_object DONE (freedreno/a5xx, i965, r600, softpipe)
|
||||
GL_ARB_stencil_texturing DONE (freedreno, i965/hsw+, nv50, r600, llvmpipe, softpipe, swr)
|
||||
GL_ARB_texture_buffer_range DONE (freedreno, nv50, i965, r600, llvmpipe)
|
||||
GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30)
|
||||
|
|
@ -249,7 +249,7 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi
|
|||
GL_ARB_shader_atomic_counters DONE (freedreno/a5xx, i965/gen7+, r600, softpipe)
|
||||
GL_ARB_shader_image_load_store DONE (freedreno/a5xx, i965/gen7+, r600, softpipe)
|
||||
GL_ARB_shader_image_size DONE (freedreno/a5xx, i965/gen7+, r600, softpipe)
|
||||
GL_ARB_shader_storage_buffer_object DONE (freedreno/a5xx, i965/gen7+, softpipe)
|
||||
GL_ARB_shader_storage_buffer_object DONE (freedreno/a5xx, i965/gen7+, r600, softpipe)
|
||||
GL_ARB_shading_language_packing DONE (all drivers)
|
||||
GL_ARB_separate_shader_objects DONE (all drivers)
|
||||
GL_ARB_stencil_texturing DONE (freedreno, nv50, r600, llvmpipe, softpipe, swr)
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ Note: some of the new features are only available with certain drivers.
|
|||
<li>Disk shader cache support for i965 when MESA_GLSL_CACHE_DISABLE environment variable is set to "0" or "false"</li>
|
||||
<li>GL_ARB_shader_atomic_counters and GL_ARB_shader_atomic_counter_ops on r600/evergreen+</li>
|
||||
<li>GL_ARB_shader_image_load_store and GL_ARB_shader_image_size on r600/evergreen+</li>
|
||||
<li>GL_ARB_shader_storage_buffer_object on r600/evergreen+<li>
|
||||
<li>GL_ARB_cull_distance on r600/evergreen+</li>
|
||||
<li>OpenGL 4.2 on r600/evergreen with hw fp64 support</li>
|
||||
</ul>
|
||||
|
|
|
|||
|
|
@ -614,6 +614,7 @@ struct eg_buf_res_params {
|
|||
unsigned size;
|
||||
unsigned char swizzle[4];
|
||||
bool uncached;
|
||||
bool force_swizzle;
|
||||
};
|
||||
|
||||
static void evergreen_fill_buffer_resource_words(struct r600_context *rctx,
|
||||
|
|
@ -635,7 +636,10 @@ static void evergreen_fill_buffer_resource_words(struct r600_context *rctx,
|
|||
|
||||
desc = util_format_description(params->pipe_format);
|
||||
|
||||
swizzle_res = r600_get_swizzle_combined(desc->swizzle, params->swizzle, TRUE);
|
||||
if (params->force_swizzle)
|
||||
swizzle_res = r600_get_swizzle_combined(params->swizzle, NULL, TRUE);
|
||||
else
|
||||
swizzle_res = r600_get_swizzle_combined(desc->swizzle, params->swizzle, TRUE);
|
||||
|
||||
va = tmp->resource.gpu_address + params->offset;
|
||||
*skip_mip_address_reloc = true;
|
||||
|
|
@ -1029,7 +1033,7 @@ static void evergreen_set_color_surface_buffer(struct r600_context *rctx,
|
|||
{
|
||||
unsigned format, swap, ntype, endian;
|
||||
const struct util_format_description *desc;
|
||||
unsigned block_size = align(util_format_get_blocksize(res->b.b.format), 4);
|
||||
unsigned block_size = util_format_get_blocksize(res->b.b.format);
|
||||
unsigned pitch_alignment =
|
||||
MAX2(64, rctx->screen->b.info.pipe_interleave_bytes / block_size);
|
||||
unsigned pitch = align(res->b.b.width0, pitch_alignment);
|
||||
|
|
@ -1082,7 +1086,7 @@ static void evergreen_set_color_surface_buffer(struct r600_context *rctx,
|
|||
color->dim = width_elements - 1;
|
||||
color->slice = 0; /* (width_elements / 64) - 1;*/
|
||||
color->view = 0;
|
||||
color->offset = res->gpu_address >> 8;
|
||||
color->offset = (res->gpu_address + first_element) >> 8;
|
||||
|
||||
color->fmask = color->offset;
|
||||
color->fmask_slice = 0;
|
||||
|
|
@ -1679,7 +1683,7 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples,
|
|||
}
|
||||
|
||||
static void evergreen_emit_image_state(struct r600_context *rctx, struct r600_atom *atom,
|
||||
int immed_id_base, int res_id_base)
|
||||
int immed_id_base, int res_id_base, int offset)
|
||||
{
|
||||
struct r600_image_state *state = (struct r600_image_state *)atom;
|
||||
struct pipe_framebuffer_state *fb_state = &rctx->framebuffer.state;
|
||||
|
|
@ -1692,7 +1696,7 @@ static void evergreen_emit_image_state(struct r600_context *rctx, struct r600_at
|
|||
for (i = 0; i < R600_MAX_IMAGES; i++) {
|
||||
struct r600_image_view *image = &state->views[i];
|
||||
unsigned reloc, immed_reloc;
|
||||
int idx = i;
|
||||
int idx = i + offset;
|
||||
|
||||
idx += fb_state->nr_cbufs + (rctx->dual_src_blend ? 1 : 0);
|
||||
if (!image->base.resource)
|
||||
|
|
@ -1749,14 +1753,14 @@ static void evergreen_emit_image_state(struct r600_context *rctx, struct r600_at
|
|||
radeon_emit(cs, immed_reloc);
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
|
||||
radeon_emit(cs, (immed_id_base + i) * 8);
|
||||
radeon_emit(cs, (immed_id_base + i + offset) * 8);
|
||||
radeon_emit_array(cs, image->immed_resource_words, 8);
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
|
||||
radeon_emit(cs, immed_reloc);
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
|
||||
radeon_emit(cs, (res_id_base + i) * 8);
|
||||
radeon_emit(cs, (res_id_base + i + offset) * 8);
|
||||
radeon_emit_array(cs, image->resource_words, 8);
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0) | pkt_flags);
|
||||
|
|
@ -1773,7 +1777,15 @@ static void evergreen_emit_fragment_image_state(struct r600_context *rctx, struc
|
|||
{
|
||||
evergreen_emit_image_state(rctx, atom,
|
||||
R600_IMAGE_IMMED_RESOURCE_OFFSET,
|
||||
R600_IMAGE_REAL_RESOURCE_OFFSET);
|
||||
R600_IMAGE_REAL_RESOURCE_OFFSET, 0);
|
||||
}
|
||||
|
||||
static void evergreen_emit_fragment_buffer_state(struct r600_context *rctx, struct r600_atom *atom)
|
||||
{
|
||||
int offset = util_bitcount(rctx->fragment_images.enabled_mask);
|
||||
evergreen_emit_image_state(rctx, atom,
|
||||
R600_IMAGE_IMMED_RESOURCE_OFFSET,
|
||||
R600_IMAGE_REAL_RESOURCE_OFFSET, offset);
|
||||
}
|
||||
|
||||
static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r600_atom *atom)
|
||||
|
|
@ -1852,6 +1864,7 @@ static void evergreen_emit_framebuffer_state(struct r600_context *rctx, struct r
|
|||
i++;
|
||||
}
|
||||
i += util_bitcount(rctx->fragment_images.enabled_mask);
|
||||
i += util_bitcount(rctx->fragment_buffers.enabled_mask);
|
||||
for (; i < 8 ; i++)
|
||||
radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
|
||||
for (; i < 12; i++)
|
||||
|
|
@ -1966,7 +1979,7 @@ static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_
|
|||
struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
|
||||
unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1;
|
||||
unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 4)) - 1;
|
||||
unsigned rat_colormask = ((1ULL << ((unsigned)a->nr_image_rats * 4)) - 1) << (a->nr_cbufs * 4);
|
||||
unsigned rat_colormask = ((1ULL << ((unsigned)(a->nr_image_rats + a->nr_buffer_rats) * 4)) - 1) << (a->nr_cbufs * 4);
|
||||
radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
|
||||
radeon_emit(cs, (a->blend_colormask & fb_colormask) | rat_colormask); /* R_028238_CB_TARGET_MASK */
|
||||
/* This must match the used export instructions exactly.
|
||||
|
|
@ -3871,6 +3884,116 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
|
|||
}
|
||||
}
|
||||
|
||||
static void evergreen_set_shader_buffers(struct pipe_context *ctx,
|
||||
enum pipe_shader_type shader, unsigned start_slot,
|
||||
unsigned count,
|
||||
const struct pipe_shader_buffer *buffers)
|
||||
{
|
||||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||||
struct r600_screen *rscreen = (struct r600_screen *)ctx->screen;
|
||||
struct r600_image_state *istate = NULL;
|
||||
struct r600_image_view *rview;
|
||||
struct r600_tex_color_info color;
|
||||
struct eg_buf_res_params buf_params;
|
||||
struct r600_resource *resource;
|
||||
int i, idx;
|
||||
unsigned old_mask;
|
||||
bool skip_reloc = false;
|
||||
|
||||
if (shader != PIPE_SHADER_FRAGMENT && count == 0)
|
||||
return;
|
||||
|
||||
assert(shader == PIPE_SHADER_FRAGMENT);
|
||||
istate = &rctx->fragment_buffers;
|
||||
|
||||
old_mask = istate->enabled_mask;
|
||||
for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) {
|
||||
const struct pipe_shader_buffer *buf;
|
||||
unsigned res_type;
|
||||
|
||||
rview = &istate->views[i];
|
||||
|
||||
if (!buffers || !buffers[idx].buffer) {
|
||||
pipe_resource_reference((struct pipe_resource **)&rview->base.resource, NULL);
|
||||
istate->enabled_mask &= ~(1 << i);
|
||||
continue;
|
||||
}
|
||||
|
||||
buf = &buffers[idx];
|
||||
pipe_resource_reference((struct pipe_resource **)&rview->base.resource, buf->buffer);
|
||||
|
||||
resource = (struct r600_resource *)rview->base.resource;
|
||||
if (!resource->immed_buffer) {
|
||||
int immed_size = (rscreen->b.info.max_se * 256 * 64) * util_format_get_blocksize(resource->b.b.format);
|
||||
|
||||
eg_resource_alloc_immed(&rscreen->b, resource, immed_size);
|
||||
}
|
||||
|
||||
color.offset = 0;
|
||||
color.view = 0;
|
||||
evergreen_set_color_surface_buffer(rctx, resource,
|
||||
PIPE_FORMAT_R32_FLOAT,
|
||||
buf->buffer_offset,
|
||||
buf->buffer_offset + buf->buffer_size,
|
||||
&color);
|
||||
|
||||
res_type = V_028C70_BUFFER;
|
||||
|
||||
rview->cb_color_base = color.offset;
|
||||
rview->cb_color_dim = color.dim;
|
||||
rview->cb_color_info = color.info |
|
||||
S_028C70_RAT(1) |
|
||||
S_028C70_RESOURCE_TYPE(res_type);
|
||||
rview->cb_color_pitch = color.pitch;
|
||||
rview->cb_color_slice = color.slice;
|
||||
rview->cb_color_view = color.view;
|
||||
rview->cb_color_attrib = color.attrib;
|
||||
rview->cb_color_fmask = color.fmask;
|
||||
rview->cb_color_fmask_slice = color.fmask_slice;
|
||||
|
||||
memset(&buf_params, 0, sizeof(buf_params));
|
||||
buf_params.pipe_format = resource->b.b.format;
|
||||
buf_params.size = resource->immed_buffer->b.b.width0;
|
||||
buf_params.swizzle[0] = PIPE_SWIZZLE_X;
|
||||
buf_params.swizzle[1] = PIPE_SWIZZLE_Y;
|
||||
buf_params.swizzle[2] = PIPE_SWIZZLE_Z;
|
||||
buf_params.swizzle[3] = PIPE_SWIZZLE_W;
|
||||
buf_params.uncached = 1;
|
||||
evergreen_fill_buffer_resource_words(rctx, &resource->immed_buffer->b.b,
|
||||
&buf_params, &skip_reloc,
|
||||
rview->immed_resource_words);
|
||||
|
||||
memset(&buf_params, 0, sizeof(buf_params));
|
||||
buf_params.pipe_format = PIPE_FORMAT_R32_FLOAT;
|
||||
buf_params.offset = buf->buffer_offset;
|
||||
buf_params.size = buf->buffer_size;
|
||||
buf_params.swizzle[0] = PIPE_SWIZZLE_X;
|
||||
buf_params.swizzle[1] = PIPE_SWIZZLE_Y;
|
||||
buf_params.swizzle[2] = PIPE_SWIZZLE_Z;
|
||||
buf_params.swizzle[3] = PIPE_SWIZZLE_W;
|
||||
buf_params.force_swizzle = true;
|
||||
buf_params.uncached = 1;
|
||||
evergreen_fill_buffer_resource_words(rctx, &resource->b.b,
|
||||
&buf_params,
|
||||
&rview->skip_mip_address_reloc,
|
||||
rview->resource_words);
|
||||
|
||||
istate->enabled_mask |= (1 << i);
|
||||
}
|
||||
|
||||
istate->atom.num_dw = util_bitcount(istate->enabled_mask) * 46;
|
||||
|
||||
if (old_mask != istate->enabled_mask)
|
||||
r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
|
||||
|
||||
if (rctx->cb_misc_state.nr_buffer_rats != util_bitcount(istate->enabled_mask)) {
|
||||
rctx->cb_misc_state.nr_buffer_rats = util_bitcount(istate->enabled_mask);
|
||||
r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
|
||||
}
|
||||
|
||||
r600_mark_atom_dirty(rctx, &istate->atom);
|
||||
}
|
||||
|
||||
static void evergreen_set_shader_images(struct pipe_context *ctx,
|
||||
enum pipe_shader_type shader, unsigned start_slot,
|
||||
unsigned count,
|
||||
|
|
@ -4079,6 +4202,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
|
|||
}
|
||||
r600_init_atom(rctx, &rctx->framebuffer.atom, id++, evergreen_emit_framebuffer_state, 0);
|
||||
r600_init_atom(rctx, &rctx->fragment_images.atom, id++, evergreen_emit_fragment_image_state, 0);
|
||||
r600_init_atom(rctx, &rctx->fragment_buffers.atom, id++, evergreen_emit_fragment_buffer_state, 0);
|
||||
/* shader const */
|
||||
r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_VERTEX].atom, id++, evergreen_emit_vs_constant_buffers, 0);
|
||||
r600_init_atom(rctx, &rctx->constbuf_state[PIPE_SHADER_GEOMETRY].atom, id++, evergreen_emit_gs_constant_buffers, 0);
|
||||
|
|
@ -4148,6 +4272,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
|
|||
rctx->b.b.set_tess_state = evergreen_set_tess_state;
|
||||
rctx->b.b.set_hw_atomic_buffers = evergreen_set_hw_atomic_buffers;
|
||||
rctx->b.b.set_shader_images = evergreen_set_shader_images;
|
||||
rctx->b.b.set_shader_buffers = evergreen_set_shader_buffers;
|
||||
if (rctx->b.chip_class == EVERGREEN)
|
||||
rctx->b.b.get_sample_position = evergreen_get_sample_position;
|
||||
else
|
||||
|
|
|
|||
|
|
@ -348,8 +348,10 @@ void r600_begin_new_cs(struct r600_context *ctx)
|
|||
r600_mark_atom_dirty(ctx, &ctx->db_misc_state.atom);
|
||||
r600_mark_atom_dirty(ctx, &ctx->db_state.atom);
|
||||
r600_mark_atom_dirty(ctx, &ctx->framebuffer.atom);
|
||||
if (ctx->b.chip_class >= EVERGREEN)
|
||||
if (ctx->b.chip_class >= EVERGREEN) {
|
||||
r600_mark_atom_dirty(ctx, &ctx->fragment_images.atom);
|
||||
r600_mark_atom_dirty(ctx, &ctx->fragment_buffers.atom);
|
||||
}
|
||||
r600_mark_atom_dirty(ctx, &ctx->hw_shader_stages[R600_HW_STAGE_PS].atom);
|
||||
r600_mark_atom_dirty(ctx, &ctx->poly_offset_state.atom);
|
||||
r600_mark_atom_dirty(ctx, &ctx->vgt_state.atom);
|
||||
|
|
|
|||
|
|
@ -382,7 +382,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
|
||||
case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
|
||||
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
|
||||
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
|
||||
case PIPE_CAP_GENERATE_MIPMAP:
|
||||
case PIPE_CAP_STRING_MARKER:
|
||||
case PIPE_CAP_QUERY_BUFFER_OBJECT:
|
||||
|
|
@ -424,6 +423,11 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
|||
case PIPE_CAP_CULL_DISTANCE:
|
||||
return 1;
|
||||
|
||||
case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
|
||||
if (family >= CHIP_CEDAR)
|
||||
return 256;
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
|
||||
if (family >= CHIP_CEDAR)
|
||||
return 30;
|
||||
|
|
@ -609,10 +613,10 @@ static int r600_get_shader_param(struct pipe_screen* pscreen,
|
|||
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
|
||||
case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
|
||||
case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
|
||||
case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
|
||||
if (rscreen->b.family >= CHIP_CEDAR &&
|
||||
(shader == PIPE_SHADER_FRAGMENT))
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
#include "tgsi/tgsi_scan.h"
|
||||
|
||||
#define R600_NUM_ATOMS 53
|
||||
#define R600_NUM_ATOMS 54
|
||||
|
||||
#define R600_MAX_IMAGES 8
|
||||
/*
|
||||
|
|
@ -145,6 +145,7 @@ struct r600_cb_misc_state {
|
|||
unsigned nr_cbufs;
|
||||
unsigned nr_ps_color_outputs;
|
||||
unsigned nr_image_rats;
|
||||
unsigned nr_buffer_rats;
|
||||
bool multiwrite;
|
||||
bool dual_src_blend;
|
||||
};
|
||||
|
|
@ -521,6 +522,7 @@ struct r600_context {
|
|||
struct r600_atomic_buffer_state atomic_buffer_state;
|
||||
/* only have images on fragment shader */
|
||||
struct r600_image_state fragment_images;
|
||||
struct r600_image_state fragment_buffers;
|
||||
/* Shaders and shader resources. */
|
||||
struct r600_cso_state vertex_fetch_shader;
|
||||
struct r600_shader_state hw_shader_stages[EG_NUM_HW_STAGES];
|
||||
|
|
|
|||
|
|
@ -968,6 +968,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
|
|||
case TGSI_FILE_SAMPLER:
|
||||
case TGSI_FILE_SAMPLER_VIEW:
|
||||
case TGSI_FILE_ADDRESS:
|
||||
case TGSI_FILE_BUFFER:
|
||||
case TGSI_FILE_IMAGE:
|
||||
break;
|
||||
|
||||
|
|
@ -3064,7 +3065,8 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
|||
shader->uses_atomics = ctx.info.file_mask[TGSI_FILE_HW_ATOMIC];
|
||||
shader->nsys_inputs = 0;
|
||||
|
||||
shader->uses_images = ctx.info.file_count[TGSI_FILE_IMAGE] > 0;
|
||||
shader->uses_images = ctx.info.file_count[TGSI_FILE_IMAGE] > 0 ||
|
||||
ctx.info.file_count[TGSI_FILE_BUFFER] > 0;
|
||||
indirect_gprs = ctx.info.indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER));
|
||||
tgsi_parse_init(&ctx.parse, tokens);
|
||||
ctx.type = ctx.info.processor;
|
||||
|
|
@ -7902,6 +7904,79 @@ static int load_index_src(struct r600_shader_ctx *ctx, int src_index, int *idx_g
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int tgsi_load_buffer(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
/* have to work out the offset into the RAT immediate return buffer */
|
||||
struct r600_bytecode_vtx vtx;
|
||||
struct r600_bytecode_cf *cf;
|
||||
int r;
|
||||
int temp_reg = r600_get_temp(ctx);
|
||||
unsigned rat_index_mode;
|
||||
unsigned base;
|
||||
|
||||
rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
|
||||
base = R600_IMAGE_REAL_RESOURCE_OFFSET + ctx->info.file_count[TGSI_FILE_IMAGE];
|
||||
|
||||
if (inst->Src[1].Register.File == TGSI_FILE_IMMEDIATE) {
|
||||
int value = (ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX]);
|
||||
r = single_alu_op2(ctx, ALU_OP1_MOV,
|
||||
temp_reg, 0,
|
||||
V_SQ_ALU_SRC_LITERAL, value >> 2,
|
||||
0, 0);
|
||||
if (r)
|
||||
return r;
|
||||
} else {
|
||||
struct r600_bytecode_alu alu;
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP2_LSHR_INT;
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[1], 0);
|
||||
alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
|
||||
alu.src[1].value = 2;
|
||||
alu.dst.sel = temp_reg;
|
||||
alu.dst.write = 1;
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
ctx->bc->cf_last->barrier = 1;
|
||||
memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
|
||||
vtx.op = FETCH_OP_VFETCH;
|
||||
vtx.buffer_id = inst->Src[0].Register.Index + base;
|
||||
vtx.buffer_index_mode = rat_index_mode;
|
||||
vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
|
||||
vtx.src_gpr = temp_reg;
|
||||
vtx.src_sel_x = 0;
|
||||
vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
|
||||
vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */
|
||||
vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */
|
||||
vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */
|
||||
vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */
|
||||
vtx.num_format_all = 1;
|
||||
vtx.format_comp_all = 1;
|
||||
vtx.srf_mode_all = 0;
|
||||
|
||||
if (inst->Dst[0].Register.WriteMask == 0xf) {
|
||||
vtx.data_format = FMT_32_32_32_32;
|
||||
vtx.use_const_fields = 0;
|
||||
} else if (inst->Dst[0].Register.WriteMask == 0x7) {
|
||||
vtx.data_format = FMT_32_32_32;
|
||||
vtx.use_const_fields = 0;
|
||||
} else if (inst->Dst[0].Register.WriteMask == 0x3) {
|
||||
vtx.data_format = FMT_32_32;
|
||||
vtx.use_const_fields = 0;
|
||||
} else
|
||||
vtx.use_const_fields = 1;
|
||||
|
||||
r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx);
|
||||
if (r)
|
||||
return r;
|
||||
cf = ctx->bc->cf_last;
|
||||
cf->barrier = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tgsi_load_rat(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
|
|
@ -7982,6 +8057,95 @@ static int tgsi_load(struct r600_shader_ctx *ctx)
|
|||
return tgsi_load_rat(ctx);
|
||||
if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC)
|
||||
return tgsi_load_gds(ctx);
|
||||
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
|
||||
return tgsi_load_buffer(ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tgsi_store_buffer_rat(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
struct r600_bytecode_cf *cf;
|
||||
int r, i;
|
||||
unsigned rat_index_mode;
|
||||
int lasti;
|
||||
int temp_reg = r600_get_temp(ctx), treg2 = r600_get_temp(ctx);
|
||||
|
||||
if (inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE) {
|
||||
int value = (ctx->literals[4 * inst->Src[0].Register.Index + inst->Src[0].Register.SwizzleX]);
|
||||
r = single_alu_op2(ctx, ALU_OP1_MOV,
|
||||
treg2, 0,
|
||||
V_SQ_ALU_SRC_LITERAL, value >> 2,
|
||||
0, 0);
|
||||
if (r)
|
||||
return r;
|
||||
} else {
|
||||
r = single_alu_op2(ctx, ALU_OP2_LSHR_INT,
|
||||
treg2, 0,
|
||||
ctx->src[0].sel, ctx->src[0].swizzle[0],
|
||||
V_SQ_ALU_SRC_LITERAL, 2);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
rat_index_mode = inst->Dst[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
|
||||
if (rat_index_mode)
|
||||
egcm_load_index_reg(ctx->bc, 1, false);
|
||||
|
||||
for (i = 0; i <= 3; i++) {
|
||||
struct r600_bytecode_alu alu;
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
alu.dst.sel = temp_reg;
|
||||
alu.dst.chan = i;
|
||||
alu.src[0].sel = V_SQ_ALU_SRC_0;
|
||||
alu.last = (i == 3);
|
||||
alu.dst.write = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
|
||||
for (i = 0; i <= lasti; i++) {
|
||||
struct r600_bytecode_alu alu;
|
||||
if (!((1 << i) & inst->Dst[0].Register.WriteMask))
|
||||
continue;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
|
||||
temp_reg, 0,
|
||||
treg2, 0,
|
||||
V_SQ_ALU_SRC_LITERAL, i);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
alu.dst.sel = ctx->temp_reg;
|
||||
alu.dst.chan = 0;
|
||||
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
|
||||
alu.last = 1;
|
||||
alu.dst.write = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
|
||||
cf = ctx->bc->cf_last;
|
||||
|
||||
cf->rat.id = ctx->shader->rat_base + inst->Dst[0].Register.Index + ctx->info.file_count[TGSI_FILE_IMAGE];
|
||||
cf->rat.inst = V_RAT_INST_STORE_TYPED;
|
||||
cf->rat.index_mode = rat_index_mode;
|
||||
cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
|
||||
cf->output.gpr = ctx->temp_reg;
|
||||
cf->output.index_gpr = temp_reg;
|
||||
cf->output.comp_mask = 1;
|
||||
cf->output.burst_count = 1;
|
||||
cf->vpm = 1;
|
||||
cf->barrier = 1;
|
||||
cf->output.elem_size = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -8044,7 +8208,11 @@ static int tgsi_store_rat(struct r600_shader_ctx *ctx)
|
|||
|
||||
static int tgsi_store(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
return tgsi_store_rat(ctx);
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER)
|
||||
return tgsi_store_buffer_rat(ctx);
|
||||
else
|
||||
return tgsi_store_rat(ctx);
|
||||
}
|
||||
|
||||
static int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx)
|
||||
|
|
@ -8060,10 +8228,16 @@ static int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx)
|
|||
const struct util_format_description *desc;
|
||||
unsigned rat_index_mode;
|
||||
unsigned immed_base;
|
||||
unsigned rat_base;
|
||||
|
||||
immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET;
|
||||
rat_base = ctx->shader->rat_base;
|
||||
|
||||
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
|
||||
immed_base += ctx->info.file_count[TGSI_FILE_IMAGE];
|
||||
rat_base += ctx->info.file_count[TGSI_FILE_IMAGE];
|
||||
}
|
||||
|
||||
assert (inst->Src[0].Register.File == TGSI_FILE_IMAGE);
|
||||
rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
|
||||
|
||||
r = load_index_src(ctx, 1, &idx_gpr);
|
||||
|
|
@ -8113,7 +8287,7 @@ static int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx)
|
|||
r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
|
||||
cf = ctx->bc->cf_last;
|
||||
|
||||
cf->rat.id = ctx->shader->rat_base + inst->Src[0].Register.Index;
|
||||
cf->rat.id = rat_base + inst->Src[0].Register.Index;
|
||||
cf->rat.inst = ctx->inst_info->op;
|
||||
cf->rat.index_mode = rat_index_mode;
|
||||
cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND;
|
||||
|
|
@ -8264,6 +8438,8 @@ static int tgsi_atomic_op(struct r600_shader_ctx *ctx)
|
|||
return tgsi_atomic_op_rat(ctx);
|
||||
if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC)
|
||||
return tgsi_atomic_op_gds(ctx);
|
||||
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
|
||||
return tgsi_atomic_op_rat(ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -8275,7 +8451,8 @@ static int tgsi_resq(struct r600_shader_ctx *ctx)
|
|||
int r;
|
||||
boolean has_txq_cube_array_z = false;
|
||||
|
||||
if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
|
||||
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
|
||||
(inst->Src[0].Register.File == TGSI_FILE_IMAGE && inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) {
|
||||
ctx->shader->uses_tex_buffers = true;
|
||||
return r600_do_buffer_txq(ctx, 0, ctx->shader->image_size_const_offset);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1333,26 +1333,35 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type
|
|||
{
|
||||
struct r600_textures_info *samplers = &rctx->samplers[shader_type];
|
||||
struct r600_image_state *images = NULL;
|
||||
int bits, sview_bits;
|
||||
struct r600_image_state *buffers = NULL;
|
||||
int bits, sview_bits, img_bits;
|
||||
uint32_t array_size;
|
||||
int i;
|
||||
uint32_t *constants;
|
||||
uint32_t base_offset;
|
||||
|
||||
if (shader_type == PIPE_SHADER_FRAGMENT)
|
||||
if (shader_type == PIPE_SHADER_FRAGMENT) {
|
||||
images = &rctx->fragment_images;
|
||||
buffers = &rctx->fragment_buffers;
|
||||
}
|
||||
|
||||
if (!samplers->views.dirty_buffer_constants &&
|
||||
(images && !images->dirty_buffer_constants))
|
||||
(images && !images->dirty_buffer_constants) &&
|
||||
(buffers && !buffers->dirty_buffer_constants))
|
||||
return;
|
||||
|
||||
if (images)
|
||||
images->dirty_buffer_constants = FALSE;
|
||||
if (buffers)
|
||||
buffers->dirty_buffer_constants = FALSE;
|
||||
samplers->views.dirty_buffer_constants = FALSE;
|
||||
|
||||
bits = sview_bits = util_last_bit(samplers->views.enabled_mask);
|
||||
if (images)
|
||||
bits += util_last_bit(images->enabled_mask);
|
||||
img_bits = bits;
|
||||
if (buffers)
|
||||
bits += util_last_bit(buffers->enabled_mask);
|
||||
array_size = bits * 2 * sizeof(uint32_t) * 4;
|
||||
|
||||
constants = r600_alloc_buf_consts(rctx, shader_type, array_size,
|
||||
|
|
@ -1366,7 +1375,7 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type
|
|||
}
|
||||
}
|
||||
if (images) {
|
||||
for (i = sview_bits; i < bits; i++) {
|
||||
for (i = sview_bits; i < img_bits; i++) {
|
||||
int idx = i - sview_bits;
|
||||
if (images->enabled_mask & (1 << idx)) {
|
||||
uint32_t offset = (base_offset / 4) + i * 2;
|
||||
|
|
@ -1375,6 +1384,16 @@ static void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type
|
|||
}
|
||||
}
|
||||
}
|
||||
if (buffers) {
|
||||
for (i = img_bits; i < bits; i++) {
|
||||
int idx = i - img_bits;
|
||||
if (buffers->enabled_mask & (1 << idx)) {
|
||||
uint32_t offset = (base_offset / 4) + i * 2;
|
||||
constants[offset] = buffers->views[i].base.resource->width0 / util_format_get_blocksize(buffers->views[i].base.format);
|
||||
constants[offset + 1] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* set sample xy locations as array of fragment shader constants */
|
||||
|
|
@ -3027,6 +3046,24 @@ static void r600_invalidate_buffer(struct pipe_context *ctx, struct pipe_resourc
|
|||
r600_sampler_views_dirty(rctx, state);
|
||||
}
|
||||
}
|
||||
|
||||
/* SSBOs */
|
||||
struct r600_image_state *istate = &rctx->fragment_buffers;
|
||||
{
|
||||
uint32_t mask = istate->enabled_mask;
|
||||
bool found = false;
|
||||
while (mask) {
|
||||
unsigned i = u_bit_scan(&mask);
|
||||
if (istate->views[i].base.resource == &rbuffer->b.b) {
|
||||
found = true;
|
||||
istate->dirty_mask |= 1 << i;
|
||||
}
|
||||
}
|
||||
if (found) {
|
||||
r600_mark_atom_dirty(rctx, &istate->atom);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void r600_set_active_query_state(struct pipe_context *ctx, boolean enable)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue