mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-22 11:50:36 +02:00
gallium: improve the pipe_stream_output_info struct (v2)
There are 3 changes: 1) stride is specified for each buffer, not just one, so that drivers don't have to derive it from the outputs 2) new per-output property dst_offset, which specifies the offset into the buffer in dwords where the output should be stored, so that drivers don't have to compute the offsets manually; this will also be useful for gl_SkipComponents from ARB_transform_feedback3 3) register_mask is removed, instead, there is start_component and num_components; register_mask with non-consecutive 1s doesn't make much sense (some hardware cannot do packing of components) Christoph Bumiller: fixed nvc0. v2: resolve merge conflicts in Draw and clean it up
This commit is contained in:
parent
faa90abfe0
commit
2449695e82
13 changed files with 57 additions and 151 deletions
|
|
@ -79,72 +79,6 @@ void draw_pt_so_emit_prepare(struct pt_so_emit *emit)
|
|||
draw_do_flush( draw, DRAW_FLUSH_BACKEND );
|
||||
}
|
||||
|
||||
static boolean
|
||||
is_component_writable(unsigned mask,
|
||||
unsigned compo)
|
||||
{
|
||||
switch (mask) {
|
||||
case TGSI_WRITEMASK_NONE:
|
||||
return FALSE;
|
||||
case TGSI_WRITEMASK_X:
|
||||
return compo == 0;
|
||||
case TGSI_WRITEMASK_Y:
|
||||
return compo == 1;
|
||||
case TGSI_WRITEMASK_XY:
|
||||
return compo == 0 || compo == 1;
|
||||
case TGSI_WRITEMASK_Z:
|
||||
return compo == 2;
|
||||
case TGSI_WRITEMASK_XZ:
|
||||
return compo == 0 || compo == 2;
|
||||
case TGSI_WRITEMASK_YZ:
|
||||
return compo == 1 || compo == 2;
|
||||
case TGSI_WRITEMASK_XYZ:
|
||||
return compo == 0 || compo == 1 || compo == 2;
|
||||
case TGSI_WRITEMASK_W:
|
||||
return compo == 3;
|
||||
case TGSI_WRITEMASK_XW:
|
||||
return compo == 0 || compo == 3;
|
||||
case TGSI_WRITEMASK_YW:
|
||||
return compo == 1 || compo == 3;
|
||||
case TGSI_WRITEMASK_XYW:
|
||||
return compo == 0 || compo == 1 || compo == 3;
|
||||
case TGSI_WRITEMASK_ZW:
|
||||
return compo == 2 || compo == 3;
|
||||
case TGSI_WRITEMASK_XZW:
|
||||
return compo == 0 || compo == 1 || compo == 3;
|
||||
case TGSI_WRITEMASK_YZW:
|
||||
return compo == 1 || compo == 2 || compo == 4;
|
||||
case TGSI_WRITEMASK_XYZW:
|
||||
return compo < 4;
|
||||
default:
|
||||
debug_assert(!"Unknown writemask in stream out");
|
||||
return compo < 4;
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE int mask_num_comps(int register_mask)
|
||||
{
|
||||
int comps = 0;
|
||||
switch (register_mask) {
|
||||
case TGSI_WRITEMASK_XYZW:
|
||||
comps = 4;
|
||||
break;
|
||||
case TGSI_WRITEMASK_XYZ:
|
||||
comps = 3;
|
||||
break;
|
||||
case TGSI_WRITEMASK_XY:
|
||||
comps = 2;
|
||||
break;
|
||||
case TGSI_WRITEMASK_X:
|
||||
comps = 1;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
return comps;
|
||||
}
|
||||
|
||||
static void so_emit_prim(struct pt_so_emit *so,
|
||||
unsigned *indices,
|
||||
unsigned num_vertices)
|
||||
|
|
@ -170,14 +104,14 @@ static void so_emit_prim(struct pt_so_emit *so,
|
|||
/* check have we space to emit prim first - if not don't do anything */
|
||||
for (i = 0; i < num_vertices; ++i) {
|
||||
for (slot = 0; slot < state->num_outputs; ++slot) {
|
||||
unsigned writemask = state->output[slot].register_mask;
|
||||
unsigned num_comps = state->output[slot].num_components;
|
||||
int ob = state->output[slot].output_buffer;
|
||||
|
||||
if ((buffer_total_bytes[ob] + mask_num_comps(writemask) * sizeof(float)) >
|
||||
if ((buffer_total_bytes[ob] + num_comps * sizeof(float)) >
|
||||
draw->so.targets[ob]->target.buffer_size) {
|
||||
return;
|
||||
}
|
||||
buffer_total_bytes[ob] += mask_num_comps(writemask) * sizeof(float);
|
||||
buffer_total_bytes[ob] += num_comps * sizeof(float);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -190,21 +124,16 @@ static void so_emit_prim(struct pt_so_emit *so,
|
|||
|
||||
for (slot = 0; slot < state->num_outputs; ++slot) {
|
||||
unsigned idx = state->output[slot].register_index;
|
||||
unsigned writemask = state->output[slot].register_mask;
|
||||
unsigned written_compos = 0;
|
||||
unsigned compo;
|
||||
unsigned start_comp = state->output[slot].start_component;
|
||||
unsigned num_comps = state->output[slot].num_components;
|
||||
int ob = state->output[slot].output_buffer;
|
||||
|
||||
buffer = (float *)((char *)draw->so.targets[ob]->mapping +
|
||||
draw->so.targets[ob]->target.buffer_offset +
|
||||
draw->so.targets[ob]->internal_offset);
|
||||
for (compo = 0; compo < 4; ++compo) {
|
||||
if (is_component_writable(writemask, compo)) {
|
||||
buffer[written_compos++] = input[idx][compo];
|
||||
}
|
||||
}
|
||||
draw->so.targets[ob]->internal_offset += written_compos * sizeof(float);
|
||||
total_written_compos += written_compos;
|
||||
memcpy(buffer, &input[idx][start_comp], num_comps * sizeof(float));
|
||||
draw->so.targets[ob]->internal_offset += num_comps * sizeof(float);
|
||||
total_written_compos += num_comps;
|
||||
}
|
||||
}
|
||||
so->emitted_vertices += num_vertices;
|
||||
|
|
|
|||
|
|
@ -263,8 +263,8 @@ struct blitter_context *util_blitter_create(struct pipe_context *pipe)
|
|||
|
||||
memset(&so, 0, sizeof(so));
|
||||
so.num_outputs = 1;
|
||||
so.output[0].register_mask = TGSI_WRITEMASK_XYZW;
|
||||
so.stride = 4;
|
||||
so.output[0].num_components = 4;
|
||||
so.stride[0] = 4;
|
||||
|
||||
ctx->vs_pos_only =
|
||||
util_make_vertex_passthrough_shader_with_so(pipe, 1, semantic_names,
|
||||
|
|
|
|||
|
|
@ -444,13 +444,15 @@ util_dump_shader_state(FILE *stream, const struct pipe_shader_state *state)
|
|||
util_dump_member_begin(stream, "stream_output");
|
||||
util_dump_struct_begin(stream, "pipe_stream_output_info");
|
||||
util_dump_member(stream, uint, &state->stream_output, num_outputs);
|
||||
util_dump_member(stream, uint, &state->stream_output, stride);
|
||||
util_dump_array(stream, uint, state->stream_output.stride,
|
||||
Elements(state->stream_output.stride));
|
||||
util_dump_array_begin(stream);
|
||||
for(i = 0; i < state->stream_output.num_outputs; ++i) {
|
||||
util_dump_elem_begin(stream);
|
||||
util_dump_struct_begin(stream, ""); /* anonymous */
|
||||
util_dump_member(stream, uint, &state->stream_output.output[i], register_index);
|
||||
util_dump_member(stream, uint, &state->stream_output.output[i], register_mask);
|
||||
util_dump_member(stream, uint, &state->stream_output.output[i], start_component);
|
||||
util_dump_member(stream, uint, &state->stream_output.output[i], num_components);
|
||||
util_dump_member(stream, uint, &state->stream_output.output[i], output_buffer);
|
||||
util_dump_struct_end(stream);
|
||||
util_dump_elem_end(stream);
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ llvmpipe_create_stream_output_state(struct pipe_context *pipe,
|
|||
|
||||
if (so) {
|
||||
so->base.num_outputs = templ->num_outputs;
|
||||
so->base.stride = templ->stride;
|
||||
memcpy(so->base.stride, templ->stride, sizeof(templ->stride));
|
||||
memcpy(so->base.output, templ->output,
|
||||
templ->num_outputs * sizeof(templ->output[0]));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -503,20 +503,17 @@ nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info,
|
|||
tfb->varying_count[b] = 0;
|
||||
|
||||
for (i = 0; i < pso->num_outputs; ++i) {
|
||||
unsigned startc = pso->output[i].start_component;
|
||||
if (pso->output[i].output_buffer != b)
|
||||
continue;
|
||||
for (c = 0; c < 4; ++c) {
|
||||
if (!(pso->output[i].register_mask & (1 << c)))
|
||||
continue;
|
||||
for (c = 0; c < pso->output[i].num_components; ++c) {
|
||||
tfb->varying_count[b]++;
|
||||
tfb->varying_index[n++] =
|
||||
info->out[pso->output[i].register_index].slot[c];
|
||||
info->out[pso->output[i].register_index].slot[startc + c];
|
||||
}
|
||||
}
|
||||
tfb->stride[b] = tfb->varying_count[b] * 4;
|
||||
tfb->stride[b] = pso->stride[b] * 4;
|
||||
}
|
||||
if (pso->stride)
|
||||
tfb->stride[0] = pso->stride;
|
||||
|
||||
return tfb;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -196,7 +196,7 @@ struct r600_so_target {
|
|||
|
||||
/* The buffer where BUFFER_FILLED_SIZE is stored. */
|
||||
struct r600_resource *filled_size;
|
||||
unsigned stride;
|
||||
unsigned stride_in_dw;
|
||||
unsigned so_index;
|
||||
};
|
||||
|
||||
|
|
@ -248,7 +248,7 @@ struct r600_context {
|
|||
struct r600_so_target *so_targets[PIPE_MAX_SO_BUFFERS];
|
||||
boolean streamout_start;
|
||||
unsigned streamout_append_bitmask;
|
||||
unsigned *vs_shader_so_strides;
|
||||
unsigned *vs_so_stride_in_dw;
|
||||
};
|
||||
|
||||
struct r600_draw {
|
||||
|
|
|
|||
|
|
@ -2033,7 +2033,7 @@ static void r600_set_streamout_enable(struct r600_context *ctx, unsigned buffer_
|
|||
void r600_context_streamout_begin(struct r600_context *ctx)
|
||||
{
|
||||
struct r600_so_target **t = ctx->so_targets;
|
||||
unsigned *strides = ctx->vs_shader_so_strides;
|
||||
unsigned *stride_in_dw = ctx->vs_so_stride_in_dw;
|
||||
unsigned buffer_en, i, update_flags = 0;
|
||||
|
||||
buffer_en = (ctx->num_so_targets >= 1 && t[0] ? 1 : 0) |
|
||||
|
|
@ -2064,7 +2064,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
|
|||
|
||||
for (i = 0; i < ctx->num_so_targets; i++) {
|
||||
if (t[i]) {
|
||||
t[i]->stride = strides[i];
|
||||
t[i]->stride_in_dw = stride_in_dw[i];
|
||||
t[i]->so_index = i;
|
||||
|
||||
update_flags |= SURFACE_BASE_UPDATE_STRMOUT(i);
|
||||
|
|
@ -2074,7 +2074,7 @@ void r600_context_streamout_begin(struct r600_context *ctx)
|
|||
16*i - R600_CONTEXT_REG_OFFSET) >> 2;
|
||||
ctx->pm4[ctx->pm4_cdwords++] = (t[i]->b.buffer_offset +
|
||||
t[i]->b.buffer_size) >> 2; /* BUFFER_SIZE (in DW) */
|
||||
ctx->pm4[ctx->pm4_cdwords++] = strides[i] >> 2; /* VTX_STRIDE (in DW) */
|
||||
ctx->pm4[ctx->pm4_cdwords++] = stride_in_dw[i]; /* VTX_STRIDE (in DW) */
|
||||
ctx->pm4[ctx->pm4_cdwords++] = 0; /* BUFFER_BASE */
|
||||
|
||||
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
|
||||
|
|
@ -2186,7 +2186,7 @@ void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_tar
|
|||
|
||||
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
|
||||
ctx->pm4[ctx->pm4_cdwords++] = (R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE - R600_CONTEXT_REG_OFFSET) >> 2;
|
||||
ctx->pm4[ctx->pm4_cdwords++] = t->stride >> 2;
|
||||
ctx->pm4[ctx->pm4_cdwords++] = t->stride_in_dw;
|
||||
|
||||
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_COPY_DW, 4, 0);
|
||||
ctx->pm4[ctx->pm4_cdwords++] = COPY_DW_SRC_IS_MEM | COPY_DW_DST_IS_REG;
|
||||
|
|
|
|||
|
|
@ -147,7 +147,6 @@ struct r600_pipe_shader {
|
|||
struct tgsi_token *tokens;
|
||||
unsigned sprite_coord_enable;
|
||||
struct pipe_stream_output_info so;
|
||||
unsigned so_strides[4];
|
||||
};
|
||||
|
||||
struct r600_pipe_sampler_state {
|
||||
|
|
|
|||
|
|
@ -124,12 +124,14 @@ int r600_pipe_shader_create(struct pipe_context *ctx, struct r600_pipe_shader *s
|
|||
unsigned i;
|
||||
fprintf(stderr, "STREAMOUT\n");
|
||||
for (i = 0; i < shader->so.num_outputs; i++) {
|
||||
unsigned mask = ((1 << shader->so.output[i].num_components) - 1) <<
|
||||
shader->so.output[i].start_component;
|
||||
fprintf(stderr, " %i: MEM_STREAM0_BUF%i OUT[%i].%s%s%s%s\n", i,
|
||||
shader->so.output[i].output_buffer, shader->so.output[i].register_index,
|
||||
shader->so.output[i].register_mask & 1 ? "x" : "_",
|
||||
(shader->so.output[i].register_mask >> 1) & 1 ? "y" : "_",
|
||||
(shader->so.output[i].register_mask >> 2) & 1 ? "z" : "_",
|
||||
(shader->so.output[i].register_mask >> 3) & 1 ? "w" : "_");
|
||||
mask & 1 ? "x" : "_",
|
||||
(mask >> 1) & 1 ? "y" : "_",
|
||||
(mask >> 2) & 1 ? "z" : "_",
|
||||
(mask >> 3) & 1 ? "w" : "_");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -863,11 +865,8 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
|
|||
|
||||
/* Add stream outputs. */
|
||||
if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
|
||||
unsigned buffer_offset[PIPE_MAX_SO_BUFFERS] = {0};
|
||||
|
||||
for (i = 0; i < so.num_outputs; i++) {
|
||||
struct r600_bytecode_output output;
|
||||
unsigned comps;
|
||||
|
||||
if (so.output[i].output_buffer >= 4) {
|
||||
R600_ERR("exceeded the max number of stream output buffers, got: %d\n",
|
||||
|
|
@ -875,36 +874,21 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
|
|||
r = -EINVAL;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
switch (so.output[i].register_mask) {
|
||||
case TGSI_WRITEMASK_XYZW:
|
||||
comps = 4;
|
||||
break;
|
||||
case TGSI_WRITEMASK_XYZ:
|
||||
comps = 3;
|
||||
break;
|
||||
case TGSI_WRITEMASK_XY:
|
||||
comps = 2;
|
||||
break;
|
||||
case TGSI_WRITEMASK_X:
|
||||
comps = 1;
|
||||
break;
|
||||
default:
|
||||
R600_ERR("streamout: invalid register_mask, got: %x\n",
|
||||
so.output[i].register_mask);
|
||||
r = -EINVAL;
|
||||
goto out_err;
|
||||
if (so.output[i].start_component) {
|
||||
R600_ERR("stream_output - start_component cannot be non-zero\n");
|
||||
r = -EINVAL;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
memset(&output, 0, sizeof(struct r600_bytecode_output));
|
||||
output.gpr = shader->output[so.output[i].register_index].gpr;
|
||||
output.elem_size = 0;
|
||||
output.array_base = buffer_offset[so.output[i].output_buffer];
|
||||
output.array_base = so.output[i].dst_offset;
|
||||
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
|
||||
output.burst_count = 1;
|
||||
output.barrier = 1;
|
||||
output.array_size = 0;
|
||||
output.comp_mask = so.output[i].register_mask;
|
||||
output.comp_mask = (1 << so.output[i].num_components) - 1;
|
||||
if (ctx.bc->chip_class >= EVERGREEN) {
|
||||
switch (so.output[i].output_buffer) {
|
||||
case 0:
|
||||
|
|
@ -939,12 +923,6 @@ static int r600_shader_from_tgsi(struct r600_pipe_context * rctx, struct r600_pi
|
|||
r = r600_bytecode_add_output(ctx.bc, &output);
|
||||
if (r)
|
||||
goto out_err;
|
||||
|
||||
buffer_offset[so.output[i].output_buffer] += comps;
|
||||
}
|
||||
|
||||
for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
|
||||
pipeshader->so_strides[i] = buffer_offset[i] * 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -649,7 +649,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
|
|||
}
|
||||
}
|
||||
|
||||
rctx->ctx.vs_shader_so_strides = rctx->vs_shader->so_strides;
|
||||
rctx->ctx.vs_so_stride_in_dw = rctx->vs_shader->so.stride;
|
||||
|
||||
mask = (1ULL << ((unsigned)rctx->framebuffer.nr_cbufs * 4)) - 1;
|
||||
|
||||
|
|
|
|||
|
|
@ -274,14 +274,16 @@ void trace_dump_shader_state(const struct pipe_shader_state *state)
|
|||
trace_dump_member_begin("stream_output");
|
||||
trace_dump_struct_begin("pipe_stream_output_info");
|
||||
trace_dump_member(uint, &state->stream_output, num_outputs);
|
||||
trace_dump_member(uint, &state->stream_output, stride);
|
||||
trace_dump_array(uint, state->stream_output.stride, PIPE_MAX_SO_BUFFERS);
|
||||
trace_dump_array_begin();
|
||||
for(i = 0; i < state->stream_output.num_outputs; ++i) {
|
||||
trace_dump_elem_begin();
|
||||
trace_dump_struct_begin(""); /* anonymous */
|
||||
trace_dump_member(uint, &state->stream_output.output[i], register_index);
|
||||
trace_dump_member(uint, &state->stream_output.output[i], register_mask);
|
||||
trace_dump_member(uint, &state->stream_output.output[i], start_component);
|
||||
trace_dump_member(uint, &state->stream_output.output[i], num_components);
|
||||
trace_dump_member(uint, &state->stream_output.output[i], output_buffer);
|
||||
trace_dump_member(uint, &state->stream_output.output[i], dst_offset);
|
||||
trace_dump_struct_end();
|
||||
trace_dump_elem_end();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -190,16 +190,19 @@ struct pipe_clip_state
|
|||
struct pipe_stream_output_info
|
||||
{
|
||||
unsigned num_outputs;
|
||||
/** stride for an entire vertex, only used if all output_buffers are 0 */
|
||||
unsigned stride;
|
||||
/** stride for an entire vertex for each buffer in dwords */
|
||||
unsigned stride[PIPE_MAX_SO_BUFFERS];
|
||||
|
||||
/**
|
||||
* Array of stream outputs, in the order they are to be written in.
|
||||
* Selected components are tightly packed into the output buffer.
|
||||
*/
|
||||
struct {
|
||||
unsigned register_index:8; /**< 0 to PIPE_MAX_SHADER_OUTPUTS */
|
||||
unsigned register_mask:4; /**< TGSI_WRITEMASK_x */
|
||||
unsigned output_buffer:4; /**< 0 to PIPE_MAX_SO_BUFFERS */
|
||||
unsigned register_index:8; /**< 0 to PIPE_MAX_SHADER_OUTPUTS */
|
||||
unsigned start_component:2; /** 0 to 3 */
|
||||
unsigned num_components:3; /** 1 to 4 */
|
||||
unsigned output_buffer:3; /**< 0 to PIPE_MAX_SO_BUFFERS */
|
||||
unsigned dst_offset:16; /**< offset into the buffer in dwords */
|
||||
} output[PIPE_MAX_SHADER_OUTPUTS];
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -5097,25 +5097,21 @@ st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
|
|||
const GLuint outputMapping[],
|
||||
struct pipe_stream_output_info *so)
|
||||
{
|
||||
static unsigned comps_to_mask[] = {
|
||||
0,
|
||||
TGSI_WRITEMASK_X,
|
||||
TGSI_WRITEMASK_XY,
|
||||
TGSI_WRITEMASK_XYZ,
|
||||
TGSI_WRITEMASK_XYZW
|
||||
};
|
||||
unsigned i;
|
||||
struct gl_transform_feedback_info *info =
|
||||
&glsl_to_tgsi->shader_program->LinkedTransformFeedback;
|
||||
|
||||
for (i = 0; i < info->NumOutputs; i++) {
|
||||
assert(info->Outputs[i].NumComponents < Elements(comps_to_mask));
|
||||
so->output[i].register_index =
|
||||
outputMapping[info->Outputs[i].OutputRegister];
|
||||
so->output[i].register_mask =
|
||||
comps_to_mask[info->Outputs[i].NumComponents]
|
||||
<< info->Outputs[i].ComponentOffset;
|
||||
so->output[i].start_component = info->Outputs[i].ComponentOffset;
|
||||
so->output[i].num_components = info->Outputs[i].NumComponents;
|
||||
so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
|
||||
so->output[i].dst_offset = info->Outputs[i].DstOffset;
|
||||
}
|
||||
|
||||
for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
|
||||
so->stride[i] = info->BufferStride[i];
|
||||
}
|
||||
so->num_outputs = info->NumOutputs;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue