vc4: Start using the pack header.

This slightly inflates the size of the generated code, in exchange for
getting us some convenient tools.

before:
   4389	      0	      0	   4389	   1125	src/gallium/drivers/vc4/.libs/vc4_draw.o
    808	      0	      0	    808	    328	src/gallium/drivers/vc4/.libs/vc4_emit.o

after:
   4449	      0	      0	   4449	   1161	src/gallium/drivers/vc4/.libs/vc4_draw.o
    988	      0	      0	    988	    3dc	src/gallium/drivers/vc4/.libs/vc4_emit.o
This commit is contained in:
Eric Anholt 2016-10-26 12:46:58 -07:00
parent 7f80a9ff13
commit 4cef255872
4 changed files with 130 additions and 51 deletions

View file

@ -40,6 +40,27 @@ struct vc4_job;
*/
struct vc4_cl_out;
/** A reference to a BO used in the CL packing functions */
struct vc4_cl_reloc {
struct vc4_bo *bo;
uint32_t offset;
};
/* We don't call anything that packs a reloc yet, so don't implement it. */
static inline void cl_pack_emit_reloc(void *cl, const struct vc4_cl_reloc *reloc)
{
abort();
}
/* We don't use the data arg yet */
#define __gen_user_data void
#define __gen_address_type struct vc4_cl_reloc
#define __gen_address_offset(reloc) ((reloc)->offset)
#define __gen_emit_reloc cl_pack_emit_reloc
#include "kernel/vc4_packet.h"
#include "broadcom/cle/v3d_packet_v21_pack.h"
struct vc4_cl {
void *base;
struct vc4_cl_out *next;
@ -205,4 +226,46 @@ cl_aligned_reloc(struct vc4_job *job, struct vc4_cl *cl,
void cl_ensure_space(struct vc4_cl *cl, uint32_t size);
#define cl_packet_header(packet) V3D21_ ## packet ## _header
#define cl_packet_length(packet) V3D21_ ## packet ## _length
#define cl_packet_pack(packet) V3D21_ ## packet ## _pack
#define cl_packet_struct(packet) V3D21_ ## packet
static inline void *
cl_get_emit_space(struct vc4_cl_out **cl, size_t size)
{
void *addr = *cl;
cl_advance(cl, size);
return addr;
}
/* Macro for setting up an emit of a CL struct. A temporary unpacked struct
* is created, which you get to set fields in of the form:
*
* cl_emit(bcl, FLAT_SHADE_FLAGS, flags) {
* .flags.flat_shade_flags = 1 << 2,
* }
*
* or default values only can be emitted with just:
*
* cl_emit(bcl, FLAT_SHADE_FLAGS, flags);
*
* The trick here is that we make a for loop that will execute the body
* (either the block or the ';' after the macro invocation) exactly once.
* Also, *dst is actually of the wrong type, it's the
* uint8_t[cl_packet_length()] in the CL, not a cl_packet_struct(packet).
*/
#define cl_emit(cl_out, packet, name) \
for (struct cl_packet_struct(packet) name = { \
cl_packet_header(packet) \
}, \
*_dst = cl_get_emit_space(cl_out, cl_packet_length(packet)); \
__builtin_expect(_dst != NULL, 1); \
({ \
cl_packet_pack(packet)(NULL, (uint8_t *)_dst, &name); \
VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, \
cl_packet_length(packet))); \
_dst = NULL; \
})) \
#endif /* VC4_CL_H */

View file

@ -82,32 +82,28 @@ vc4_start_draw(struct vc4_context *vc4)
vc4_get_draw_cl_space(job, 0);
struct vc4_cl_out *bcl = cl_start(&job->bcl);
// Tile state data is 48 bytes per tile, I think it can be thrown away
// as soon as binning is finished.
cl_u8(&bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
cl_u32(&bcl, 0); /* tile alloc addr, filled by kernel */
cl_u32(&bcl, 0); /* tile alloc size, filled by kernel */
cl_u32(&bcl, 0); /* tile state addr, filled by kernel */
cl_u8(&bcl, job->draw_tiles_x);
cl_u8(&bcl, job->draw_tiles_y);
/* Other flags are filled by kernel. */
cl_u8(&bcl, job->msaa ? VC4_BIN_CONFIG_MS_MODE_4X : 0);
cl_emit(&bcl, TILE_BINNING_MODE_CONFIGURATION, bin) {
bin.width_in_tiles = job->draw_tiles_x;
bin.height_in_tiles = job->draw_tiles_y;
bin.multisample_mode_4x = job->msaa;
}
/* START_TILE_BINNING resets the statechange counters in the hardware,
* which are what is used when a primitive is binned to a tile to
* figure out what new state packets need to be written to that tile's
* command list.
*/
cl_u8(&bcl, VC4_PACKET_START_TILE_BINNING);
cl_emit(&bcl, START_TILE_BINNING, start);
/* Reset the current compressed primitives format. This gets modified
* by VC4_PACKET_GL_INDEXED_PRIMITIVE and
* VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
* of every tile.
*/
cl_u8(&bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
cl_u8(&bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
cl_emit(&bcl, PRIMITIVE_LIST_FORMAT, list) {
list.data_type = _16_BIT_INDEX;
list.primitive_type = TRIANGLES_LIST;
}
job->needs_flush = true;
job->draw_width = vc4->framebuffer.width;
@ -221,13 +217,15 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4,
cl_end(&job->shader_rec, shader_rec);
struct vc4_cl_out *bcl = cl_start(&job->bcl);
/* the actual draw call. */
cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE);
assert(vtx->num_elements <= 8);
/* Note that number of attributes == 0 in the packet means 8
* attributes. This field also contains the offset into shader_rec.
*/
cl_u32(&bcl, num_elements_emit & 0x7);
cl_emit(&bcl, GL_SHADER_STATE, shader_state) {
/* Note that number of attributes == 0 in the packet means 8
* attributes. This field also contains the offset into
* shader_rec.
*/
assert(vtx->num_elements <= 8);
shader_state.number_of_attribute_arrays =
num_elements_emit & 0x7;
}
cl_end(&job->bcl, bcl);
vc4_write_uniforms(vc4, vc4->prog.fs,
@ -436,10 +434,11 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
}
}
cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
cl_u8(&bcl, info->mode);
cl_u32(&bcl, this_count);
cl_u32(&bcl, start);
cl_emit(&bcl, VERTEX_ARRAY_PRIMITIVES, array) {
array.primitive_mode = info->mode;
array.length = this_count;
array.index_of_first_vertex = start;
}
job->draw_calls_queued++;
count -= step;

View file

@ -60,11 +60,12 @@ vc4_emit_state(struct pipe_context *pctx)
maxy = MIN2(vp_maxy, vc4->scissor.maxy);
}
cl_u8(&bcl, VC4_PACKET_CLIP_WINDOW);
cl_u16(&bcl, minx);
cl_u16(&bcl, miny);
cl_u16(&bcl, maxx - minx);
cl_u16(&bcl, maxy - miny);
cl_emit(&bcl, CLIP_WINDOW, clip) {
clip.clip_window_left_pixel_coordinate = minx;
clip.clip_window_bottom_pixel_coordinate = miny;
clip.clip_window_height_in_pixels = maxy - miny;
clip.clip_window_width_in_pixels = maxx - minx;
}
job->draw_min_x = MIN2(job->draw_min_x, minx);
job->draw_min_y = MIN2(job->draw_min_y, miny);
@ -113,35 +114,51 @@ vc4_emit_state(struct pipe_context *pctx)
}
if (vc4->dirty & VC4_DIRTY_RASTERIZER) {
cl_u8(&bcl, VC4_PACKET_DEPTH_OFFSET);
cl_u16(&bcl, vc4->rasterizer->offset_factor);
cl_u16(&bcl, vc4->rasterizer->offset_units);
cl_emit(&bcl, DEPTH_OFFSET, depth) {
depth.depth_offset_units =
vc4->rasterizer->offset_units;
depth.depth_offset_factor =
vc4->rasterizer->offset_factor;
}
cl_u8(&bcl, VC4_PACKET_POINT_SIZE);
cl_f(&bcl, vc4->rasterizer->point_size);
cl_emit(&bcl, POINT_SIZE, points) {
points.point_size = vc4->rasterizer->point_size;
}
cl_u8(&bcl, VC4_PACKET_LINE_WIDTH);
cl_f(&bcl, vc4->rasterizer->base.line_width);
cl_emit(&bcl, LINE_WIDTH, points) {
points.line_width = vc4->rasterizer->base.line_width;
}
}
if (vc4->dirty & VC4_DIRTY_VIEWPORT) {
cl_u8(&bcl, VC4_PACKET_CLIPPER_XY_SCALING);
cl_f(&bcl, vc4->viewport.scale[0] * 16.0f);
cl_f(&bcl, vc4->viewport.scale[1] * 16.0f);
cl_emit(&bcl, CLIPPER_XY_SCALING, clip) {
clip.viewport_half_width_in_1_16th_of_pixel =
vc4->viewport.scale[0] * 16.0f;
clip.viewport_half_height_in_1_16th_of_pixel =
vc4->viewport.scale[1] * 16.0f;
}
cl_u8(&bcl, VC4_PACKET_CLIPPER_Z_SCALING);
cl_f(&bcl, vc4->viewport.translate[2]);
cl_f(&bcl, vc4->viewport.scale[2]);
cl_emit(&bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
clip.viewport_z_offset_zc_to_zs =
vc4->viewport.translate[2];
clip.viewport_z_scale_zc_to_zs =
vc4->viewport.scale[2];
}
cl_u8(&bcl, VC4_PACKET_VIEWPORT_OFFSET);
cl_u16(&bcl, 16 * vc4->viewport.translate[0]);
cl_u16(&bcl, 16 * vc4->viewport.translate[1]);
cl_emit(&bcl, VIEWPORT_OFFSET, vp) {
vp.viewport_centre_x_coordinate =
16 * vc4->viewport.translate[0];
vp.viewport_centre_y_coordinate =
16 * vc4->viewport.translate[1];
}
}
if (vc4->dirty & VC4_DIRTY_FLAT_SHADE_FLAGS) {
cl_u8(&bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
cl_u32(&bcl, vc4->rasterizer->base.flatshade ?
vc4->prog.fs->color_inputs : 0);
cl_emit(&bcl, FLAT_SHADE_FLAGS, flags) {
if (vc4->rasterizer->base.flatshade)
flags.flat_shading_flags =
vc4->prog.fs->color_inputs;
}
}
cl_end(&job->bcl, bcl);

View file

@ -378,11 +378,11 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
*/
cl_ensure_space(&job->bcl, 8);
struct vc4_cl_out *bcl = cl_start(&job->bcl);
cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
cl_emit(&bcl, INCREMENT_SEMAPHORE, incr);
/* The FLUSH caps all of our bin lists with a
* VC4_PACKET_RETURN.
*/
cl_u8(&bcl, VC4_PACKET_FLUSH);
cl_emit(&bcl, FLUSH, flush);
cl_end(&job->bcl, bcl);
}
struct drm_vc4_submit_cl submit = {