mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 06:48:06 +02:00
vc4: Start using the pack header.
This slightly inflates the size of the generated code, in exchange for
getting us some convenient tools.
before:
4389 0 0 4389 1125 src/gallium/drivers/vc4/.libs/vc4_draw.o
808 0 0 808 328 src/gallium/drivers/vc4/.libs/vc4_emit.o
after:
4449 0 0 4449 1161 src/gallium/drivers/vc4/.libs/vc4_draw.o
988 0 0 988 3dc src/gallium/drivers/vc4/.libs/vc4_emit.o
This commit is contained in:
parent
7f80a9ff13
commit
4cef255872
4 changed files with 130 additions and 51 deletions
|
|
@ -40,6 +40,27 @@ struct vc4_job;
|
|||
*/
|
||||
struct vc4_cl_out;
|
||||
|
||||
/** A reference to a BO used in the CL packing functions */
|
||||
struct vc4_cl_reloc {
|
||||
struct vc4_bo *bo;
|
||||
uint32_t offset;
|
||||
};
|
||||
|
||||
/* We don't call anything that packs a reloc yet, so don't implement it. */
|
||||
static inline void cl_pack_emit_reloc(void *cl, const struct vc4_cl_reloc *reloc)
|
||||
{
|
||||
abort();
|
||||
}
|
||||
|
||||
/* We don't use the data arg yet */
|
||||
#define __gen_user_data void
|
||||
#define __gen_address_type struct vc4_cl_reloc
|
||||
#define __gen_address_offset(reloc) ((reloc)->offset)
|
||||
#define __gen_emit_reloc cl_pack_emit_reloc
|
||||
|
||||
#include "kernel/vc4_packet.h"
|
||||
#include "broadcom/cle/v3d_packet_v21_pack.h"
|
||||
|
||||
struct vc4_cl {
|
||||
void *base;
|
||||
struct vc4_cl_out *next;
|
||||
|
|
@ -205,4 +226,46 @@ cl_aligned_reloc(struct vc4_job *job, struct vc4_cl *cl,
|
|||
|
||||
void cl_ensure_space(struct vc4_cl *cl, uint32_t size);
|
||||
|
||||
#define cl_packet_header(packet) V3D21_ ## packet ## _header
|
||||
#define cl_packet_length(packet) V3D21_ ## packet ## _length
|
||||
#define cl_packet_pack(packet) V3D21_ ## packet ## _pack
|
||||
#define cl_packet_struct(packet) V3D21_ ## packet
|
||||
|
||||
static inline void *
|
||||
cl_get_emit_space(struct vc4_cl_out **cl, size_t size)
|
||||
{
|
||||
void *addr = *cl;
|
||||
cl_advance(cl, size);
|
||||
return addr;
|
||||
}
|
||||
|
||||
/* Macro for setting up an emit of a CL struct. A temporary unpacked struct
|
||||
* is created, which you get to set fields in of the form:
|
||||
*
|
||||
* cl_emit(bcl, FLAT_SHADE_FLAGS, flags) {
|
||||
* .flags.flat_shade_flags = 1 << 2,
|
||||
* }
|
||||
*
|
||||
* or default values only can be emitted with just:
|
||||
*
|
||||
* cl_emit(bcl, FLAT_SHADE_FLAGS, flags);
|
||||
*
|
||||
* The trick here is that we make a for loop that will execute the body
|
||||
* (either the block or the ';' after the macro invocation) exactly once.
|
||||
* Also, *dst is actually of the wrong type, it's the
|
||||
* uint8_t[cl_packet_length()] in the CL, not a cl_packet_struct(packet).
|
||||
*/
|
||||
#define cl_emit(cl_out, packet, name) \
|
||||
for (struct cl_packet_struct(packet) name = { \
|
||||
cl_packet_header(packet) \
|
||||
}, \
|
||||
*_dst = cl_get_emit_space(cl_out, cl_packet_length(packet)); \
|
||||
__builtin_expect(_dst != NULL, 1); \
|
||||
({ \
|
||||
cl_packet_pack(packet)(NULL, (uint8_t *)_dst, &name); \
|
||||
VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, \
|
||||
cl_packet_length(packet))); \
|
||||
_dst = NULL; \
|
||||
})) \
|
||||
|
||||
#endif /* VC4_CL_H */
|
||||
|
|
|
|||
|
|
@ -82,32 +82,28 @@ vc4_start_draw(struct vc4_context *vc4)
|
|||
vc4_get_draw_cl_space(job, 0);
|
||||
|
||||
struct vc4_cl_out *bcl = cl_start(&job->bcl);
|
||||
// Tile state data is 48 bytes per tile, I think it can be thrown away
|
||||
// as soon as binning is finished.
|
||||
cl_u8(&bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
|
||||
cl_u32(&bcl, 0); /* tile alloc addr, filled by kernel */
|
||||
cl_u32(&bcl, 0); /* tile alloc size, filled by kernel */
|
||||
cl_u32(&bcl, 0); /* tile state addr, filled by kernel */
|
||||
cl_u8(&bcl, job->draw_tiles_x);
|
||||
cl_u8(&bcl, job->draw_tiles_y);
|
||||
/* Other flags are filled by kernel. */
|
||||
cl_u8(&bcl, job->msaa ? VC4_BIN_CONFIG_MS_MODE_4X : 0);
|
||||
cl_emit(&bcl, TILE_BINNING_MODE_CONFIGURATION, bin) {
|
||||
bin.width_in_tiles = job->draw_tiles_x;
|
||||
bin.height_in_tiles = job->draw_tiles_y;
|
||||
bin.multisample_mode_4x = job->msaa;
|
||||
}
|
||||
|
||||
/* START_TILE_BINNING resets the statechange counters in the hardware,
|
||||
* which are what is used when a primitive is binned to a tile to
|
||||
* figure out what new state packets need to be written to that tile's
|
||||
* command list.
|
||||
*/
|
||||
cl_u8(&bcl, VC4_PACKET_START_TILE_BINNING);
|
||||
cl_emit(&bcl, START_TILE_BINNING, start);
|
||||
|
||||
/* Reset the current compressed primitives format. This gets modified
|
||||
* by VC4_PACKET_GL_INDEXED_PRIMITIVE and
|
||||
* VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
|
||||
* of every tile.
|
||||
*/
|
||||
cl_u8(&bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
|
||||
cl_u8(&bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
|
||||
VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
|
||||
cl_emit(&bcl, PRIMITIVE_LIST_FORMAT, list) {
|
||||
list.data_type = _16_BIT_INDEX;
|
||||
list.primitive_type = TRIANGLES_LIST;
|
||||
}
|
||||
|
||||
job->needs_flush = true;
|
||||
job->draw_width = vc4->framebuffer.width;
|
||||
|
|
@ -221,13 +217,15 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4,
|
|||
cl_end(&job->shader_rec, shader_rec);
|
||||
|
||||
struct vc4_cl_out *bcl = cl_start(&job->bcl);
|
||||
/* the actual draw call. */
|
||||
cl_u8(&bcl, VC4_PACKET_GL_SHADER_STATE);
|
||||
assert(vtx->num_elements <= 8);
|
||||
/* Note that number of attributes == 0 in the packet means 8
|
||||
* attributes. This field also contains the offset into shader_rec.
|
||||
*/
|
||||
cl_u32(&bcl, num_elements_emit & 0x7);
|
||||
cl_emit(&bcl, GL_SHADER_STATE, shader_state) {
|
||||
/* Note that number of attributes == 0 in the packet means 8
|
||||
* attributes. This field also contains the offset into
|
||||
* shader_rec.
|
||||
*/
|
||||
assert(vtx->num_elements <= 8);
|
||||
shader_state.number_of_attribute_arrays =
|
||||
num_elements_emit & 0x7;
|
||||
}
|
||||
cl_end(&job->bcl, bcl);
|
||||
|
||||
vc4_write_uniforms(vc4, vc4->prog.fs,
|
||||
|
|
@ -436,10 +434,11 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
|||
}
|
||||
}
|
||||
|
||||
cl_u8(&bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
|
||||
cl_u8(&bcl, info->mode);
|
||||
cl_u32(&bcl, this_count);
|
||||
cl_u32(&bcl, start);
|
||||
cl_emit(&bcl, VERTEX_ARRAY_PRIMITIVES, array) {
|
||||
array.primitive_mode = info->mode;
|
||||
array.length = this_count;
|
||||
array.index_of_first_vertex = start;
|
||||
}
|
||||
job->draw_calls_queued++;
|
||||
|
||||
count -= step;
|
||||
|
|
|
|||
|
|
@ -60,11 +60,12 @@ vc4_emit_state(struct pipe_context *pctx)
|
|||
maxy = MIN2(vp_maxy, vc4->scissor.maxy);
|
||||
}
|
||||
|
||||
cl_u8(&bcl, VC4_PACKET_CLIP_WINDOW);
|
||||
cl_u16(&bcl, minx);
|
||||
cl_u16(&bcl, miny);
|
||||
cl_u16(&bcl, maxx - minx);
|
||||
cl_u16(&bcl, maxy - miny);
|
||||
cl_emit(&bcl, CLIP_WINDOW, clip) {
|
||||
clip.clip_window_left_pixel_coordinate = minx;
|
||||
clip.clip_window_bottom_pixel_coordinate = miny;
|
||||
clip.clip_window_height_in_pixels = maxy - miny;
|
||||
clip.clip_window_width_in_pixels = maxx - minx;
|
||||
}
|
||||
|
||||
job->draw_min_x = MIN2(job->draw_min_x, minx);
|
||||
job->draw_min_y = MIN2(job->draw_min_y, miny);
|
||||
|
|
@ -113,35 +114,51 @@ vc4_emit_state(struct pipe_context *pctx)
|
|||
}
|
||||
|
||||
if (vc4->dirty & VC4_DIRTY_RASTERIZER) {
|
||||
cl_u8(&bcl, VC4_PACKET_DEPTH_OFFSET);
|
||||
cl_u16(&bcl, vc4->rasterizer->offset_factor);
|
||||
cl_u16(&bcl, vc4->rasterizer->offset_units);
|
||||
cl_emit(&bcl, DEPTH_OFFSET, depth) {
|
||||
depth.depth_offset_units =
|
||||
vc4->rasterizer->offset_units;
|
||||
depth.depth_offset_factor =
|
||||
vc4->rasterizer->offset_factor;
|
||||
}
|
||||
|
||||
cl_u8(&bcl, VC4_PACKET_POINT_SIZE);
|
||||
cl_f(&bcl, vc4->rasterizer->point_size);
|
||||
cl_emit(&bcl, POINT_SIZE, points) {
|
||||
points.point_size = vc4->rasterizer->point_size;
|
||||
}
|
||||
|
||||
cl_u8(&bcl, VC4_PACKET_LINE_WIDTH);
|
||||
cl_f(&bcl, vc4->rasterizer->base.line_width);
|
||||
cl_emit(&bcl, LINE_WIDTH, points) {
|
||||
points.line_width = vc4->rasterizer->base.line_width;
|
||||
}
|
||||
}
|
||||
|
||||
if (vc4->dirty & VC4_DIRTY_VIEWPORT) {
|
||||
cl_u8(&bcl, VC4_PACKET_CLIPPER_XY_SCALING);
|
||||
cl_f(&bcl, vc4->viewport.scale[0] * 16.0f);
|
||||
cl_f(&bcl, vc4->viewport.scale[1] * 16.0f);
|
||||
cl_emit(&bcl, CLIPPER_XY_SCALING, clip) {
|
||||
clip.viewport_half_width_in_1_16th_of_pixel =
|
||||
vc4->viewport.scale[0] * 16.0f;
|
||||
clip.viewport_half_height_in_1_16th_of_pixel =
|
||||
vc4->viewport.scale[1] * 16.0f;
|
||||
}
|
||||
|
||||
cl_u8(&bcl, VC4_PACKET_CLIPPER_Z_SCALING);
|
||||
cl_f(&bcl, vc4->viewport.translate[2]);
|
||||
cl_f(&bcl, vc4->viewport.scale[2]);
|
||||
cl_emit(&bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
|
||||
clip.viewport_z_offset_zc_to_zs =
|
||||
vc4->viewport.translate[2];
|
||||
clip.viewport_z_scale_zc_to_zs =
|
||||
vc4->viewport.scale[2];
|
||||
}
|
||||
|
||||
cl_u8(&bcl, VC4_PACKET_VIEWPORT_OFFSET);
|
||||
cl_u16(&bcl, 16 * vc4->viewport.translate[0]);
|
||||
cl_u16(&bcl, 16 * vc4->viewport.translate[1]);
|
||||
cl_emit(&bcl, VIEWPORT_OFFSET, vp) {
|
||||
vp.viewport_centre_x_coordinate =
|
||||
16 * vc4->viewport.translate[0];
|
||||
vp.viewport_centre_y_coordinate =
|
||||
16 * vc4->viewport.translate[1];
|
||||
}
|
||||
}
|
||||
|
||||
if (vc4->dirty & VC4_DIRTY_FLAT_SHADE_FLAGS) {
|
||||
cl_u8(&bcl, VC4_PACKET_FLAT_SHADE_FLAGS);
|
||||
cl_u32(&bcl, vc4->rasterizer->base.flatshade ?
|
||||
vc4->prog.fs->color_inputs : 0);
|
||||
cl_emit(&bcl, FLAT_SHADE_FLAGS, flags) {
|
||||
if (vc4->rasterizer->base.flatshade)
|
||||
flags.flat_shading_flags =
|
||||
vc4->prog.fs->color_inputs;
|
||||
}
|
||||
}
|
||||
|
||||
cl_end(&job->bcl, bcl);
|
||||
|
|
|
|||
|
|
@ -378,11 +378,11 @@ vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
|
|||
*/
|
||||
cl_ensure_space(&job->bcl, 8);
|
||||
struct vc4_cl_out *bcl = cl_start(&job->bcl);
|
||||
cl_u8(&bcl, VC4_PACKET_INCREMENT_SEMAPHORE);
|
||||
cl_emit(&bcl, INCREMENT_SEMAPHORE, incr);
|
||||
/* The FLUSH caps all of our bin lists with a
|
||||
* VC4_PACKET_RETURN.
|
||||
*/
|
||||
cl_u8(&bcl, VC4_PACKET_FLUSH);
|
||||
cl_emit(&bcl, FLUSH, flush);
|
||||
cl_end(&job->bcl, bcl);
|
||||
}
|
||||
struct drm_vc4_submit_cl submit = {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue