panfrost: clang-format the tree

This switches us over to Mesa's code style [1], normalizing us within the tree.
The results aren't perfect, but they bring us a hell of a lot closer to the rest
of the tree. Panfrost doesn't feel so foreign relative to Mesa with this, which
I think (in retrospect after a bunch of years of being "different") is the right
call.

I skipped PanVK because that's paused right now.

  find panfrost/ -type f -name '*.h' | grep -v vulkan | xargs clang-format -i;
  find panfrost/ -type f -name '*.c' | grep -v vulkan | xargs clang-format -i;
  clang-format -i gallium/drivers/panfrost/*.c gallium/drivers/panfrost/*.h ; find
  panfrost/ -type f -name '*.cpp' | grep -v vulkan | xargs clang-format -i

[1] https://docs.mesa3d.org/codingstyle.html

Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20425>
This commit is contained in:
Alyssa Rosenzweig 2022-12-23 16:58:38 -05:00 committed by Marge Bot
parent a4705afe63
commit 0afd691f29
182 changed files with 36570 additions and 36355 deletions

View file

@ -36,26 +36,26 @@
struct panfrost_bo;
struct pan_blend_info {
unsigned constant_mask : 4;
bool fixed_function : 1;
bool enabled : 1;
bool load_dest : 1;
bool opaque : 1;
bool alpha_zero_nop : 1;
bool alpha_one_store : 1;
unsigned constant_mask : 4;
bool fixed_function : 1;
bool enabled : 1;
bool load_dest : 1;
bool opaque : 1;
bool alpha_zero_nop : 1;
bool alpha_one_store : 1;
};
struct panfrost_blend_state {
struct pipe_blend_state base;
struct pan_blend_state pan;
struct pan_blend_info info[PIPE_MAX_COLOR_BUFS];
uint32_t equation[PIPE_MAX_COLOR_BUFS];
struct pipe_blend_state base;
struct pan_blend_state pan;
struct pan_blend_info info[PIPE_MAX_COLOR_BUFS];
uint32_t equation[PIPE_MAX_COLOR_BUFS];
/* info.load presented as a bitfield for draw call hot paths */
unsigned load_dest_mask : PIPE_MAX_COLOR_BUFS;
/* info.load presented as a bitfield for draw call hot paths */
unsigned load_dest_mask : PIPE_MAX_COLOR_BUFS;
};
mali_ptr
panfrost_get_blend(struct panfrost_batch *batch, unsigned rt, struct panfrost_bo **bo, unsigned *shader_offset);
mali_ptr panfrost_get_blend(struct panfrost_batch *batch, unsigned rt,
struct panfrost_bo **bo, unsigned *shader_offset);
#endif

View file

@ -27,59 +27,58 @@
*
*/
#include "util/format/u_format.h"
#include "pan_context.h"
#include "pan_util.h"
#include "util/format/u_format.h"
void
panfrost_blitter_save(struct panfrost_context *ctx, bool render_cond)
{
struct blitter_context *blitter = ctx->blitter;
struct blitter_context *blitter = ctx->blitter;
util_blitter_save_vertex_buffer_slot(blitter, ctx->vertex_buffers);
util_blitter_save_vertex_elements(blitter, ctx->vertex);
util_blitter_save_vertex_shader(blitter, ctx->uncompiled[PIPE_SHADER_VERTEX]);
util_blitter_save_rasterizer(blitter, ctx->rasterizer);
util_blitter_save_viewport(blitter, &ctx->pipe_viewport);
util_blitter_save_scissor(blitter, &ctx->scissor);
util_blitter_save_fragment_shader(blitter, ctx->uncompiled[PIPE_SHADER_FRAGMENT]);
util_blitter_save_blend(blitter, ctx->blend);
util_blitter_save_depth_stencil_alpha(blitter, ctx->depth_stencil);
util_blitter_save_stencil_ref(blitter, &ctx->stencil_ref);
util_blitter_save_so_targets(blitter, 0, NULL);
util_blitter_save_sample_mask(blitter, ctx->sample_mask, ctx->min_samples);
util_blitter_save_vertex_buffer_slot(blitter, ctx->vertex_buffers);
util_blitter_save_vertex_elements(blitter, ctx->vertex);
util_blitter_save_vertex_shader(blitter,
ctx->uncompiled[PIPE_SHADER_VERTEX]);
util_blitter_save_rasterizer(blitter, ctx->rasterizer);
util_blitter_save_viewport(blitter, &ctx->pipe_viewport);
util_blitter_save_scissor(blitter, &ctx->scissor);
util_blitter_save_fragment_shader(blitter,
ctx->uncompiled[PIPE_SHADER_FRAGMENT]);
util_blitter_save_blend(blitter, ctx->blend);
util_blitter_save_depth_stencil_alpha(blitter, ctx->depth_stencil);
util_blitter_save_stencil_ref(blitter, &ctx->stencil_ref);
util_blitter_save_so_targets(blitter, 0, NULL);
util_blitter_save_sample_mask(blitter, ctx->sample_mask, ctx->min_samples);
util_blitter_save_framebuffer(blitter, &ctx->pipe_framebuffer);
util_blitter_save_fragment_sampler_states(blitter,
ctx->sampler_count[PIPE_SHADER_FRAGMENT],
(void **)(&ctx->samplers[PIPE_SHADER_FRAGMENT]));
util_blitter_save_fragment_sampler_views(blitter,
ctx->sampler_view_count[PIPE_SHADER_FRAGMENT],
(struct pipe_sampler_view **)&ctx->sampler_views[PIPE_SHADER_FRAGMENT]);
util_blitter_save_fragment_constant_buffer_slot(blitter,
ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);
if (!render_cond) {
util_blitter_save_render_condition(blitter,
(struct pipe_query *) ctx->cond_query,
ctx->cond_cond, ctx->cond_mode);
}
util_blitter_save_framebuffer(blitter, &ctx->pipe_framebuffer);
util_blitter_save_fragment_sampler_states(
blitter, ctx->sampler_count[PIPE_SHADER_FRAGMENT],
(void **)(&ctx->samplers[PIPE_SHADER_FRAGMENT]));
util_blitter_save_fragment_sampler_views(
blitter, ctx->sampler_view_count[PIPE_SHADER_FRAGMENT],
(struct pipe_sampler_view **)&ctx->sampler_views[PIPE_SHADER_FRAGMENT]);
util_blitter_save_fragment_constant_buffer_slot(
blitter, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);
if (!render_cond) {
util_blitter_save_render_condition(blitter,
(struct pipe_query *)ctx->cond_query,
ctx->cond_cond, ctx->cond_mode);
}
}
void
panfrost_blit(struct pipe_context *pipe,
const struct pipe_blit_info *info)
panfrost_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
{
struct panfrost_context *ctx = pan_context(pipe);
struct panfrost_context *ctx = pan_context(pipe);
if (info->render_condition_enable &&
!panfrost_render_condition_check(ctx))
return;
if (info->render_condition_enable && !panfrost_render_condition_check(ctx))
return;
if (!util_blitter_is_blit_supported(ctx->blitter, info))
unreachable("Unsupported blit\n");
if (!util_blitter_is_blit_supported(ctx->blitter, info))
unreachable("Unsupported blit\n");
panfrost_blitter_save(ctx, info->render_condition_enable);
util_blitter_blit(ctx->blitter, info);
panfrost_blitter_save(ctx, info->render_condition_enable);
util_blitter_blit(ctx->blitter, info);
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -26,206 +26,207 @@
#define __BUILDER_H__
#define _LARGEFILE64_SOURCE 1
#include <sys/mman.h>
#include <assert.h>
#include "pan_resource.h"
#include "pan_job.h"
#include <sys/mman.h>
#include "pan_blend_cso.h"
#include "pan_encoder.h"
#include "pan_texture.h"
#include "pan_earlyzs.h"
#include "pan_encoder.h"
#include "pan_job.h"
#include "pan_resource.h"
#include "pan_texture.h"
#include "pipe/p_compiler.h"
#include "util/detect.h"
#include "pipe/p_context.h"
#include "pipe/p_defines.h"
#include "util/format/u_formats.h"
#include "pipe/p_screen.h"
#include "pipe/p_state.h"
#include "util/u_blitter.h"
#include "util/detect.h"
#include "util/format/u_formats.h"
#include "util/hash_table.h"
#include "util/simple_mtx.h"
#include "util/u_blitter.h"
#include "midgard/midgard_compile.h"
#include "compiler/shader_enums.h"
#include "midgard/midgard_compile.h"
#define SET_BIT(lval, bit, cond) \
if (cond) \
lval |= (bit); \
else \
lval &= ~(bit);
#define SET_BIT(lval, bit, cond) \
if (cond) \
lval |= (bit); \
else \
lval &= ~(bit);
/* Dirty tracking flags. 3D is for general 3D state. Shader flags are
* per-stage. Renderer refers to Renderer State Descriptors. Vertex refers to
* vertex attributes/elements. */
enum pan_dirty_3d {
PAN_DIRTY_VIEWPORT = BITFIELD_BIT(0),
PAN_DIRTY_SCISSOR = BITFIELD_BIT(1),
PAN_DIRTY_VERTEX = BITFIELD_BIT(2),
PAN_DIRTY_PARAMS = BITFIELD_BIT(3),
PAN_DIRTY_DRAWID = BITFIELD_BIT(4),
PAN_DIRTY_TLS_SIZE = BITFIELD_BIT(5),
PAN_DIRTY_ZS = BITFIELD_BIT(6),
PAN_DIRTY_BLEND = BITFIELD_BIT(7),
PAN_DIRTY_MSAA = BITFIELD_BIT(8),
PAN_DIRTY_OQ = BITFIELD_BIT(9),
PAN_DIRTY_RASTERIZER = BITFIELD_BIT(10),
PAN_DIRTY_POINTS = BITFIELD_BIT(11),
PAN_DIRTY_SO = BITFIELD_BIT(12),
PAN_DIRTY_VIEWPORT = BITFIELD_BIT(0),
PAN_DIRTY_SCISSOR = BITFIELD_BIT(1),
PAN_DIRTY_VERTEX = BITFIELD_BIT(2),
PAN_DIRTY_PARAMS = BITFIELD_BIT(3),
PAN_DIRTY_DRAWID = BITFIELD_BIT(4),
PAN_DIRTY_TLS_SIZE = BITFIELD_BIT(5),
PAN_DIRTY_ZS = BITFIELD_BIT(6),
PAN_DIRTY_BLEND = BITFIELD_BIT(7),
PAN_DIRTY_MSAA = BITFIELD_BIT(8),
PAN_DIRTY_OQ = BITFIELD_BIT(9),
PAN_DIRTY_RASTERIZER = BITFIELD_BIT(10),
PAN_DIRTY_POINTS = BITFIELD_BIT(11),
PAN_DIRTY_SO = BITFIELD_BIT(12),
};
enum pan_dirty_shader {
PAN_DIRTY_STAGE_SHADER = BITFIELD_BIT(0),
PAN_DIRTY_STAGE_TEXTURE = BITFIELD_BIT(1),
PAN_DIRTY_STAGE_SAMPLER = BITFIELD_BIT(2),
PAN_DIRTY_STAGE_IMAGE = BITFIELD_BIT(3),
PAN_DIRTY_STAGE_CONST = BITFIELD_BIT(4),
PAN_DIRTY_STAGE_SSBO = BITFIELD_BIT(5),
PAN_DIRTY_STAGE_SHADER = BITFIELD_BIT(0),
PAN_DIRTY_STAGE_TEXTURE = BITFIELD_BIT(1),
PAN_DIRTY_STAGE_SAMPLER = BITFIELD_BIT(2),
PAN_DIRTY_STAGE_IMAGE = BITFIELD_BIT(3),
PAN_DIRTY_STAGE_CONST = BITFIELD_BIT(4),
PAN_DIRTY_STAGE_SSBO = BITFIELD_BIT(5),
};
struct panfrost_constant_buffer {
struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
uint32_t enabled_mask;
struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
uint32_t enabled_mask;
};
struct panfrost_query {
/* Passthrough from Gallium */
unsigned type;
unsigned index;
/* Passthrough from Gallium */
unsigned type;
unsigned index;
/* For computed queries. 64-bit to prevent overflow */
struct {
uint64_t start;
uint64_t end;
};
/* For computed queries. 64-bit to prevent overflow */
struct {
uint64_t start;
uint64_t end;
};
/* Memory for the GPU to writeback the value of the query */
struct pipe_resource *rsrc;
/* Memory for the GPU to writeback the value of the query */
struct pipe_resource *rsrc;
/* Whether an occlusion query is for a MSAA framebuffer */
bool msaa;
/* Whether an occlusion query is for a MSAA framebuffer */
bool msaa;
};
struct panfrost_streamout_target {
struct pipe_stream_output_target base;
uint32_t offset;
struct pipe_stream_output_target base;
uint32_t offset;
};
struct panfrost_streamout {
struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
unsigned num_targets;
struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
unsigned num_targets;
};
struct panfrost_context {
/* Gallium context */
struct pipe_context base;
/* Gallium context */
struct pipe_context base;
/* Dirty global state */
enum pan_dirty_3d dirty;
/* Dirty global state */
enum pan_dirty_3d dirty;
/* Per shader stage dirty state */
enum pan_dirty_shader dirty_shader[PIPE_SHADER_TYPES];
/* Per shader stage dirty state */
enum pan_dirty_shader dirty_shader[PIPE_SHADER_TYPES];
/* Unowned pools, so manage yourself. */
struct panfrost_pool descs, shaders;
/* Unowned pools, so manage yourself. */
struct panfrost_pool descs, shaders;
/* Sync obj used to keep track of in-flight jobs. */
uint32_t syncobj;
/* Sync obj used to keep track of in-flight jobs. */
uint32_t syncobj;
/* Set of 32 batches. When the set is full, the LRU entry (the batch
* with the smallest seqnum) is flushed to free a slot.
*/
struct {
uint64_t seqnum;
struct panfrost_batch slots[PAN_MAX_BATCHES];
/* Set of 32 batches. When the set is full, the LRU entry (the batch
* with the smallest seqnum) is flushed to free a slot.
*/
struct {
uint64_t seqnum;
struct panfrost_batch slots[PAN_MAX_BATCHES];
/** Set of active batches for faster traversal */
BITSET_DECLARE(active, PAN_MAX_BATCHES);
} batches;
/** Set of active batches for faster traversal */
BITSET_DECLARE(active, PAN_MAX_BATCHES);
} batches;
/* Map from resources to panfrost_batches */
struct hash_table *writers;
/* Map from resources to panfrost_batches */
struct hash_table *writers;
/* Bound job batch */
struct panfrost_batch *batch;
/* Bound job batch */
struct panfrost_batch *batch;
/* Within a launch_grid call.. */
const struct pipe_grid_info *compute_grid;
/* Within a launch_grid call.. */
const struct pipe_grid_info *compute_grid;
struct pipe_framebuffer_state pipe_framebuffer;
struct panfrost_streamout streamout;
struct pipe_framebuffer_state pipe_framebuffer;
struct panfrost_streamout streamout;
bool active_queries;
uint64_t prims_generated;
uint64_t tf_prims_generated;
uint64_t draw_calls;
struct panfrost_query *occlusion_query;
bool active_queries;
uint64_t prims_generated;
uint64_t tf_prims_generated;
uint64_t draw_calls;
struct panfrost_query *occlusion_query;
unsigned drawid;
unsigned vertex_count;
unsigned instance_count;
unsigned offset_start;
unsigned base_vertex;
unsigned base_instance;
enum pipe_prim_type active_prim;
unsigned drawid;
unsigned vertex_count;
unsigned instance_count;
unsigned offset_start;
unsigned base_vertex;
unsigned base_instance;
enum pipe_prim_type active_prim;
/* If instancing is enabled, vertex count padded for instance; if
* it is disabled, just equal to plain vertex count */
unsigned padded_count;
/* If instancing is enabled, vertex count padded for instance; if
* it is disabled, just equal to plain vertex count */
unsigned padded_count;
struct panfrost_constant_buffer constant_buffer[PIPE_SHADER_TYPES];
struct panfrost_rasterizer *rasterizer;
struct panfrost_vertex_state *vertex;
struct panfrost_constant_buffer constant_buffer[PIPE_SHADER_TYPES];
struct panfrost_rasterizer *rasterizer;
struct panfrost_vertex_state *vertex;
struct panfrost_uncompiled_shader *uncompiled[PIPE_SHADER_TYPES];
struct panfrost_compiled_shader *prog[PIPE_SHADER_TYPES];
struct panfrost_uncompiled_shader *uncompiled[PIPE_SHADER_TYPES];
struct panfrost_compiled_shader *prog[PIPE_SHADER_TYPES];
struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
uint32_t vb_mask;
struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
uint32_t vb_mask;
struct pipe_shader_buffer ssbo[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
uint32_t ssbo_mask[PIPE_SHADER_TYPES];
struct pipe_shader_buffer ssbo[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
uint32_t ssbo_mask[PIPE_SHADER_TYPES];
struct pipe_image_view images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
uint32_t image_mask[PIPE_SHADER_TYPES];
struct pipe_image_view images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
uint32_t image_mask[PIPE_SHADER_TYPES];
struct panfrost_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
unsigned sampler_count[PIPE_SHADER_TYPES];
uint32_t valid_samplers[PIPE_SHADER_TYPES];
struct panfrost_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
unsigned sampler_count[PIPE_SHADER_TYPES];
uint32_t valid_samplers[PIPE_SHADER_TYPES];
struct panfrost_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
unsigned sampler_view_count[PIPE_SHADER_TYPES];
struct panfrost_sampler_view
*sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
unsigned sampler_view_count[PIPE_SHADER_TYPES];
struct blitter_context *blitter;
struct blitter_context *blitter;
struct panfrost_blend_state *blend;
struct panfrost_blend_state *blend;
/* On Valhall, does the current blend state use a blend shader for any
* output? We need this information in a hot path to decide if
* per-sample shading should be enabled.
*/
bool valhall_has_blend_shader;
/* On Valhall, does the current blend state use a blend shader for any
* output? We need this information in a hot path to decide if
* per-sample shading should be enabled.
*/
bool valhall_has_blend_shader;
struct pipe_viewport_state pipe_viewport;
struct pipe_scissor_state scissor;
struct pipe_blend_color blend_color;
struct panfrost_zsa_state *depth_stencil;
struct pipe_stencil_ref stencil_ref;
uint16_t sample_mask;
unsigned min_samples;
struct pipe_viewport_state pipe_viewport;
struct pipe_scissor_state scissor;
struct pipe_blend_color blend_color;
struct panfrost_zsa_state *depth_stencil;
struct pipe_stencil_ref stencil_ref;
uint16_t sample_mask;
unsigned min_samples;
struct panfrost_query *cond_query;
bool cond_cond;
enum pipe_render_cond_flag cond_mode;
struct panfrost_query *cond_query;
bool cond_cond;
enum pipe_render_cond_flag cond_mode;
bool is_noop;
bool is_noop;
/* Mask of active render targets */
uint8_t fb_rt_mask;
/* Mask of active render targets */
uint8_t fb_rt_mask;
int in_sync_fd;
uint32_t in_sync_obj;
int in_sync_fd;
uint32_t in_sync_obj;
};
/* Corresponds to the CSO */
@ -234,19 +235,19 @@ struct panfrost_rasterizer;
/* Linked varyings */
struct pan_linkage {
/* If the upload is owned by the CSO instead
* of the pool, the referenced BO. Else,
* NULL. */
struct panfrost_bo *bo;
/* If the upload is owned by the CSO instead
* of the pool, the referenced BO. Else,
* NULL. */
struct panfrost_bo *bo;
/* Uploaded attribute descriptors */
mali_ptr producer, consumer;
/* Uploaded attribute descriptors */
mali_ptr producer, consumer;
/* Varyings buffers required */
uint32_t present;
/* Varyings buffers required */
uint32_t present;
/* Per-vertex stride for general varying buffer */
uint32_t stride;
/* Per-vertex stride for general varying buffer */
uint32_t stride;
};
#define RSD_WORDS 16
@ -255,89 +256,89 @@ struct pan_linkage {
* shaders with varying emulated features baked in
*/
struct panfrost_fs_key {
/* Number of colour buffers if gl_FragColor is written */
unsigned nr_cbufs_for_fragcolor;
/* Number of colour buffers if gl_FragColor is written */
unsigned nr_cbufs_for_fragcolor;
/* On Valhall, fixed_varying_mask of the linked vertex shader */
uint32_t fixed_varying_mask;
/* On Valhall, fixed_varying_mask of the linked vertex shader */
uint32_t fixed_varying_mask;
/* Midgard shaders that read the tilebuffer must be keyed for
* non-blendable formats
*/
enum pipe_format rt_formats[8];
/* Midgard shaders that read the tilebuffer must be keyed for
* non-blendable formats
*/
enum pipe_format rt_formats[8];
/* From rasterize state, to lower point sprites */
uint16_t sprite_coord_enable;
/* From rasterize state, to lower point sprites */
uint16_t sprite_coord_enable;
/* User clip plane lowering */
uint8_t clip_plane_enable;
/* User clip plane lowering */
uint8_t clip_plane_enable;
};
struct panfrost_shader_key {
union {
/* Vertex shaders do not use shader keys. However, we have a
* special "transform feedback" vertex program derived from a
* vertex shader. If vs_is_xfb is set on a vertex shader, this
* is a transform feedback shader, else it is a regular
* (unkeyed) vertex shader.
*/
bool vs_is_xfb;
union {
/* Vertex shaders do not use shader keys. However, we have a
* special "transform feedback" vertex program derived from a
* vertex shader. If vs_is_xfb is set on a vertex shader, this
* is a transform feedback shader, else it is a regular
* (unkeyed) vertex shader.
*/
bool vs_is_xfb;
/* Fragment shaders use regular shader keys */
struct panfrost_fs_key fs;
};
/* Fragment shaders use regular shader keys */
struct panfrost_fs_key fs;
};
};
struct panfrost_compiled_shader {
/* Respectively, shader binary and Renderer State Descriptor */
struct panfrost_pool_ref bin, state;
/* Respectively, shader binary and Renderer State Descriptor */
struct panfrost_pool_ref bin, state;
/* For fragment shaders, a prepared (but not uploaded RSD) */
uint32_t partial_rsd[RSD_WORDS];
/* For fragment shaders, a prepared (but not uploaded RSD) */
uint32_t partial_rsd[RSD_WORDS];
struct pan_shader_info info;
struct pan_shader_info info;
struct pan_earlyzs_lut earlyzs;
struct pan_earlyzs_lut earlyzs;
/* Linked varyings, for non-separable programs */
struct pan_linkage linkage;
/* Linked varyings, for non-separable programs */
struct pan_linkage linkage;
struct pipe_stream_output_info stream_output;
struct pipe_stream_output_info stream_output;
struct panfrost_shader_key key;
struct panfrost_shader_key key;
/* Mask of state that dirties the sysvals */
unsigned dirty_3d, dirty_shader;
/* Mask of state that dirties the sysvals */
unsigned dirty_3d, dirty_shader;
};
/* Shader CSO */
struct panfrost_uncompiled_shader {
/* NIR for the shader. For graphics, this will be non-NULL even for
* TGSI. For compute, this will be NULL after the shader is compiled,
* as we don't need any compute variants.
*/
const nir_shader *nir;
/* NIR for the shader. For graphics, this will be non-NULL even for
* TGSI. For compute, this will be NULL after the shader is compiled,
* as we don't need any compute variants.
*/
const nir_shader *nir;
/* A SHA1 of the serialized NIR for the disk cache. */
unsigned char nir_sha1[20];
/* A SHA1 of the serialized NIR for the disk cache. */
unsigned char nir_sha1[20];
/* Stream output information */
struct pipe_stream_output_info stream_output;
/* Stream output information */
struct pipe_stream_output_info stream_output;
/** Lock for the variants array */
simple_mtx_t lock;
/** Lock for the variants array */
simple_mtx_t lock;
/* Array of panfrost_compiled_shader */
struct util_dynarray variants;
/* Array of panfrost_compiled_shader */
struct util_dynarray variants;
/* Compiled transform feedback program, if one is required */
struct panfrost_compiled_shader *xfb;
/* Compiled transform feedback program, if one is required */
struct panfrost_compiled_shader *xfb;
/* On vertex shaders, bit mask of special desktop-only varyings to link
* with the fragment shader. Used on Valhall to implement separable
* shaders for desktop GL.
*/
uint32_t fixed_varying_mask;
/* On vertex shaders, bit mask of special desktop-only varyings to link
* with the fragment shader. Used on Valhall to implement separable
* shaders for desktop GL.
*/
uint32_t fixed_varying_mask;
};
/* The binary artefacts of compiling a shader. This differs from
@ -347,11 +348,11 @@ struct panfrost_uncompiled_shader {
* This structure is serialized for the shader disk cache.
*/
struct panfrost_shader_binary {
/* Collected information about the compiled shader */
struct pan_shader_info info;
/* Collected information about the compiled shader */
struct pan_shader_info info;
/* The binary itself */
struct util_dynarray binary;
/* The binary itself */
struct util_dynarray binary;
};
void
@ -360,28 +361,25 @@ panfrost_disk_cache_store(struct disk_cache *cache,
const struct panfrost_shader_key *key,
const struct panfrost_shader_binary *binary);
bool
panfrost_disk_cache_retrieve(struct disk_cache *cache,
const struct panfrost_uncompiled_shader *uncompiled,
const struct panfrost_shader_key *key,
struct panfrost_shader_binary *binary);
bool panfrost_disk_cache_retrieve(
struct disk_cache *cache,
const struct panfrost_uncompiled_shader *uncompiled,
const struct panfrost_shader_key *key,
struct panfrost_shader_binary *binary);
void
panfrost_disk_cache_init(struct panfrost_screen *screen);
void panfrost_disk_cache_init(struct panfrost_screen *screen);
/** (Vertex buffer index, divisor) tuple that will become an Attribute Buffer
* Descriptor at draw-time on Midgard
*/
struct pan_vertex_buffer {
unsigned vbi;
unsigned divisor;
unsigned vbi;
unsigned divisor;
};
unsigned
pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
unsigned *nr_bufs,
unsigned vbi,
unsigned divisor);
unsigned pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
unsigned *nr_bufs, unsigned vbi,
unsigned divisor);
struct panfrost_zsa_state;
struct panfrost_sampler_state;
@ -391,39 +389,32 @@ struct panfrost_vertex_state;
static inline struct panfrost_context *
pan_context(struct pipe_context *pcontext)
{
return (struct panfrost_context *) pcontext;
return (struct panfrost_context *)pcontext;
}
static inline struct panfrost_streamout_target *
pan_so_target(struct pipe_stream_output_target *target)
{
return (struct panfrost_streamout_target *)target;
return (struct panfrost_streamout_target *)target;
}
struct pipe_context *
panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags);
struct pipe_context *panfrost_create_context(struct pipe_screen *screen,
void *priv, unsigned flags);
bool
panfrost_writes_point_size(struct panfrost_context *ctx);
bool panfrost_writes_point_size(struct panfrost_context *ctx);
struct panfrost_ptr
panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler);
struct panfrost_ptr panfrost_vertex_tiler_job(struct panfrost_context *ctx,
bool is_tiler);
void
panfrost_flush(
struct pipe_context *pipe,
struct pipe_fence_handle **fence,
unsigned flags);
void panfrost_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence,
unsigned flags);
bool
panfrost_render_condition_check(struct panfrost_context *ctx);
bool panfrost_render_condition_check(struct panfrost_context *ctx);
void
panfrost_update_shader_variant(struct panfrost_context *ctx,
enum pipe_shader_type type);
void panfrost_update_shader_variant(struct panfrost_context *ctx,
enum pipe_shader_type type);
void
panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss);
void panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss);
mali_ptr
panfrost_get_index_buffer(struct panfrost_batch *batch,
@ -438,41 +429,37 @@ panfrost_get_index_buffer_bounded(struct panfrost_batch *batch,
/* Instancing */
mali_ptr
panfrost_vertex_buffer_address(struct panfrost_context *ctx, unsigned i);
mali_ptr panfrost_vertex_buffer_address(struct panfrost_context *ctx,
unsigned i);
void
panfrost_shader_context_init(struct pipe_context *pctx);
void panfrost_shader_context_init(struct pipe_context *pctx);
static inline void
panfrost_dirty_state_all(struct panfrost_context *ctx)
{
ctx->dirty = ~0;
ctx->dirty = ~0;
for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
ctx->dirty_shader[i] = ~0;
for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
ctx->dirty_shader[i] = ~0;
}
static inline void
panfrost_clean_state_3d(struct panfrost_context *ctx)
{
ctx->dirty = 0;
ctx->dirty = 0;
for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
if (i != PIPE_SHADER_COMPUTE)
ctx->dirty_shader[i] = 0;
}
for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
if (i != PIPE_SHADER_COMPUTE)
ctx->dirty_shader[i] = 0;
}
}
void
panfrost_set_batch_masks_blend(struct panfrost_batch *batch);
void panfrost_set_batch_masks_blend(struct panfrost_batch *batch);
void
panfrost_set_batch_masks_zs(struct panfrost_batch *batch);
void panfrost_set_batch_masks_zs(struct panfrost_batch *batch);
void
panfrost_track_image_access(struct panfrost_batch *batch,
enum pipe_shader_type stage,
struct pipe_image_view *image);
void panfrost_track_image_access(struct panfrost_batch *batch,
enum pipe_shader_type stage,
struct pipe_image_view *image);
#endif

View file

@ -21,9 +21,9 @@
* DEALINGS IN THE SOFTWARE.
*/
#include <stdio.h>
#include <stdint.h>
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include "compiler/nir/nir.h"
@ -43,17 +43,17 @@ extern int bifrost_debug;
* Compute a disk cache key for the given uncompiled shader and shader key.
*/
static void
panfrost_disk_cache_compute_key(struct disk_cache *cache,
const struct panfrost_uncompiled_shader *uncompiled,
const struct panfrost_shader_key *shader_key,
cache_key cache_key)
panfrost_disk_cache_compute_key(
struct disk_cache *cache,
const struct panfrost_uncompiled_shader *uncompiled,
const struct panfrost_shader_key *shader_key, cache_key cache_key)
{
uint8_t data[sizeof(uncompiled->nir_sha1) + sizeof(*shader_key)];
uint8_t data[sizeof(uncompiled->nir_sha1) + sizeof(*shader_key)];
memcpy(data, uncompiled->nir_sha1, sizeof(uncompiled->nir_sha1));
memcpy(data + sizeof(uncompiled->nir_sha1), shader_key, sizeof(*shader_key));
memcpy(data, uncompiled->nir_sha1, sizeof(uncompiled->nir_sha1));
memcpy(data + sizeof(uncompiled->nir_sha1), shader_key, sizeof(*shader_key));
disk_cache_compute_key(cache, data, sizeof(data), cache_key);
disk_cache_compute_key(cache, data, sizeof(data), cache_key);
}
/**
@ -69,33 +69,33 @@ panfrost_disk_cache_store(struct disk_cache *cache,
const struct panfrost_shader_binary *binary)
{
#ifdef ENABLE_SHADER_CACHE
if (!cache)
return;
if (!cache)
return;
cache_key cache_key;
panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);
cache_key cache_key;
panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);
if (debug) {
char sha1[41];
_mesa_sha1_format(sha1, cache_key);
fprintf(stderr, "[mesa disk cache] storing %s\n", sha1);
}
if (debug) {
char sha1[41];
_mesa_sha1_format(sha1, cache_key);
fprintf(stderr, "[mesa disk cache] storing %s\n", sha1);
}
struct blob blob;
blob_init(&blob);
struct blob blob;
blob_init(&blob);
/* We write the following data to the cache blob:
*
* 1. Size of program binary
* 2. Program binary
* 3. Shader info
*/
blob_write_uint32(&blob, binary->binary.size);
blob_write_bytes(&blob, binary->binary.data, binary->binary.size);
blob_write_bytes(&blob, &binary->info, sizeof(binary->info));
/* We write the following data to the cache blob:
*
* 1. Size of program binary
* 2. Program binary
* 3. Shader info
*/
blob_write_uint32(&blob, binary->binary.size);
blob_write_bytes(&blob, binary->binary.data, binary->binary.size);
blob_write_bytes(&blob, &binary->info, sizeof(binary->info));
disk_cache_put(cache, cache_key, blob.data, blob.size, NULL);
blob_finish(&blob);
disk_cache_put(cache, cache_key, blob.data, blob.size, NULL);
blob_finish(&blob);
#endif
}
@ -109,43 +109,43 @@ panfrost_disk_cache_retrieve(struct disk_cache *cache,
struct panfrost_shader_binary *binary)
{
#ifdef ENABLE_SHADER_CACHE
if (!cache)
return false;
if (!cache)
return false;
cache_key cache_key;
panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);
cache_key cache_key;
panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);
if (debug) {
char sha1[41];
_mesa_sha1_format(sha1, cache_key);
fprintf(stderr, "[mesa disk cache] retrieving %s: ", sha1);
}
if (debug) {
char sha1[41];
_mesa_sha1_format(sha1, cache_key);
fprintf(stderr, "[mesa disk cache] retrieving %s: ", sha1);
}
size_t size;
void *buffer = disk_cache_get(cache, cache_key, &size);
size_t size;
void *buffer = disk_cache_get(cache, cache_key, &size);
if (debug)
fprintf(stderr, "%s\n", buffer ? "found" : "missing");
if (debug)
fprintf(stderr, "%s\n", buffer ? "found" : "missing");
if (!buffer)
return false;
if (!buffer)
return false;
struct blob_reader blob;
blob_reader_init(&blob, buffer, size);
struct blob_reader blob;
blob_reader_init(&blob, buffer, size);
util_dynarray_init(&binary->binary, NULL);
util_dynarray_init(&binary->binary, NULL);
uint32_t binary_size = blob_read_uint32(&blob);
void *ptr = util_dynarray_resize_bytes(&binary->binary, binary_size, 1);
uint32_t binary_size = blob_read_uint32(&blob);
void *ptr = util_dynarray_resize_bytes(&binary->binary, binary_size, 1);
blob_copy_bytes(&blob, ptr, binary_size);
blob_copy_bytes(&blob, &binary->info, sizeof(binary->info));
blob_copy_bytes(&blob, ptr, binary_size);
blob_copy_bytes(&blob, &binary->info, sizeof(binary->info));
free(buffer);
free(buffer);
return true;
return true;
#else
return false;
return false;
#endif
}
@ -156,22 +156,22 @@ void
panfrost_disk_cache_init(struct panfrost_screen *screen)
{
#ifdef ENABLE_SHADER_CACHE
const char *renderer = screen->base.get_name(&screen->base);
const char *renderer = screen->base.get_name(&screen->base);
const struct build_id_note *note =
build_id_find_nhdr_for_addr(panfrost_disk_cache_init);
assert(note && build_id_length(note) == 20); /* sha1 */
const struct build_id_note *note =
build_id_find_nhdr_for_addr(panfrost_disk_cache_init);
assert(note && build_id_length(note) == 20); /* sha1 */
const uint8_t *id_sha1 = build_id_data(note);
assert(id_sha1);
const uint8_t *id_sha1 = build_id_data(note);
assert(id_sha1);
char timestamp[41];
_mesa_sha1_format(timestamp, id_sha1);
char timestamp[41];
_mesa_sha1_format(timestamp, id_sha1);
/* Consider any flags affecting the compile when caching */
uint64_t driver_flags = screen->dev.debug;
driver_flags |= ((uint64_t) (midgard_debug | bifrost_debug) << 32);
/* Consider any flags affecting the compile when caching */
uint64_t driver_flags = screen->dev.debug;
driver_flags |= ((uint64_t)(midgard_debug | bifrost_debug) << 32);
screen->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
screen->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
#endif
}

View file

@ -26,8 +26,8 @@
* SOFTWARE.
*/
#include "pan_context.h"
#include "pan_fence.h"
#include "pan_context.h"
#include "pan_screen.h"
#include "util/os_time.h"
@ -38,117 +38,112 @@ panfrost_fence_reference(struct pipe_screen *pscreen,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *fence)
{
struct panfrost_device *dev = pan_device(pscreen);
struct pipe_fence_handle *old = *ptr;
struct panfrost_device *dev = pan_device(pscreen);
struct pipe_fence_handle *old = *ptr;
if (pipe_reference(&old->reference, &fence->reference)) {
drmSyncobjDestroy(dev->fd, old->syncobj);
free(old);
}
if (pipe_reference(&old->reference, &fence->reference)) {
drmSyncobjDestroy(dev->fd, old->syncobj);
free(old);
}
*ptr = fence;
*ptr = fence;
}
bool
panfrost_fence_finish(struct pipe_screen *pscreen,
struct pipe_context *ctx,
struct pipe_fence_handle *fence,
uint64_t timeout)
panfrost_fence_finish(struct pipe_screen *pscreen, struct pipe_context *ctx,
struct pipe_fence_handle *fence, uint64_t timeout)
{
struct panfrost_device *dev = pan_device(pscreen);
int ret;
struct panfrost_device *dev = pan_device(pscreen);
int ret;
if (fence->signaled)
return true;
if (fence->signaled)
return true;
uint64_t abs_timeout = os_time_get_absolute_timeout(timeout);
if (abs_timeout == OS_TIMEOUT_INFINITE)
abs_timeout = INT64_MAX;
uint64_t abs_timeout = os_time_get_absolute_timeout(timeout);
if (abs_timeout == OS_TIMEOUT_INFINITE)
abs_timeout = INT64_MAX;
ret = drmSyncobjWait(dev->fd, &fence->syncobj,
1,
abs_timeout, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
NULL);
ret = drmSyncobjWait(dev->fd, &fence->syncobj, 1, abs_timeout,
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, NULL);
fence->signaled = (ret >= 0);
return fence->signaled;
fence->signaled = (ret >= 0);
return fence->signaled;
}
int
panfrost_fence_get_fd(struct pipe_screen *screen,
struct pipe_fence_handle *f)
panfrost_fence_get_fd(struct pipe_screen *screen, struct pipe_fence_handle *f)
{
struct panfrost_device *dev = pan_device(screen);
int fd = -1;
struct panfrost_device *dev = pan_device(screen);
int fd = -1;
drmSyncobjExportSyncFile(dev->fd, f->syncobj, &fd);
return fd;
drmSyncobjExportSyncFile(dev->fd, f->syncobj, &fd);
return fd;
}
struct pipe_fence_handle *
panfrost_fence_from_fd(struct panfrost_context *ctx, int fd,
enum pipe_fd_type type)
{
struct panfrost_device *dev = pan_device(ctx->base.screen);
int ret;
struct panfrost_device *dev = pan_device(ctx->base.screen);
int ret;
struct pipe_fence_handle *f = calloc(1, sizeof(*f));
if (!f)
return NULL;
struct pipe_fence_handle *f = calloc(1, sizeof(*f));
if (!f)
return NULL;
if (type == PIPE_FD_TYPE_NATIVE_SYNC) {
ret = drmSyncobjCreate(dev->fd, 0, &f->syncobj);
if (ret) {
fprintf(stderr, "create syncobj failed\n");
goto err_free_fence;
}
if (type == PIPE_FD_TYPE_NATIVE_SYNC) {
ret = drmSyncobjCreate(dev->fd, 0, &f->syncobj);
if (ret) {
fprintf(stderr, "create syncobj failed\n");
goto err_free_fence;
}
ret = drmSyncobjImportSyncFile(dev->fd, f->syncobj, fd);
if (ret) {
fprintf(stderr, "import syncfile failed\n");
goto err_destroy_syncobj;
}
} else {
assert(type == PIPE_FD_TYPE_SYNCOBJ);
ret = drmSyncobjFDToHandle(dev->fd, fd, &f->syncobj);
if (ret) {
fprintf(stderr, "import syncobj FD failed\n");
goto err_free_fence;
}
}
ret = drmSyncobjImportSyncFile(dev->fd, f->syncobj, fd);
if (ret) {
fprintf(stderr, "import syncfile failed\n");
goto err_destroy_syncobj;
}
} else {
assert(type == PIPE_FD_TYPE_SYNCOBJ);
ret = drmSyncobjFDToHandle(dev->fd, fd, &f->syncobj);
if (ret) {
fprintf(stderr, "import syncobj FD failed\n");
goto err_free_fence;
}
}
pipe_reference_init(&f->reference, 1);
pipe_reference_init(&f->reference, 1);
return f;
return f;
err_destroy_syncobj:
drmSyncobjDestroy(dev->fd, f->syncobj);
drmSyncobjDestroy(dev->fd, f->syncobj);
err_free_fence:
free(f);
return NULL;
free(f);
return NULL;
}
struct pipe_fence_handle *
panfrost_fence_create(struct panfrost_context *ctx)
{
struct panfrost_device *dev = pan_device(ctx->base.screen);
int fd = -1, ret;
struct panfrost_device *dev = pan_device(ctx->base.screen);
int fd = -1, ret;
/* Snapshot the last rendering out fence. We'd rather have another
* syncobj instead of a sync file, but this is all we get.
* (HandleToFD/FDToHandle just gives you another syncobj ID for the
* same syncobj).
*/
ret = drmSyncobjExportSyncFile(dev->fd, ctx->syncobj, &fd);
if (ret || fd == -1) {
fprintf(stderr, "export failed\n");
return NULL;
}
/* Snapshot the last rendering out fence. We'd rather have another
* syncobj instead of a sync file, but this is all we get.
* (HandleToFD/FDToHandle just gives you another syncobj ID for the
* same syncobj).
*/
ret = drmSyncobjExportSyncFile(dev->fd, ctx->syncobj, &fd);
if (ret || fd == -1) {
fprintf(stderr, "export failed\n");
return NULL;
}
struct pipe_fence_handle *f =
panfrost_fence_from_fd(ctx, fd, PIPE_FD_TYPE_NATIVE_SYNC);
struct pipe_fence_handle *f =
panfrost_fence_from_fd(ctx, fd, PIPE_FD_TYPE_NATIVE_SYNC);
close(fd);
close(fd);
return f;
return f;
}

View file

@ -30,29 +30,24 @@
struct panfrost_context;
struct pipe_fence_handle {
struct pipe_reference reference;
uint32_t syncobj;
bool signaled;
struct pipe_reference reference;
uint32_t syncobj;
bool signaled;
};
void
panfrost_fence_reference(struct pipe_screen *pscreen,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *fence);
void panfrost_fence_reference(struct pipe_screen *pscreen,
struct pipe_fence_handle **ptr,
struct pipe_fence_handle *fence);
bool
panfrost_fence_finish(struct pipe_screen *pscreen,
struct pipe_context *ctx,
struct pipe_fence_handle *fence,
uint64_t timeout);
bool panfrost_fence_finish(struct pipe_screen *pscreen,
struct pipe_context *ctx,
struct pipe_fence_handle *fence, uint64_t timeout);
int
panfrost_fence_get_fd(struct pipe_screen *screen,
struct pipe_fence_handle *f);
int panfrost_fence_get_fd(struct pipe_screen *screen,
struct pipe_fence_handle *f);
struct pipe_fence_handle *
panfrost_fence_from_fd(struct panfrost_context *ctx, int fd,
enum pipe_fd_type type);
struct pipe_fence_handle *panfrost_fence_from_fd(struct panfrost_context *ctx,
int fd,
enum pipe_fd_type type);
struct pipe_fence_handle *
panfrost_fence_create(struct panfrost_context *ctx);
struct pipe_fence_handle *panfrost_fence_create(struct panfrost_context *ctx);

View file

@ -21,66 +21,66 @@
* SOFTWARE.
*/
#include "pan_context.h"
#include "util/u_vbuf.h"
#include "pan_context.h"
void
panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss)
{
unsigned dirty = 0;
unsigned dirty_shader = PAN_DIRTY_STAGE_SHADER | PAN_DIRTY_STAGE_CONST;
unsigned dirty = 0;
unsigned dirty_shader = PAN_DIRTY_STAGE_SHADER | PAN_DIRTY_STAGE_CONST;
for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
switch (PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[i])) {
case PAN_SYSVAL_VIEWPORT_SCALE:
case PAN_SYSVAL_VIEWPORT_OFFSET:
dirty |= PAN_DIRTY_VIEWPORT;
break;
for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
switch (PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[i])) {
case PAN_SYSVAL_VIEWPORT_SCALE:
case PAN_SYSVAL_VIEWPORT_OFFSET:
dirty |= PAN_DIRTY_VIEWPORT;
break;
case PAN_SYSVAL_TEXTURE_SIZE:
dirty_shader |= PAN_DIRTY_STAGE_TEXTURE;
break;
case PAN_SYSVAL_TEXTURE_SIZE:
dirty_shader |= PAN_DIRTY_STAGE_TEXTURE;
break;
case PAN_SYSVAL_SSBO:
dirty_shader |= PAN_DIRTY_STAGE_SSBO;
break;
case PAN_SYSVAL_SSBO:
dirty_shader |= PAN_DIRTY_STAGE_SSBO;
break;
case PAN_SYSVAL_XFB:
dirty |= PAN_DIRTY_SO;
break;
case PAN_SYSVAL_XFB:
dirty |= PAN_DIRTY_SO;
break;
case PAN_SYSVAL_SAMPLER:
dirty_shader |= PAN_DIRTY_STAGE_SAMPLER;
break;
case PAN_SYSVAL_SAMPLER:
dirty_shader |= PAN_DIRTY_STAGE_SAMPLER;
break;
case PAN_SYSVAL_IMAGE_SIZE:
dirty_shader |= PAN_DIRTY_STAGE_IMAGE;
break;
case PAN_SYSVAL_IMAGE_SIZE:
dirty_shader |= PAN_DIRTY_STAGE_IMAGE;
break;
case PAN_SYSVAL_NUM_WORK_GROUPS:
case PAN_SYSVAL_LOCAL_GROUP_SIZE:
case PAN_SYSVAL_WORK_DIM:
case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
case PAN_SYSVAL_NUM_VERTICES:
dirty |= PAN_DIRTY_PARAMS;
break;
case PAN_SYSVAL_NUM_WORK_GROUPS:
case PAN_SYSVAL_LOCAL_GROUP_SIZE:
case PAN_SYSVAL_WORK_DIM:
case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
case PAN_SYSVAL_NUM_VERTICES:
dirty |= PAN_DIRTY_PARAMS;
break;
case PAN_SYSVAL_DRAWID:
dirty |= PAN_DIRTY_DRAWID;
break;
case PAN_SYSVAL_DRAWID:
dirty |= PAN_DIRTY_DRAWID;
break;
case PAN_SYSVAL_SAMPLE_POSITIONS:
case PAN_SYSVAL_MULTISAMPLED:
case PAN_SYSVAL_RT_CONVERSION:
/* Nothing beyond the batch itself */
break;
default:
unreachable("Invalid sysval");
}
}
case PAN_SYSVAL_SAMPLE_POSITIONS:
case PAN_SYSVAL_MULTISAMPLED:
case PAN_SYSVAL_RT_CONVERSION:
/* Nothing beyond the batch itself */
break;
default:
unreachable("Invalid sysval");
}
}
ss->dirty_3d = dirty;
ss->dirty_shader = dirty_shader;
ss->dirty_3d = dirty;
ss->dirty_shader = dirty_shader;
}
/*
@ -93,25 +93,22 @@ panfrost_get_index_buffer(struct panfrost_batch *batch,
const struct pipe_draw_info *info,
const struct pipe_draw_start_count_bias *draw)
{
struct panfrost_resource *rsrc = pan_resource(info->index.resource);
off_t offset = draw->start * info->index_size;
struct panfrost_resource *rsrc = pan_resource(info->index.resource);
off_t offset = draw->start * info->index_size;
if (!info->has_user_indices) {
/* Only resources can be directly mapped */
panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
return rsrc->image.data.bo->ptr.gpu + offset;
} else {
/* Otherwise, we need to upload to transient memory */
const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
struct panfrost_ptr T =
pan_pool_alloc_aligned(&batch->pool.base,
draw->count *
info->index_size,
info->index_size);
if (!info->has_user_indices) {
/* Only resources can be directly mapped */
panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
return rsrc->image.data.bo->ptr.gpu + offset;
} else {
/* Otherwise, we need to upload to transient memory */
const uint8_t *ibuf8 = (const uint8_t *)info->index.user;
struct panfrost_ptr T = pan_pool_alloc_aligned(
&batch->pool.base, draw->count * info->index_size, info->index_size);
memcpy(T.cpu, ibuf8 + offset, draw->count * info->index_size);
return T.gpu;
}
memcpy(T.cpu, ibuf8 + offset, draw->count * info->index_size);
return T.gpu;
}
}
/* Gets a GPU address for the associated index buffer. Only gauranteed to be
@ -126,34 +123,30 @@ panfrost_get_index_buffer_bounded(struct panfrost_batch *batch,
const struct pipe_draw_start_count_bias *draw,
unsigned *min_index, unsigned *max_index)
{
struct panfrost_resource *rsrc = pan_resource(info->index.resource);
struct panfrost_context *ctx = batch->ctx;
bool needs_indices = true;
struct panfrost_resource *rsrc = pan_resource(info->index.resource);
struct panfrost_context *ctx = batch->ctx;
bool needs_indices = true;
if (info->index_bounds_valid) {
*min_index = info->min_index;
*max_index = info->max_index;
needs_indices = false;
} else if (!info->has_user_indices) {
/* Check the cache */
needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
draw->start,
draw->count,
min_index,
max_index);
}
if (info->index_bounds_valid) {
*min_index = info->min_index;
*max_index = info->max_index;
needs_indices = false;
} else if (!info->has_user_indices) {
/* Check the cache */
needs_indices = !panfrost_minmax_cache_get(
rsrc->index_cache, draw->start, draw->count, min_index, max_index);
}
if (needs_indices) {
/* Fallback */
u_vbuf_get_minmax_index(&ctx->base, info, draw, min_index, max_index);
if (needs_indices) {
/* Fallback */
u_vbuf_get_minmax_index(&ctx->base, info, draw, min_index, max_index);
if (!info->has_user_indices)
panfrost_minmax_cache_add(rsrc->index_cache,
draw->start, draw->count,
*min_index, *max_index);
}
if (!info->has_user_indices)
panfrost_minmax_cache_add(rsrc->index_cache, draw->start, draw->count,
*min_index, *max_index);
}
return panfrost_get_index_buffer(batch, info, draw);
return panfrost_get_index_buffer(batch, info, draw);
}
/**
@ -163,26 +156,24 @@ panfrost_get_index_buffer_bounded(struct panfrost_batch *batch,
* elements CSO create time, not at draw time.
*/
unsigned
pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
unsigned *nr_bufs,
unsigned vbi,
unsigned divisor)
pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers, unsigned *nr_bufs,
unsigned vbi, unsigned divisor)
{
/* Look up the buffer */
for (unsigned i = 0; i < (*nr_bufs); ++i) {
if (buffers[i].vbi == vbi && buffers[i].divisor == divisor)
return i;
}
/* Look up the buffer */
for (unsigned i = 0; i < (*nr_bufs); ++i) {
if (buffers[i].vbi == vbi && buffers[i].divisor == divisor)
return i;
}
/* Else, create a new buffer */
unsigned idx = (*nr_bufs)++;
/* Else, create a new buffer */
unsigned idx = (*nr_bufs)++;
buffers[idx] = (struct pan_vertex_buffer) {
.vbi = vbi,
.divisor = divisor,
};
buffers[idx] = (struct pan_vertex_buffer){
.vbi = vbi,
.divisor = divisor,
};
return idx;
return idx;
}
/*
@ -194,8 +185,8 @@ pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
static void
panfrost_draw_target(struct panfrost_batch *batch, unsigned target)
{
batch->draws |= target;
batch->resolve |= target;
batch->draws |= target;
batch->resolve |= target;
}
/*
@ -206,34 +197,34 @@ panfrost_draw_target(struct panfrost_batch *batch, unsigned target)
void
panfrost_set_batch_masks_blend(struct panfrost_batch *batch)
{
struct panfrost_context *ctx = batch->ctx;
struct panfrost_blend_state *blend = ctx->blend;
struct panfrost_context *ctx = batch->ctx;
struct panfrost_blend_state *blend = ctx->blend;
for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
if (blend->info[i].enabled && batch->key.cbufs[i])
panfrost_draw_target(batch, PIPE_CLEAR_COLOR0 << i);
}
for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
if (blend->info[i].enabled && batch->key.cbufs[i])
panfrost_draw_target(batch, PIPE_CLEAR_COLOR0 << i);
}
}
void
panfrost_set_batch_masks_zs(struct panfrost_batch *batch)
{
struct panfrost_context *ctx = batch->ctx;
struct pipe_depth_stencil_alpha_state *zsa = (void *) ctx->depth_stencil;
struct panfrost_context *ctx = batch->ctx;
struct pipe_depth_stencil_alpha_state *zsa = (void *)ctx->depth_stencil;
/* Assume depth is read (TODO: perf) */
if (zsa->depth_enabled)
batch->read |= PIPE_CLEAR_DEPTH;
/* Assume depth is read (TODO: perf) */
if (zsa->depth_enabled)
batch->read |= PIPE_CLEAR_DEPTH;
if (zsa->depth_writemask)
panfrost_draw_target(batch, PIPE_CLEAR_DEPTH);
if (zsa->depth_writemask)
panfrost_draw_target(batch, PIPE_CLEAR_DEPTH);
if (zsa->stencil[0].enabled) {
panfrost_draw_target(batch, PIPE_CLEAR_STENCIL);
if (zsa->stencil[0].enabled) {
panfrost_draw_target(batch, PIPE_CLEAR_STENCIL);
/* Assume stencil is read (TODO: perf) */
batch->read |= PIPE_CLEAR_STENCIL;
}
/* Assume stencil is read (TODO: perf) */
batch->read |= PIPE_CLEAR_STENCIL;
}
}
void
@ -241,21 +232,20 @@ panfrost_track_image_access(struct panfrost_batch *batch,
enum pipe_shader_type stage,
struct pipe_image_view *image)
{
struct panfrost_resource *rsrc = pan_resource(image->resource);
struct panfrost_resource *rsrc = pan_resource(image->resource);
if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) {
panfrost_batch_write_rsrc(batch, rsrc, stage);
if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) {
panfrost_batch_write_rsrc(batch, rsrc, stage);
bool is_buffer = rsrc->base.target == PIPE_BUFFER;
unsigned level = is_buffer ? 0 : image->u.tex.level;
BITSET_SET(rsrc->valid.data, level);
bool is_buffer = rsrc->base.target == PIPE_BUFFER;
unsigned level = is_buffer ? 0 : image->u.tex.level;
BITSET_SET(rsrc->valid.data, level);
if (is_buffer) {
util_range_add(&rsrc->base, &rsrc->valid_buffer_range,
0, rsrc->base.width0);
}
} else {
panfrost_batch_read_rsrc(batch, rsrc, stage);
}
if (is_buffer) {
util_range_add(&rsrc->base, &rsrc->valid_buffer_range, 0,
rsrc->base.width0);
}
} else {
panfrost_batch_read_rsrc(batch, rsrc, stage);
}
}

File diff suppressed because it is too large Load diff

View file

@ -26,8 +26,8 @@
#ifndef __PAN_JOB_H__
#define __PAN_JOB_H__
#include "util/u_dynarray.h"
#include "pipe/p_state.h"
#include "util/u_dynarray.h"
#include "pan_cs.h"
#include "pan_mempool.h"
#include "pan_resource.h"
@ -39,11 +39,11 @@
* error. The getter needs to be used instead.
*/
struct pan_tristate {
enum {
PAN_TRISTATE_DONTCARE,
PAN_TRISTATE_FALSE,
PAN_TRISTATE_TRUE,
} v;
enum {
PAN_TRISTATE_DONTCARE,
PAN_TRISTATE_FALSE,
PAN_TRISTATE_TRUE,
} v;
};
/*
@ -53,20 +53,20 @@ struct pan_tristate {
static bool
pan_tristate_set(struct pan_tristate *state, bool value)
{
switch (state->v) {
case PAN_TRISTATE_DONTCARE:
state->v = value ? PAN_TRISTATE_TRUE : PAN_TRISTATE_FALSE;
return true;
switch (state->v) {
case PAN_TRISTATE_DONTCARE:
state->v = value ? PAN_TRISTATE_TRUE : PAN_TRISTATE_FALSE;
return true;
case PAN_TRISTATE_FALSE:
return (value == false);
case PAN_TRISTATE_FALSE:
return (value == false);
case PAN_TRISTATE_TRUE:
return (value == true);
case PAN_TRISTATE_TRUE:
return (value == true);
default:
unreachable("Invalid tristate value");
}
default:
unreachable("Invalid tristate value");
}
}
/*
@ -76,189 +76,179 @@ pan_tristate_set(struct pan_tristate *state, bool value)
static bool
pan_tristate_get(struct pan_tristate state)
{
return (state.v == PAN_TRISTATE_TRUE);
return (state.v == PAN_TRISTATE_TRUE);
}
/* A panfrost_batch corresponds to a bound FBO we're rendering to,
* collecting over multiple draws. */
struct panfrost_batch {
struct panfrost_context *ctx;
struct pipe_framebuffer_state key;
struct panfrost_context *ctx;
struct pipe_framebuffer_state key;
/* Sequence number used to implement LRU eviction when all batch slots are used */
uint64_t seqnum;
/* Sequence number used to implement LRU eviction when all batch slots are
* used */
uint64_t seqnum;
/* Buffers cleared (PIPE_CLEAR_* bitmask) */
unsigned clear;
/* Buffers cleared (PIPE_CLEAR_* bitmask) */
unsigned clear;
/* Buffers drawn */
unsigned draws;
/* Buffers drawn */
unsigned draws;
/* Buffers read */
unsigned read;
/* Buffers read */
unsigned read;
/* Buffers needing resolve to memory */
unsigned resolve;
/* Buffers needing resolve to memory */
unsigned resolve;
/* Packed clear values, indexed by both render target as well as word.
* Essentially, a single pixel is packed, with some padding to bring it
* up to a 32-bit interval; that pixel is then duplicated over to fill
* all 16-bytes */
/* Packed clear values, indexed by both render target as well as word.
* Essentially, a single pixel is packed, with some padding to bring it
* up to a 32-bit interval; that pixel is then duplicated over to fill
* all 16-bytes */
uint32_t clear_color[PIPE_MAX_COLOR_BUFS][4];
float clear_depth;
unsigned clear_stencil;
uint32_t clear_color[PIPE_MAX_COLOR_BUFS][4];
float clear_depth;
unsigned clear_stencil;
/* Amount of thread local storage required per thread */
unsigned stack_size;
/* Amount of thread local storage required per thread */
unsigned stack_size;
/* Amount of shared memory needed per workgroup (for compute) */
unsigned shared_size;
/* Amount of shared memory needed per workgroup (for compute) */
unsigned shared_size;
/* The bounding box covered by this job, taking scissors into account.
* Basically, the bounding box we have to run fragment shaders for */
/* The bounding box covered by this job, taking scissors into account.
* Basically, the bounding box we have to run fragment shaders for */
unsigned minx, miny;
unsigned maxx, maxy;
unsigned minx, miny;
unsigned maxx, maxy;
/* Acts as a rasterizer discard */
bool scissor_culls_everything;
/* Acts as a rasterizer discard */
bool scissor_culls_everything;
/* BOs referenced not in the pool */
unsigned num_bos;
struct util_dynarray bos;
/* BOs referenced not in the pool */
unsigned num_bos;
struct util_dynarray bos;
/* Pool owned by this batch (released when the batch is released) used for temporary descriptors */
struct panfrost_pool pool;
/* Pool owned by this batch (released when the batch is released) used for
* temporary descriptors */
struct panfrost_pool pool;
/* Pool also owned by this batch that is not CPU mapped (created as
* INVISIBLE) used for private GPU-internal structures, particularly
* varyings */
struct panfrost_pool invisible_pool;
/* Pool also owned by this batch that is not CPU mapped (created as
* INVISIBLE) used for private GPU-internal structures, particularly
* varyings */
struct panfrost_pool invisible_pool;
/* Job scoreboarding state */
struct pan_scoreboard scoreboard;
/* Job scoreboarding state */
struct pan_scoreboard scoreboard;
/* Polygon list bound to the batch, or NULL if none bound yet */
struct panfrost_bo *polygon_list;
/* Polygon list bound to the batch, or NULL if none bound yet */
struct panfrost_bo *polygon_list;
/* Scratchpad BO bound to the batch, or NULL if none bound yet */
struct panfrost_bo *scratchpad;
/* Scratchpad BO bound to the batch, or NULL if none bound yet */
struct panfrost_bo *scratchpad;
/* Shared memory BO bound to the batch, or NULL if none bound yet */
struct panfrost_bo *shared_memory;
/* Shared memory BO bound to the batch, or NULL if none bound yet */
struct panfrost_bo *shared_memory;
/* Framebuffer descriptor. */
struct panfrost_ptr framebuffer;
/* Framebuffer descriptor. */
struct panfrost_ptr framebuffer;
/* Thread local storage descriptor. */
struct panfrost_ptr tls;
/* Thread local storage descriptor. */
struct panfrost_ptr tls;
/* Tiler context */
struct pan_tiler_context tiler_ctx;
/* Tiler context */
struct pan_tiler_context tiler_ctx;
/* Keep the num_work_groups sysval around for indirect dispatch */
mali_ptr num_wg_sysval[3];
/* Keep the num_work_groups sysval around for indirect dispatch */
mali_ptr num_wg_sysval[3];
/* Cached descriptors */
mali_ptr viewport;
mali_ptr rsd[PIPE_SHADER_TYPES];
mali_ptr textures[PIPE_SHADER_TYPES];
mali_ptr samplers[PIPE_SHADER_TYPES];
mali_ptr attribs[PIPE_SHADER_TYPES];
mali_ptr attrib_bufs[PIPE_SHADER_TYPES];
mali_ptr uniform_buffers[PIPE_SHADER_TYPES];
mali_ptr push_uniforms[PIPE_SHADER_TYPES];
mali_ptr depth_stencil;
mali_ptr blend;
/* Cached descriptors */
mali_ptr viewport;
mali_ptr rsd[PIPE_SHADER_TYPES];
mali_ptr textures[PIPE_SHADER_TYPES];
mali_ptr samplers[PIPE_SHADER_TYPES];
mali_ptr attribs[PIPE_SHADER_TYPES];
mali_ptr attrib_bufs[PIPE_SHADER_TYPES];
mali_ptr uniform_buffers[PIPE_SHADER_TYPES];
mali_ptr push_uniforms[PIPE_SHADER_TYPES];
mali_ptr depth_stencil;
mali_ptr blend;
/* Valhall: struct mali_scissor_packed */
unsigned scissor[2];
float minimum_z, maximum_z;
/* Valhall: struct mali_scissor_packed */
unsigned scissor[2];
float minimum_z, maximum_z;
/* Used on Valhall only. Midgard includes attributes in-band with
* attributes, wildly enough.
*/
mali_ptr images[PIPE_SHADER_TYPES];
/* Used on Valhall only. Midgard includes attributes in-band with
* attributes, wildly enough.
*/
mali_ptr images[PIPE_SHADER_TYPES];
/* On Valhall, these are properties of the batch. On Bifrost, they are
* per draw.
*/
struct pan_tristate sprite_coord_origin;
struct pan_tristate first_provoking_vertex;
/* On Valhall, these are properties of the batch. On Bifrost, they are
* per draw.
*/
struct pan_tristate sprite_coord_origin;
struct pan_tristate first_provoking_vertex;
};
/* Functions for managing the above */
struct panfrost_batch *
panfrost_get_batch_for_fbo(struct panfrost_context *ctx);
struct panfrost_batch *panfrost_get_batch_for_fbo(struct panfrost_context *ctx);
struct panfrost_batch *
panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx, const char *reason);
panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx,
const char *reason);
void
panfrost_batch_add_bo(struct panfrost_batch *batch,
struct panfrost_bo *bo,
enum pipe_shader_type stage);
void panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo,
enum pipe_shader_type stage);
void
panfrost_batch_read_rsrc(struct panfrost_batch *batch,
struct panfrost_resource *rsrc,
enum pipe_shader_type stage);
void panfrost_batch_read_rsrc(struct panfrost_batch *batch,
struct panfrost_resource *rsrc,
enum pipe_shader_type stage);
void
panfrost_batch_write_rsrc(struct panfrost_batch *batch,
struct panfrost_resource *rsrc,
enum pipe_shader_type stage);
void panfrost_batch_write_rsrc(struct panfrost_batch *batch,
struct panfrost_resource *rsrc,
enum pipe_shader_type stage);
bool
panfrost_any_batch_reads_rsrc(struct panfrost_context *ctx,
struct panfrost_resource *rsrc);
bool panfrost_any_batch_reads_rsrc(struct panfrost_context *ctx,
struct panfrost_resource *rsrc);
bool
panfrost_any_batch_writes_rsrc(struct panfrost_context *ctx,
struct panfrost_resource *rsrc);
bool panfrost_any_batch_writes_rsrc(struct panfrost_context *ctx,
struct panfrost_resource *rsrc);
struct panfrost_bo *panfrost_batch_create_bo(struct panfrost_batch *batch,
size_t size, uint32_t create_flags,
enum pipe_shader_type stage,
const char *label);
void panfrost_flush_all_batches(struct panfrost_context *ctx,
const char *reason);
void panfrost_flush_batches_accessing_rsrc(struct panfrost_context *ctx,
struct panfrost_resource *rsrc,
const char *reason);
void panfrost_flush_writer(struct panfrost_context *ctx,
struct panfrost_resource *rsrc, const char *reason);
void panfrost_batch_adjust_stack_size(struct panfrost_batch *batch);
struct panfrost_bo *panfrost_batch_get_scratchpad(struct panfrost_batch *batch,
unsigned size,
unsigned thread_tls_alloc,
unsigned core_id_range);
struct panfrost_bo *
panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
uint32_t create_flags, enum pipe_shader_type stage,
const char *label);
panfrost_batch_get_shared_memory(struct panfrost_batch *batch, unsigned size,
unsigned workgroup_count);
void
panfrost_flush_all_batches(struct panfrost_context *ctx, const char *reason);
void panfrost_batch_clear(struct panfrost_batch *batch, unsigned buffers,
const union pipe_color_union *color, double depth,
unsigned stencil);
void
panfrost_flush_batches_accessing_rsrc(struct panfrost_context *ctx,
struct panfrost_resource *rsrc,
const char *reason);
void panfrost_batch_union_scissor(struct panfrost_batch *batch, unsigned minx,
unsigned miny, unsigned maxx, unsigned maxy);
void
panfrost_flush_writer(struct panfrost_context *ctx,
struct panfrost_resource *rsrc,
const char *reason);
void
panfrost_batch_adjust_stack_size(struct panfrost_batch *batch);
struct panfrost_bo *
panfrost_batch_get_scratchpad(struct panfrost_batch *batch, unsigned size, unsigned thread_tls_alloc, unsigned core_id_range);
struct panfrost_bo *
panfrost_batch_get_shared_memory(struct panfrost_batch *batch, unsigned size, unsigned workgroup_count);
void
panfrost_batch_clear(struct panfrost_batch *batch,
unsigned buffers,
const union pipe_color_union *color,
double depth, unsigned stencil);
void
panfrost_batch_union_scissor(struct panfrost_batch *batch,
unsigned minx, unsigned miny,
unsigned maxx, unsigned maxy);
bool
panfrost_batch_skip_rasterization(struct panfrost_batch *batch);
bool panfrost_batch_skip_rasterization(struct panfrost_batch *batch);
#endif

View file

@ -46,124 +46,124 @@
static struct panfrost_bo *
panfrost_pool_alloc_backing(struct panfrost_pool *pool, size_t bo_sz)
{
/* We don't know what the BO will be used for, so let's flag it
* RW and attach it to both the fragment and vertex/tiler jobs.
* TODO: if we want fine grained BO assignment we should pass
* flags to this function and keep the read/write,
* fragment/vertex+tiler pools separate.
*/
struct panfrost_bo *bo = panfrost_bo_create(pool->base.dev, bo_sz,
pool->base.create_flags, pool->base.label);
/* We don't know what the BO will be used for, so let's flag it
* RW and attach it to both the fragment and vertex/tiler jobs.
* TODO: if we want fine grained BO assignment we should pass
* flags to this function and keep the read/write,
* fragment/vertex+tiler pools separate.
*/
struct panfrost_bo *bo = panfrost_bo_create(
pool->base.dev, bo_sz, pool->base.create_flags, pool->base.label);
if (pool->owned)
util_dynarray_append(&pool->bos, struct panfrost_bo *, bo);
else
panfrost_bo_unreference(pool->transient_bo);
if (pool->owned)
util_dynarray_append(&pool->bos, struct panfrost_bo *, bo);
else
panfrost_bo_unreference(pool->transient_bo);
pool->transient_bo = bo;
pool->transient_offset = 0;
pool->transient_bo = bo;
pool->transient_offset = 0;
return bo;
return bo;
}
void
panfrost_pool_init(struct panfrost_pool *pool, void *memctx,
struct panfrost_device *dev,
unsigned create_flags, size_t slab_size, const char *label,
bool prealloc, bool owned)
struct panfrost_device *dev, unsigned create_flags,
size_t slab_size, const char *label, bool prealloc,
bool owned)
{
memset(pool, 0, sizeof(*pool));
pan_pool_init(&pool->base, dev, create_flags, slab_size, label);
pool->owned = owned;
memset(pool, 0, sizeof(*pool));
pan_pool_init(&pool->base, dev, create_flags, slab_size, label);
pool->owned = owned;
if (owned)
util_dynarray_init(&pool->bos, memctx);
if (owned)
util_dynarray_init(&pool->bos, memctx);
if (prealloc)
panfrost_pool_alloc_backing(pool, pool->base.slab_size);
if (prealloc)
panfrost_pool_alloc_backing(pool, pool->base.slab_size);
}
void
panfrost_pool_cleanup(struct panfrost_pool *pool)
{
if (!pool->owned) {
panfrost_bo_unreference(pool->transient_bo);
return;
}
if (!pool->owned) {
panfrost_bo_unreference(pool->transient_bo);
return;
}
util_dynarray_foreach(&pool->bos, struct panfrost_bo *, bo)
panfrost_bo_unreference(*bo);
util_dynarray_foreach(&pool->bos, struct panfrost_bo *, bo)
panfrost_bo_unreference(*bo);
util_dynarray_fini(&pool->bos);
util_dynarray_fini(&pool->bos);
}
void
panfrost_pool_get_bo_handles(struct panfrost_pool *pool, uint32_t *handles)
{
assert(pool->owned && "pool does not track BOs in unowned mode");
assert(pool->owned && "pool does not track BOs in unowned mode");
unsigned idx = 0;
util_dynarray_foreach(&pool->bos, struct panfrost_bo *, bo) {
assert((*bo)->gem_handle > 0);
handles[idx++] = (*bo)->gem_handle;
unsigned idx = 0;
util_dynarray_foreach(&pool->bos, struct panfrost_bo *, bo) {
assert((*bo)->gem_handle > 0);
handles[idx++] = (*bo)->gem_handle;
/* Update the BO access flags so that panfrost_bo_wait() knows
* about all pending accesses.
* We only keep the READ/WRITE info since this is all the BO
* wait logic cares about.
* We also preserve existing flags as this batch might not
* be the first one to access the BO.
*/
(*bo)->gpu_access |= PAN_BO_ACCESS_RW;
}
/* Update the BO access flags so that panfrost_bo_wait() knows
* about all pending accesses.
* We only keep the READ/WRITE info since this is all the BO
* wait logic cares about.
* We also preserve existing flags as this batch might not
* be the first one to access the BO.
*/
(*bo)->gpu_access |= PAN_BO_ACCESS_RW;
}
}
#define PAN_GUARD_SIZE 4096
static struct panfrost_ptr
panfrost_pool_alloc_aligned(struct panfrost_pool *pool, size_t sz, unsigned alignment)
panfrost_pool_alloc_aligned(struct panfrost_pool *pool, size_t sz,
unsigned alignment)
{
assert(alignment == util_next_power_of_two(alignment));
assert(alignment == util_next_power_of_two(alignment));
/* Find or create a suitable BO */
struct panfrost_bo *bo = pool->transient_bo;
unsigned offset = ALIGN_POT(pool->transient_offset, alignment);
/* Find or create a suitable BO */
struct panfrost_bo *bo = pool->transient_bo;
unsigned offset = ALIGN_POT(pool->transient_offset, alignment);
#ifdef PAN_DBG_OVERFLOW
if (unlikely(pool->base.dev->debug & PAN_DBG_OVERFLOW) &&
!(pool->base.create_flags & PAN_BO_INVISIBLE)) {
unsigned aligned = ALIGN_POT(sz, sysconf(_SC_PAGESIZE));
unsigned bo_size = aligned + PAN_GUARD_SIZE;
if (unlikely(pool->base.dev->debug & PAN_DBG_OVERFLOW) &&
!(pool->base.create_flags & PAN_BO_INVISIBLE)) {
unsigned aligned = ALIGN_POT(sz, sysconf(_SC_PAGESIZE));
unsigned bo_size = aligned + PAN_GUARD_SIZE;
bo = panfrost_pool_alloc_backing(pool, bo_size);
memset(bo->ptr.cpu, 0xbb, bo_size);
bo = panfrost_pool_alloc_backing(pool, bo_size);
memset(bo->ptr.cpu, 0xbb, bo_size);
/* Place the object as close as possible to the protected
* region at the end of the buffer while keeping alignment. */
offset = ROUND_DOWN_TO(aligned - sz, alignment);
/* Place the object as close as possible to the protected
* region at the end of the buffer while keeping alignment. */
offset = ROUND_DOWN_TO(aligned - sz, alignment);
if (mprotect(bo->ptr.cpu + aligned,
PAN_GUARD_SIZE, PROT_NONE) == -1)
perror("mprotect");
if (mprotect(bo->ptr.cpu + aligned, PAN_GUARD_SIZE, PROT_NONE) == -1)
perror("mprotect");
pool->transient_bo = NULL;
}
pool->transient_bo = NULL;
}
#endif
/* If we don't fit, allocate a new backing */
if (unlikely(bo == NULL || (offset + sz) >= pool->base.slab_size)) {
bo = panfrost_pool_alloc_backing(pool,
ALIGN_POT(MAX2(pool->base.slab_size, sz), 4096));
offset = 0;
}
/* If we don't fit, allocate a new backing */
if (unlikely(bo == NULL || (offset + sz) >= pool->base.slab_size)) {
bo = panfrost_pool_alloc_backing(
pool, ALIGN_POT(MAX2(pool->base.slab_size, sz), 4096));
offset = 0;
}
pool->transient_offset = offset + sz;
pool->transient_offset = offset + sz;
struct panfrost_ptr ret = {
.cpu = bo->ptr.cpu + offset,
.gpu = bo->ptr.gpu + offset,
};
struct panfrost_ptr ret = {
.cpu = bo->ptr.cpu + offset,
.gpu = bo->ptr.gpu + offset,
};
return ret;
return ret;
}
PAN_POOL_ALLOCATOR(struct panfrost_pool, panfrost_pool_alloc_aligned)

View file

@ -31,37 +31,37 @@
be unowned for persistent uploads. */
struct panfrost_pool {
/* Inherit from pan_pool */
struct pan_pool base;
/* Inherit from pan_pool */
struct pan_pool base;
/* BOs allocated by this pool */
struct util_dynarray bos;
/* BOs allocated by this pool */
struct util_dynarray bos;
/* Current transient BO */
struct panfrost_bo *transient_bo;
/* Current transient BO */
struct panfrost_bo *transient_bo;
/* Within the topmost transient BO, how much has been used? */
unsigned transient_offset;
/* Within the topmost transient BO, how much has been used? */
unsigned transient_offset;
/* Mode of the pool. BO management is in the pool for owned mode, but
* the consumed for unowned mode. */
bool owned;
/* Mode of the pool. BO management is in the pool for owned mode, but
* the consumed for unowned mode. */
bool owned;
};
static inline struct panfrost_pool *
to_panfrost_pool(struct pan_pool *pool)
{
return container_of(pool, struct panfrost_pool, base);
return container_of(pool, struct panfrost_pool, base);
}
/* Reference to pool allocated memory for an unowned pool */
struct panfrost_pool_ref {
/* Owning BO */
struct panfrost_bo *bo;
/* Owning BO */
struct panfrost_bo *bo;
/* Mapped GPU VA */
mali_ptr gpu;
/* Mapped GPU VA */
mali_ptr gpu;
};
/* Take a reference to an allocation pool. Call directly after allocating from
@ -70,32 +70,30 @@ struct panfrost_pool_ref {
static inline struct panfrost_pool_ref
panfrost_pool_take_ref(struct panfrost_pool *pool, mali_ptr ptr)
{
if (!pool->owned)
panfrost_bo_reference(pool->transient_bo);
if (!pool->owned)
panfrost_bo_reference(pool->transient_bo);
return (struct panfrost_pool_ref) {
.bo = pool->transient_bo,
.gpu = ptr,
};
return (struct panfrost_pool_ref){
.bo = pool->transient_bo,
.gpu = ptr,
};
}
void
panfrost_pool_init(struct panfrost_pool *pool, void *memctx,
struct panfrost_device *dev, unsigned create_flags,
size_t slab_size, const char *label, bool prealloc, bool
owned);
void panfrost_pool_init(struct panfrost_pool *pool, void *memctx,
struct panfrost_device *dev, unsigned create_flags,
size_t slab_size, const char *label, bool prealloc,
bool owned);
void
panfrost_pool_cleanup(struct panfrost_pool *pool);
void panfrost_pool_cleanup(struct panfrost_pool *pool);
static inline unsigned
panfrost_pool_num_bos(struct panfrost_pool *pool)
{
assert(pool->owned && "pool does not track BOs in unowned mode");
return util_dynarray_num_elements(&pool->bos, struct panfrost_bo *);
assert(pool->owned && "pool does not track BOs in unowned mode");
return util_dynarray_num_elements(&pool->bos, struct panfrost_bo *);
}
void
panfrost_pool_get_bo_handles(struct panfrost_pool *pool, uint32_t *handles);
void panfrost_pool_get_bo_handles(struct panfrost_pool *pool,
uint32_t *handles);
#endif

View file

@ -31,8 +31,7 @@ extern "C" {
struct pipe_screen;
struct renderonly;
struct pipe_screen *
panfrost_create_screen(int fd, struct renderonly *ro);
struct pipe_screen *panfrost_create_screen(int fd, struct renderonly *ro);
#ifdef __cplusplus
}

File diff suppressed because it is too large Load diff

View file

@ -22,87 +22,86 @@
*
*/
#ifndef PAN_RESOURCE_H
#define PAN_RESOURCE_H
#include "pan_screen.h"
#include "pan_minmax_cache.h"
#include "pan_texture.h"
#include "drm-uapi/drm.h"
#include "util/u_range.h"
#include "pan_minmax_cache.h"
#include "pan_screen.h"
#include "pan_texture.h"
#define LAYOUT_CONVERT_THRESHOLD 8
#define PAN_MAX_BATCHES 32
#define PAN_MAX_BATCHES 32
#define PAN_BIND_SHARED_MASK (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | \
PIPE_BIND_SHARED)
#define PAN_BIND_SHARED_MASK \
(PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)
struct panfrost_resource {
struct pipe_resource base;
struct {
struct pipe_scissor_state extent;
struct {
bool enable;
unsigned stride;
unsigned size;
BITSET_WORD *data;
} tile_map;
} damage;
struct pipe_resource base;
struct {
struct pipe_scissor_state extent;
struct {
bool enable;
unsigned stride;
unsigned size;
BITSET_WORD *data;
} tile_map;
} damage;
struct renderonly_scanout *scanout;
struct renderonly_scanout *scanout;
struct panfrost_resource *separate_stencil;
struct panfrost_resource *separate_stencil;
struct util_range valid_buffer_range;
struct util_range valid_buffer_range;
/* Description of the resource layout */
struct pan_image image;
/* Description of the resource layout */
struct pan_image image;
struct {
/* Is the checksum for this image valid? Implicitly refers to
* the first slice; we only checksum non-mipmapped 2D images */
bool crc;
struct {
/* Is the checksum for this image valid? Implicitly refers to
* the first slice; we only checksum non-mipmapped 2D images */
bool crc;
/* Has anything been written to this slice? */
BITSET_DECLARE(data, MAX_MIP_LEVELS);
} valid;
/* Has anything been written to this slice? */
BITSET_DECLARE(data, MAX_MIP_LEVELS);
} valid;
/* Whether the modifier can be changed */
bool modifier_constant;
/* Whether the modifier can be changed */
bool modifier_constant;
/* Used to decide when to convert to another modifier */
uint16_t modifier_updates;
/* Used to decide when to convert to another modifier */
uint16_t modifier_updates;
/* Do all pixels have the same stencil value? */
bool constant_stencil;
/* Do all pixels have the same stencil value? */
bool constant_stencil;
/* The stencil value if constant_stencil is set */
uint8_t stencil_value;
/* The stencil value if constant_stencil is set */
uint8_t stencil_value;
/* Cached min/max values for index buffers */
struct panfrost_minmax_cache *index_cache;
/* Cached min/max values for index buffers */
struct panfrost_minmax_cache *index_cache;
};
static inline struct panfrost_resource *
pan_resource(struct pipe_resource *p)
{
return (struct panfrost_resource *)p;
return (struct panfrost_resource *)p;
}
struct panfrost_transfer {
struct pipe_transfer base;
void *map;
struct {
struct pipe_resource *rsrc;
struct pipe_box box;
} staging;
struct pipe_transfer base;
void *map;
struct {
struct pipe_resource *rsrc;
struct pipe_box box;
} staging;
};
static inline struct panfrost_transfer *
pan_transfer(struct pipe_transfer *p)
{
return (struct panfrost_transfer *)p;
return (struct panfrost_transfer *)p;
}
void panfrost_resource_screen_init(struct pipe_screen *screen);
@ -113,53 +112,48 @@ void panfrost_resource_context_init(struct pipe_context *pctx);
/* Blitting */
void
panfrost_blitter_save(struct panfrost_context *ctx, bool render_cond);
void panfrost_blitter_save(struct panfrost_context *ctx, bool render_cond);
void
panfrost_blit(struct pipe_context *pipe,
const struct pipe_blit_info *info);
void panfrost_blit(struct pipe_context *pipe,
const struct pipe_blit_info *info);
void
panfrost_resource_set_damage_region(struct pipe_screen *screen,
struct pipe_resource *res,
unsigned int nrects,
const struct pipe_box *rects);
void panfrost_resource_set_damage_region(struct pipe_screen *screen,
struct pipe_resource *res,
unsigned int nrects,
const struct pipe_box *rects);
static inline enum mali_texture_dimension
panfrost_translate_texture_dimension(enum pipe_texture_target t) {
switch (t)
{
case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
return MALI_TEXTURE_DIMENSION_1D;
panfrost_translate_texture_dimension(enum pipe_texture_target t)
{
switch (t) {
case PIPE_BUFFER:
case PIPE_TEXTURE_1D:
case PIPE_TEXTURE_1D_ARRAY:
return MALI_TEXTURE_DIMENSION_1D;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_RECT:
return MALI_TEXTURE_DIMENSION_2D;
case PIPE_TEXTURE_2D:
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_RECT:
return MALI_TEXTURE_DIMENSION_2D;
case PIPE_TEXTURE_3D:
return MALI_TEXTURE_DIMENSION_3D;
case PIPE_TEXTURE_3D:
return MALI_TEXTURE_DIMENSION_3D;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
return MALI_TEXTURE_DIMENSION_CUBE;
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
return MALI_TEXTURE_DIMENSION_CUBE;
default:
unreachable("Unknown target");
}
default:
unreachable("Unknown target");
}
}
void
pan_resource_modifier_convert(struct panfrost_context *ctx,
struct panfrost_resource *rsrc,
uint64_t modifier, const char *reason);
void pan_resource_modifier_convert(struct panfrost_context *ctx,
struct panfrost_resource *rsrc,
uint64_t modifier, const char *reason);
void
pan_legalize_afbc_format(struct panfrost_context *ctx,
struct panfrost_resource *rsrc,
enum pipe_format format);
void pan_legalize_afbc_format(struct panfrost_context *ctx,
struct panfrost_resource *rsrc,
enum pipe_format format);
#endif /* PAN_RESOURCE_H */

File diff suppressed because it is too large Load diff

View file

@ -30,14 +30,14 @@
#define PAN_SCREEN_H
#include <xf86drm.h>
#include "pipe/p_screen.h"
#include "pipe/p_defines.h"
#include "pipe/p_screen.h"
#include "renderonly/renderonly.h"
#include "util/u_dynarray.h"
#include "util/bitset.h"
#include "util/set.h"
#include "util/log.h"
#include "util/disk_cache.h"
#include "util/log.h"
#include "util/set.h"
#include "util/u_dynarray.h"
#include "pan_device.h"
#include "pan_mempool.h"
@ -45,7 +45,7 @@
#define PAN_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
static const struct pipe_driver_query_info panfrost_driver_query_list[] = {
{"draw-calls", PAN_QUERY_DRAW_CALLS, { 0 }},
{"draw-calls", PAN_QUERY_DRAW_CALLS, {0}},
};
struct panfrost_batch;
@ -58,77 +58,74 @@ struct pan_blend_state;
/* Virtual table of per-generation (GenXML) functions */
struct panfrost_vtable {
/* Prepares the renderer state descriptor or shader program descriptor
* for a given compiled shader, and if desired uploads it as well */
void (*prepare_shader)(struct panfrost_compiled_shader *,
struct panfrost_pool *, bool);
/* Prepares the renderer state descriptor or shader program descriptor
* for a given compiled shader, and if desired uploads it as well */
void (*prepare_shader)(struct panfrost_compiled_shader *,
struct panfrost_pool *, bool);
/* Emits a thread local storage descriptor */
void (*emit_tls)(struct panfrost_batch *);
/* Emits a thread local storage descriptor */
void (*emit_tls)(struct panfrost_batch *);
/* Emits a framebuffer descriptor */
void (*emit_fbd)(struct panfrost_batch *, const struct pan_fb_info *);
/* Emits a framebuffer descriptor */
void (*emit_fbd)(struct panfrost_batch *, const struct pan_fb_info *);
/* Emits a fragment job */
mali_ptr (*emit_fragment_job)(struct panfrost_batch *, const struct pan_fb_info *);
/* Emits a fragment job */
mali_ptr (*emit_fragment_job)(struct panfrost_batch *,
const struct pan_fb_info *);
/* General destructor */
void (*screen_destroy)(struct pipe_screen *);
/* General destructor */
void (*screen_destroy)(struct pipe_screen *);
/* Preload framebuffer */
void (*preload)(struct panfrost_batch *, struct pan_fb_info *);
/* Preload framebuffer */
void (*preload)(struct panfrost_batch *, struct pan_fb_info *);
/* Initialize a Gallium context */
void (*context_init)(struct pipe_context *pipe);
/* Initialize a Gallium context */
void (*context_init)(struct pipe_context *pipe);
/* Device-dependent initialization of a panfrost_batch */
void (*init_batch)(struct panfrost_batch *batch);
/* Device-dependent initialization of a panfrost_batch */
void (*init_batch)(struct panfrost_batch *batch);
/* Get blend shader */
struct pan_blend_shader_variant *
(*get_blend_shader)(const struct panfrost_device *,
const struct pan_blend_state *,
nir_alu_type, nir_alu_type,
unsigned rt);
/* Get blend shader */
struct pan_blend_shader_variant *(*get_blend_shader)(
const struct panfrost_device *, const struct pan_blend_state *,
nir_alu_type, nir_alu_type, unsigned rt);
/* Initialize the polygon list */
void (*init_polygon_list)(struct panfrost_batch *);
/* Initialize the polygon list */
void (*init_polygon_list)(struct panfrost_batch *);
/* Shader compilation methods */
const nir_shader_compiler_options *(*get_compiler_options)(void);
void (*compile_shader)(nir_shader *s,
struct panfrost_compile_inputs *inputs,
struct util_dynarray *binary,
struct pan_shader_info *info);
/* Shader compilation methods */
const nir_shader_compiler_options *(*get_compiler_options)(void);
void (*compile_shader)(nir_shader *s, struct panfrost_compile_inputs *inputs,
struct util_dynarray *binary,
struct pan_shader_info *info);
};
struct panfrost_screen {
struct pipe_screen base;
struct panfrost_device dev;
struct {
struct panfrost_pool bin_pool;
struct panfrost_pool desc_pool;
} blitter;
struct pipe_screen base;
struct panfrost_device dev;
struct {
struct panfrost_pool bin_pool;
struct panfrost_pool desc_pool;
} blitter;
struct panfrost_vtable vtbl;
struct disk_cache *disk_cache;
struct panfrost_vtable vtbl;
struct disk_cache *disk_cache;
};
static inline struct panfrost_screen *
pan_screen(struct pipe_screen *p)
{
return (struct panfrost_screen *)p;
return (struct panfrost_screen *)p;
}
static inline struct panfrost_device *
pan_device(struct pipe_screen *p)
{
return &(pan_screen(p)->dev);
return &(pan_screen(p)->dev);
}
int
panfrost_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
struct pipe_driver_query_info *info);
int panfrost_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
struct pipe_driver_query_info *info);
void panfrost_cmdstream_screen_init_v4(struct panfrost_screen *screen);
void panfrost_cmdstream_screen_init_v5(struct panfrost_screen *screen);
@ -136,13 +133,13 @@ void panfrost_cmdstream_screen_init_v6(struct panfrost_screen *screen);
void panfrost_cmdstream_screen_init_v7(struct panfrost_screen *screen);
void panfrost_cmdstream_screen_init_v9(struct panfrost_screen *screen);
#define perf_debug(dev, ...) \
do { \
if (unlikely((dev)->debug & PAN_DBG_PERF)) \
mesa_logw(__VA_ARGS__); \
} while(0)
#define perf_debug(dev, ...) \
do { \
if (unlikely((dev)->debug & PAN_DBG_PERF)) \
mesa_logw(__VA_ARGS__); \
} while (0)
#define perf_debug_ctx(ctx, ...) \
perf_debug(pan_device((ctx)->base.screen), __VA_ARGS__);
#define perf_debug_ctx(ctx, ...) \
perf_debug(pan_device((ctx)->base.screen), __VA_ARGS__);
#endif /* PAN_SCREEN_H */

View file

@ -28,103 +28,96 @@
*
*/
#include "pan_context.h"
#include "pan_bo.h"
#include "pan_shader.h"
#include "util/u_memory.h"
#include "nir/tgsi_to_nir.h"
#include "util/u_memory.h"
#include "nir_serialize.h"
#include "pan_bo.h"
#include "pan_context.h"
static struct panfrost_uncompiled_shader *
panfrost_alloc_shader(const nir_shader *nir)
{
struct panfrost_uncompiled_shader *so =
rzalloc(NULL, struct panfrost_uncompiled_shader);
struct panfrost_uncompiled_shader *so =
rzalloc(NULL, struct panfrost_uncompiled_shader);
simple_mtx_init(&so->lock, mtx_plain);
util_dynarray_init(&so->variants, so);
simple_mtx_init(&so->lock, mtx_plain);
util_dynarray_init(&so->variants, so);
so->nir = nir;
so->nir = nir;
/* Serialize the NIR to a binary blob that we can hash for the disk
* cache. Drop unnecessary information (like variable names) so the
* serialized NIR is smaller, and also to let us detect more isomorphic
* shaders when hashing, increasing cache hits.
*/
struct blob blob;
blob_init(&blob);
nir_serialize(&blob, nir, true);
_mesa_sha1_compute(blob.data, blob.size, so->nir_sha1);
blob_finish(&blob);
/* Serialize the NIR to a binary blob that we can hash for the disk
* cache. Drop unnecessary information (like variable names) so the
* serialized NIR is smaller, and also to let us detect more isomorphic
* shaders when hashing, increasing cache hits.
*/
struct blob blob;
blob_init(&blob);
nir_serialize(&blob, nir, true);
_mesa_sha1_compute(blob.data, blob.size, so->nir_sha1);
blob_finish(&blob);
return so;
return so;
}
static struct panfrost_compiled_shader *
panfrost_alloc_variant(struct panfrost_uncompiled_shader *so)
{
return util_dynarray_grow(&so->variants, struct panfrost_compiled_shader, 1);
return util_dynarray_grow(&so->variants, struct panfrost_compiled_shader, 1);
}
static void
panfrost_shader_compile(struct panfrost_screen *screen,
const nir_shader *ir,
panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
struct util_debug_callback *dbg,
struct panfrost_shader_key *key,
unsigned req_local_mem,
struct panfrost_shader_key *key, unsigned req_local_mem,
unsigned fixed_varying_mask,
struct panfrost_shader_binary *out)
{
struct panfrost_device *dev = pan_device(&screen->base);
struct panfrost_device *dev = pan_device(&screen->base);
nir_shader *s = nir_shader_clone(NULL, ir);
nir_shader *s = nir_shader_clone(NULL, ir);
struct panfrost_compile_inputs inputs = {
.debug = dbg,
.gpu_id = dev->gpu_id,
.fixed_sysval_ubo = -1,
};
struct panfrost_compile_inputs inputs = {
.debug = dbg,
.gpu_id = dev->gpu_id,
.fixed_sysval_ubo = -1,
};
/* Lower this early so the backends don't have to worry about it */
if (s->info.stage == MESA_SHADER_FRAGMENT) {
inputs.fixed_varying_mask = key->fs.fixed_varying_mask;
/* Lower this early so the backends don't have to worry about it */
if (s->info.stage == MESA_SHADER_FRAGMENT) {
inputs.fixed_varying_mask = key->fs.fixed_varying_mask;
if (s->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
NIR_PASS_V(s, nir_lower_fragcolor,
key->fs.nr_cbufs_for_fragcolor);
}
if (s->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
NIR_PASS_V(s, nir_lower_fragcolor, key->fs.nr_cbufs_for_fragcolor);
}
if (key->fs.sprite_coord_enable) {
NIR_PASS_V(s, nir_lower_texcoord_replace,
key->fs.sprite_coord_enable,
true /* point coord is sysval */,
false /* Y-invert */);
}
if (key->fs.sprite_coord_enable) {
NIR_PASS_V(s, nir_lower_texcoord_replace, key->fs.sprite_coord_enable,
true /* point coord is sysval */, false /* Y-invert */);
}
if (key->fs.clip_plane_enable) {
NIR_PASS_V(s, nir_lower_clip_fs,
key->fs.clip_plane_enable,
false);
}
if (key->fs.clip_plane_enable) {
NIR_PASS_V(s, nir_lower_clip_fs, key->fs.clip_plane_enable, false);
}
memcpy(inputs.rt_formats, key->fs.rt_formats, sizeof(inputs.rt_formats));
} else if (s->info.stage == MESA_SHADER_VERTEX) {
inputs.fixed_varying_mask = fixed_varying_mask;
memcpy(inputs.rt_formats, key->fs.rt_formats, sizeof(inputs.rt_formats));
} else if (s->info.stage == MESA_SHADER_VERTEX) {
inputs.fixed_varying_mask = fixed_varying_mask;
/* No IDVS for internal XFB shaders */
inputs.no_idvs = s->info.has_transform_feedback_varyings;
}
/* No IDVS for internal XFB shaders */
inputs.no_idvs = s->info.has_transform_feedback_varyings;
}
util_dynarray_init(&out->binary, NULL);
screen->vtbl.compile_shader(s, &inputs, &out->binary, &out->info);
util_dynarray_init(&out->binary, NULL);
screen->vtbl.compile_shader(s, &inputs, &out->binary, &out->info);
assert(req_local_mem >= out->info.wls_size);
out->info.wls_size = req_local_mem;
assert(req_local_mem >= out->info.wls_size);
out->info.wls_size = req_local_mem;
/* In both clone and tgsi_to_nir paths, the shader is ralloc'd against
* a NULL context
*/
ralloc_free(s);
/* In both clone and tgsi_to_nir paths, the shader is ralloc'd against
* a NULL context
*/
ralloc_free(s);
}
static void
@ -136,287 +129,288 @@ panfrost_shader_get(struct pipe_screen *pscreen,
struct panfrost_compiled_shader *state,
unsigned req_local_mem)
{
struct panfrost_screen *screen = pan_screen(pscreen);
struct panfrost_device *dev = pan_device(pscreen);
struct panfrost_screen *screen = pan_screen(pscreen);
struct panfrost_device *dev = pan_device(pscreen);
struct panfrost_shader_binary res = { 0 };
struct panfrost_shader_binary res = {0};
/* Try to retrieve the variant from the disk cache. If that fails,
* compile a new variant and store in the disk cache for later reuse.
*/
if (!panfrost_disk_cache_retrieve(screen->disk_cache, uncompiled, &state->key, &res)) {
panfrost_shader_compile(screen, uncompiled->nir, dbg, &state->key,
req_local_mem,
uncompiled->fixed_varying_mask, &res);
/* Try to retrieve the variant from the disk cache. If that fails,
* compile a new variant and store in the disk cache for later reuse.
*/
if (!panfrost_disk_cache_retrieve(screen->disk_cache, uncompiled,
&state->key, &res)) {
panfrost_shader_compile(screen, uncompiled->nir, dbg, &state->key,
req_local_mem, uncompiled->fixed_varying_mask,
&res);
panfrost_disk_cache_store(screen->disk_cache, uncompiled, &state->key, &res);
}
panfrost_disk_cache_store(screen->disk_cache, uncompiled, &state->key,
&res);
}
state->info = res.info;
state->info = res.info;
if (res.binary.size) {
state->bin = panfrost_pool_take_ref(shader_pool,
pan_pool_upload_aligned(&shader_pool->base,
res.binary.data, res.binary.size, 128));
}
if (res.binary.size) {
state->bin = panfrost_pool_take_ref(
shader_pool,
pan_pool_upload_aligned(&shader_pool->base, res.binary.data,
res.binary.size, 128));
}
util_dynarray_fini(&res.binary);
util_dynarray_fini(&res.binary);
/* Don't upload RSD for fragment shaders since they need draw-time
* merging for e.g. depth/stencil/alpha. RSDs are replaced by simpler
* shader program descriptors on Valhall, which can be preuploaded even
* for fragment shaders. */
bool upload = !(uncompiled->nir->info.stage == MESA_SHADER_FRAGMENT && dev->arch <= 7);
screen->vtbl.prepare_shader(state, desc_pool, upload);
/* Don't upload RSD for fragment shaders since they need draw-time
* merging for e.g. depth/stencil/alpha. RSDs are replaced by simpler
* shader program descriptors on Valhall, which can be preuploaded even
* for fragment shaders. */
bool upload =
!(uncompiled->nir->info.stage == MESA_SHADER_FRAGMENT && dev->arch <= 7);
screen->vtbl.prepare_shader(state, desc_pool, upload);
panfrost_analyze_sysvals(state);
panfrost_analyze_sysvals(state);
}
static void
panfrost_build_key(struct panfrost_context *ctx,
struct panfrost_shader_key *key,
const nir_shader *nir)
struct panfrost_shader_key *key, const nir_shader *nir)
{
/* We don't currently have vertex shader variants */
if (nir->info.stage != MESA_SHADER_FRAGMENT)
return;
/* We don't currently have vertex shader variants */
if (nir->info.stage != MESA_SHADER_FRAGMENT)
return;
struct panfrost_device *dev = pan_device(ctx->base.screen);
struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
struct pipe_rasterizer_state *rast = (void *) ctx->rasterizer;
struct panfrost_uncompiled_shader *vs = ctx->uncompiled[MESA_SHADER_VERTEX];
struct panfrost_device *dev = pan_device(ctx->base.screen);
struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
struct pipe_rasterizer_state *rast = (void *)ctx->rasterizer;
struct panfrost_uncompiled_shader *vs = ctx->uncompiled[MESA_SHADER_VERTEX];
/* gl_FragColor lowering needs the number of colour buffers */
if (nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
key->fs.nr_cbufs_for_fragcolor = fb->nr_cbufs;
}
/* gl_FragColor lowering needs the number of colour buffers */
if (nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
key->fs.nr_cbufs_for_fragcolor = fb->nr_cbufs;
}
/* Point sprite lowering needed on Bifrost and newer */
if (dev->arch >= 6 && rast && ctx->active_prim == PIPE_PRIM_POINTS) {
key->fs.sprite_coord_enable = rast->sprite_coord_enable;
}
/* Point sprite lowering needed on Bifrost and newer */
if (dev->arch >= 6 && rast && ctx->active_prim == PIPE_PRIM_POINTS) {
key->fs.sprite_coord_enable = rast->sprite_coord_enable;
}
/* User clip plane lowering needed everywhere */
if (rast) {
key->fs.clip_plane_enable = rast->clip_plane_enable;
}
/* User clip plane lowering needed everywhere */
if (rast) {
key->fs.clip_plane_enable = rast->clip_plane_enable;
}
if (dev->arch <= 5) {
u_foreach_bit(i, (nir->info.outputs_read >> FRAG_RESULT_DATA0)) {
enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
if (dev->arch <= 5) {
u_foreach_bit(i, (nir->info.outputs_read >> FRAG_RESULT_DATA0)) {
enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
if ((fb->nr_cbufs > i) && fb->cbufs[i])
fmt = fb->cbufs[i]->format;
if ((fb->nr_cbufs > i) && fb->cbufs[i])
fmt = fb->cbufs[i]->format;
if (panfrost_blendable_formats_v6[fmt].internal)
fmt = PIPE_FORMAT_NONE;
if (panfrost_blendable_formats_v6[fmt].internal)
fmt = PIPE_FORMAT_NONE;
key->fs.rt_formats[i] = fmt;
}
}
key->fs.rt_formats[i] = fmt;
}
}
/* Funny desktop GL varying lowering on Valhall */
if (dev->arch >= 9) {
assert(vs != NULL && "too early");
key->fs.fixed_varying_mask = vs->fixed_varying_mask;
}
/* Funny desktop GL varying lowering on Valhall */
if (dev->arch >= 9) {
assert(vs != NULL && "too early");
key->fs.fixed_varying_mask = vs->fixed_varying_mask;
}
}
static struct panfrost_compiled_shader *
panfrost_new_variant_locked(
struct panfrost_context *ctx,
struct panfrost_uncompiled_shader *uncompiled,
struct panfrost_shader_key *key)
panfrost_new_variant_locked(struct panfrost_context *ctx,
struct panfrost_uncompiled_shader *uncompiled,
struct panfrost_shader_key *key)
{
struct panfrost_compiled_shader *prog = panfrost_alloc_variant(uncompiled);
struct panfrost_compiled_shader *prog = panfrost_alloc_variant(uncompiled);
*prog = (struct panfrost_compiled_shader) {
.key = *key,
.stream_output = uncompiled->stream_output,
};
*prog = (struct panfrost_compiled_shader){
.key = *key,
.stream_output = uncompiled->stream_output,
};
panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs,
uncompiled, &ctx->base.debug, prog, 0);
panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs, uncompiled,
&ctx->base.debug, prog, 0);
prog->earlyzs = pan_earlyzs_analyze(&prog->info);
prog->earlyzs = pan_earlyzs_analyze(&prog->info);
return prog;
return prog;
}
static void
panfrost_bind_shader_state(
struct pipe_context *pctx,
void *hwcso,
enum pipe_shader_type type)
panfrost_bind_shader_state(struct pipe_context *pctx, void *hwcso,
enum pipe_shader_type type)
{
struct panfrost_context *ctx = pan_context(pctx);
ctx->uncompiled[type] = hwcso;
ctx->prog[type] = NULL;
struct panfrost_context *ctx = pan_context(pctx);
ctx->uncompiled[type] = hwcso;
ctx->prog[type] = NULL;
ctx->dirty |= PAN_DIRTY_TLS_SIZE;
ctx->dirty_shader[type] |= PAN_DIRTY_STAGE_SHADER;
ctx->dirty |= PAN_DIRTY_TLS_SIZE;
ctx->dirty_shader[type] |= PAN_DIRTY_STAGE_SHADER;
if (hwcso)
panfrost_update_shader_variant(ctx, type);
if (hwcso)
panfrost_update_shader_variant(ctx, type);
}
void
panfrost_update_shader_variant(struct panfrost_context *ctx,
enum pipe_shader_type type)
{
/* No shader variants for compute */
if (type == PIPE_SHADER_COMPUTE)
return;
/* No shader variants for compute */
if (type == PIPE_SHADER_COMPUTE)
return;
/* We need linking information, defer this */
if (type == PIPE_SHADER_FRAGMENT && !ctx->uncompiled[PIPE_SHADER_VERTEX])
return;
/* We need linking information, defer this */
if (type == PIPE_SHADER_FRAGMENT && !ctx->uncompiled[PIPE_SHADER_VERTEX])
return;
/* Also defer, happens with GALLIUM_HUD */
if (!ctx->uncompiled[type])
return;
/* Also defer, happens with GALLIUM_HUD */
if (!ctx->uncompiled[type])
return;
/* Match the appropriate variant */
struct panfrost_uncompiled_shader *uncompiled = ctx->uncompiled[type];
struct panfrost_compiled_shader *compiled = NULL;
/* Match the appropriate variant */
struct panfrost_uncompiled_shader *uncompiled = ctx->uncompiled[type];
struct panfrost_compiled_shader *compiled = NULL;
simple_mtx_lock(&uncompiled->lock);
simple_mtx_lock(&uncompiled->lock);
struct panfrost_shader_key key = { 0 };
panfrost_build_key(ctx, &key, uncompiled->nir);
struct panfrost_shader_key key = {0};
panfrost_build_key(ctx, &key, uncompiled->nir);
util_dynarray_foreach(&uncompiled->variants, struct panfrost_compiled_shader, so) {
if (memcmp(&key, &so->key, sizeof(key)) == 0) {
compiled = so;
break;
}
}
util_dynarray_foreach(&uncompiled->variants, struct panfrost_compiled_shader,
so) {
if (memcmp(&key, &so->key, sizeof(key)) == 0) {
compiled = so;
break;
}
}
if (compiled == NULL)
compiled = panfrost_new_variant_locked(ctx, uncompiled, &key);
if (compiled == NULL)
compiled = panfrost_new_variant_locked(ctx, uncompiled, &key);
ctx->prog[type] = compiled;
ctx->prog[type] = compiled;
/* TODO: it would be more efficient to release the lock before
* compiling instead of after, but that can race if thread A compiles a
* variant while thread B searches for that same variant */
simple_mtx_unlock(&uncompiled->lock);
/* TODO: it would be more efficient to release the lock before
* compiling instead of after, but that can race if thread A compiles a
* variant while thread B searches for that same variant */
simple_mtx_unlock(&uncompiled->lock);
}
static void
panfrost_bind_vs_state(struct pipe_context *pctx, void *hwcso)
{
panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_VERTEX);
panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_VERTEX);
/* Fragment shaders are linked with vertex shaders */
struct panfrost_context *ctx = pan_context(pctx);
panfrost_update_shader_variant(ctx, PIPE_SHADER_FRAGMENT);
/* Fragment shaders are linked with vertex shaders */
struct panfrost_context *ctx = pan_context(pctx);
panfrost_update_shader_variant(ctx, PIPE_SHADER_FRAGMENT);
}
static void
panfrost_bind_fs_state(struct pipe_context *pctx, void *hwcso)
{
panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_FRAGMENT);
panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_FRAGMENT);
}
static void *
panfrost_create_shader_state(
struct pipe_context *pctx,
const struct pipe_shader_state *cso)
panfrost_create_shader_state(struct pipe_context *pctx,
const struct pipe_shader_state *cso)
{
nir_shader *nir = (cso->type == PIPE_SHADER_IR_TGSI) ?
tgsi_to_nir(cso->tokens, pctx->screen, false) :
cso->ir.nir;
nir_shader *nir = (cso->type == PIPE_SHADER_IR_TGSI)
? tgsi_to_nir(cso->tokens, pctx->screen, false)
: cso->ir.nir;
struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(nir);
struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(nir);
/* The driver gets ownership of the nir_shader for graphics. The NIR is
* ralloc'd. Free the NIR when we free the uncompiled shader.
*/
ralloc_steal(so, nir);
/* The driver gets ownership of the nir_shader for graphics. The NIR is
* ralloc'd. Free the NIR when we free the uncompiled shader.
*/
ralloc_steal(so, nir);
so->stream_output = cso->stream_output;
so->nir = nir;
so->stream_output = cso->stream_output;
so->nir = nir;
/* Fix linkage early */
if (so->nir->info.stage == MESA_SHADER_VERTEX) {
so->fixed_varying_mask =
(so->nir->info.outputs_written & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
}
/* Fix linkage early */
if (so->nir->info.stage == MESA_SHADER_VERTEX) {
so->fixed_varying_mask =
(so->nir->info.outputs_written & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
}
/* If this shader uses transform feedback, compile the transform
* feedback program. This is a special shader variant.
*/
struct panfrost_context *ctx = pan_context(pctx);
/* If this shader uses transform feedback, compile the transform
* feedback program. This is a special shader variant.
*/
struct panfrost_context *ctx = pan_context(pctx);
if (so->nir->xfb_info) {
nir_shader *xfb = nir_shader_clone(NULL, so->nir);
xfb->info.name = ralloc_asprintf(xfb, "%s@xfb", xfb->info.name);
xfb->info.internal = true;
if (so->nir->xfb_info) {
nir_shader *xfb = nir_shader_clone(NULL, so->nir);
xfb->info.name = ralloc_asprintf(xfb, "%s@xfb", xfb->info.name);
xfb->info.internal = true;
so->xfb = calloc(1, sizeof(struct panfrost_compiled_shader));
so->xfb->key.vs_is_xfb = true;
so->xfb = calloc(1, sizeof(struct panfrost_compiled_shader));
so->xfb->key.vs_is_xfb = true;
panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs,
so, &ctx->base.debug, so->xfb, 0);
panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs, so,
&ctx->base.debug, so->xfb, 0);
/* Since transform feedback is handled via the transform
* feedback program, the original program no longer uses XFB
*/
nir->info.has_transform_feedback_varyings = false;
}
/* Since transform feedback is handled via the transform
* feedback program, the original program no longer uses XFB
*/
nir->info.has_transform_feedback_varyings = false;
}
/* Compile the program. We don't use vertex shader keys, so there will
* be no further vertex shader variants. We do have fragment shader
* keys, but we can still compile with a default key that will work most
* of the time.
*/
struct panfrost_shader_key key = { 0 };
/* Compile the program. We don't use vertex shader keys, so there will
* be no further vertex shader variants. We do have fragment shader
* keys, but we can still compile with a default key that will work most
* of the time.
*/
struct panfrost_shader_key key = {0};
/* gl_FragColor lowering needs the number of colour buffers on desktop
* GL, where it acts as an implicit broadcast to all colour buffers.
*
* However, gl_FragColor is a legacy feature, so assume that if
* gl_FragColor is used, there is only a single render target. The
* implicit broadcast is neither especially useful nor required by GLES.
*/
if (so->nir->info.stage == MESA_SHADER_FRAGMENT &&
so->nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
/* gl_FragColor lowering needs the number of colour buffers on desktop
* GL, where it acts as an implicit broadcast to all colour buffers.
*
* However, gl_FragColor is a legacy feature, so assume that if
* gl_FragColor is used, there is only a single render target. The
* implicit broadcast is neither especially useful nor required by GLES.
*/
if (so->nir->info.stage == MESA_SHADER_FRAGMENT &&
so->nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
key.fs.nr_cbufs_for_fragcolor = 1;
}
key.fs.nr_cbufs_for_fragcolor = 1;
}
/* Creating a CSO is single-threaded, so it's ok to use the
* locked function without explicitly taking the lock. Creating a
* default variant acts as a precompile.
*/
panfrost_new_variant_locked(ctx, so, &key);
/* Creating a CSO is single-threaded, so it's ok to use the
* locked function without explicitly taking the lock. Creating a
* default variant acts as a precompile.
*/
panfrost_new_variant_locked(ctx, so, &key);
return so;
return so;
}
static void
panfrost_delete_shader_state(struct pipe_context *pctx, void *so)
{
struct panfrost_uncompiled_shader *cso = (struct panfrost_uncompiled_shader *) so;
struct panfrost_uncompiled_shader *cso =
(struct panfrost_uncompiled_shader *)so;
util_dynarray_foreach(&cso->variants, struct panfrost_compiled_shader, so) {
panfrost_bo_unreference(so->bin.bo);
panfrost_bo_unreference(so->state.bo);
panfrost_bo_unreference(so->linkage.bo);
}
util_dynarray_foreach(&cso->variants, struct panfrost_compiled_shader, so) {
panfrost_bo_unreference(so->bin.bo);
panfrost_bo_unreference(so->state.bo);
panfrost_bo_unreference(so->linkage.bo);
}
if (cso->xfb) {
panfrost_bo_unreference(cso->xfb->bin.bo);
panfrost_bo_unreference(cso->xfb->state.bo);
panfrost_bo_unreference(cso->xfb->linkage.bo);
free(cso->xfb);
}
if (cso->xfb) {
panfrost_bo_unreference(cso->xfb->bin.bo);
panfrost_bo_unreference(cso->xfb->state.bo);
panfrost_bo_unreference(cso->xfb->linkage.bo);
free(cso->xfb);
}
simple_mtx_destroy(&cso->lock);
simple_mtx_destroy(&cso->lock);
ralloc_free(so);
ralloc_free(so);
}
/*
@ -424,52 +418,51 @@ panfrost_delete_shader_state(struct pipe_context *pctx, void *so)
* precompiled, creating both the uncompiled and compiled shaders now.
*/
static void *
panfrost_create_compute_state(
struct pipe_context *pctx,
const struct pipe_compute_state *cso)
panfrost_create_compute_state(struct pipe_context *pctx,
const struct pipe_compute_state *cso)
{
struct panfrost_context *ctx = pan_context(pctx);
struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(cso->prog);
struct panfrost_compiled_shader *v = panfrost_alloc_variant(so);
memset(v, 0, sizeof *v);
struct panfrost_context *ctx = pan_context(pctx);
struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(cso->prog);
struct panfrost_compiled_shader *v = panfrost_alloc_variant(so);
memset(v, 0, sizeof *v);
assert(cso->ir_type == PIPE_SHADER_IR_NIR && "TGSI kernels unsupported");
assert(cso->ir_type == PIPE_SHADER_IR_NIR && "TGSI kernels unsupported");
panfrost_shader_get(pctx->screen, &ctx->shaders, &ctx->descs,
so, &ctx->base.debug, v, cso->static_shared_mem);
panfrost_shader_get(pctx->screen, &ctx->shaders, &ctx->descs, so,
&ctx->base.debug, v, cso->static_shared_mem);
/* The NIR becomes invalid after this. For compute kernels, we never
* need to access it again. Don't keep a dangling pointer around.
*/
so->nir = NULL;
/* The NIR becomes invalid after this. For compute kernels, we never
* need to access it again. Don't keep a dangling pointer around.
*/
so->nir = NULL;
return so;
return so;
}
static void
panfrost_bind_compute_state(struct pipe_context *pipe, void *cso)
{
struct panfrost_context *ctx = pan_context(pipe);
struct panfrost_uncompiled_shader *uncompiled = cso;
struct panfrost_context *ctx = pan_context(pipe);
struct panfrost_uncompiled_shader *uncompiled = cso;
ctx->uncompiled[PIPE_SHADER_COMPUTE] = uncompiled;
ctx->uncompiled[PIPE_SHADER_COMPUTE] = uncompiled;
ctx->prog[PIPE_SHADER_COMPUTE] =
uncompiled ? util_dynarray_begin(&uncompiled->variants) : NULL;
ctx->prog[PIPE_SHADER_COMPUTE] =
uncompiled ? util_dynarray_begin(&uncompiled->variants) : NULL;
}
void
panfrost_shader_context_init(struct pipe_context *pctx)
{
pctx->create_vs_state = panfrost_create_shader_state;
pctx->delete_vs_state = panfrost_delete_shader_state;
pctx->bind_vs_state = panfrost_bind_vs_state;
pctx->create_vs_state = panfrost_create_shader_state;
pctx->delete_vs_state = panfrost_delete_shader_state;
pctx->bind_vs_state = panfrost_bind_vs_state;
pctx->create_fs_state = panfrost_create_shader_state;
pctx->delete_fs_state = panfrost_delete_shader_state;
pctx->bind_fs_state = panfrost_bind_fs_state;
pctx->create_fs_state = panfrost_create_shader_state;
pctx->delete_fs_state = panfrost_delete_shader_state;
pctx->bind_fs_state = panfrost_bind_fs_state;
pctx->create_compute_state = panfrost_create_compute_state;
pctx->bind_compute_state = panfrost_bind_compute_state;
pctx->delete_compute_state = panfrost_delete_shader_state;
pctx->create_compute_state = panfrost_create_compute_state;
pctx->bind_compute_state = panfrost_bind_compute_state;
pctx->delete_compute_state = panfrost_delete_shader_state;
}

View file

@ -64,20 +64,20 @@
static bool
bi_has_skip_bit(enum bi_opcode op)
{
switch (op) {
case BI_OPCODE_TEX_SINGLE:
case BI_OPCODE_TEXC:
case BI_OPCODE_TEXC_DUAL:
case BI_OPCODE_TEXS_2D_F16:
case BI_OPCODE_TEXS_2D_F32:
case BI_OPCODE_TEXS_CUBE_F16:
case BI_OPCODE_TEXS_CUBE_F32:
case BI_OPCODE_VAR_TEX_F16:
case BI_OPCODE_VAR_TEX_F32:
return true;
default:
return false;
}
switch (op) {
case BI_OPCODE_TEX_SINGLE:
case BI_OPCODE_TEXC:
case BI_OPCODE_TEXC_DUAL:
case BI_OPCODE_TEXS_2D_F16:
case BI_OPCODE_TEXS_2D_F32:
case BI_OPCODE_TEXS_CUBE_F16:
case BI_OPCODE_TEXS_CUBE_F32:
case BI_OPCODE_VAR_TEX_F16:
case BI_OPCODE_VAR_TEX_F32:
return true;
default:
return false;
}
}
/* Does a given instruction require helper threads to be active (because it
@ -87,52 +87,52 @@ bi_has_skip_bit(enum bi_opcode op)
bool
bi_instr_uses_helpers(bi_instr *I)
{
switch (I->op) {
case BI_OPCODE_TEXC:
case BI_OPCODE_TEXC_DUAL:
case BI_OPCODE_TEXS_2D_F16:
case BI_OPCODE_TEXS_2D_F32:
case BI_OPCODE_TEXS_CUBE_F16:
case BI_OPCODE_TEXS_CUBE_F32:
case BI_OPCODE_VAR_TEX_F16:
case BI_OPCODE_VAR_TEX_F32:
return !I->lod_mode; /* set for zero, clear for computed */
case BI_OPCODE_TEX_SINGLE:
return (I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_LOD) ||
(I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_BIAS);
case BI_OPCODE_CLPER_I32:
case BI_OPCODE_CLPER_OLD_I32:
/* Fragment shaders require helpers to implement derivatives.
* Other shader stages don't have helpers at all */
return true;
default:
return false;
}
switch (I->op) {
case BI_OPCODE_TEXC:
case BI_OPCODE_TEXC_DUAL:
case BI_OPCODE_TEXS_2D_F16:
case BI_OPCODE_TEXS_2D_F32:
case BI_OPCODE_TEXS_CUBE_F16:
case BI_OPCODE_TEXS_CUBE_F32:
case BI_OPCODE_VAR_TEX_F16:
case BI_OPCODE_VAR_TEX_F32:
return !I->lod_mode; /* set for zero, clear for computed */
case BI_OPCODE_TEX_SINGLE:
return (I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_LOD) ||
(I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_BIAS);
case BI_OPCODE_CLPER_I32:
case BI_OPCODE_CLPER_OLD_I32:
/* Fragment shaders require helpers to implement derivatives.
* Other shader stages don't have helpers at all */
return true;
default:
return false;
}
}
/* Does a block use helpers directly */
static bool
bi_block_uses_helpers(bi_block *block)
{
bi_foreach_instr_in_block(block, I) {
if (bi_instr_uses_helpers(I))
return true;
}
bi_foreach_instr_in_block(block, I) {
if (bi_instr_uses_helpers(I))
return true;
}
return false;
return false;
}
bool
bi_block_terminates_helpers(bi_block *block)
{
/* Can't terminate if a successor needs helpers */
bi_foreach_successor(block, succ) {
if (succ->pass_flags & 1)
return false;
}
/* Can't terminate if a successor needs helpers */
bi_foreach_successor(block, succ) {
if (succ->pass_flags & 1)
return false;
}
/* Otherwise we terminate */
return true;
/* Otherwise we terminate */
return true;
}
/*
@ -142,128 +142,130 @@ bi_block_terminates_helpers(bi_block *block)
static void
bi_propagate_pass_flag(bi_block *block)
{
block->pass_flags = 1;
block->pass_flags = 1;
bi_foreach_predecessor(block, pred) {
if ((*pred)->pass_flags == 0)
bi_propagate_pass_flag(*pred);
}
bi_foreach_predecessor(block, pred) {
if ((*pred)->pass_flags == 0)
bi_propagate_pass_flag(*pred);
}
}
void
bi_analyze_helper_terminate(bi_context *ctx)
{
/* Other shader stages do not have a notion of helper threads, so we
* can skip the analysis. Don't run for blend shaders, either, since
* they run in the context of another shader that we don't see. */
if (ctx->stage != MESA_SHADER_FRAGMENT || ctx->inputs->is_blend)
return;
/* Other shader stages do not have a notion of helper threads, so we
* can skip the analysis. Don't run for blend shaders, either, since
* they run in the context of another shader that we don't see. */
if (ctx->stage != MESA_SHADER_FRAGMENT || ctx->inputs->is_blend)
return;
/* Clear flags */
bi_foreach_block(ctx, block)
block->pass_flags = 0;
/* Clear flags */
bi_foreach_block(ctx, block)
block->pass_flags = 0;
/* For each block, check if it uses helpers and propagate that fact if
* so. We walk in reverse order to minimize the number of blocks tested:
* if the (unique) last block uses helpers, only that block is tested.
*/
bi_foreach_block_rev(ctx, block) {
if (block->pass_flags == 0 && bi_block_uses_helpers(block))
bi_propagate_pass_flag(block);
}
/* For each block, check if it uses helpers and propagate that fact if
* so. We walk in reverse order to minimize the number of blocks tested:
* if the (unique) last block uses helpers, only that block is tested.
*/
bi_foreach_block_rev(ctx, block) {
if (block->pass_flags == 0 && bi_block_uses_helpers(block))
bi_propagate_pass_flag(block);
}
}
void
bi_mark_clauses_td(bi_context *ctx)
{
if (ctx->stage != MESA_SHADER_FRAGMENT || ctx->inputs->is_blend)
return;
if (ctx->stage != MESA_SHADER_FRAGMENT || ctx->inputs->is_blend)
return;
/* Finally, mark clauses requiring helpers */
bi_foreach_block(ctx, block) {
/* At the end, there are helpers iff we don't terminate */
bool helpers = !bi_block_terminates_helpers(block);
/* Finally, mark clauses requiring helpers */
bi_foreach_block(ctx, block) {
/* At the end, there are helpers iff we don't terminate */
bool helpers = !bi_block_terminates_helpers(block);
bi_foreach_clause_in_block_rev(block, clause) {
bi_foreach_instr_in_clause_rev(block, clause, I) {
helpers |= bi_instr_uses_helpers(I);
}
bi_foreach_clause_in_block_rev(block, clause) {
bi_foreach_instr_in_clause_rev(block, clause, I) {
helpers |= bi_instr_uses_helpers(I);
}
clause->td = !helpers;
}
}
clause->td = !helpers;
}
}
}
static bool
bi_helper_block_update(BITSET_WORD *deps, bi_block *block)
{
bool progress = false;
bool progress = false;
bi_foreach_instr_in_block_rev(block, I) {
/* If a destination is required by helper invocation... */
bi_foreach_dest(I, d) {
if (!BITSET_TEST(deps, I->dest[d].value))
continue;
bi_foreach_instr_in_block_rev(block, I) {
/* If a destination is required by helper invocation... */
bi_foreach_dest(I, d) {
if (!BITSET_TEST(deps, I->dest[d].value))
continue;
/* ...so are the sources */
bi_foreach_ssa_src(I, s) {
progress |= !BITSET_TEST(deps, I->src[s].value);
BITSET_SET(deps, I->src[s].value);
}
/* ...so are the sources */
bi_foreach_ssa_src(I, s) {
progress |= !BITSET_TEST(deps, I->src[s].value);
BITSET_SET(deps, I->src[s].value);
}
break;
}
}
break;
}
}
return progress;
return progress;
}
void
bi_analyze_helper_requirements(bi_context *ctx)
{
BITSET_WORD *deps = calloc(sizeof(BITSET_WORD), ctx->ssa_alloc);
BITSET_WORD *deps = calloc(sizeof(BITSET_WORD), ctx->ssa_alloc);
/* Initialize with the sources of instructions consuming
* derivatives */
/* Initialize with the sources of instructions consuming
* derivatives */
bi_foreach_instr_global(ctx, I) {
if (!bi_instr_uses_helpers(I)) continue;
bi_foreach_instr_global(ctx, I) {
if (!bi_instr_uses_helpers(I))
continue;
bi_foreach_ssa_src(I, s)
BITSET_SET(deps, I->src[s].value);
}
bi_foreach_ssa_src(I, s)
BITSET_SET(deps, I->src[s].value);
}
/* Propagate that up */
u_worklist worklist;
bi_worklist_init(ctx, &worklist);
/* Propagate that up */
u_worklist worklist;
bi_worklist_init(ctx, &worklist);
bi_foreach_block(ctx, block) {
bi_worklist_push_tail(&worklist, block);
}
bi_foreach_block(ctx, block) {
bi_worklist_push_tail(&worklist, block);
}
while (!u_worklist_is_empty(&worklist)) {
bi_block *blk = bi_worklist_pop_tail(&worklist);
while (!u_worklist_is_empty(&worklist)) {
bi_block *blk = bi_worklist_pop_tail(&worklist);
if (bi_helper_block_update(deps, blk)) {
bi_foreach_predecessor(blk, pred)
bi_worklist_push_head(&worklist, *pred);
}
}
if (bi_helper_block_update(deps, blk)) {
bi_foreach_predecessor(blk, pred)
bi_worklist_push_head(&worklist, *pred);
}
}
u_worklist_fini(&worklist);
u_worklist_fini(&worklist);
/* Set the execute bits */
/* Set the execute bits */
bi_foreach_instr_global(ctx, I) {
if (!bi_has_skip_bit(I->op)) continue;
bi_foreach_instr_global(ctx, I) {
if (!bi_has_skip_bit(I->op))
continue;
bool exec = false;
bool exec = false;
bi_foreach_dest(I, d)
exec |= BITSET_TEST(deps, I->dest[d].value);
bi_foreach_dest(I, d)
exec |= BITSET_TEST(deps, I->dest[d].value);
I->skip = !exec;
}
I->skip = !exec;
}
free(deps);
free(deps);
}

View file

@ -37,10 +37,8 @@
bool
bi_ec0_packed(unsigned tuple_count)
{
return (tuple_count == 3) ||
(tuple_count == 5) ||
(tuple_count == 6) ||
(tuple_count == 8);
return (tuple_count == 3) || (tuple_count == 5) || (tuple_count == 6) ||
(tuple_count == 8);
}
/* Helper to calculate the number of quadwords in a clause. This is a function
@ -60,7 +58,7 @@ bi_ec0_packed(unsigned tuple_count)
* 6 | 5*
* 7 | 5
* 8 | 6*
*
*
* Y = { X if X <= 3
* { X - 1 if 4 <= X <= 6
* { X - 2 if 7 <= X <= 8
@ -72,15 +70,15 @@ bi_ec0_packed(unsigned tuple_count)
static unsigned
bi_clause_quadwords(bi_clause *clause)
{
unsigned X = clause->tuple_count;
unsigned Y = X - ((X >= 7) ? 2 : (X >= 4) ? 1 : 0);
unsigned X = clause->tuple_count;
unsigned Y = X - ((X >= 7) ? 2 : (X >= 4) ? 1 : 0);
unsigned constants = clause->constant_count;
unsigned constants = clause->constant_count;
if ((X != 4) && (X != 7) && (X >= 3) && constants)
constants--;
if ((X != 4) && (X != 7) && (X >= 3) && constants)
constants--;
return Y + DIV_ROUND_UP(constants, 2);
return Y + DIV_ROUND_UP(constants, 2);
}
/* Measures the number of quadwords a branch jumps. Bifrost relative offsets
@ -90,62 +88,62 @@ bi_clause_quadwords(bi_clause *clause)
signed
bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target)
{
/* Signed since we might jump backwards */
signed ret = 0;
/* Signed since we might jump backwards */
signed ret = 0;
/* Determine if the block we're branching to is strictly greater in
* source order */
bool forwards = target->index > start->block->index;
/* Determine if the block we're branching to is strictly greater in
* source order */
bool forwards = target->index > start->block->index;
if (forwards) {
/* We have to jump through this block from the start of this
* clause to the end */
bi_foreach_clause_in_block_from(start->block, clause, start) {
ret += bi_clause_quadwords(clause);
}
if (forwards) {
/* We have to jump through this block from the start of this
* clause to the end */
bi_foreach_clause_in_block_from(start->block, clause, start) {
ret += bi_clause_quadwords(clause);
}
/* We then need to jump through every clause of every following
* block until the target */
bi_foreach_block_from(ctx, start->block, blk) {
/* Don't double-count the first block */
if (blk == start->block)
continue;
/* We then need to jump through every clause of every following
* block until the target */
bi_foreach_block_from(ctx, start->block, blk) {
/* Don't double-count the first block */
if (blk == start->block)
continue;
/* End just before the target */
if (blk == target)
break;
/* End just before the target */
if (blk == target)
break;
/* Count every clause in the block */
bi_foreach_clause_in_block(blk, clause) {
ret += bi_clause_quadwords(clause);
}
}
} else {
/* We start at the beginning of the clause but have to jump
* through the clauses before us in the block */
bi_foreach_clause_in_block_from_rev(start->block, clause, start) {
if (clause == start)
continue;
/* Count every clause in the block */
bi_foreach_clause_in_block(blk, clause) {
ret += bi_clause_quadwords(clause);
}
}
} else {
/* We start at the beginning of the clause but have to jump
* through the clauses before us in the block */
bi_foreach_clause_in_block_from_rev(start->block, clause, start) {
if (clause == start)
continue;
ret -= bi_clause_quadwords(clause);
}
ret -= bi_clause_quadwords(clause);
}
/* And jump back every clause of preceding blocks up through
* and including the target to get to the beginning of the
* target */
bi_foreach_block_from_rev(ctx, start->block, blk) {
if (blk == start->block)
continue;
/* And jump back every clause of preceding blocks up through
* and including the target to get to the beginning of the
* target */
bi_foreach_block_from_rev(ctx, start->block, blk) {
if (blk == start->block)
continue;
bi_foreach_clause_in_block(blk, clause) {
ret -= bi_clause_quadwords(clause);
}
bi_foreach_clause_in_block(blk, clause) {
ret -= bi_clause_quadwords(clause);
}
/* End just after the target */
if (blk == target)
break;
}
}
/* End just after the target */
if (blk == target)
break;
}
}
return ret;
return ret;
}

View file

@ -23,98 +23,100 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "util/u_memory.h"
#include "compiler.h"
void
bi_liveness_ins_update_ssa(BITSET_WORD *live, const bi_instr *I)
{
bi_foreach_dest(I, d)
BITSET_CLEAR(live, I->dest[d].value);
bi_foreach_dest(I, d)
BITSET_CLEAR(live, I->dest[d].value);
bi_foreach_ssa_src(I, s)
BITSET_SET(live, I->src[s].value);
bi_foreach_ssa_src(I, s)
BITSET_SET(live, I->src[s].value);
}
void
bi_compute_liveness_ssa(bi_context *ctx)
{
u_worklist worklist;
u_worklist_init(&worklist, ctx->num_blocks, NULL);
u_worklist worklist;
u_worklist_init(&worklist, ctx->num_blocks, NULL);
/* Free any previous liveness, and allocate */
unsigned words = BITSET_WORDS(ctx->ssa_alloc);
/* Free any previous liveness, and allocate */
unsigned words = BITSET_WORDS(ctx->ssa_alloc);
bi_foreach_block(ctx, block) {
if (block->ssa_live_in)
ralloc_free(block->ssa_live_in);
bi_foreach_block(ctx, block) {
if (block->ssa_live_in)
ralloc_free(block->ssa_live_in);
if (block->ssa_live_out)
ralloc_free(block->ssa_live_out);
if (block->ssa_live_out)
ralloc_free(block->ssa_live_out);
block->ssa_live_in = rzalloc_array(block, BITSET_WORD, words);
block->ssa_live_out = rzalloc_array(block, BITSET_WORD, words);
block->ssa_live_in = rzalloc_array(block, BITSET_WORD, words);
block->ssa_live_out = rzalloc_array(block, BITSET_WORD, words);
bi_worklist_push_head(&worklist, block);
}
bi_worklist_push_head(&worklist, block);
}
/* Iterate the work list */
while(!u_worklist_is_empty(&worklist)) {
/* Pop in reverse order since liveness is a backwards pass */
bi_block *blk = bi_worklist_pop_head(&worklist);
/* Iterate the work list */
while (!u_worklist_is_empty(&worklist)) {
/* Pop in reverse order since liveness is a backwards pass */
bi_block *blk = bi_worklist_pop_head(&worklist);
/* Update its liveness information */
memcpy(blk->ssa_live_in, blk->ssa_live_out, words * sizeof(BITSET_WORD));
/* Update its liveness information */
memcpy(blk->ssa_live_in, blk->ssa_live_out, words * sizeof(BITSET_WORD));
bi_foreach_instr_in_block_rev(blk, I) {
/* Phi nodes are handled separately, so we skip them. As phi nodes are
* at the beginning and we're iterating backwards, we stop as soon as
* we hit a phi node.
*/
if (I->op == BI_OPCODE_PHI)
break;
bi_foreach_instr_in_block_rev(blk, I) {
/* Phi nodes are handled separately, so we skip them. As phi nodes are
* at the beginning and we're iterating backwards, we stop as soon as
* we hit a phi node.
*/
if (I->op == BI_OPCODE_PHI)
break;
bi_liveness_ins_update_ssa(blk->ssa_live_in, I);
}
bi_liveness_ins_update_ssa(blk->ssa_live_in, I);
}
/* Propagate the live in of the successor (blk) to the live out of
* predecessors.
*
* Phi nodes are logically on the control flow edge and act in parallel.
* To handle when propagating, we kill writes from phis and make live the
* corresponding sources.
*/
bi_foreach_predecessor(blk, pred) {
BITSET_WORD *live = ralloc_array(blk, BITSET_WORD, words);
memcpy(live, blk->ssa_live_in, words * sizeof(BITSET_WORD));
/* Propagate the live in of the successor (blk) to the live out of
* predecessors.
*
* Phi nodes are logically on the control flow edge and act in parallel.
* To handle when propagating, we kill writes from phis and make live the
* corresponding sources.
*/
bi_foreach_predecessor(blk, pred) {
BITSET_WORD *live = ralloc_array(blk, BITSET_WORD, words);
memcpy(live, blk->ssa_live_in, words * sizeof(BITSET_WORD));
/* Kill write */
bi_foreach_instr_in_block(blk, I) {
if (I->op != BI_OPCODE_PHI) break;
/* Kill write */
bi_foreach_instr_in_block(blk, I) {
if (I->op != BI_OPCODE_PHI)
break;
BITSET_CLEAR(live, I->dest[0].value);
}
BITSET_CLEAR(live, I->dest[0].value);
}
/* Make live the corresponding source */
bi_foreach_instr_in_block(blk, I) {
if (I->op != BI_OPCODE_PHI) break;
/* Make live the corresponding source */
bi_foreach_instr_in_block(blk, I) {
if (I->op != BI_OPCODE_PHI)
break;
bi_index operand = I->src[bi_predecessor_index(blk, *pred)];
if (bi_is_ssa(operand))
BITSET_SET(live, operand.value);
}
bi_index operand = I->src[bi_predecessor_index(blk, *pred)];
if (bi_is_ssa(operand))
BITSET_SET(live, operand.value);
}
BITSET_WORD progress = 0;
BITSET_WORD progress = 0;
for (unsigned i = 0; i < words; ++i) {
progress |= live[i] & ~((*pred)->ssa_live_out[i]);
(*pred)->ssa_live_out[i] |= live[i];
}
for (unsigned i = 0; i < words; ++i) {
progress |= live[i] & ~((*pred)->ssa_live_out[i]);
(*pred)->ssa_live_out[i] |= live[i];
}
if (progress != 0)
bi_worklist_push_tail(&worklist, *pred);
}
}
if (progress != 0)
bi_worklist_push_tail(&worklist, *pred);
}
}
u_worklist_fini(&worklist);
u_worklist_fini(&worklist);
}

View file

@ -21,8 +21,8 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "compiler/nir/nir_builder.h"
#include "compiler.h"
/* Divergent attribute access is undefined behaviour. To avoid divergence,
* lower to an if-chain like:
@ -40,89 +40,88 @@
static bool
bi_lower_divergent_indirects_impl(nir_builder *b, nir_instr *instr, void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
gl_shader_stage stage = b->shader->info.stage;
nir_src *offset;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
gl_shader_stage stage = b->shader->info.stage;
nir_src *offset;
/* Not all indirect access needs this workaround */
switch (intr->intrinsic) {
case nir_intrinsic_load_input:
case nir_intrinsic_load_interpolated_input:
/* Attributes and varyings */
offset = nir_get_io_offset_src(intr);
break;
/* Not all indirect access needs this workaround */
switch (intr->intrinsic) {
case nir_intrinsic_load_input:
case nir_intrinsic_load_interpolated_input:
/* Attributes and varyings */
offset = nir_get_io_offset_src(intr);
break;
case nir_intrinsic_store_output:
/* Varyings only */
if (stage == MESA_SHADER_FRAGMENT)
return false;
case nir_intrinsic_store_output:
/* Varyings only */
if (stage == MESA_SHADER_FRAGMENT)
return false;
offset = nir_get_io_offset_src(intr);
break;
offset = nir_get_io_offset_src(intr);
break;
case nir_intrinsic_image_atomic_add:
case nir_intrinsic_image_atomic_imin:
case nir_intrinsic_image_atomic_umin:
case nir_intrinsic_image_atomic_imax:
case nir_intrinsic_image_atomic_umax:
case nir_intrinsic_image_atomic_and:
case nir_intrinsic_image_atomic_or:
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_load:
case nir_intrinsic_image_store:
/* Any image access */
offset = &intr->src[0];
break;
default:
return false;
}
case nir_intrinsic_image_atomic_add:
case nir_intrinsic_image_atomic_imin:
case nir_intrinsic_image_atomic_umin:
case nir_intrinsic_image_atomic_imax:
case nir_intrinsic_image_atomic_umax:
case nir_intrinsic_image_atomic_and:
case nir_intrinsic_image_atomic_or:
case nir_intrinsic_image_atomic_xor:
case nir_intrinsic_image_load:
case nir_intrinsic_image_store:
/* Any image access */
offset = &intr->src[0];
break;
default:
return false;
}
if (!nir_src_is_divergent(*offset))
return false;
if (!nir_src_is_divergent(*offset))
return false;
/* This indirect does need it */
/* This indirect does need it */
b->cursor = nir_before_instr(instr);
nir_ssa_def *lane = nir_load_subgroup_invocation(b);
unsigned *lanes = data;
b->cursor = nir_before_instr(instr);
nir_ssa_def *lane = nir_load_subgroup_invocation(b);
unsigned *lanes = data;
/* Write zero in a funny way to bypass lower_load_const_to_scalar */
bool has_dest = nir_intrinsic_infos[intr->intrinsic].has_dest;
unsigned size = has_dest ? nir_dest_bit_size(intr->dest) : 32;
nir_ssa_def *zero = has_dest ? nir_imm_zero(b, 1, size) : NULL;
nir_ssa_def *zeroes[4] = { zero, zero, zero, zero };
nir_ssa_def *res = has_dest ?
nir_vec(b, zeroes, nir_dest_num_components(intr->dest)) : NULL;
/* Write zero in a funny way to bypass lower_load_const_to_scalar */
bool has_dest = nir_intrinsic_infos[intr->intrinsic].has_dest;
unsigned size = has_dest ? nir_dest_bit_size(intr->dest) : 32;
nir_ssa_def *zero = has_dest ? nir_imm_zero(b, 1, size) : NULL;
nir_ssa_def *zeroes[4] = {zero, zero, zero, zero};
nir_ssa_def *res =
has_dest ? nir_vec(b, zeroes, nir_dest_num_components(intr->dest)) : NULL;
for (unsigned i = 0; i < (*lanes); ++i) {
nir_push_if(b, nir_ieq_imm(b, lane, i));
for (unsigned i = 0; i < (*lanes); ++i) {
nir_push_if(b, nir_ieq_imm(b, lane, i));
nir_instr *c = nir_instr_clone(b->shader, instr);
nir_intrinsic_instr *c_intr = nir_instr_as_intrinsic(c);
nir_builder_instr_insert(b, c);
nir_pop_if(b, NULL);
nir_instr *c = nir_instr_clone(b->shader, instr);
nir_intrinsic_instr *c_intr = nir_instr_as_intrinsic(c);
nir_builder_instr_insert(b, c);
nir_pop_if(b, NULL);
if (has_dest) {
assert(c_intr->dest.is_ssa);
nir_ssa_def *c_ssa = &c_intr->dest.ssa;
res = nir_if_phi(b, c_ssa, res);
}
}
if (has_dest) {
assert(c_intr->dest.is_ssa);
nir_ssa_def *c_ssa = &c_intr->dest.ssa;
res = nir_if_phi(b, c_ssa, res);
}
}
if (has_dest)
nir_ssa_def_rewrite_uses(&intr->dest.ssa, res);
if (has_dest)
nir_ssa_def_rewrite_uses(&intr->dest.ssa, res);
nir_instr_remove(instr);
return true;
nir_instr_remove(instr);
return true;
}
bool
bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes)
{
return nir_shader_instructions_pass(shader,
bi_lower_divergent_indirects_impl,
nir_metadata_none, &lanes);
return nir_shader_instructions_pass(
shader, bi_lower_divergent_indirects_impl, nir_metadata_none, &lanes);
}

View file

@ -21,8 +21,8 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "bi_builder.h"
#include "compiler.h"
/* Not all 8-bit and 16-bit instructions support all swizzles on all sources.
* These passes, intended to run after NIR->BIR but before scheduling/RA, lower
@ -33,270 +33,269 @@
static bool
bi_swizzle_replicates_8(enum bi_swizzle swz)
{
switch (swz) {
case BI_SWIZZLE_B0000:
case BI_SWIZZLE_B1111:
case BI_SWIZZLE_B2222:
case BI_SWIZZLE_B3333:
return true;
default:
return false;
}
switch (swz) {
case BI_SWIZZLE_B0000:
case BI_SWIZZLE_B1111:
case BI_SWIZZLE_B2222:
case BI_SWIZZLE_B3333:
return true;
default:
return false;
}
}
static void
lower_swizzle(bi_context *ctx, bi_instr *ins, unsigned src)
{
/* TODO: Use the opcode table and be a lot more methodical about this... */
switch (ins->op) {
/* Some instructions used with 16-bit data never have swizzles */
case BI_OPCODE_CSEL_V2F16:
case BI_OPCODE_CSEL_V2I16:
case BI_OPCODE_CSEL_V2S16:
case BI_OPCODE_CSEL_V2U16:
/* TODO: Use the opcode table and be a lot more methodical about this... */
switch (ins->op) {
/* Some instructions used with 16-bit data never have swizzles */
case BI_OPCODE_CSEL_V2F16:
case BI_OPCODE_CSEL_V2I16:
case BI_OPCODE_CSEL_V2S16:
case BI_OPCODE_CSEL_V2U16:
/* Despite ostensibly being 32-bit instructions, CLPER does not
* inherently interpret the data, so it can be used for v2f16
* derivatives, which might require swizzle lowering */
case BI_OPCODE_CLPER_I32:
case BI_OPCODE_CLPER_OLD_I32:
/* Despite ostensibly being 32-bit instructions, CLPER does not
* inherently interpret the data, so it can be used for v2f16
* derivatives, which might require swizzle lowering */
case BI_OPCODE_CLPER_I32:
case BI_OPCODE_CLPER_OLD_I32:
/* Similarly, CSEL.i32 consumes a boolean as a 32-bit argument. If the
* boolean is implemented as a 16-bit integer, the swizzle is needed
* for correct operation if the instruction producing the 16-bit
* boolean does not replicate to both halves of the containing 32-bit
* register. As such, we may need to lower a swizzle.
*
* This is a silly hack. Ideally, code gen would be smart enough to
* avoid this case (by replicating). In practice, silly hardware design
* decisions force our hand here.
*/
case BI_OPCODE_MUX_I32:
case BI_OPCODE_CSEL_I32:
break;
/* Similarly, CSEL.i32 consumes a boolean as a 32-bit argument. If the
* boolean is implemented as a 16-bit integer, the swizzle is needed
* for correct operation if the instruction producing the 16-bit
* boolean does not replicate to both halves of the containing 32-bit
* register. As such, we may need to lower a swizzle.
*
* This is a silly hack. Ideally, code gen would be smart enough to
* avoid this case (by replicating). In practice, silly hardware design
* decisions force our hand here.
*/
case BI_OPCODE_MUX_I32:
case BI_OPCODE_CSEL_I32:
break;
case BI_OPCODE_IADD_V2S16:
case BI_OPCODE_IADD_V2U16:
case BI_OPCODE_ISUB_V2S16:
case BI_OPCODE_ISUB_V2U16:
if (src == 0 && ins->src[src].swizzle != BI_SWIZZLE_H10)
break;
else
return;
case BI_OPCODE_LSHIFT_AND_V2I16:
case BI_OPCODE_LSHIFT_OR_V2I16:
case BI_OPCODE_LSHIFT_XOR_V2I16:
case BI_OPCODE_RSHIFT_AND_V2I16:
case BI_OPCODE_RSHIFT_OR_V2I16:
case BI_OPCODE_RSHIFT_XOR_V2I16:
if (src == 2)
return;
else
break;
case BI_OPCODE_IADD_V2S16:
case BI_OPCODE_IADD_V2U16:
case BI_OPCODE_ISUB_V2S16:
case BI_OPCODE_ISUB_V2U16:
if (src == 0 && ins->src[src].swizzle != BI_SWIZZLE_H10)
break;
else
return;
case BI_OPCODE_LSHIFT_AND_V2I16:
case BI_OPCODE_LSHIFT_OR_V2I16:
case BI_OPCODE_LSHIFT_XOR_V2I16:
case BI_OPCODE_RSHIFT_AND_V2I16:
case BI_OPCODE_RSHIFT_OR_V2I16:
case BI_OPCODE_RSHIFT_XOR_V2I16:
if (src == 2)
return;
else
break;
/* For some reason MUX.v2i16 allows swaps but not replication */
case BI_OPCODE_MUX_V2I16:
if (ins->src[src].swizzle == BI_SWIZZLE_H10)
return;
else
break;
/* For some reason MUX.v2i16 allows swaps but not replication */
case BI_OPCODE_MUX_V2I16:
if (ins->src[src].swizzle == BI_SWIZZLE_H10)
return;
else
break;
/* No swizzles supported */
case BI_OPCODE_HADD_V4U8:
case BI_OPCODE_HADD_V4S8:
case BI_OPCODE_CLZ_V4U8:
case BI_OPCODE_IDP_V4I8:
case BI_OPCODE_IABS_V4S8:
case BI_OPCODE_ICMP_V4I8:
case BI_OPCODE_ICMP_V4U8:
case BI_OPCODE_MUX_V4I8:
case BI_OPCODE_IADD_IMM_V4I8:
break;
/* No swizzles supported */
case BI_OPCODE_HADD_V4U8:
case BI_OPCODE_HADD_V4S8:
case BI_OPCODE_CLZ_V4U8:
case BI_OPCODE_IDP_V4I8:
case BI_OPCODE_IABS_V4S8:
case BI_OPCODE_ICMP_V4I8:
case BI_OPCODE_ICMP_V4U8:
case BI_OPCODE_MUX_V4I8:
case BI_OPCODE_IADD_IMM_V4I8:
break;
case BI_OPCODE_LSHIFT_AND_V4I8:
case BI_OPCODE_LSHIFT_OR_V4I8:
case BI_OPCODE_LSHIFT_XOR_V4I8:
case BI_OPCODE_RSHIFT_AND_V4I8:
case BI_OPCODE_RSHIFT_OR_V4I8:
case BI_OPCODE_RSHIFT_XOR_V4I8:
/* Last source allows identity or replication */
if (src == 2 && bi_swizzle_replicates_8(ins->src[src].swizzle))
return;
case BI_OPCODE_LSHIFT_AND_V4I8:
case BI_OPCODE_LSHIFT_OR_V4I8:
case BI_OPCODE_LSHIFT_XOR_V4I8:
case BI_OPCODE_RSHIFT_AND_V4I8:
case BI_OPCODE_RSHIFT_OR_V4I8:
case BI_OPCODE_RSHIFT_XOR_V4I8:
/* Last source allows identity or replication */
if (src == 2 && bi_swizzle_replicates_8(ins->src[src].swizzle))
return;
/* Others do not allow swizzles */
break;
/* Others do not allow swizzles */
break;
/* We don't want to deal with reswizzling logic in modifier prop. Move
* the swizzle outside, it's easier for clamp propagation. */
case BI_OPCODE_FCLAMP_V2F16:
{
bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
bi_index dest = ins->dest[0];
bi_index tmp = bi_temp(ctx);
/* We don't want to deal with reswizzling logic in modifier prop. Move
* the swizzle outside, it's easier for clamp propagation. */
case BI_OPCODE_FCLAMP_V2F16: {
bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
bi_index dest = ins->dest[0];
bi_index tmp = bi_temp(ctx);
ins->dest[0] = tmp;
bi_swz_v2i16_to(&b, dest, bi_replace_index(ins->src[0], tmp));
return;
}
ins->dest[0] = tmp;
bi_swz_v2i16_to(&b, dest, bi_replace_index(ins->src[0], tmp));
return;
}
default:
return;
}
default:
return;
}
/* First, try to apply a given swizzle to a constant to clear the
* runtime swizzle. This is less heavy-handed than ignoring the
* swizzle for scalar destinations, since it maintains
* replication of the destination.
*/
if (ins->src[src].type == BI_INDEX_CONSTANT) {
ins->src[src].value = bi_apply_swizzle(ins->src[src].value,
ins->src[src].swizzle);
ins->src[src].swizzle = BI_SWIZZLE_H01;
return;
}
/* First, try to apply a given swizzle to a constant to clear the
* runtime swizzle. This is less heavy-handed than ignoring the
* swizzle for scalar destinations, since it maintains
* replication of the destination.
*/
if (ins->src[src].type == BI_INDEX_CONSTANT) {
ins->src[src].value =
bi_apply_swizzle(ins->src[src].value, ins->src[src].swizzle);
ins->src[src].swizzle = BI_SWIZZLE_H01;
return;
}
/* Even if the source does not replicate, if the consuming instruction
* produces a 16-bit scalar, we can ignore the other component.
*/
if (ins->dest[0].swizzle == BI_SWIZZLE_H00 &&
ins->src[src].swizzle == BI_SWIZZLE_H00)
{
ins->src[src].swizzle = BI_SWIZZLE_H01;
return;
}
/* Even if the source does not replicate, if the consuming instruction
* produces a 16-bit scalar, we can ignore the other component.
*/
if (ins->dest[0].swizzle == BI_SWIZZLE_H00 &&
ins->src[src].swizzle == BI_SWIZZLE_H00) {
ins->src[src].swizzle = BI_SWIZZLE_H01;
return;
}
/* Lower it away */
bi_builder b = bi_init_builder(ctx, bi_before_instr(ins));
/* Lower it away */
bi_builder b = bi_init_builder(ctx, bi_before_instr(ins));
bool is_8 = (bi_opcode_props[ins->op].size == BI_SIZE_8);
bi_index orig = ins->src[src];
bi_index stripped = bi_replace_index(bi_null(), orig);
stripped.swizzle = ins->src[src].swizzle;
bool is_8 = (bi_opcode_props[ins->op].size == BI_SIZE_8);
bi_index orig = ins->src[src];
bi_index stripped = bi_replace_index(bi_null(), orig);
stripped.swizzle = ins->src[src].swizzle;
bi_index swz = is_8 ? bi_swz_v4i8(&b, stripped) : bi_swz_v2i16(&b, stripped);
bi_index swz = is_8 ? bi_swz_v4i8(&b, stripped) : bi_swz_v2i16(&b, stripped);
bi_replace_src(ins, src, swz);
ins->src[src].swizzle = BI_SWIZZLE_H01;
bi_replace_src(ins, src, swz);
ins->src[src].swizzle = BI_SWIZZLE_H01;
}
static bool
bi_swizzle_replicates_16(enum bi_swizzle swz)
{
switch (swz) {
case BI_SWIZZLE_H00:
case BI_SWIZZLE_H11:
return true;
default:
/* If a swizzle replicates every 8-bits, it also replicates
* every 16-bits, so allow 8-bit replicating swizzles.
*/
return bi_swizzle_replicates_8(swz);
}
switch (swz) {
case BI_SWIZZLE_H00:
case BI_SWIZZLE_H11:
return true;
default:
/* If a swizzle replicates every 8-bits, it also replicates
* every 16-bits, so allow 8-bit replicating swizzles.
*/
return bi_swizzle_replicates_8(swz);
}
}
static bool
bi_instr_replicates(bi_instr *I, BITSET_WORD *replicates_16)
{
switch (I->op) {
switch (I->op) {
/* Instructions that construct vectors have replicated output if their
* sources are identical. Check this case first.
*/
case BI_OPCODE_MKVEC_V2I16:
case BI_OPCODE_V2F16_TO_V2S16:
case BI_OPCODE_V2F16_TO_V2U16:
case BI_OPCODE_V2F32_TO_V2F16:
case BI_OPCODE_V2S16_TO_V2F16:
case BI_OPCODE_V2S8_TO_V2F16:
case BI_OPCODE_V2S8_TO_V2S16:
case BI_OPCODE_V2U16_TO_V2F16:
case BI_OPCODE_V2U8_TO_V2F16:
case BI_OPCODE_V2U8_TO_V2U16:
return bi_is_value_equiv(I->src[0], I->src[1]);
/* Instructions that construct vectors have replicated output if their
* sources are identical. Check this case first.
*/
case BI_OPCODE_MKVEC_V2I16:
case BI_OPCODE_V2F16_TO_V2S16:
case BI_OPCODE_V2F16_TO_V2U16:
case BI_OPCODE_V2F32_TO_V2F16:
case BI_OPCODE_V2S16_TO_V2F16:
case BI_OPCODE_V2S8_TO_V2F16:
case BI_OPCODE_V2S8_TO_V2S16:
case BI_OPCODE_V2U16_TO_V2F16:
case BI_OPCODE_V2U8_TO_V2F16:
case BI_OPCODE_V2U8_TO_V2U16:
return bi_is_value_equiv(I->src[0], I->src[1]);
/* 16-bit transcendentals are defined to output zero in their
* upper half, so they do not replicate
*/
case BI_OPCODE_FRCP_F16:
case BI_OPCODE_FRSQ_F16:
return false;
/* 16-bit transcendentals are defined to output zero in their
* upper half, so they do not replicate
*/
case BI_OPCODE_FRCP_F16:
case BI_OPCODE_FRSQ_F16:
return false;
/* Not sure, be conservative, we don't use these.. */
case BI_OPCODE_VN_ASST1_F16:
case BI_OPCODE_FPCLASS_F16:
case BI_OPCODE_FPOW_SC_DET_F16:
return false;
/* Not sure, be conservative, we don't use these.. */
case BI_OPCODE_VN_ASST1_F16:
case BI_OPCODE_FPCLASS_F16:
case BI_OPCODE_FPOW_SC_DET_F16:
return false;
default:
break;
}
default:
break;
}
/* Replication analysis only makes sense for ALU instructions */
if (bi_opcode_props[I->op].message != BIFROST_MESSAGE_NONE)
return false;
/* Replication analysis only makes sense for ALU instructions */
if (bi_opcode_props[I->op].message != BIFROST_MESSAGE_NONE)
return false;
/* We only analyze 16-bit instructions for 16-bit replication. We could
* maybe do better.
*/
if (bi_opcode_props[I->op].size != BI_SIZE_16)
return false;
/* We only analyze 16-bit instructions for 16-bit replication. We could
* maybe do better.
*/
if (bi_opcode_props[I->op].size != BI_SIZE_16)
return false;
bi_foreach_src(I, s) {
if (bi_is_null(I->src[s]))
continue;
bi_foreach_src(I, s) {
if (bi_is_null(I->src[s]))
continue;
/* Replicated swizzles */
if (bi_swizzle_replicates_16(I->src[s].swizzle))
continue;
/* Replicated swizzles */
if (bi_swizzle_replicates_16(I->src[s].swizzle))
continue;
/* Replicated values */
if (bi_is_ssa(I->src[s]) &&
BITSET_TEST(replicates_16, I->src[s].value))
continue;
/* Replicated values */
if (bi_is_ssa(I->src[s]) && BITSET_TEST(replicates_16, I->src[s].value))
continue;
/* Replicated constants */
if (I->src[s].type == BI_INDEX_CONSTANT &&
(I->src[s].value & 0xFFFF) == (I->src[s].value >> 16))
continue;
/* Replicated constants */
if (I->src[s].type == BI_INDEX_CONSTANT &&
(I->src[s].value & 0xFFFF) == (I->src[s].value >> 16))
continue;
return false;
}
return false;
}
return true;
return true;
}
void
bi_lower_swizzle(bi_context *ctx)
{
bi_foreach_instr_global_safe(ctx, ins) {
bi_foreach_src(ins, s) {
if (bi_is_null(ins->src[s])) continue;
if (ins->src[s].swizzle == BI_SWIZZLE_H01) continue;
bi_foreach_instr_global_safe(ctx, ins) {
bi_foreach_src(ins, s) {
if (bi_is_null(ins->src[s]))
continue;
if (ins->src[s].swizzle == BI_SWIZZLE_H01)
continue;
lower_swizzle(ctx, ins, s);
}
}
lower_swizzle(ctx, ins, s);
}
}
/* Now that we've lowered swizzles, clean up the mess */
BITSET_WORD *replicates_16 = calloc(sizeof(bi_index), ctx->ssa_alloc);
/* Now that we've lowered swizzles, clean up the mess */
BITSET_WORD *replicates_16 = calloc(sizeof(bi_index), ctx->ssa_alloc);
bi_foreach_instr_global(ctx, ins) {
if (ins->nr_dests && bi_instr_replicates(ins, replicates_16))
BITSET_SET(replicates_16, ins->dest[0].value);
bi_foreach_instr_global(ctx, ins) {
if (ins->nr_dests && bi_instr_replicates(ins, replicates_16))
BITSET_SET(replicates_16, ins->dest[0].value);
if (ins->op == BI_OPCODE_SWZ_V2I16 && bi_is_ssa(ins->src[0]) &&
BITSET_TEST(replicates_16, ins->src[0].value)) {
ins->op = BI_OPCODE_MOV_I32;
ins->src[0].swizzle = BI_SWIZZLE_H01;
}
if (ins->op == BI_OPCODE_SWZ_V2I16 && bi_is_ssa(ins->src[0]) &&
BITSET_TEST(replicates_16, ins->src[0].value)) {
ins->op = BI_OPCODE_MOV_I32;
ins->src[0].swizzle = BI_SWIZZLE_H01;
}
/* The above passes rely on replicating destinations. For
* Valhall, we will want to optimize this. For now, default
* to Bifrost compatible behaviour.
*/
if (ins->nr_dests)
ins->dest[0].swizzle = BI_SWIZZLE_H01;
}
/* The above passes rely on replicating destinations. For
* Valhall, we will want to optimize this. For now, default
* to Bifrost compatible behaviour.
*/
if (ins->nr_dests)
ins->dest[0].swizzle = BI_SWIZZLE_H01;
}
free(replicates_16);
free(replicates_16);
}

View file

@ -21,8 +21,8 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "bi_builder.h"
#include "compiler.h"
/* Dead simple constant folding to cleanup compiler frontend patterns. Before
* adding a new pattern here, check why you need it and whether we can avoid
@ -31,83 +31,84 @@
static inline uint32_t
bi_source_value(const bi_instr *I, unsigned s)
{
if (s < I->nr_srcs)
return bi_apply_swizzle(I->src[s].value, I->src[s].swizzle);
else
return 0;
if (s < I->nr_srcs)
return bi_apply_swizzle(I->src[s].value, I->src[s].swizzle);
else
return 0;
}
uint32_t
bi_fold_constant(bi_instr *I, bool *unsupported)
{
/* We can only fold instructions where all sources are constant */
bi_foreach_src(I, s) {
if (I->src[s].type != BI_INDEX_CONSTANT) {
*unsupported = true;
return 0;
}
}
/* We can only fold instructions where all sources are constant */
bi_foreach_src(I, s) {
if (I->src[s].type != BI_INDEX_CONSTANT) {
*unsupported = true;
return 0;
}
}
/* Grab the sources */
uint32_t a = bi_source_value(I, 0);
uint32_t b = bi_source_value(I, 1);
uint32_t c = bi_source_value(I, 2);
uint32_t d = bi_source_value(I, 3);
/* Grab the sources */
uint32_t a = bi_source_value(I, 0);
uint32_t b = bi_source_value(I, 1);
uint32_t c = bi_source_value(I, 2);
uint32_t d = bi_source_value(I, 3);
/* Evaluate the instruction */
switch (I->op) {
case BI_OPCODE_SWZ_V2I16:
return a;
/* Evaluate the instruction */
switch (I->op) {
case BI_OPCODE_SWZ_V2I16:
return a;
case BI_OPCODE_MKVEC_V2I16:
return (b << 16) | (a & 0xFFFF);
case BI_OPCODE_MKVEC_V2I16:
return (b << 16) | (a & 0xFFFF);
case BI_OPCODE_MKVEC_V4I8:
return (d << 24) | ((c & 0xFF) << 16) | ((b & 0xFF) << 8) | (a & 0xFF);
case BI_OPCODE_MKVEC_V4I8:
return (d << 24) | ((c & 0xFF) << 16) | ((b & 0xFF) << 8) | (a & 0xFF);
case BI_OPCODE_MKVEC_V2I8:
return (c << 16) | ((b & 0xFF) << 8) | (a & 0xFF);
case BI_OPCODE_MKVEC_V2I8:
return (c << 16) | ((b & 0xFF) << 8) | (a & 0xFF);
case BI_OPCODE_LSHIFT_OR_I32:
if (I->not_result || I->src[0].neg || I->src[1].neg)
break;
case BI_OPCODE_LSHIFT_OR_I32:
if (I->not_result || I->src[0].neg || I->src[1].neg)
break;
return (a << c) | b;
return (a << c) | b;
case BI_OPCODE_F32_TO_U32:
if (I->round == BI_ROUND_NONE) {
/* Explicitly clamp to prevent undefined behaviour and
* match hardware rules */
float f = uif(a);
return (f >= 0.0) ? (uint32_t) f : 0;
} else
break;
case BI_OPCODE_F32_TO_U32:
if (I->round == BI_ROUND_NONE) {
/* Explicitly clamp to prevent undefined behaviour and
* match hardware rules */
float f = uif(a);
return (f >= 0.0) ? (uint32_t)f : 0;
} else
break;
default:
break;
}
default:
break;
}
*unsupported = true;
return 0;
*unsupported = true;
return 0;
}
bool
bi_opt_constant_fold(bi_context *ctx)
{
bool progress = false;
bool progress = false;
bi_foreach_instr_global_safe(ctx, ins) {
bool unsupported = false;
uint32_t replace = bi_fold_constant(ins, &unsupported);
if (unsupported) continue;
bi_foreach_instr_global_safe(ctx, ins) {
bool unsupported = false;
uint32_t replace = bi_fold_constant(ins, &unsupported);
if (unsupported)
continue;
/* Replace with constant move, to be copypropped */
assert(ins->nr_dests == 1);
bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
bi_mov_i32_to(&b, ins->dest[0], bi_imm_u32(replace));
bi_remove_instruction(ins);
progress = true;
}
/* Replace with constant move, to be copypropped */
assert(ins->nr_dests == 1);
bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
bi_mov_i32_to(&b, ins->dest[0], bi_imm_u32(replace));
bi_remove_instruction(ins);
progress = true;
}
return progress;
return progress;
}

View file

@ -22,92 +22,95 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "bi_builder.h"
#include "compiler.h"
/* SSA copy propagation */
static bool
bi_reads_fau(bi_instr *ins)
{
bi_foreach_src(ins, s) {
if (ins->src[s].type == BI_INDEX_FAU)
return true;
}
bi_foreach_src(ins, s) {
if (ins->src[s].type == BI_INDEX_FAU)
return true;
}
return false;
return false;
}
void
bi_opt_copy_prop(bi_context *ctx)
{
/* Chase SPLIT of COLLECT. Instruction selection usually avoids this
* pattern (due to the split cache), but it is inevitably generated by
* the UBO pushing pass.
*/
bi_instr **collects = calloc(sizeof(bi_instr *), ctx->ssa_alloc);
bi_foreach_instr_global_safe(ctx, I) {
if (I->op == BI_OPCODE_COLLECT_I32) {
/* Rewrite trivial collects while we're at it */
if (I->nr_srcs == 1)
I->op = BI_OPCODE_MOV_I32;
/* Chase SPLIT of COLLECT. Instruction selection usually avoids this
* pattern (due to the split cache), but it is inevitably generated by
* the UBO pushing pass.
*/
bi_instr **collects = calloc(sizeof(bi_instr *), ctx->ssa_alloc);
bi_foreach_instr_global_safe(ctx, I) {
if (I->op == BI_OPCODE_COLLECT_I32) {
/* Rewrite trivial collects while we're at it */
if (I->nr_srcs == 1)
I->op = BI_OPCODE_MOV_I32;
collects[I->dest[0].value] = I;
} else if (I->op == BI_OPCODE_SPLIT_I32) {
/* Rewrite trivial splits while we're at it */
if (I->nr_dests == 1)
I->op = BI_OPCODE_MOV_I32;
collects[I->dest[0].value] = I;
} else if (I->op == BI_OPCODE_SPLIT_I32) {
/* Rewrite trivial splits while we're at it */
if (I->nr_dests == 1)
I->op = BI_OPCODE_MOV_I32;
bi_instr *collect = collects[I->src[0].value];
if (!collect)
continue;
bi_instr *collect = collects[I->src[0].value];
if (!collect)
continue;
/* Lower the split to moves, copyprop cleans up */
bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
/* Lower the split to moves, copyprop cleans up */
bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
bi_foreach_dest(I, d)
bi_mov_i32_to(&b, I->dest[d], collect->src[d]);
bi_foreach_dest(I, d)
bi_mov_i32_to(&b, I->dest[d], collect->src[d]);
bi_remove_instruction(I);
}
}
bi_remove_instruction(I);
}
}
free(collects);
free(collects);
bi_index *replacement = calloc(sizeof(bi_index), ctx->ssa_alloc);
bi_index *replacement = calloc(sizeof(bi_index), ctx->ssa_alloc);
bi_foreach_instr_global_safe(ctx, ins) {
if (ins->op == BI_OPCODE_MOV_I32 && ins->src[0].type != BI_INDEX_REGISTER) {
bi_index replace = ins->src[0];
bi_foreach_instr_global_safe(ctx, ins) {
if (ins->op == BI_OPCODE_MOV_I32 &&
ins->src[0].type != BI_INDEX_REGISTER) {
bi_index replace = ins->src[0];
/* Peek through one layer so copyprop converges in one
* iteration for chained moves */
if (bi_is_ssa(replace)) {
bi_index chained = replacement[replace.value];
/* Peek through one layer so copyprop converges in one
* iteration for chained moves */
if (bi_is_ssa(replace)) {
bi_index chained = replacement[replace.value];
if (!bi_is_null(chained))
replace = chained;
}
if (!bi_is_null(chained))
replace = chained;
}
assert(ins->nr_dests == 1);
replacement[ins->dest[0].value] = replace;
}
assert(ins->nr_dests == 1);
replacement[ins->dest[0].value] = replace;
}
bi_foreach_src(ins, s) {
bi_index use = ins->src[s];
bi_foreach_src(ins, s) {
bi_index use = ins->src[s];
if (use.type != BI_INDEX_NORMAL) continue;
if (bi_is_staging_src(ins, s)) continue;
if (use.type != BI_INDEX_NORMAL)
continue;
if (bi_is_staging_src(ins, s))
continue;
bi_index repl = replacement[use.value];
bi_index repl = replacement[use.value];
if (repl.type == BI_INDEX_CONSTANT && bi_reads_fau(ins))
continue;
if (repl.type == BI_INDEX_CONSTANT && bi_reads_fau(ins))
continue;
if (!bi_is_null(repl))
bi_replace_src(ins, s, repl);
}
}
if (!bi_is_null(repl))
bi_replace_src(ins, s, repl);
}
}
free(replacement);
free(replacement);
}

View file

@ -22,8 +22,8 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "bi_builder.h"
#include "compiler.h"
#define XXH_INLINE_ALL
#include "util/xxhash.h"
@ -36,85 +36,88 @@
static inline uint32_t
HASH(uint32_t hash, unsigned data)
{
return XXH32(&data, sizeof(data), hash);
return XXH32(&data, sizeof(data), hash);
}
static uint32_t
hash_index(uint32_t hash, bi_index index)
{
hash = HASH(hash, index.value);
hash = HASH(hash, index.abs);
hash = HASH(hash, index.neg);
hash = HASH(hash, index.swizzle);
hash = HASH(hash, index.offset);
hash = HASH(hash, index.type);
return hash;
hash = HASH(hash, index.value);
hash = HASH(hash, index.abs);
hash = HASH(hash, index.neg);
hash = HASH(hash, index.swizzle);
hash = HASH(hash, index.offset);
hash = HASH(hash, index.type);
return hash;
}
/* Hash an ALU instruction. */
static uint32_t
hash_instr(const void *data)
{
const bi_instr *I = data;
uint32_t hash = 0;
const bi_instr *I = data;
uint32_t hash = 0;
hash = HASH(hash, I->op);
hash = HASH(hash, I->nr_dests);
hash = HASH(hash, I->nr_srcs);
hash = HASH(hash, I->op);
hash = HASH(hash, I->nr_dests);
hash = HASH(hash, I->nr_srcs);
assert(!I->flow && !I->slot && "CSE must be early");
assert(!I->flow && !I->slot && "CSE must be early");
/* Explcitly skip destinations, except for size details */
bi_foreach_dest(I, d) {
hash = HASH(hash, I->dest[d].swizzle);
}
/* Explcitly skip destinations, except for size details */
bi_foreach_dest(I, d) {
hash = HASH(hash, I->dest[d].swizzle);
}
bi_foreach_src(I, s) {
hash = hash_index(hash, I->src[s]);
}
bi_foreach_src(I, s) {
hash = hash_index(hash, I->src[s]);
}
/* Explicitly skip branch, regfmt, vecsize, no_spill, tdd, table */
hash = HASH(hash, I->dest_mod);
/* Explicitly skip branch, regfmt, vecsize, no_spill, tdd, table */
hash = HASH(hash, I->dest_mod);
/* Explicitly skip other immediates */
hash = HASH(hash, I->shift);
/* Explicitly skip other immediates */
hash = HASH(hash, I->shift);
for (unsigned i = 0; i < ARRAY_SIZE(I->flags); ++i)
hash = HASH(hash, I->flags[i]);
for (unsigned i = 0; i < ARRAY_SIZE(I->flags); ++i)
hash = HASH(hash, I->flags[i]);
return hash;
return hash;
}
static bool
instrs_equal(const void *_i1, const void *_i2)
{
const bi_instr *i1 = _i1, *i2 = _i2;
const bi_instr *i1 = _i1, *i2 = _i2;
if (i1->op != i2->op) return false;
if (i1->nr_srcs != i2->nr_srcs) return false;
if (i1->nr_dests != i2->nr_dests) return false;
if (i1->op != i2->op)
return false;
if (i1->nr_srcs != i2->nr_srcs)
return false;
if (i1->nr_dests != i2->nr_dests)
return false;
/* Explicitly skip destinations */
/* Explicitly skip destinations */
bi_foreach_src(i1, s) {
bi_index s1 = i1->src[s], s2 = i2->src[s];
bi_foreach_src(i1, s) {
bi_index s1 = i1->src[s], s2 = i2->src[s];
if (memcmp(&s1, &s2, sizeof(s1)) != 0)
return false;
}
if (memcmp(&s1, &s2, sizeof(s1)) != 0)
return false;
}
if (i1->dest_mod != i2->dest_mod)
return false;
if (i1->dest_mod != i2->dest_mod)
return false;
if (i1->shift != i2->shift)
return false;
if (i1->shift != i2->shift)
return false;
for (unsigned i = 0; i < ARRAY_SIZE(i1->flags); ++i) {
if (i1->flags[i] != i2->flags[i])
return false;
}
for (unsigned i = 0; i < ARRAY_SIZE(i1->flags); ++i) {
if (i1->flags[i] != i2->flags[i])
return false;
}
return true;
return true;
}
/* Determines what instructions the above routines have to handle */
@ -122,64 +125,64 @@ instrs_equal(const void *_i1, const void *_i2)
static bool
instr_can_cse(const bi_instr *I)
{
switch (I->op) {
case BI_OPCODE_DTSEL_IMM:
case BI_OPCODE_DISCARD_F32:
return false;
default:
break;
}
switch (I->op) {
case BI_OPCODE_DTSEL_IMM:
case BI_OPCODE_DISCARD_F32:
return false;
default:
break;
}
/* Be conservative about which message-passing instructions we CSE,
* since most are not pure even within a thread.
*/
if (bi_opcode_props[I->op].message && I->op != BI_OPCODE_LEA_BUF_IMM)
return false;
/* Be conservative about which message-passing instructions we CSE,
* since most are not pure even within a thread.
*/
if (bi_opcode_props[I->op].message && I->op != BI_OPCODE_LEA_BUF_IMM)
return false;
if (I->branch_target)
return false;
if (I->branch_target)
return false;
return true;
return true;
}
void
bi_opt_cse(bi_context *ctx)
{
struct set *instr_set = _mesa_set_create(NULL, hash_instr, instrs_equal);
struct set *instr_set = _mesa_set_create(NULL, hash_instr, instrs_equal);
bi_foreach_block(ctx, block) {
bi_index *replacement = calloc(sizeof(bi_index), ctx->ssa_alloc);
_mesa_set_clear(instr_set, NULL);
bi_foreach_block(ctx, block) {
bi_index *replacement = calloc(sizeof(bi_index), ctx->ssa_alloc);
_mesa_set_clear(instr_set, NULL);
bi_foreach_instr_in_block(block, instr) {
/* Rewrite before trying to CSE anything so we converge
* locally in one iteration */
bi_foreach_ssa_src(instr, s) {
if (bi_is_staging_src(instr, s))
continue;
bi_foreach_instr_in_block(block, instr) {
/* Rewrite before trying to CSE anything so we converge
* locally in one iteration */
bi_foreach_ssa_src(instr, s) {
if (bi_is_staging_src(instr, s))
continue;
bi_index repl = replacement[instr->src[s].value];
if (!bi_is_null(repl))
bi_replace_src(instr, s, repl);
}
bi_index repl = replacement[instr->src[s].value];
if (!bi_is_null(repl))
bi_replace_src(instr, s, repl);
}
if (!instr_can_cse(instr))
continue;
if (!instr_can_cse(instr))
continue;
bool found;
struct set_entry *entry =
_mesa_set_search_or_add(instr_set, instr, &found);
if (found) {
const bi_instr *match = entry->key;
bool found;
struct set_entry *entry =
_mesa_set_search_or_add(instr_set, instr, &found);
if (found) {
const bi_instr *match = entry->key;
bi_foreach_dest(instr, d) {
replacement[instr->dest[d].value] = match->dest[d];
}
}
}
bi_foreach_dest(instr, d) {
replacement[instr->dest[d].value] = match->dest[d];
}
}
}
free(replacement);
}
free(replacement);
}
_mesa_set_destroy(instr_set, NULL);
_mesa_set_destroy(instr_set, NULL);
}

View file

@ -22,66 +22,67 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "util/u_memory.h"
#include "compiler.h"
/* A simple SSA-based mark-and-sweep dead code elimination pass. */
void
bi_opt_dead_code_eliminate(bi_context *ctx)
{
/* Mark live values */
BITSET_WORD *mark = calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->ssa_alloc));
/* Mark live values */
BITSET_WORD *mark =
calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->ssa_alloc));
u_worklist worklist;
u_worklist_init(&worklist, ctx->num_blocks, NULL);
u_worklist worklist;
u_worklist_init(&worklist, ctx->num_blocks, NULL);
bi_foreach_block(ctx, block) {
bi_worklist_push_head(&worklist, block);
}
bi_foreach_block(ctx, block) {
bi_worklist_push_head(&worklist, block);
}
while(!u_worklist_is_empty(&worklist)) {
/* Pop in reverse order for backwards pass */
bi_block *blk = bi_worklist_pop_head(&worklist);
while (!u_worklist_is_empty(&worklist)) {
/* Pop in reverse order for backwards pass */
bi_block *blk = bi_worklist_pop_head(&worklist);
bool progress = false;
bool progress = false;
bi_foreach_instr_in_block_rev(blk, I) {
bool needed = bi_side_effects(I);
bi_foreach_instr_in_block_rev(blk, I) {
bool needed = bi_side_effects(I);
bi_foreach_dest(I, d)
needed |= BITSET_TEST(mark, I->dest[d].value);
bi_foreach_dest(I, d)
needed |= BITSET_TEST(mark, I->dest[d].value);
if (!needed)
continue;
if (!needed)
continue;
bi_foreach_ssa_src(I, s) {
progress |= !BITSET_TEST(mark, I->src[s].value);
BITSET_SET(mark, I->src[s].value);
}
}
bi_foreach_ssa_src(I, s) {
progress |= !BITSET_TEST(mark, I->src[s].value);
BITSET_SET(mark, I->src[s].value);
}
}
/* XXX: slow */
if (progress) {
bi_foreach_block(ctx, block)
bi_worklist_push_head(&worklist, block);
}
}
/* XXX: slow */
if (progress) {
bi_foreach_block(ctx, block)
bi_worklist_push_head(&worklist, block);
}
}
u_worklist_fini(&worklist);
u_worklist_fini(&worklist);
/* Sweep */
bi_foreach_instr_global_safe(ctx, I) {
bool needed = bi_side_effects(I);
/* Sweep */
bi_foreach_instr_global_safe(ctx, I) {
bool needed = bi_side_effects(I);
bi_foreach_dest(I, d)
needed |= BITSET_TEST(mark, I->dest[d].value);
bi_foreach_dest(I, d)
needed |= BITSET_TEST(mark, I->dest[d].value);
if (!needed)
bi_remove_instruction(I);
}
if (!needed)
bi_remove_instruction(I);
}
free(mark);
free(mark);
}
/* Post-RA liveness-based dead code analysis to clean up results of bundling */
@ -89,39 +90,39 @@ bi_opt_dead_code_eliminate(bi_context *ctx)
uint64_t MUST_CHECK
bi_postra_liveness_ins(uint64_t live, bi_instr *ins)
{
bi_foreach_dest(ins, d) {
if (ins->dest[d].type == BI_INDEX_REGISTER) {
unsigned nr = bi_count_write_registers(ins, d);
unsigned reg = ins->dest[d].value;
live &= ~(BITFIELD64_MASK(nr) << reg);
}
}
bi_foreach_dest(ins, d) {
if (ins->dest[d].type == BI_INDEX_REGISTER) {
unsigned nr = bi_count_write_registers(ins, d);
unsigned reg = ins->dest[d].value;
live &= ~(BITFIELD64_MASK(nr) << reg);
}
}
bi_foreach_src(ins, s) {
if (ins->src[s].type == BI_INDEX_REGISTER) {
unsigned nr = bi_count_read_registers(ins, s);
unsigned reg = ins->src[s].value;
live |= (BITFIELD64_MASK(nr) << reg);
}
}
bi_foreach_src(ins, s) {
if (ins->src[s].type == BI_INDEX_REGISTER) {
unsigned nr = bi_count_read_registers(ins, s);
unsigned reg = ins->src[s].value;
live |= (BITFIELD64_MASK(nr) << reg);
}
}
return live;
return live;
}
static bool
bi_postra_liveness_block(bi_block *blk)
{
bi_foreach_successor(blk, succ)
blk->reg_live_out |= succ->reg_live_in;
bi_foreach_successor(blk, succ)
blk->reg_live_out |= succ->reg_live_in;
uint64_t live = blk->reg_live_out;
uint64_t live = blk->reg_live_out;
bi_foreach_instr_in_block_rev(blk, ins)
live = bi_postra_liveness_ins(live, ins);
bi_foreach_instr_in_block_rev(blk, ins)
live = bi_postra_liveness_ins(live, ins);
bool progress = blk->reg_live_in != live;
blk->reg_live_in = live;
return progress;
bool progress = blk->reg_live_in != live;
blk->reg_live_in = live;
return progress;
}
/* Globally, liveness analysis uses a fixed-point algorithm based on a
@ -133,58 +134,58 @@ bi_postra_liveness_block(bi_block *blk)
void
bi_postra_liveness(bi_context *ctx)
{
u_worklist worklist;
bi_worklist_init(ctx, &worklist);
u_worklist worklist;
bi_worklist_init(ctx, &worklist);
bi_foreach_block(ctx, block) {
block->reg_live_out = block->reg_live_in = 0;
bi_foreach_block(ctx, block) {
block->reg_live_out = block->reg_live_in = 0;
bi_worklist_push_tail(&worklist, block);
}
bi_worklist_push_tail(&worklist, block);
}
while (!u_worklist_is_empty(&worklist)) {
/* Pop off in reverse order since liveness is backwards */
bi_block *blk = bi_worklist_pop_tail(&worklist);
while (!u_worklist_is_empty(&worklist)) {
/* Pop off in reverse order since liveness is backwards */
bi_block *blk = bi_worklist_pop_tail(&worklist);
/* Update liveness information. If we made progress, we need to
* reprocess the predecessors
*/
if (bi_postra_liveness_block(blk)) {
bi_foreach_predecessor(blk, pred)
bi_worklist_push_head(&worklist, *pred);
}
}
/* Update liveness information. If we made progress, we need to
* reprocess the predecessors
*/
if (bi_postra_liveness_block(blk)) {
bi_foreach_predecessor(blk, pred)
bi_worklist_push_head(&worklist, *pred);
}
}
u_worklist_fini(&worklist);
u_worklist_fini(&worklist);
}
void
bi_opt_dce_post_ra(bi_context *ctx)
{
bi_postra_liveness(ctx);
bi_postra_liveness(ctx);
bi_foreach_block_rev(ctx, block) {
uint64_t live = block->reg_live_out;
bi_foreach_block_rev(ctx, block) {
uint64_t live = block->reg_live_out;
bi_foreach_instr_in_block_rev(block, ins) {
if (ins->op == BI_OPCODE_DTSEL_IMM)
ins->dest[0] = bi_null();
bi_foreach_instr_in_block_rev(block, ins) {
if (ins->op == BI_OPCODE_DTSEL_IMM)
ins->dest[0] = bi_null();
bi_foreach_dest(ins, d) {
if (ins->dest[d].type != BI_INDEX_REGISTER)
continue;
bi_foreach_dest(ins, d) {
if (ins->dest[d].type != BI_INDEX_REGISTER)
continue;
unsigned nr = bi_count_write_registers(ins, d);
unsigned reg = ins->dest[d].value;
uint64_t mask = (BITFIELD64_MASK(nr) << reg);
bool cullable = (ins->op != BI_OPCODE_BLEND);
cullable &= !bi_opcode_props[ins->op].sr_write;
unsigned nr = bi_count_write_registers(ins, d);
unsigned reg = ins->dest[d].value;
uint64_t mask = (BITFIELD64_MASK(nr) << reg);
bool cullable = (ins->op != BI_OPCODE_BLEND);
cullable &= !bi_opcode_props[ins->op].sr_write;
if (!(live & mask) && cullable)
ins->dest[d] = bi_null();
}
if (!(live & mask) && cullable)
ins->dest[d] = bi_null();
}
live = bi_postra_liveness_ins(live, ins);
}
}
live = bi_postra_liveness_ins(live, ins);
}
}
}

View file

@ -21,8 +21,8 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "bi_builder.h"
#include "compiler.h"
#define XXH_INLINE_ALL
#include "util/xxhash.h"
@ -51,58 +51,60 @@
static inline bool
bi_can_fuse_dual_tex(bi_instr *I, bool fuse_zero_lod)
{
return (I->op == BI_OPCODE_TEXS_2D_F32 || I->op == BI_OPCODE_TEXS_2D_F16) &&
(I->texture_index < 4 && I->sampler_index < 4) &&
(I->lod_mode == fuse_zero_lod);
return (I->op == BI_OPCODE_TEXS_2D_F32 || I->op == BI_OPCODE_TEXS_2D_F16) &&
(I->texture_index < 4 && I->sampler_index < 4) &&
(I->lod_mode == fuse_zero_lod);
}
static enum bifrost_texture_format
bi_format_for_texs_2d(enum bi_opcode op)
{
switch (op) {
case BI_OPCODE_TEXS_2D_F32: return BIFROST_TEXTURE_FORMAT_F32;
case BI_OPCODE_TEXS_2D_F16: return BIFROST_TEXTURE_FORMAT_F16;
default: unreachable("Invalid TEXS_2D instruction");
}
switch (op) {
case BI_OPCODE_TEXS_2D_F32:
return BIFROST_TEXTURE_FORMAT_F32;
case BI_OPCODE_TEXS_2D_F16:
return BIFROST_TEXTURE_FORMAT_F16;
default:
unreachable("Invalid TEXS_2D instruction");
}
}
static void
bi_fuse_dual(bi_context *ctx, bi_instr *I1, bi_instr *I2)
{
/* Construct a texture operation descriptor for the dual texture */
struct bifrost_dual_texture_operation desc = {
.mode = BIFROST_TEXTURE_OPERATION_DUAL,
/* Construct a texture operation descriptor for the dual texture */
struct bifrost_dual_texture_operation desc = {
.mode = BIFROST_TEXTURE_OPERATION_DUAL,
.primary_texture_index = I1->texture_index,
.primary_sampler_index = I1->sampler_index,
.primary_format = bi_format_for_texs_2d(I1->op),
.primary_mask = 0xF,
.primary_texture_index = I1->texture_index,
.primary_sampler_index = I1->sampler_index,
.primary_format = bi_format_for_texs_2d(I1->op),
.primary_mask = 0xF,
.secondary_texture_index = I2->texture_index,
.secondary_sampler_index = I2->sampler_index,
.secondary_format = bi_format_for_texs_2d(I2->op),
.secondary_mask = 0xF,
};
.secondary_texture_index = I2->texture_index,
.secondary_sampler_index = I2->sampler_index,
.secondary_format = bi_format_for_texs_2d(I2->op),
.secondary_mask = 0xF,
};
/* LOD mode is implied in a shader stage */
assert(I1->lod_mode == I2->lod_mode);
/* LOD mode is implied in a shader stage */
assert(I1->lod_mode == I2->lod_mode);
/* Insert before the earlier instruction in case its result is consumed
* before the later instruction
*/
bi_builder b = bi_init_builder(ctx, bi_before_instr(I1));
/* Insert before the earlier instruction in case its result is consumed
* before the later instruction
*/
bi_builder b = bi_init_builder(ctx, bi_before_instr(I1));
bi_instr *I = bi_texc_dual_to(&b,
I1->dest[0], I2->dest[0], bi_null(), /* staging */
I1->src[0], I1->src[1], /* coordinates */
bi_imm_u32(bi_dual_tex_as_u32(desc)), I1->lod_mode,
bi_count_write_registers(I1, 0),
bi_count_write_registers(I2, 0));
bi_instr *I = bi_texc_dual_to(
&b, I1->dest[0], I2->dest[0], bi_null(), /* staging */
I1->src[0], I1->src[1], /* coordinates */
bi_imm_u32(bi_dual_tex_as_u32(desc)), I1->lod_mode,
bi_count_write_registers(I1, 0), bi_count_write_registers(I2, 0));
I->skip = I1->skip && I2->skip;
I->skip = I1->skip && I2->skip;
bi_remove_instruction(I1);
bi_remove_instruction(I2);
bi_remove_instruction(I1);
bi_remove_instruction(I2);
}
#define HASH(hash, data) XXH32(&(data), sizeof(data), hash)
@ -110,45 +112,45 @@ bi_fuse_dual(bi_context *ctx, bi_instr *I1, bi_instr *I2)
static uint32_t
coord_hash(const void *key)
{
const bi_instr *I = key;
const bi_instr *I = key;
return XXH32(&I->src[0], sizeof(I->src[0]) + sizeof(I->src[1]), 0);
return XXH32(&I->src[0], sizeof(I->src[0]) + sizeof(I->src[1]), 0);
}
static bool
coord_equal(const void *key1, const void *key2)
{
const bi_instr *I = key1;
const bi_instr *J = key2;
const bi_instr *I = key1;
const bi_instr *J = key2;
return memcmp(&I->src[0], &J->src[0],
sizeof(I->src[0]) + sizeof(I->src[1])) == 0;
return memcmp(&I->src[0], &J->src[0],
sizeof(I->src[0]) + sizeof(I->src[1])) == 0;
}
static void
bi_opt_fuse_dual_texture_block(bi_context *ctx, bi_block *block)
{
struct set *set = _mesa_set_create(ctx, coord_hash, coord_equal);
bool fuse_zero_lod = (ctx->stage != MESA_SHADER_FRAGMENT);
bool found = false;
struct set *set = _mesa_set_create(ctx, coord_hash, coord_equal);
bool fuse_zero_lod = (ctx->stage != MESA_SHADER_FRAGMENT);
bool found = false;
bi_foreach_instr_in_block_safe(block, I) {
if (!bi_can_fuse_dual_tex(I, fuse_zero_lod)) continue;
bi_foreach_instr_in_block_safe(block, I) {
if (!bi_can_fuse_dual_tex(I, fuse_zero_lod))
continue;
struct set_entry *ent = _mesa_set_search_or_add(set, I, &found);
struct set_entry *ent = _mesa_set_search_or_add(set, I, &found);
if (found) {
bi_fuse_dual(ctx, (bi_instr *) ent->key, I);
_mesa_set_remove(set, ent);
}
}
if (found) {
bi_fuse_dual(ctx, (bi_instr *)ent->key, I);
_mesa_set_remove(set, ent);
}
}
}
void
bi_opt_fuse_dual_texture(bi_context *ctx)
{
bi_foreach_block(ctx, block) {
bi_opt_fuse_dual_texture_block(ctx, block);
}
bi_foreach_block(ctx, block) {
bi_opt_fuse_dual_texture_block(ctx, block);
}
}

View file

@ -21,8 +21,8 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "bi_builder.h"
#include "compiler.h"
/* Bifrost v7 can preload up to two messages of the form:
*
@ -35,8 +35,8 @@
static bool
bi_is_regfmt_float(enum bi_register_format regfmt)
{
return (regfmt == BI_REGISTER_FORMAT_F32) ||
(regfmt == BI_REGISTER_FORMAT_F16);
return (regfmt == BI_REGISTER_FORMAT_F32) ||
(regfmt == BI_REGISTER_FORMAT_F16);
}
/*
@ -46,107 +46,107 @@ bi_is_regfmt_float(enum bi_register_format regfmt)
static bool
bi_can_interp_at_sample(bi_instr *I)
{
/* .sample mode with r61 corresponds to per-sample interpolation */
if (I->sample == BI_SAMPLE_SAMPLE)
return bi_is_value_equiv(I->src[0], bi_register(61));
/* .sample mode with r61 corresponds to per-sample interpolation */
if (I->sample == BI_SAMPLE_SAMPLE)
return bi_is_value_equiv(I->src[0], bi_register(61));
/* If the shader runs with pixel-frequency shading, .sample is
* equivalent to .center, so allow .center
*
* If the shader runs with sample-frequency shading, .sample and .center
* are not equivalent. However, the ESSL 3.20 specification
* stipulates in section 4.5 ("Interpolation Qualifiers"):
*
* for fragment shader input variables qualified with neither
* centroid nor sample, the value of the assigned variable may be
* interpolated anywhere within the pixel and a single value may be
* assigned to each sample within the pixel, to the extent permitted
* by the OpenGL ES Specification.
*
* We only produce .center for variables qualified with neither centroid
* nor sample, so if .center is specified this section applies. This
* suggests that, although per-pixel interpolation is allowed, it is not
* mandated ("may" rather than "must" or "should"). Therefore it appears
* safe to substitute sample.
*/
return (I->sample == BI_SAMPLE_CENTER);
/* If the shader runs with pixel-frequency shading, .sample is
* equivalent to .center, so allow .center
*
* If the shader runs with sample-frequency shading, .sample and .center
* are not equivalent. However, the ESSL 3.20 specification
* stipulates in section 4.5 ("Interpolation Qualifiers"):
*
* for fragment shader input variables qualified with neither
* centroid nor sample, the value of the assigned variable may be
* interpolated anywhere within the pixel and a single value may be
* assigned to each sample within the pixel, to the extent permitted
* by the OpenGL ES Specification.
*
* We only produce .center for variables qualified with neither centroid
* nor sample, so if .center is specified this section applies. This
* suggests that, although per-pixel interpolation is allowed, it is not
* mandated ("may" rather than "must" or "should"). Therefore it appears
* safe to substitute sample.
*/
return (I->sample == BI_SAMPLE_CENTER);
}
static bool
bi_can_preload_ld_var(bi_instr *I)
{
return (I->op == BI_OPCODE_LD_VAR_IMM) &&
bi_can_interp_at_sample(I) &&
bi_is_regfmt_float(I->register_format);
return (I->op == BI_OPCODE_LD_VAR_IMM) && bi_can_interp_at_sample(I) &&
bi_is_regfmt_float(I->register_format);
}
static bool
bi_is_var_tex(enum bi_opcode op)
{
return (op == BI_OPCODE_VAR_TEX_F32) || (op == BI_OPCODE_VAR_TEX_F16);
return (op == BI_OPCODE_VAR_TEX_F32) || (op == BI_OPCODE_VAR_TEX_F16);
}
void
bi_opt_message_preload(bi_context *ctx)
{
unsigned nr_preload = 0;
unsigned nr_preload = 0;
/* We only preload from the first block */
bi_block *block = bi_start_block(&ctx->blocks);
bi_builder b = bi_init_builder(ctx, bi_before_nonempty_block(block));
/* We only preload from the first block */
bi_block *block = bi_start_block(&ctx->blocks);
bi_builder b = bi_init_builder(ctx, bi_before_nonempty_block(block));
bi_foreach_instr_in_block_safe(block, I) {
if (I->nr_dests != 1) continue;
bi_foreach_instr_in_block_safe(block, I) {
if (I->nr_dests != 1)
continue;
struct bifrost_message_preload msg;
struct bifrost_message_preload msg;
if (bi_can_preload_ld_var(I)) {
msg = (struct bifrost_message_preload) {
.enabled = true,
.varying_index = I->varying_index,
.fp16 = (I->register_format == BI_REGISTER_FORMAT_F16),
.num_components = I->vecsize + 1,
};
} else if (bi_is_var_tex(I->op)) {
msg = (struct bifrost_message_preload) {
.enabled = true,
.texture = true,
.varying_index = I->varying_index,
.texture_index = I->texture_index,
.fp16 = (I->op == BI_OPCODE_VAR_TEX_F16),
.skip = I->skip,
.zero_lod = I->lod_mode,
};
} else {
continue;
}
if (bi_can_preload_ld_var(I)) {
msg = (struct bifrost_message_preload){
.enabled = true,
.varying_index = I->varying_index,
.fp16 = (I->register_format == BI_REGISTER_FORMAT_F16),
.num_components = I->vecsize + 1,
};
} else if (bi_is_var_tex(I->op)) {
msg = (struct bifrost_message_preload){
.enabled = true,
.texture = true,
.varying_index = I->varying_index,
.texture_index = I->texture_index,
.fp16 = (I->op == BI_OPCODE_VAR_TEX_F16),
.skip = I->skip,
.zero_lod = I->lod_mode,
};
} else {
continue;
}
/* Report the preloading */
ctx->info.bifrost->messages[nr_preload] = msg;
/* Report the preloading */
ctx->info.bifrost->messages[nr_preload] = msg;
/* Replace with a collect of preloaded registers. The collect
* kills the moves, so the collect is free (it is coalesced).
*/
b.cursor = bi_before_instr(I);
/* Replace with a collect of preloaded registers. The collect
* kills the moves, so the collect is free (it is coalesced).
*/
b.cursor = bi_before_instr(I);
unsigned nr = bi_count_write_registers(I, 0);
bi_instr *collect = bi_collect_i32_to(&b, I->dest[0], nr);
unsigned nr = bi_count_write_registers(I, 0);
bi_instr *collect = bi_collect_i32_to(&b, I->dest[0], nr);
/* The registers themselves must be preloaded at the start of
* the program. Preloaded registers are coalesced, so these
* moves are free.
*/
b.cursor = bi_before_block(block);
bi_foreach_src(collect, i) {
unsigned reg = (nr_preload * 4) + i;
/* The registers themselves must be preloaded at the start of
* the program. Preloaded registers are coalesced, so these
* moves are free.
*/
b.cursor = bi_before_block(block);
bi_foreach_src(collect, i) {
unsigned reg = (nr_preload * 4) + i;
collect->src[i] = bi_mov_i32(&b, bi_register(reg));
}
collect->src[i] = bi_mov_i32(&b, bi_register(reg));
}
bi_remove_instruction(I);
bi_remove_instruction(I);
/* Maximum number of preloaded messages */
if ((++nr_preload) == 2)
break;
}
/* Maximum number of preloaded messages */
if ((++nr_preload) == 2)
break;
}
}

View file

@ -22,8 +22,8 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "bi_builder.h"
#include "compiler.h"
/*
* Due to a Bifrost encoding restriction, some instructions cannot have an abs
@ -33,76 +33,76 @@
static bool
bi_would_impact_abs(unsigned arch, bi_instr *I, bi_index repl, unsigned s)
{
return (arch <= 8) && I->src[1 - s].abs &&
bi_is_word_equiv(I->src[1 - s], repl);
return (arch <= 8) && I->src[1 - s].abs &&
bi_is_word_equiv(I->src[1 - s], repl);
}
static bool
bi_takes_fabs(unsigned arch, bi_instr *I, bi_index repl, unsigned s)
{
switch (I->op) {
case BI_OPCODE_FCMP_V2F16:
case BI_OPCODE_FMAX_V2F16:
case BI_OPCODE_FMIN_V2F16:
return !bi_would_impact_abs(arch, I, repl, s);
case BI_OPCODE_FADD_V2F16:
/*
* For FADD.v2f16, the FMA pipe has the abs encoding hazard,
* while the FADD pipe cannot encode a clamp. Either case in
* isolation can be worked around in the scheduler, but both
* together is impossible to encode. Avoid the hazard.
*/
return !(I->clamp && bi_would_impact_abs(arch, I, repl, s));
case BI_OPCODE_V2F32_TO_V2F16:
/* TODO: Needs both match or lower */
return false;
case BI_OPCODE_FLOG_TABLE_F32:
/* TODO: Need to check mode */
return false;
default:
return bi_opcode_props[I->op].abs & BITFIELD_BIT(s);
}
switch (I->op) {
case BI_OPCODE_FCMP_V2F16:
case BI_OPCODE_FMAX_V2F16:
case BI_OPCODE_FMIN_V2F16:
return !bi_would_impact_abs(arch, I, repl, s);
case BI_OPCODE_FADD_V2F16:
/*
* For FADD.v2f16, the FMA pipe has the abs encoding hazard,
* while the FADD pipe cannot encode a clamp. Either case in
* isolation can be worked around in the scheduler, but both
* together is impossible to encode. Avoid the hazard.
*/
return !(I->clamp && bi_would_impact_abs(arch, I, repl, s));
case BI_OPCODE_V2F32_TO_V2F16:
/* TODO: Needs both match or lower */
return false;
case BI_OPCODE_FLOG_TABLE_F32:
/* TODO: Need to check mode */
return false;
default:
return bi_opcode_props[I->op].abs & BITFIELD_BIT(s);
}
}
static bool
bi_takes_fneg(unsigned arch, bi_instr *I, unsigned s)
{
switch (I->op) {
case BI_OPCODE_CUBE_SSEL:
case BI_OPCODE_CUBE_TSEL:
case BI_OPCODE_CUBEFACE:
/* TODO: Bifrost encoding restriction: need to match or lower */
return arch >= 9;
case BI_OPCODE_FREXPE_F32:
case BI_OPCODE_FREXPE_V2F16:
case BI_OPCODE_FLOG_TABLE_F32:
/* TODO: Need to check mode */
return false;
default:
return bi_opcode_props[I->op].neg & BITFIELD_BIT(s);
}
switch (I->op) {
case BI_OPCODE_CUBE_SSEL:
case BI_OPCODE_CUBE_TSEL:
case BI_OPCODE_CUBEFACE:
/* TODO: Bifrost encoding restriction: need to match or lower */
return arch >= 9;
case BI_OPCODE_FREXPE_F32:
case BI_OPCODE_FREXPE_V2F16:
case BI_OPCODE_FLOG_TABLE_F32:
/* TODO: Need to check mode */
return false;
default:
return bi_opcode_props[I->op].neg & BITFIELD_BIT(s);
}
}
static bool
bi_is_fabsneg(enum bi_opcode op, enum bi_size size)
{
return (size == BI_SIZE_32 && op == BI_OPCODE_FABSNEG_F32) ||
(size == BI_SIZE_16 && op == BI_OPCODE_FABSNEG_V2F16);
return (size == BI_SIZE_32 && op == BI_OPCODE_FABSNEG_F32) ||
(size == BI_SIZE_16 && op == BI_OPCODE_FABSNEG_V2F16);
}
static enum bi_swizzle
bi_compose_swizzle_16(enum bi_swizzle a, enum bi_swizzle b)
{
assert(a <= BI_SWIZZLE_H11);
assert(b <= BI_SWIZZLE_H11);
assert(a <= BI_SWIZZLE_H11);
assert(b <= BI_SWIZZLE_H11);
bool al = (a & BI_SWIZZLE_H10);
bool ar = (a & BI_SWIZZLE_H01);
bool bl = (b & BI_SWIZZLE_H10);
bool br = (b & BI_SWIZZLE_H01);
bool al = (a & BI_SWIZZLE_H10);
bool ar = (a & BI_SWIZZLE_H01);
bool bl = (b & BI_SWIZZLE_H10);
bool br = (b & BI_SWIZZLE_H01);
return ((al ? br : bl) ? BI_SWIZZLE_H10 : 0) |
((ar ? br : bl) ? BI_SWIZZLE_H01 : 0);
return ((al ? br : bl) ? BI_SWIZZLE_H10 : 0) |
((ar ? br : bl) ? BI_SWIZZLE_H01 : 0);
}
/* Like bi_replace_index, but composes instead of overwrites */
@ -110,17 +110,17 @@ bi_compose_swizzle_16(enum bi_swizzle a, enum bi_swizzle b)
static inline bi_index
bi_compose_float_index(bi_index old, bi_index repl)
{
/* abs(-x) = abs(+x) so ignore repl.neg if old.abs is set, otherwise
* -(-x) = x but -(+x) = +(-x) so need to exclusive-or the negates */
repl.neg = old.neg ^ (repl.neg && !old.abs);
/* abs(-x) = abs(+x) so ignore repl.neg if old.abs is set, otherwise
* -(-x) = x but -(+x) = +(-x) so need to exclusive-or the negates */
repl.neg = old.neg ^ (repl.neg && !old.abs);
/* +/- abs(+/- abs(x)) = +/- abs(x), etc so just or the two */
repl.abs |= old.abs;
/* +/- abs(+/- abs(x)) = +/- abs(x), etc so just or the two */
repl.abs |= old.abs;
/* Use the old swizzle to select from the replacement swizzle */
repl.swizzle = bi_compose_swizzle_16(old.swizzle, repl.swizzle);
/* Use the old swizzle to select from the replacement swizzle */
repl.swizzle = bi_compose_swizzle_16(old.swizzle, repl.swizzle);
return repl;
return repl;
}
/* DISCARD.b32(FCMP.f(x, y)) --> DISCARD.f(x, y) */
@ -128,30 +128,35 @@ bi_compose_float_index(bi_index old, bi_index repl)
static inline bool
bi_fuse_discard_fcmp(bi_context *ctx, bi_instr *I, bi_instr *mod)
{
if (!mod) return false;
if (I->op != BI_OPCODE_DISCARD_B32) return false;
if (mod->op != BI_OPCODE_FCMP_F32 && mod->op != BI_OPCODE_FCMP_V2F16) return false;
if (mod->cmpf >= BI_CMPF_GTLT) return false;
if (!mod)
return false;
if (I->op != BI_OPCODE_DISCARD_B32)
return false;
if (mod->op != BI_OPCODE_FCMP_F32 && mod->op != BI_OPCODE_FCMP_V2F16)
return false;
if (mod->cmpf >= BI_CMPF_GTLT)
return false;
/* result_type doesn't matter */
/* result_type doesn't matter */
/* .abs and .neg modifiers allowed on Valhall DISCARD but not Bifrost */
bool absneg = mod->src[0].neg || mod->src[0].abs;
absneg |= mod->src[1].neg || mod->src[1].abs;
/* .abs and .neg modifiers allowed on Valhall DISCARD but not Bifrost */
bool absneg = mod->src[0].neg || mod->src[0].abs;
absneg |= mod->src[1].neg || mod->src[1].abs;
if (ctx->arch <= 8 && absneg) return false;
if (ctx->arch <= 8 && absneg)
return false;
enum bi_swizzle r = I->src[0].swizzle;
enum bi_swizzle r = I->src[0].swizzle;
bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
I = bi_discard_f32(&b, mod->src[0], mod->src[1], mod->cmpf);
bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
I = bi_discard_f32(&b, mod->src[0], mod->src[1], mod->cmpf);
if (mod->op == BI_OPCODE_FCMP_V2F16) {
I->src[0].swizzle = bi_compose_swizzle_16(r, I->src[0].swizzle);
I->src[1].swizzle = bi_compose_swizzle_16(r, I->src[1].swizzle);
}
if (mod->op == BI_OPCODE_FCMP_V2F16) {
I->src[0].swizzle = bi_compose_swizzle_16(r, I->src[0].swizzle);
I->src[1].swizzle = bi_compose_swizzle_16(r, I->src[1].swizzle);
}
return true;
return true;
}
/*
@ -159,80 +164,80 @@ bi_fuse_discard_fcmp(bi_context *ctx, bi_instr *I, bi_instr *mod)
* because all 8-bit and 16-bit integers may be represented exactly as fp32.
*/
struct {
enum bi_opcode inner;
enum bi_opcode outer;
enum bi_opcode replacement;
enum bi_opcode inner;
enum bi_opcode outer;
enum bi_opcode replacement;
} bi_small_int_patterns[] = {
{ BI_OPCODE_S8_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S8_TO_F32 },
{ BI_OPCODE_U8_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U8_TO_F32 },
{ BI_OPCODE_U8_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U8_TO_F32 },
{ BI_OPCODE_S16_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S16_TO_F32 },
{ BI_OPCODE_U16_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U16_TO_F32 },
{ BI_OPCODE_U16_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U16_TO_F32 },
{BI_OPCODE_S8_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S8_TO_F32},
{BI_OPCODE_U8_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U8_TO_F32},
{BI_OPCODE_U8_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U8_TO_F32},
{BI_OPCODE_S16_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S16_TO_F32},
{BI_OPCODE_U16_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U16_TO_F32},
{BI_OPCODE_U16_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U16_TO_F32},
};
static inline void
bi_fuse_small_int_to_f32(bi_instr *I, bi_instr *mod)
{
for (unsigned i = 0; i < ARRAY_SIZE(bi_small_int_patterns); ++i) {
if (I->op != bi_small_int_patterns[i].outer)
continue;
if (mod->op != bi_small_int_patterns[i].inner)
continue;
for (unsigned i = 0; i < ARRAY_SIZE(bi_small_int_patterns); ++i) {
if (I->op != bi_small_int_patterns[i].outer)
continue;
if (mod->op != bi_small_int_patterns[i].inner)
continue;
assert(I->src[0].swizzle == BI_SWIZZLE_H01);
I->src[0] = mod->src[0];
I->round = BI_ROUND_NONE;
I->op = bi_small_int_patterns[i].replacement;
}
assert(I->src[0].swizzle == BI_SWIZZLE_H01);
I->src[0] = mod->src[0];
I->round = BI_ROUND_NONE;
I->op = bi_small_int_patterns[i].replacement;
}
}
void
bi_opt_mod_prop_forward(bi_context *ctx)
{
bi_instr **lut = calloc(sizeof(bi_instr *), ctx->ssa_alloc);
bi_instr **lut = calloc(sizeof(bi_instr *), ctx->ssa_alloc);
bi_foreach_instr_global_safe(ctx, I) {
/* Try fusing FCMP into DISCARD.b32, building a new DISCARD.f32
* instruction. As this is the only optimization DISCARD is
* involved in, this shortcircuits other processing.
*/
if (I->op == BI_OPCODE_DISCARD_B32) {
if (bi_is_ssa(I->src[0]) &&
bi_fuse_discard_fcmp(ctx, I, lut[I->src[0].value])) {
bi_remove_instruction(I);
}
bi_foreach_instr_global_safe(ctx, I) {
/* Try fusing FCMP into DISCARD.b32, building a new DISCARD.f32
* instruction. As this is the only optimization DISCARD is
* involved in, this shortcircuits other processing.
*/
if (I->op == BI_OPCODE_DISCARD_B32) {
if (bi_is_ssa(I->src[0]) &&
bi_fuse_discard_fcmp(ctx, I, lut[I->src[0].value])) {
bi_remove_instruction(I);
}
continue;
}
continue;
}
bi_foreach_dest(I, d) {
lut[I->dest[d].value] = I;
}
bi_foreach_dest(I, d) {
lut[I->dest[d].value] = I;
}
bi_foreach_ssa_src(I, s) {
bi_instr *mod = lut[I->src[s].value];
bi_foreach_ssa_src(I, s) {
bi_instr *mod = lut[I->src[s].value];
if (!mod)
continue;
if (!mod)
continue;
unsigned size = bi_opcode_props[I->op].size;
unsigned size = bi_opcode_props[I->op].size;
bi_fuse_small_int_to_f32(I, mod);
bi_fuse_small_int_to_f32(I, mod);
if (bi_is_fabsneg(mod->op, size)) {
if (mod->src[0].abs && !bi_takes_fabs(ctx->arch, I, mod->src[0], s))
continue;
if (bi_is_fabsneg(mod->op, size)) {
if (mod->src[0].abs && !bi_takes_fabs(ctx->arch, I, mod->src[0], s))
continue;
if (mod->src[0].neg && !bi_takes_fneg(ctx->arch, I, s))
continue;
if (mod->src[0].neg && !bi_takes_fneg(ctx->arch, I, s))
continue;
I->src[s] = bi_compose_float_index(I->src[s], mod->src[0]);
}
}
}
I->src[s] = bi_compose_float_index(I->src[s], mod->src[0]);
}
}
}
free(lut);
free(lut);
}
/* RSCALE has restrictions on how the clamp may be used, only used for
@ -241,199 +246,207 @@ bi_opt_mod_prop_forward(bi_context *ctx)
static bool
bi_takes_clamp(bi_instr *I)
{
switch (I->op) {
case BI_OPCODE_FMA_RSCALE_F32:
case BI_OPCODE_FMA_RSCALE_V2F16:
case BI_OPCODE_FADD_RSCALE_F32:
return false;
case BI_OPCODE_FADD_V2F16:
/* Encoding restriction */
return !(I->src[0].abs && I->src[1].abs &&
bi_is_word_equiv(I->src[0], I->src[1]));
default:
return bi_opcode_props[I->op].clamp;
}
switch (I->op) {
case BI_OPCODE_FMA_RSCALE_F32:
case BI_OPCODE_FMA_RSCALE_V2F16:
case BI_OPCODE_FADD_RSCALE_F32:
return false;
case BI_OPCODE_FADD_V2F16:
/* Encoding restriction */
return !(I->src[0].abs && I->src[1].abs &&
bi_is_word_equiv(I->src[0], I->src[1]));
default:
return bi_opcode_props[I->op].clamp;
}
}
static bool
bi_is_fclamp(enum bi_opcode op, enum bi_size size)
{
return (size == BI_SIZE_32 && op == BI_OPCODE_FCLAMP_F32) ||
(size == BI_SIZE_16 && op == BI_OPCODE_FCLAMP_V2F16);
return (size == BI_SIZE_32 && op == BI_OPCODE_FCLAMP_F32) ||
(size == BI_SIZE_16 && op == BI_OPCODE_FCLAMP_V2F16);
}
static bool
bi_optimizer_clamp(bi_instr *I, bi_instr *use)
{
if (!bi_is_fclamp(use->op, bi_opcode_props[I->op].size)) return false;
if (!bi_takes_clamp(I)) return false;
if (!bi_is_fclamp(use->op, bi_opcode_props[I->op].size))
return false;
if (!bi_takes_clamp(I))
return false;
/* Clamps are bitfields (clamp_m1_1/clamp_0_inf) so composition is OR */
I->clamp |= use->clamp;
I->dest[0] = use->dest[0];
return true;
/* Clamps are bitfields (clamp_m1_1/clamp_0_inf) so composition is OR */
I->clamp |= use->clamp;
I->dest[0] = use->dest[0];
return true;
}
static enum bi_opcode
bi_sized_mux_op(unsigned size)
{
switch (size) {
case 8: return BI_OPCODE_MUX_V4I8;
case 16: return BI_OPCODE_MUX_V2I16;
case 32: return BI_OPCODE_MUX_I32;
default: unreachable("invalid size");
}
switch (size) {
case 8:
return BI_OPCODE_MUX_V4I8;
case 16:
return BI_OPCODE_MUX_V2I16;
case 32:
return BI_OPCODE_MUX_I32;
default:
unreachable("invalid size");
}
}
static bool
bi_is_fixed_mux(bi_instr *I, unsigned size, bi_index v1)
{
return I->op == bi_sized_mux_op(size) &&
bi_is_value_equiv(I->src[0], bi_zero()) &&
bi_is_value_equiv(I->src[1], v1);
return I->op == bi_sized_mux_op(size) &&
bi_is_value_equiv(I->src[0], bi_zero()) &&
bi_is_value_equiv(I->src[1], v1);
}
static bool
bi_takes_int_result_type(enum bi_opcode op)
{
switch (op) {
case BI_OPCODE_ICMP_I32:
case BI_OPCODE_ICMP_S32:
case BI_OPCODE_ICMP_U32:
case BI_OPCODE_ICMP_V2I16:
case BI_OPCODE_ICMP_V2S16:
case BI_OPCODE_ICMP_V2U16:
case BI_OPCODE_ICMP_V4I8:
case BI_OPCODE_ICMP_V4S8:
case BI_OPCODE_ICMP_V4U8:
case BI_OPCODE_FCMP_F32:
case BI_OPCODE_FCMP_V2F16:
return true;
default:
return false;
}
switch (op) {
case BI_OPCODE_ICMP_I32:
case BI_OPCODE_ICMP_S32:
case BI_OPCODE_ICMP_U32:
case BI_OPCODE_ICMP_V2I16:
case BI_OPCODE_ICMP_V2S16:
case BI_OPCODE_ICMP_V2U16:
case BI_OPCODE_ICMP_V4I8:
case BI_OPCODE_ICMP_V4S8:
case BI_OPCODE_ICMP_V4U8:
case BI_OPCODE_FCMP_F32:
case BI_OPCODE_FCMP_V2F16:
return true;
default:
return false;
}
}
static bool
bi_takes_float_result_type(enum bi_opcode op)
{
return (op == BI_OPCODE_FCMP_F32) ||
(op == BI_OPCODE_FCMP_V2F16);
return (op == BI_OPCODE_FCMP_F32) || (op == BI_OPCODE_FCMP_V2F16);
}
/* CMP+MUX -> CMP with result type */
static bool
bi_optimizer_result_type(bi_instr *I, bi_instr *mux)
{
if (bi_opcode_props[I->op].size != bi_opcode_props[mux->op].size)
return false;
if (bi_opcode_props[I->op].size != bi_opcode_props[mux->op].size)
return false;
if (bi_is_fixed_mux(mux, 32, bi_imm_f32(1.0)) ||
bi_is_fixed_mux(mux, 16, bi_imm_f16(1.0))) {
if (bi_is_fixed_mux(mux, 32, bi_imm_f32(1.0)) ||
bi_is_fixed_mux(mux, 16, bi_imm_f16(1.0))) {
if (!bi_takes_float_result_type(I->op))
return false;
if (!bi_takes_float_result_type(I->op))
return false;
I->result_type = BI_RESULT_TYPE_F1;
} else if (bi_is_fixed_mux(mux, 32, bi_imm_u32(1)) ||
bi_is_fixed_mux(mux, 16, bi_imm_u16(1)) ||
bi_is_fixed_mux(mux, 8, bi_imm_u8(1))) {
I->result_type = BI_RESULT_TYPE_F1;
} else if (bi_is_fixed_mux(mux, 32, bi_imm_u32(1)) ||
bi_is_fixed_mux(mux, 16, bi_imm_u16(1)) ||
bi_is_fixed_mux(mux, 8, bi_imm_u8(1))) {
if (!bi_takes_int_result_type(I->op))
return false;
if (!bi_takes_int_result_type(I->op))
return false;
I->result_type = BI_RESULT_TYPE_I1;
} else {
return false;
}
I->result_type = BI_RESULT_TYPE_I1;
} else {
return false;
}
I->dest[0] = mux->dest[0];
return true;
I->dest[0] = mux->dest[0];
return true;
}
static bool
bi_is_var_tex(bi_instr *var, bi_instr *tex)
{
return (var->op == BI_OPCODE_LD_VAR_IMM) &&
(tex->op == BI_OPCODE_TEXS_2D_F16 || tex->op == BI_OPCODE_TEXS_2D_F32) &&
(var->register_format == BI_REGISTER_FORMAT_F32) &&
((var->sample == BI_SAMPLE_CENTER && var->update == BI_UPDATE_STORE) ||
(var->sample == BI_SAMPLE_NONE && var->update == BI_UPDATE_RETRIEVE)) &&
(tex->texture_index == tex->sampler_index) &&
(tex->texture_index < 4) &&
(var->index < 8);
return (var->op == BI_OPCODE_LD_VAR_IMM) &&
(tex->op == BI_OPCODE_TEXS_2D_F16 ||
tex->op == BI_OPCODE_TEXS_2D_F32) &&
(var->register_format == BI_REGISTER_FORMAT_F32) &&
((var->sample == BI_SAMPLE_CENTER &&
var->update == BI_UPDATE_STORE) ||
(var->sample == BI_SAMPLE_NONE &&
var->update == BI_UPDATE_RETRIEVE)) &&
(tex->texture_index == tex->sampler_index) &&
(tex->texture_index < 4) && (var->index < 8);
}
static bool
bi_optimizer_var_tex(bi_context *ctx, bi_instr *var, bi_instr *tex)
{
if (!bi_is_var_tex(var, tex)) return false;
if (!bi_is_var_tex(var, tex))
return false;
/* Construct the corresponding VAR_TEX intruction */
bi_builder b = bi_init_builder(ctx, bi_after_instr(var));
/* Construct the corresponding VAR_TEX intruction */
bi_builder b = bi_init_builder(ctx, bi_after_instr(var));
bi_instr *I = bi_var_tex_f32_to(&b, tex->dest[0], tex->lod_mode,
var->sample, var->update, tex->texture_index, var->index);
I->skip = tex->skip;
bi_instr *I = bi_var_tex_f32_to(&b, tex->dest[0], tex->lod_mode, var->sample,
var->update, tex->texture_index, var->index);
I->skip = tex->skip;
if (tex->op == BI_OPCODE_TEXS_2D_F16)
I->op = BI_OPCODE_VAR_TEX_F16;
if (tex->op == BI_OPCODE_TEXS_2D_F16)
I->op = BI_OPCODE_VAR_TEX_F16;
/* Dead code elimination will clean up for us */
return true;
/* Dead code elimination will clean up for us */
return true;
}
void
bi_opt_mod_prop_backward(bi_context *ctx)
{
unsigned count = ctx->ssa_alloc;
bi_instr **uses = calloc(count, sizeof(*uses));
BITSET_WORD *multiple = calloc(BITSET_WORDS(count), sizeof(*multiple));
unsigned count = ctx->ssa_alloc;
bi_instr **uses = calloc(count, sizeof(*uses));
BITSET_WORD *multiple = calloc(BITSET_WORDS(count), sizeof(*multiple));
bi_foreach_instr_global_rev(ctx, I) {
bi_foreach_ssa_src(I, s) {
unsigned v = I->src[s].value;
bi_foreach_instr_global_rev(ctx, I) {
bi_foreach_ssa_src(I, s) {
unsigned v = I->src[s].value;
if (uses[v] && uses[v] != I)
BITSET_SET(multiple, v);
else
uses[v] = I;
}
if (uses[v] && uses[v] != I)
BITSET_SET(multiple, v);
else
uses[v] = I;
}
if (!I->nr_dests)
continue;
if (!I->nr_dests)
continue;
bi_instr *use = uses[I->dest[0].value];
bi_instr *use = uses[I->dest[0].value];
if (!use || BITSET_TEST(multiple, I->dest[0].value))
continue;
if (!use || BITSET_TEST(multiple, I->dest[0].value))
continue;
/* Destination has a single use, try to propagate */
bool propagated =
bi_optimizer_clamp(I, use) ||
bi_optimizer_result_type(I, use);
/* Destination has a single use, try to propagate */
bool propagated =
bi_optimizer_clamp(I, use) || bi_optimizer_result_type(I, use);
if (!propagated && I->op == BI_OPCODE_LD_VAR_IMM && use->op == BI_OPCODE_SPLIT_I32) {
/* Need to see through the split in a
* ld_var_imm/split/var_tex sequence
*/
bi_instr *tex = uses[use->dest[0].value];
if (!propagated && I->op == BI_OPCODE_LD_VAR_IMM &&
use->op == BI_OPCODE_SPLIT_I32) {
/* Need to see through the split in a
* ld_var_imm/split/var_tex sequence
*/
bi_instr *tex = uses[use->dest[0].value];
if (!tex || BITSET_TEST(multiple, use->dest[0].value))
continue;
if (!tex || BITSET_TEST(multiple, use->dest[0].value))
continue;
use = tex;
propagated = bi_optimizer_var_tex(ctx, I, use);
}
use = tex;
propagated = bi_optimizer_var_tex(ctx, I, use);
}
if (propagated) {
bi_remove_instruction(use);
continue;
}
}
if (propagated) {
bi_remove_instruction(use);
continue;
}
}
free(uses);
free(multiple);
free(uses);
free(multiple);
}
/*
@ -443,37 +456,37 @@ bi_opt_mod_prop_backward(bi_context *ctx)
static bool
bi_lower_opt_instruction_helper(bi_builder *b, bi_instr *I)
{
bi_instr *repl;
bi_instr *repl;
switch (I->op) {
case BI_OPCODE_FABSNEG_F32:
case BI_OPCODE_FCLAMP_F32:
repl = bi_fadd_f32_to(b, I->dest[0], I->src[0], bi_negzero());
repl->clamp = I->clamp;
return true;
switch (I->op) {
case BI_OPCODE_FABSNEG_F32:
case BI_OPCODE_FCLAMP_F32:
repl = bi_fadd_f32_to(b, I->dest[0], I->src[0], bi_negzero());
repl->clamp = I->clamp;
return true;
case BI_OPCODE_FABSNEG_V2F16:
case BI_OPCODE_FCLAMP_V2F16:
repl = bi_fadd_v2f16_to(b, I->dest[0], I->src[0], bi_negzero());
repl->clamp = I->clamp;
return true;
case BI_OPCODE_FABSNEG_V2F16:
case BI_OPCODE_FCLAMP_V2F16:
repl = bi_fadd_v2f16_to(b, I->dest[0], I->src[0], bi_negzero());
repl->clamp = I->clamp;
return true;
case BI_OPCODE_DISCARD_B32:
bi_discard_f32(b, I->src[0], bi_zero(), BI_CMPF_NE);
return true;
case BI_OPCODE_DISCARD_B32:
bi_discard_f32(b, I->src[0], bi_zero(), BI_CMPF_NE);
return true;
default:
return false;
}
default:
return false;
}
}
void
bi_lower_opt_instructions(bi_context *ctx)
{
bi_foreach_instr_global_safe(ctx, I) {
bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
bi_foreach_instr_global_safe(ctx, I) {
bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
if (bi_lower_opt_instruction_helper(&b, I))
bi_remove_instruction(I);
}
if (bi_lower_opt_instruction_helper(&b, I))
bi_remove_instruction(I);
}
}

View file

@ -21,8 +21,8 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "bi_builder.h"
#include "compiler.h"
/* This optimization pass, intended to run once after code emission but before
* copy propagation, analyzes direct word-aligned UBO reads and promotes a
@ -32,17 +32,16 @@
static bool
bi_is_ubo(bi_instr *ins)
{
return (bi_opcode_props[ins->op].message == BIFROST_MESSAGE_LOAD) &&
(ins->seg == BI_SEG_UBO);
return (bi_opcode_props[ins->op].message == BIFROST_MESSAGE_LOAD) &&
(ins->seg == BI_SEG_UBO);
}
static bool
bi_is_direct_aligned_ubo(bi_instr *ins)
{
return bi_is_ubo(ins) &&
(ins->src[0].type == BI_INDEX_CONSTANT) &&
(ins->src[1].type == BI_INDEX_CONSTANT) &&
((ins->src[0].value & 0x3) == 0);
return bi_is_ubo(ins) && (ins->src[0].type == BI_INDEX_CONSTANT) &&
(ins->src[1].type == BI_INDEX_CONSTANT) &&
((ins->src[0].value & 0x3) == 0);
}
/* Represents use data for a single UBO */
@ -50,44 +49,46 @@ bi_is_direct_aligned_ubo(bi_instr *ins)
#define MAX_UBO_WORDS (65536 / 16)
struct bi_ubo_block {
BITSET_DECLARE(pushed, MAX_UBO_WORDS);
uint8_t range[MAX_UBO_WORDS];
BITSET_DECLARE(pushed, MAX_UBO_WORDS);
uint8_t range[MAX_UBO_WORDS];
};
struct bi_ubo_analysis {
/* Per block analysis */
unsigned nr_blocks;
struct bi_ubo_block *blocks;
/* Per block analysis */
unsigned nr_blocks;
struct bi_ubo_block *blocks;
};
static struct bi_ubo_analysis
bi_analyze_ranges(bi_context *ctx)
{
struct bi_ubo_analysis res = {
.nr_blocks = ctx->nir->info.num_ubos + 1,
};
struct bi_ubo_analysis res = {
.nr_blocks = ctx->nir->info.num_ubos + 1,
};
res.blocks = calloc(res.nr_blocks, sizeof(struct bi_ubo_block));
res.blocks = calloc(res.nr_blocks, sizeof(struct bi_ubo_block));
bi_foreach_instr_global(ctx, ins) {
if (!bi_is_direct_aligned_ubo(ins)) continue;
bi_foreach_instr_global(ctx, ins) {
if (!bi_is_direct_aligned_ubo(ins))
continue;
unsigned ubo = ins->src[1].value;
unsigned word = ins->src[0].value / 4;
unsigned channels = bi_opcode_props[ins->op].sr_count;
unsigned ubo = ins->src[1].value;
unsigned word = ins->src[0].value / 4;
unsigned channels = bi_opcode_props[ins->op].sr_count;
assert(ubo < res.nr_blocks);
assert(channels > 0 && channels <= 4);
assert(ubo < res.nr_blocks);
assert(channels > 0 && channels <= 4);
if (word >= MAX_UBO_WORDS) continue;
if (word >= MAX_UBO_WORDS)
continue;
/* Must use max if the same base is read with different channel
* counts, which is possible with nir_opt_shrink_vectors */
uint8_t *range = res.blocks[ubo].range;
range[word] = MAX2(range[word], channels);
}
/* Must use max if the same base is read with different channel
* counts, which is possible with nir_opt_shrink_vectors */
uint8_t *range = res.blocks[ubo].range;
range[word] = MAX2(range[word], channels);
}
return res;
return res;
}
/* Select UBO words to push. A sophisticated implementation would consider the
@ -97,92 +98,93 @@ bi_analyze_ranges(bi_context *ctx)
static void
bi_pick_ubo(struct panfrost_ubo_push *push, struct bi_ubo_analysis *analysis)
{
for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) {
struct bi_ubo_block *block = &analysis->blocks[ubo];
for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) {
struct bi_ubo_block *block = &analysis->blocks[ubo];
for (unsigned r = 0; r < MAX_UBO_WORDS; ++r) {
unsigned range = block->range[r];
for (unsigned r = 0; r < MAX_UBO_WORDS; ++r) {
unsigned range = block->range[r];
/* Don't push something we don't access */
if (range == 0) continue;
/* Don't push something we don't access */
if (range == 0)
continue;
/* Don't push more than possible */
if (push->count > PAN_MAX_PUSH - range)
return;
/* Don't push more than possible */
if (push->count > PAN_MAX_PUSH - range)
return;
for (unsigned offs = 0; offs < range; ++offs) {
struct panfrost_ubo_word word = {
.ubo = ubo,
.offset = (r + offs) * 4,
};
for (unsigned offs = 0; offs < range; ++offs) {
struct panfrost_ubo_word word = {
.ubo = ubo,
.offset = (r + offs) * 4,
};
push->words[push->count++] = word;
}
push->words[push->count++] = word;
}
/* Mark it as pushed so we can rewrite */
BITSET_SET(block->pushed, r);
}
}
/* Mark it as pushed so we can rewrite */
BITSET_SET(block->pushed, r);
}
}
}
void
bi_opt_push_ubo(bi_context *ctx)
{
struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx);
bi_pick_ubo(ctx->info.push, &analysis);
struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx);
bi_pick_ubo(ctx->info.push, &analysis);
ctx->ubo_mask = 0;
ctx->ubo_mask = 0;
bi_foreach_instr_global_safe(ctx, ins) {
if (!bi_is_ubo(ins)) continue;
bi_foreach_instr_global_safe(ctx, ins) {
if (!bi_is_ubo(ins))
continue;
unsigned ubo = ins->src[1].value;
unsigned offset = ins->src[0].value;
unsigned ubo = ins->src[1].value;
unsigned offset = ins->src[0].value;
if (!bi_is_direct_aligned_ubo(ins)) {
/* The load can't be pushed, so this UBO needs to be
* uploaded conventionally */
if (ins->src[1].type == BI_INDEX_CONSTANT)
ctx->ubo_mask |= BITSET_BIT(ubo);
else
ctx->ubo_mask = ~0;
if (!bi_is_direct_aligned_ubo(ins)) {
/* The load can't be pushed, so this UBO needs to be
* uploaded conventionally */
if (ins->src[1].type == BI_INDEX_CONSTANT)
ctx->ubo_mask |= BITSET_BIT(ubo);
else
ctx->ubo_mask = ~0;
continue;
}
continue;
}
/* Check if we decided to push this */
assert(ubo < analysis.nr_blocks);
if (!BITSET_TEST(analysis.blocks[ubo].pushed, offset / 4)) {
ctx->ubo_mask |= BITSET_BIT(ubo);
continue;
}
/* Check if we decided to push this */
assert(ubo < analysis.nr_blocks);
if (!BITSET_TEST(analysis.blocks[ubo].pushed, offset / 4)) {
ctx->ubo_mask |= BITSET_BIT(ubo);
continue;
}
/* Replace the UBO load with moves from FAU */
bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
/* Replace the UBO load with moves from FAU */
bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
unsigned nr = bi_opcode_props[ins->op].sr_count;
bi_instr *vec = bi_collect_i32_to(&b, ins->dest[0], nr);
unsigned nr = bi_opcode_props[ins->op].sr_count;
bi_instr *vec = bi_collect_i32_to(&b, ins->dest[0], nr);
bi_foreach_src(vec, w) {
/* FAU is grouped in pairs (2 x 4-byte) */
unsigned base =
pan_lookup_pushed_ubo(ctx->info.push, ubo,
(offset + 4 * w));
bi_foreach_src(vec, w) {
/* FAU is grouped in pairs (2 x 4-byte) */
unsigned base =
pan_lookup_pushed_ubo(ctx->info.push, ubo, (offset + 4 * w));
unsigned fau_idx = (base >> 1);
unsigned fau_hi = (base & 1);
unsigned fau_idx = (base >> 1);
unsigned fau_hi = (base & 1);
vec->src[w] = bi_fau(BIR_FAU_UNIFORM | fau_idx, fau_hi);
}
vec->src[w] = bi_fau(BIR_FAU_UNIFORM | fau_idx, fau_hi);
}
bi_remove_instruction(ins);
}
bi_remove_instruction(ins);
}
free(analysis.blocks);
free(analysis.blocks);
}
typedef struct {
BITSET_DECLARE(row, PAN_MAX_PUSH);
BITSET_DECLARE(row, PAN_MAX_PUSH);
} adjacency_row;
/* Find the connected component containing `node` with depth-first search */
@ -190,33 +192,32 @@ static void
bi_find_component(adjacency_row *adjacency, BITSET_WORD *visited,
unsigned *component, unsigned *size, unsigned node)
{
unsigned neighbour;
unsigned neighbour;
BITSET_SET(visited, node);
component[(*size)++] = node;
BITSET_SET(visited, node);
component[(*size)++] = node;
BITSET_FOREACH_SET(neighbour, adjacency[node].row, PAN_MAX_PUSH) {
if (!BITSET_TEST(visited, neighbour)) {
bi_find_component(adjacency, visited, component, size,
neighbour);
}
}
BITSET_FOREACH_SET(neighbour, adjacency[node].row, PAN_MAX_PUSH) {
if (!BITSET_TEST(visited, neighbour)) {
bi_find_component(adjacency, visited, component, size, neighbour);
}
}
}
static bool
bi_is_uniform(bi_index idx)
{
return (idx.type == BI_INDEX_FAU) && (idx.value & BIR_FAU_UNIFORM);
return (idx.type == BI_INDEX_FAU) && (idx.value & BIR_FAU_UNIFORM);
}
/* Get the index of a uniform in 32-bit words from the start of FAU-RAM */
static unsigned
bi_uniform_word(bi_index idx)
{
assert(bi_is_uniform(idx));
assert(idx.offset <= 1);
assert(bi_is_uniform(idx));
assert(idx.offset <= 1);
return ((idx.value & ~BIR_FAU_UNIFORM) << 1) | idx.offset;
return ((idx.value & ~BIR_FAU_UNIFORM) << 1) | idx.offset;
}
/*
@ -228,35 +229,35 @@ bi_uniform_word(bi_index idx)
static void
bi_create_fau_interference_graph(bi_context *ctx, adjacency_row *adjacency)
{
bi_foreach_instr_global(ctx, I) {
unsigned nodes[BI_MAX_SRCS] = {};
unsigned node_count = 0;
bi_foreach_instr_global(ctx, I) {
unsigned nodes[BI_MAX_SRCS] = {};
unsigned node_count = 0;
/* Set nodes[] to 32-bit uniforms accessed */
bi_foreach_src(I, s) {
if (bi_is_uniform(I->src[s])) {
unsigned word = bi_uniform_word(I->src[s]);
/* Set nodes[] to 32-bit uniforms accessed */
bi_foreach_src(I, s) {
if (bi_is_uniform(I->src[s])) {
unsigned word = bi_uniform_word(I->src[s]);
if (word >= ctx->info.push_offset)
nodes[node_count++] = word;
}
}
if (word >= ctx->info.push_offset)
nodes[node_count++] = word;
}
}
/* Create clique connecting nodes[] */
for (unsigned i = 0; i < node_count; ++i) {
for (unsigned j = 0; j < node_count; ++j) {
if (i == j)
continue;
/* Create clique connecting nodes[] */
for (unsigned i = 0; i < node_count; ++i) {
for (unsigned j = 0; j < node_count; ++j) {
if (i == j)
continue;
unsigned x = nodes[i], y = nodes[j];
assert(MAX2(x, y) < ctx->info.push->count);
unsigned x = nodes[i], y = nodes[j];
assert(MAX2(x, y) < ctx->info.push->count);
/* Add undirected edge between the nodes */
BITSET_SET(adjacency[x].row, y);
BITSET_SET(adjacency[y].row, x);
}
}
}
/* Add undirected edge between the nodes */
BITSET_SET(adjacency[x].row, y);
BITSET_SET(adjacency[y].row, x);
}
}
}
}
/*
@ -278,71 +279,72 @@ bi_create_fau_interference_graph(bi_context *ctx, adjacency_row *adjacency)
void
bi_opt_reorder_push(bi_context *ctx)
{
adjacency_row adjacency[PAN_MAX_PUSH] = { 0 };
BITSET_DECLARE(visited, PAN_MAX_PUSH) = { 0 };
adjacency_row adjacency[PAN_MAX_PUSH] = {0};
BITSET_DECLARE(visited, PAN_MAX_PUSH) = {0};
unsigned ordering[PAN_MAX_PUSH] = { 0 };
unsigned unpaired[PAN_MAX_PUSH] = { 0 };
unsigned pushed = 0, unpaired_count = 0;
unsigned ordering[PAN_MAX_PUSH] = {0};
unsigned unpaired[PAN_MAX_PUSH] = {0};
unsigned pushed = 0, unpaired_count = 0;
struct panfrost_ubo_push *push = ctx->info.push;
unsigned push_offset = ctx->info.push_offset;
struct panfrost_ubo_push *push = ctx->info.push;
unsigned push_offset = ctx->info.push_offset;
bi_create_fau_interference_graph(ctx, adjacency);
bi_create_fau_interference_graph(ctx, adjacency);
for (unsigned i = push_offset; i < push->count; ++i) {
if (BITSET_TEST(visited, i)) continue;
for (unsigned i = push_offset; i < push->count; ++i) {
if (BITSET_TEST(visited, i))
continue;
unsigned component[PAN_MAX_PUSH] = { 0 };
unsigned size = 0;
bi_find_component(adjacency, visited, component, &size, i);
unsigned component[PAN_MAX_PUSH] = {0};
unsigned size = 0;
bi_find_component(adjacency, visited, component, &size, i);
/* If there is an odd number of uses, at least one use must be
* unpaired. Arbitrarily take the last one.
*/
if (size % 2)
unpaired[unpaired_count++] = component[--size];
/* If there is an odd number of uses, at least one use must be
* unpaired. Arbitrarily take the last one.
*/
if (size % 2)
unpaired[unpaired_count++] = component[--size];
/* The rest of uses are paired */
assert((size % 2) == 0);
/* The rest of uses are paired */
assert((size % 2) == 0);
/* Push the paired uses */
memcpy(ordering + pushed, component, sizeof(unsigned) * size);
pushed += size;
}
/* Push the paired uses */
memcpy(ordering + pushed, component, sizeof(unsigned) * size);
pushed += size;
}
/* Push unpaired nodes at the end */
memcpy(ordering + pushed, unpaired, sizeof(unsigned) * unpaired_count);
pushed += unpaired_count;
/* Push unpaired nodes at the end */
memcpy(ordering + pushed, unpaired, sizeof(unsigned) * unpaired_count);
pushed += unpaired_count;
/* Ordering is a permutation. Invert it for O(1) lookup. */
unsigned old_to_new[PAN_MAX_PUSH] = { 0 };
/* Ordering is a permutation. Invert it for O(1) lookup. */
unsigned old_to_new[PAN_MAX_PUSH] = {0};
for (unsigned i = 0; i < push_offset; ++i) {
old_to_new[i] = i;
}
for (unsigned i = 0; i < push_offset; ++i) {
old_to_new[i] = i;
}
for (unsigned i = 0; i < pushed; ++i) {
assert(ordering[i] >= push_offset);
old_to_new[ordering[i]] = push_offset + i;
}
for (unsigned i = 0; i < pushed; ++i) {
assert(ordering[i] >= push_offset);
old_to_new[ordering[i]] = push_offset + i;
}
/* Use new ordering throughout the program */
bi_foreach_instr_global(ctx, I) {
bi_foreach_src(I, s) {
if (bi_is_uniform(I->src[s])) {
unsigned node = bi_uniform_word(I->src[s]);
unsigned new_node = old_to_new[node];
I->src[s].value = BIR_FAU_UNIFORM | (new_node >> 1);
I->src[s].offset = new_node & 1;
}
}
}
/* Use new ordering throughout the program */
bi_foreach_instr_global(ctx, I) {
bi_foreach_src(I, s) {
if (bi_is_uniform(I->src[s])) {
unsigned node = bi_uniform_word(I->src[s]);
unsigned new_node = old_to_new[node];
I->src[s].value = BIR_FAU_UNIFORM | (new_node >> 1);
I->src[s].offset = new_node & 1;
}
}
}
/* Use new ordering for push */
struct panfrost_ubo_push old = *push;
for (unsigned i = 0; i < pushed; ++i)
push->words[push_offset + i] = old.words[ordering[i]];
/* Use new ordering for push */
struct panfrost_ubo_push old = *push;
for (unsigned i = 0; i < pushed; ++i)
push->words[push_offset + i] = old.words[ordering[i]];
push->count = push_offset + pushed;
push->count = push_offset + pushed;
}

File diff suppressed because it is too large Load diff

View file

@ -26,149 +26,148 @@
/* Bottom-up local scheduler to reduce register pressure */
#include "compiler.h"
#include "util/dag.h"
#include "compiler.h"
struct sched_ctx {
/* Dependency graph */
struct dag *dag;
/* Dependency graph */
struct dag *dag;
/* Live set */
BITSET_WORD *live;
/* Live set */
BITSET_WORD *live;
};
struct sched_node {
struct dag_node dag;
struct dag_node dag;
/* Instruction this node represents */
bi_instr *instr;
/* Instruction this node represents */
bi_instr *instr;
};
static void
add_dep(struct sched_node *a, struct sched_node *b)
{
if (a && b)
dag_add_edge(&a->dag, &b->dag, 0);
if (a && b)
dag_add_edge(&a->dag, &b->dag, 0);
}
static struct dag *
create_dag(bi_context *ctx, bi_block *block, void *memctx)
{
struct dag *dag = dag_create(ctx);
struct dag *dag = dag_create(ctx);
struct sched_node **last_write =
calloc(ctx->ssa_alloc, sizeof(struct sched_node *));
struct sched_node *coverage = NULL;
struct sched_node *preload = NULL;
struct sched_node **last_write =
calloc(ctx->ssa_alloc, sizeof(struct sched_node *));
struct sched_node *coverage = NULL;
struct sched_node *preload = NULL;
/* Last memory load, to serialize stores against */
struct sched_node *memory_load = NULL;
/* Last memory load, to serialize stores against */
struct sched_node *memory_load = NULL;
/* Last memory store, to serialize loads and stores against */
struct sched_node *memory_store = NULL;
/* Last memory store, to serialize loads and stores against */
struct sched_node *memory_store = NULL;
bi_foreach_instr_in_block(block, I) {
/* Leave branches at the end */
if (I->op == BI_OPCODE_JUMP || bi_opcode_props[I->op].branch)
break;
bi_foreach_instr_in_block(block, I) {
/* Leave branches at the end */
if (I->op == BI_OPCODE_JUMP || bi_opcode_props[I->op].branch)
break;
assert(I->branch_target == NULL);
assert(I->branch_target == NULL);
struct sched_node *node = rzalloc(memctx, struct sched_node);
node->instr = I;
dag_init_node(dag, &node->dag);
struct sched_node *node = rzalloc(memctx, struct sched_node);
node->instr = I;
dag_init_node(dag, &node->dag);
/* Reads depend on writes, no other hazards in SSA */
bi_foreach_ssa_src(I, s)
add_dep(node, last_write[I->src[s].value]);
/* Reads depend on writes, no other hazards in SSA */
bi_foreach_ssa_src(I, s)
add_dep(node, last_write[I->src[s].value]);
bi_foreach_dest(I, d)
last_write[I->dest[d].value] = node;
bi_foreach_dest(I, d)
last_write[I->dest[d].value] = node;
switch (bi_opcode_props[I->op].message) {
case BIFROST_MESSAGE_LOAD:
/* Regular memory loads needs to be serialized against
* other memory access. However, UBO memory is read-only
* so it can be moved around freely.
*/
if (I->seg != BI_SEG_UBO) {
add_dep(node, memory_store);
memory_load = node;
}
switch (bi_opcode_props[I->op].message) {
case BIFROST_MESSAGE_LOAD:
/* Regular memory loads needs to be serialized against
* other memory access. However, UBO memory is read-only
* so it can be moved around freely.
*/
if (I->seg != BI_SEG_UBO) {
add_dep(node, memory_store);
memory_load = node;
}
break;
break;
case BIFROST_MESSAGE_ATTRIBUTE:
/* Regular attribute loads can be reordered, but
* writeable attributes can't be. Our one use of
* writeable attributes are images.
*/
if ((I->op == BI_OPCODE_LD_TEX) ||
(I->op == BI_OPCODE_LD_TEX_IMM) ||
(I->op == BI_OPCODE_LD_ATTR_TEX)) {
add_dep(node, memory_store);
memory_load = node;
}
case BIFROST_MESSAGE_ATTRIBUTE:
/* Regular attribute loads can be reordered, but
* writeable attributes can't be. Our one use of
* writeable attributes are images.
*/
if ((I->op == BI_OPCODE_LD_TEX) || (I->op == BI_OPCODE_LD_TEX_IMM) ||
(I->op == BI_OPCODE_LD_ATTR_TEX)) {
add_dep(node, memory_store);
memory_load = node;
}
break;
break;
case BIFROST_MESSAGE_STORE:
assert(I->seg != BI_SEG_UBO);
add_dep(node, memory_load);
add_dep(node, memory_store);
memory_store = node;
break;
case BIFROST_MESSAGE_STORE:
assert(I->seg != BI_SEG_UBO);
add_dep(node, memory_load);
add_dep(node, memory_store);
memory_store = node;
break;
case BIFROST_MESSAGE_ATOMIC:
case BIFROST_MESSAGE_BARRIER:
add_dep(node, memory_load);
add_dep(node, memory_store);
memory_load = node;
memory_store = node;
break;
case BIFROST_MESSAGE_ATOMIC:
case BIFROST_MESSAGE_BARRIER:
add_dep(node, memory_load);
add_dep(node, memory_store);
memory_load = node;
memory_store = node;
break;
case BIFROST_MESSAGE_BLEND:
case BIFROST_MESSAGE_Z_STENCIL:
case BIFROST_MESSAGE_TILE:
add_dep(node, coverage);
coverage = node;
break;
case BIFROST_MESSAGE_BLEND:
case BIFROST_MESSAGE_Z_STENCIL:
case BIFROST_MESSAGE_TILE:
add_dep(node, coverage);
coverage = node;
break;
case BIFROST_MESSAGE_ATEST:
/* ATEST signals the end of shader side effects */
add_dep(node, memory_store);
memory_store = node;
case BIFROST_MESSAGE_ATEST:
/* ATEST signals the end of shader side effects */
add_dep(node, memory_store);
memory_store = node;
/* ATEST also updates coverage */
add_dep(node, coverage);
coverage = node;
break;
default:
break;
}
/* ATEST also updates coverage */
add_dep(node, coverage);
coverage = node;
break;
default:
break;
}
add_dep(node, preload);
add_dep(node, preload);
if (I->op == BI_OPCODE_DISCARD_F32) {
/* Serialize against ATEST */
add_dep(node, coverage);
coverage = node;
if (I->op == BI_OPCODE_DISCARD_F32) {
/* Serialize against ATEST */
add_dep(node, coverage);
coverage = node;
/* Also serialize against memory and barriers */
add_dep(node, memory_load);
add_dep(node, memory_store);
memory_load = node;
memory_store = node;
} else if ((I->op == BI_OPCODE_PHI) ||
(I->op == BI_OPCODE_MOV_I32 &&
I->src[0].type == BI_INDEX_REGISTER)) {
preload = node;
}
}
/* Also serialize against memory and barriers */
add_dep(node, memory_load);
add_dep(node, memory_store);
memory_load = node;
memory_store = node;
} else if ((I->op == BI_OPCODE_PHI) ||
(I->op == BI_OPCODE_MOV_I32 &&
I->src[0].type == BI_INDEX_REGISTER)) {
preload = node;
}
}
free(last_write);
free(last_write);
return dag;
return dag;
}
/*
@ -183,30 +182,30 @@ create_dag(bi_context *ctx, bi_block *block, void *memctx)
static signed
calculate_pressure_delta(bi_instr *I, BITSET_WORD *live)
{
signed delta = 0;
signed delta = 0;
/* Destinations must be unique */
bi_foreach_dest(I, d) {
if (BITSET_TEST(live, I->dest[d].value))
delta -= bi_count_write_registers(I, d);
}
/* Destinations must be unique */
bi_foreach_dest(I, d) {
if (BITSET_TEST(live, I->dest[d].value))
delta -= bi_count_write_registers(I, d);
}
bi_foreach_ssa_src(I, src) {
/* Filter duplicates */
bool dupe = false;
bi_foreach_ssa_src(I, src) {
/* Filter duplicates */
bool dupe = false;
for (unsigned i = 0; i < src; ++i) {
if (bi_is_equiv(I->src[i], I->src[src])) {
dupe = true;
break;
}
}
for (unsigned i = 0; i < src; ++i) {
if (bi_is_equiv(I->src[i], I->src[src])) {
dupe = true;
break;
}
}
if (!dupe && !BITSET_TEST(live, I->src[src].value))
delta += bi_count_read_registers(I, src);
}
if (!dupe && !BITSET_TEST(live, I->src[src].value))
delta += bi_count_read_registers(I, src);
}
return delta;
return delta;
}
/*
@ -216,87 +215,88 @@ calculate_pressure_delta(bi_instr *I, BITSET_WORD *live)
static struct sched_node *
choose_instr(struct sched_ctx *s)
{
int32_t min_delta = INT32_MAX;
struct sched_node *best = NULL;
int32_t min_delta = INT32_MAX;
struct sched_node *best = NULL;
list_for_each_entry(struct sched_node, n, &s->dag->heads, dag.link) {
int32_t delta = calculate_pressure_delta(n->instr, s->live);
list_for_each_entry(struct sched_node, n, &s->dag->heads, dag.link) {
int32_t delta = calculate_pressure_delta(n->instr, s->live);
if (delta < min_delta) {
best = n;
min_delta = delta;
}
}
if (delta < min_delta) {
best = n;
min_delta = delta;
}
}
return best;
return best;
}
static void
pressure_schedule_block(bi_context *ctx, bi_block *block, struct sched_ctx *s)
{
/* off by a constant, that's ok */
signed pressure = 0;
signed orig_max_pressure = 0;
unsigned nr_ins = 0;
/* off by a constant, that's ok */
signed pressure = 0;
signed orig_max_pressure = 0;
unsigned nr_ins = 0;
memcpy(s->live, block->ssa_live_out, BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
memcpy(s->live, block->ssa_live_out,
BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
bi_foreach_instr_in_block_rev(block, I) {
pressure += calculate_pressure_delta(I, s->live);
orig_max_pressure = MAX2(pressure, orig_max_pressure);
bi_liveness_ins_update_ssa(s->live, I);
nr_ins++;
}
bi_foreach_instr_in_block_rev(block, I) {
pressure += calculate_pressure_delta(I, s->live);
orig_max_pressure = MAX2(pressure, orig_max_pressure);
bi_liveness_ins_update_ssa(s->live, I);
nr_ins++;
}
memcpy(s->live, block->ssa_live_out, BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
memcpy(s->live, block->ssa_live_out,
BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
/* off by a constant, that's ok */
signed max_pressure = 0;
pressure = 0;
/* off by a constant, that's ok */
signed max_pressure = 0;
pressure = 0;
struct sched_node **schedule = calloc(nr_ins, sizeof(struct sched_node *));
nr_ins = 0;
struct sched_node **schedule = calloc(nr_ins, sizeof(struct sched_node *));
nr_ins = 0;
while (!list_is_empty(&s->dag->heads)) {
struct sched_node *node = choose_instr(s);
pressure += calculate_pressure_delta(node->instr, s->live);
max_pressure = MAX2(pressure, max_pressure);
dag_prune_head(s->dag, &node->dag);
while (!list_is_empty(&s->dag->heads)) {
struct sched_node *node = choose_instr(s);
pressure += calculate_pressure_delta(node->instr, s->live);
max_pressure = MAX2(pressure, max_pressure);
dag_prune_head(s->dag, &node->dag);
schedule[nr_ins++] = node;
bi_liveness_ins_update_ssa(s->live, node->instr);
}
schedule[nr_ins++] = node;
bi_liveness_ins_update_ssa(s->live, node->instr);
}
/* Bail if it looks like it's worse */
if (max_pressure >= orig_max_pressure) {
free(schedule);
return;
}
/* Bail if it looks like it's worse */
if (max_pressure >= orig_max_pressure) {
free(schedule);
return;
}
/* Apply the schedule */
for (unsigned i = 0; i < nr_ins; ++i) {
bi_remove_instruction(schedule[i]->instr);
list_add(&schedule[i]->instr->link, &block->instructions);
}
/* Apply the schedule */
for (unsigned i = 0; i < nr_ins; ++i) {
bi_remove_instruction(schedule[i]->instr);
list_add(&schedule[i]->instr->link, &block->instructions);
}
free(schedule);
free(schedule);
}
void
bi_pressure_schedule(bi_context *ctx)
{
bi_compute_liveness_ssa(ctx);
void *memctx = ralloc_context(ctx);
BITSET_WORD *live = ralloc_array(memctx, BITSET_WORD, BITSET_WORDS(ctx->ssa_alloc));
bi_compute_liveness_ssa(ctx);
void *memctx = ralloc_context(ctx);
BITSET_WORD *live =
ralloc_array(memctx, BITSET_WORD, BITSET_WORDS(ctx->ssa_alloc));
bi_foreach_block(ctx, block) {
struct sched_ctx sctx = {
.dag = create_dag(ctx, block, memctx),
.live = live
};
bi_foreach_block(ctx, block) {
struct sched_ctx sctx = {.dag = create_dag(ctx, block, memctx),
.live = live};
pressure_schedule_block(ctx, block, &sctx);
}
pressure_schedule_block(ctx, block, &sctx);
}
ralloc_free(memctx);
ralloc_free(memctx);
}

View file

@ -24,177 +24,179 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "bi_print_common.h"
#include "compiler.h"
static const char *
bi_reg_op_name(enum bifrost_reg_op op)
{
switch (op) {
case BIFROST_OP_IDLE: return "idle";
case BIFROST_OP_READ: return "read";
case BIFROST_OP_WRITE: return "write";
case BIFROST_OP_WRITE_LO: return "write lo";
case BIFROST_OP_WRITE_HI: return "write hi";
default: return "invalid";
}
switch (op) {
case BIFROST_OP_IDLE:
return "idle";
case BIFROST_OP_READ:
return "read";
case BIFROST_OP_WRITE:
return "write";
case BIFROST_OP_WRITE_LO:
return "write lo";
case BIFROST_OP_WRITE_HI:
return "write hi";
default:
return "invalid";
}
}
void
bi_print_slots(bi_registers *regs, FILE *fp)
{
for (unsigned i = 0; i < 2; ++i) {
if (regs->enabled[i])
fprintf(fp, "slot %u: %u\n", i, regs->slot[i]);
}
for (unsigned i = 0; i < 2; ++i) {
if (regs->enabled[i])
fprintf(fp, "slot %u: %u\n", i, regs->slot[i]);
}
if (regs->slot23.slot2) {
fprintf(fp, "slot 2 (%s%s): %u\n",
bi_reg_op_name(regs->slot23.slot2),
regs->slot23.slot2 >= BIFROST_OP_WRITE ?
" FMA": "",
regs->slot[2]);
}
if (regs->slot23.slot2) {
fprintf(fp, "slot 2 (%s%s): %u\n", bi_reg_op_name(regs->slot23.slot2),
regs->slot23.slot2 >= BIFROST_OP_WRITE ? " FMA" : "",
regs->slot[2]);
}
if (regs->slot23.slot3) {
fprintf(fp, "slot 3 (%s %s): %u\n",
bi_reg_op_name(regs->slot23.slot3),
regs->slot23.slot3_fma ? "FMA" : "ADD",
regs->slot[3]);
}
if (regs->slot23.slot3) {
fprintf(fp, "slot 3 (%s %s): %u\n", bi_reg_op_name(regs->slot23.slot3),
regs->slot23.slot3_fma ? "FMA" : "ADD", regs->slot[3]);
}
}
void
bi_print_tuple(bi_tuple *tuple, FILE *fp)
{
bi_instr *ins[2] = { tuple->fma, tuple->add };
bi_instr *ins[2] = {tuple->fma, tuple->add};
for (unsigned i = 0; i < 2; ++i) {
fprintf(fp, (i == 0) ? "\t* " : "\t+ ");
for (unsigned i = 0; i < 2; ++i) {
fprintf(fp, (i == 0) ? "\t* " : "\t+ ");
if (ins[i])
bi_print_instr(ins[i], fp);
else
fprintf(fp, "NOP\n");
}
if (ins[i])
bi_print_instr(ins[i], fp);
else
fprintf(fp, "NOP\n");
}
}
void
bi_print_clause(bi_clause *clause, FILE *fp)
{
fprintf(fp, "id(%u)", clause->scoreboard_id);
fprintf(fp, "id(%u)", clause->scoreboard_id);
if (clause->dependencies) {
fprintf(fp, " wait(");
if (clause->dependencies) {
fprintf(fp, " wait(");
for (unsigned i = 0; i < 8; ++i) {
if (clause->dependencies & (1 << i))
fprintf(fp, "%u ", i);
}
for (unsigned i = 0; i < 8; ++i) {
if (clause->dependencies & (1 << i))
fprintf(fp, "%u ", i);
}
fprintf(fp, ")");
}
fprintf(fp, ")");
}
fprintf(fp, " %s", bi_flow_control_name(clause->flow_control));
fprintf(fp, " %s", bi_flow_control_name(clause->flow_control));
if (!clause->next_clause_prefetch)
fprintf(fp, " no_prefetch");
if (!clause->next_clause_prefetch)
fprintf(fp, " no_prefetch");
if (clause->staging_barrier)
fprintf(fp, " osrb");
if (clause->staging_barrier)
fprintf(fp, " osrb");
if (clause->td)
fprintf(fp, " td");
if (clause->td)
fprintf(fp, " td");
if (clause->pcrel_idx != ~0)
fprintf(fp, " pcrel(%u)", clause->pcrel_idx);
if (clause->pcrel_idx != ~0)
fprintf(fp, " pcrel(%u)", clause->pcrel_idx);
fprintf(fp, "\n");
fprintf(fp, "\n");
for (unsigned i = 0; i < clause->tuple_count; ++i)
bi_print_tuple(&clause->tuples[i], fp);
for (unsigned i = 0; i < clause->tuple_count; ++i)
bi_print_tuple(&clause->tuples[i], fp);
if (clause->constant_count) {
for (unsigned i = 0; i < clause->constant_count; ++i)
fprintf(fp, "%" PRIx64 " ", clause->constants[i]);
if (clause->constant_count) {
for (unsigned i = 0; i < clause->constant_count; ++i)
fprintf(fp, "%" PRIx64 " ", clause->constants[i]);
if (clause->branch_constant)
fprintf(fp, "*");
if (clause->branch_constant)
fprintf(fp, "*");
fprintf(fp, "\n");
}
fprintf(fp, "\n");
}
fprintf(fp, "\n");
fprintf(fp, "\n");
}
static void
bi_print_scoreboard_line(unsigned slot, const char *name, uint64_t mask, FILE *fp)
bi_print_scoreboard_line(unsigned slot, const char *name, uint64_t mask,
FILE *fp)
{
if (!mask)
return;
if (!mask)
return;
fprintf(fp, "slot %u %s:", slot, name);
fprintf(fp, "slot %u %s:", slot, name);
u_foreach_bit64(reg, mask)
fprintf(fp, " r%" PRId64, reg);
u_foreach_bit64(reg, mask) fprintf(fp, " r%" PRId64, reg);
fprintf(fp, "\n");
fprintf(fp, "\n");
}
static void
bi_print_scoreboard(struct bi_scoreboard_state *state, FILE *fp)
{
for (unsigned i = 0; i < BI_NUM_SLOTS; ++i) {
bi_print_scoreboard_line(i, "reads", state->read[i], fp);
bi_print_scoreboard_line(i, "writes", state->write[i], fp);
}
for (unsigned i = 0; i < BI_NUM_SLOTS; ++i) {
bi_print_scoreboard_line(i, "reads", state->read[i], fp);
bi_print_scoreboard_line(i, "writes", state->write[i], fp);
}
}
void
bi_print_block(bi_block *block, FILE *fp)
{
if (block->scheduled) {
bi_print_scoreboard(&block->scoreboard_in, fp);
fprintf(fp, "\n");
}
if (block->scheduled) {
bi_print_scoreboard(&block->scoreboard_in, fp);
fprintf(fp, "\n");
}
fprintf(fp, "block%u {\n", block->index);
fprintf(fp, "block%u {\n", block->index);
if (block->scheduled) {
bi_foreach_clause_in_block(block, clause)
bi_print_clause(clause, fp);
} else {
bi_foreach_instr_in_block(block, ins)
bi_print_instr((bi_instr *) ins, fp);
}
if (block->scheduled) {
bi_foreach_clause_in_block(block, clause)
bi_print_clause(clause, fp);
} else {
bi_foreach_instr_in_block(block, ins)
bi_print_instr((bi_instr *)ins, fp);
}
fprintf(fp, "}");
fprintf(fp, "}");
if (block->successors[0]) {
fprintf(fp, " -> ");
if (block->successors[0]) {
fprintf(fp, " -> ");
bi_foreach_successor((block), succ)
fprintf(fp, "block%u ", succ->index);
}
bi_foreach_successor((block), succ)
fprintf(fp, "block%u ", succ->index);
}
if (bi_num_predecessors(block)) {
fprintf(fp, " from");
if (bi_num_predecessors(block)) {
fprintf(fp, " from");
bi_foreach_predecessor(block, pred)
fprintf(fp, " block%u", (*pred)->index);
}
bi_foreach_predecessor(block, pred)
fprintf(fp, " block%u", (*pred)->index);
}
if (block->scheduled) {
fprintf(fp, "\n");
bi_print_scoreboard(&block->scoreboard_out, fp);
}
if (block->scheduled) {
fprintf(fp, "\n");
bi_print_scoreboard(&block->scoreboard_out, fp);
}
fprintf(fp, "\n\n");
fprintf(fp, "\n\n");
}
void
bi_print_shader(bi_context *ctx, FILE *fp)
{
bi_foreach_block(ctx, block)
bi_print_block(block, fp);
bi_foreach_block(ctx, block)
bi_print_block(block, fp);
}

View file

@ -31,38 +31,63 @@
const char *
bi_message_type_name(enum bifrost_message_type T)
{
switch (T) {
case BIFROST_MESSAGE_NONE: return "";
case BIFROST_MESSAGE_VARYING: return "vary";
case BIFROST_MESSAGE_ATTRIBUTE: return "attr";
case BIFROST_MESSAGE_TEX: return "tex";
case BIFROST_MESSAGE_VARTEX: return "vartex";
case BIFROST_MESSAGE_LOAD: return "load";
case BIFROST_MESSAGE_STORE: return "store";
case BIFROST_MESSAGE_ATOMIC: return "atomic";
case BIFROST_MESSAGE_BARRIER: return "barrier";
case BIFROST_MESSAGE_BLEND: return "blend";
case BIFROST_MESSAGE_TILE: return "tile";
case BIFROST_MESSAGE_Z_STENCIL: return "z_stencil";
case BIFROST_MESSAGE_ATEST: return "atest";
case BIFROST_MESSAGE_JOB: return "job";
case BIFROST_MESSAGE_64BIT: return "64";
default: return "XXX reserved";
}
switch (T) {
case BIFROST_MESSAGE_NONE:
return "";
case BIFROST_MESSAGE_VARYING:
return "vary";
case BIFROST_MESSAGE_ATTRIBUTE:
return "attr";
case BIFROST_MESSAGE_TEX:
return "tex";
case BIFROST_MESSAGE_VARTEX:
return "vartex";
case BIFROST_MESSAGE_LOAD:
return "load";
case BIFROST_MESSAGE_STORE:
return "store";
case BIFROST_MESSAGE_ATOMIC:
return "atomic";
case BIFROST_MESSAGE_BARRIER:
return "barrier";
case BIFROST_MESSAGE_BLEND:
return "blend";
case BIFROST_MESSAGE_TILE:
return "tile";
case BIFROST_MESSAGE_Z_STENCIL:
return "z_stencil";
case BIFROST_MESSAGE_ATEST:
return "atest";
case BIFROST_MESSAGE_JOB:
return "job";
case BIFROST_MESSAGE_64BIT:
return "64";
default:
return "XXX reserved";
}
}
const char *
bi_flow_control_name(enum bifrost_flow mode)
{
switch (mode) {
case BIFROST_FLOW_END: return "eos";
case BIFROST_FLOW_NBTB_PC: return "nbb br_pc";
case BIFROST_FLOW_NBTB_UNCONDITIONAL: return "nbb r_uncond";
case BIFROST_FLOW_NBTB: return "nbb";
case BIFROST_FLOW_BTB_UNCONDITIONAL: return "bb r_uncond";
case BIFROST_FLOW_BTB_NONE: return "bb";
case BIFROST_FLOW_WE_UNCONDITIONAL: return "we r_uncond";
case BIFROST_FLOW_WE: return "we";
default: return "XXX";
}
switch (mode) {
case BIFROST_FLOW_END:
return "eos";
case BIFROST_FLOW_NBTB_PC:
return "nbb br_pc";
case BIFROST_FLOW_NBTB_UNCONDITIONAL:
return "nbb r_uncond";
case BIFROST_FLOW_NBTB:
return "nbb";
case BIFROST_FLOW_BTB_UNCONDITIONAL:
return "bb r_uncond";
case BIFROST_FLOW_BTB_NONE:
return "bb";
case BIFROST_FLOW_WE_UNCONDITIONAL:
return "we r_uncond";
case BIFROST_FLOW_WE:
return "we";
default:
return "XXX";
}
}

View file

@ -30,7 +30,7 @@
#include <stdio.h>
#include "bifrost.h"
const char * bi_message_type_name(enum bifrost_message_type T);
const char * bi_flow_control_name(enum bifrost_flow mode);
const char *bi_message_type_name(enum bifrost_message_type T);
const char *bi_flow_control_name(enum bifrost_flow mode);
#endif

View file

@ -44,15 +44,15 @@
static inline unsigned
bifrost_get_quirks(unsigned product_id)
{
switch (product_id >> 8) {
case 0x60: /* G71 */
return BIFROST_NO_FP32_TRANSCENDENTALS | BIFROST_LIMITED_CLPER;
case 0x62: /* G72 */
case 0x70: /* G31 */
return BIFROST_LIMITED_CLPER;
default:
return 0;
}
switch (product_id >> 8) {
case 0x60: /* G71 */
return BIFROST_NO_FP32_TRANSCENDENTALS | BIFROST_LIMITED_CLPER;
case 0x62: /* G72 */
case 0x70: /* G31 */
return BIFROST_LIMITED_CLPER;
default:
return 0;
}
}
#endif

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -54,9 +54,9 @@
*/
#define BI_NUM_GENERAL_SLOTS 6
#define BI_NUM_SLOTS 8
#define BI_NUM_REGISTERS 64
#define BI_SLOT_SERIAL 0 /* arbitrary */
#define BI_NUM_SLOTS 8
#define BI_NUM_REGISTERS 64
#define BI_SLOT_SERIAL 0 /* arbitrary */
/*
* Due to the crude scoreboarding we do, we need to serialize varying loads and
@ -65,26 +65,26 @@
static bool
bi_should_serialize(bi_instr *I)
{
/* For debug, serialize everything to disable scoreboard opts */
if (bifrost_debug & BIFROST_DBG_NOSB)
return true;
/* For debug, serialize everything to disable scoreboard opts */
if (bifrost_debug & BIFROST_DBG_NOSB)
return true;
/* Although nominally on the attribute unit, image loads have the same
* coherency requirements as general memory loads. Serialize them for
* now until we can do something more clever.
*/
if (I->op == BI_OPCODE_LD_ATTR_TEX)
return true;
/* Although nominally on the attribute unit, image loads have the same
* coherency requirements as general memory loads. Serialize them for
* now until we can do something more clever.
*/
if (I->op == BI_OPCODE_LD_ATTR_TEX)
return true;
switch (bi_opcode_props[I->op].message) {
case BIFROST_MESSAGE_VARYING:
case BIFROST_MESSAGE_LOAD:
case BIFROST_MESSAGE_STORE:
case BIFROST_MESSAGE_ATOMIC:
return true;
default:
return false;
}
switch (bi_opcode_props[I->op].message) {
case BIFROST_MESSAGE_VARYING:
case BIFROST_MESSAGE_LOAD:
case BIFROST_MESSAGE_STORE:
case BIFROST_MESSAGE_ATOMIC:
return true;
default:
return false;
}
}
/* Given a scoreboard model, choose a slot for a clause wrapping a given
@ -93,76 +93,77 @@ bi_should_serialize(bi_instr *I)
static unsigned
bi_choose_scoreboard_slot(bi_instr *message)
{
/* ATEST, ZS_EMIT must be issued with slot #0 */
if (message->op == BI_OPCODE_ATEST || message->op == BI_OPCODE_ZS_EMIT)
return 0;
/* ATEST, ZS_EMIT must be issued with slot #0 */
if (message->op == BI_OPCODE_ATEST || message->op == BI_OPCODE_ZS_EMIT)
return 0;
/* BARRIER must be issued with slot #7 */
if (message->op == BI_OPCODE_BARRIER)
return 7;
/* BARRIER must be issued with slot #7 */
if (message->op == BI_OPCODE_BARRIER)
return 7;
/* For now, make serialization is easy */
if (bi_should_serialize(message))
return BI_SLOT_SERIAL;
/* For now, make serialization is easy */
if (bi_should_serialize(message))
return BI_SLOT_SERIAL;
return 0;
return 0;
}
static uint64_t
bi_read_mask(bi_instr *I, bool staging_only)
{
uint64_t mask = 0;
uint64_t mask = 0;
if (staging_only && !bi_opcode_props[I->op].sr_read)
return mask;
if (staging_only && !bi_opcode_props[I->op].sr_read)
return mask;
bi_foreach_src(I, s) {
if (I->src[s].type == BI_INDEX_REGISTER) {
unsigned reg = I->src[s].value;
unsigned count = bi_count_read_registers(I, s);
bi_foreach_src(I, s) {
if (I->src[s].type == BI_INDEX_REGISTER) {
unsigned reg = I->src[s].value;
unsigned count = bi_count_read_registers(I, s);
mask |= (BITFIELD64_MASK(count) << reg);
}
mask |= (BITFIELD64_MASK(count) << reg);
}
if (staging_only)
break;
}
if (staging_only)
break;
}
return mask;
return mask;
}
static uint64_t
bi_write_mask(bi_instr *I)
{
uint64_t mask = 0;
uint64_t mask = 0;
bi_foreach_dest(I, d) {
if (bi_is_null(I->dest[d])) continue;
bi_foreach_dest(I, d) {
if (bi_is_null(I->dest[d]))
continue;
assert(I->dest[d].type == BI_INDEX_REGISTER);
assert(I->dest[d].type == BI_INDEX_REGISTER);
unsigned reg = I->dest[d].value;
unsigned count = bi_count_write_registers(I, d);
unsigned reg = I->dest[d].value;
unsigned count = bi_count_write_registers(I, d);
mask |= (BITFIELD64_MASK(count) << reg);
}
mask |= (BITFIELD64_MASK(count) << reg);
}
/* Instructions like AXCHG.i32 unconditionally both read and write
* staging registers. Even if we discard the result, the write still
* happens logically and needs to be included in our calculations.
* Obscurely, ATOM_CX is sr_write but can ignore the staging register in
* certain circumstances; this does not require consideration.
*/
if (bi_opcode_props[I->op].sr_write && I->nr_dests && I->nr_srcs &&
bi_is_null(I->dest[0]) && !bi_is_null(I->src[0])) {
/* Instructions like AXCHG.i32 unconditionally both read and write
* staging registers. Even if we discard the result, the write still
* happens logically and needs to be included in our calculations.
* Obscurely, ATOM_CX is sr_write but can ignore the staging register in
* certain circumstances; this does not require consideration.
*/
if (bi_opcode_props[I->op].sr_write && I->nr_dests && I->nr_srcs &&
bi_is_null(I->dest[0]) && !bi_is_null(I->src[0])) {
unsigned reg = I->src[0].value;
unsigned count = bi_count_write_registers(I, 0);
unsigned reg = I->src[0].value;
unsigned count = bi_count_write_registers(I, 0);
mask |= (BITFIELD64_MASK(count) << reg);
}
mask |= (BITFIELD64_MASK(count) << reg);
}
return mask;
return mask;
}
/* Update the scoreboard model to assign an instruction to a given slot */
@ -170,140 +171,143 @@ bi_write_mask(bi_instr *I)
static void
bi_push_clause(struct bi_scoreboard_state *st, bi_clause *clause)
{
bi_instr *I = clause->message;
unsigned slot = clause->scoreboard_id;
bi_instr *I = clause->message;
unsigned slot = clause->scoreboard_id;
if (!I)
return;
if (!I)
return;
st->read[slot] |= bi_read_mask(I, true);
st->read[slot] |= bi_read_mask(I, true);
if (bi_opcode_props[I->op].sr_write)
st->write[slot] |= bi_write_mask(I);
if (bi_opcode_props[I->op].sr_write)
st->write[slot] |= bi_write_mask(I);
}
/* Adds a dependency on each slot writing any specified register */
static void
bi_depend_on_writers(bi_clause *clause, struct bi_scoreboard_state *st, uint64_t regmask)
bi_depend_on_writers(bi_clause *clause, struct bi_scoreboard_state *st,
uint64_t regmask)
{
for (unsigned slot = 0; slot < ARRAY_SIZE(st->write); ++slot) {
if (!(st->write[slot] & regmask))
continue;
for (unsigned slot = 0; slot < ARRAY_SIZE(st->write); ++slot) {
if (!(st->write[slot] & regmask))
continue;
st->write[slot] = 0;
st->read[slot] = 0;
st->write[slot] = 0;
st->read[slot] = 0;
clause->dependencies |= BITFIELD_BIT(slot);
}
clause->dependencies |= BITFIELD_BIT(slot);
}
}
static void
bi_set_staging_barrier(bi_clause *clause, struct bi_scoreboard_state *st, uint64_t regmask)
bi_set_staging_barrier(bi_clause *clause, struct bi_scoreboard_state *st,
uint64_t regmask)
{
for (unsigned slot = 0; slot < ARRAY_SIZE(st->read); ++slot) {
if (!(st->read[slot] & regmask))
continue;
for (unsigned slot = 0; slot < ARRAY_SIZE(st->read); ++slot) {
if (!(st->read[slot] & regmask))
continue;
st->read[slot] = 0;
clause->staging_barrier = true;
}
st->read[slot] = 0;
clause->staging_barrier = true;
}
}
/* Sets the dependencies for a given clause, updating the model */
static void
bi_set_dependencies(bi_block *block, bi_clause *clause, struct bi_scoreboard_state *st)
bi_set_dependencies(bi_block *block, bi_clause *clause,
struct bi_scoreboard_state *st)
{
bi_foreach_instr_in_clause(block, clause, I) {
uint64_t read = bi_read_mask(I, false);
uint64_t written = bi_write_mask(I);
bi_foreach_instr_in_clause(block, clause, I) {
uint64_t read = bi_read_mask(I, false);
uint64_t written = bi_write_mask(I);
/* Read-after-write; write-after-write */
bi_depend_on_writers(clause, st, read | written);
/* Read-after-write; write-after-write */
bi_depend_on_writers(clause, st, read | written);
/* Write-after-read */
bi_set_staging_barrier(clause, st, written);
}
/* Write-after-read */
bi_set_staging_barrier(clause, st, written);
}
/* LD_VAR instructions must be serialized per-quad. Just always depend
* on any LD_VAR instructions. This isn't optimal, but doing better
* requires divergence-aware data flow analysis.
*
* Similarly, memory loads/stores need to be synchronized. For now,
* force them to be serialized. This is not optimal.
*/
if (clause->message && bi_should_serialize(clause->message))
clause->dependencies |= BITFIELD_BIT(BI_SLOT_SERIAL);
/* LD_VAR instructions must be serialized per-quad. Just always depend
* on any LD_VAR instructions. This isn't optimal, but doing better
* requires divergence-aware data flow analysis.
*
* Similarly, memory loads/stores need to be synchronized. For now,
* force them to be serialized. This is not optimal.
*/
if (clause->message && bi_should_serialize(clause->message))
clause->dependencies |= BITFIELD_BIT(BI_SLOT_SERIAL);
/* Barriers must wait on all slots to flush existing work. It might be
* possible to skip this with more information about the barrier. For
* now, be conservative.
*/
if (clause->message && clause->message->op == BI_OPCODE_BARRIER)
clause->dependencies |= BITFIELD_MASK(BI_NUM_GENERAL_SLOTS);
/* Barriers must wait on all slots to flush existing work. It might be
* possible to skip this with more information about the barrier. For
* now, be conservative.
*/
if (clause->message && clause->message->op == BI_OPCODE_BARRIER)
clause->dependencies |= BITFIELD_MASK(BI_NUM_GENERAL_SLOTS);
}
static bool
scoreboard_block_update(bi_block *blk)
{
bool progress = false;
bool progress = false;
/* pending_in[s] = sum { p in pred[s] } ( pending_out[p] ) */
bi_foreach_predecessor(blk, pred) {
for (unsigned i = 0; i < BI_NUM_SLOTS; ++i) {
blk->scoreboard_in.read[i] |= (*pred)->scoreboard_out.read[i];
blk->scoreboard_in.write[i] |= (*pred)->scoreboard_out.write[i];
}
}
/* pending_in[s] = sum { p in pred[s] } ( pending_out[p] ) */
bi_foreach_predecessor(blk, pred) {
for (unsigned i = 0; i < BI_NUM_SLOTS; ++i) {
blk->scoreboard_in.read[i] |= (*pred)->scoreboard_out.read[i];
blk->scoreboard_in.write[i] |= (*pred)->scoreboard_out.write[i];
}
}
struct bi_scoreboard_state state = blk->scoreboard_in;
struct bi_scoreboard_state state = blk->scoreboard_in;
/* Assign locally */
/* Assign locally */
bi_foreach_clause_in_block(blk, clause) {
bi_set_dependencies(blk, clause, &state);
bi_push_clause(&state, clause);
}
bi_foreach_clause_in_block(blk, clause) {
bi_set_dependencies(blk, clause, &state);
bi_push_clause(&state, clause);
}
/* To figure out progress, diff scoreboard_out */
/* To figure out progress, diff scoreboard_out */
for (unsigned i = 0; i < BI_NUM_SLOTS; ++i)
progress |= !!memcmp(&state, &blk->scoreboard_out, sizeof(state));
for (unsigned i = 0; i < BI_NUM_SLOTS; ++i)
progress |= !!memcmp(&state, &blk->scoreboard_out, sizeof(state));
blk->scoreboard_out = state;
blk->scoreboard_out = state;
return progress;
return progress;
}
void
bi_assign_scoreboard(bi_context *ctx)
{
u_worklist worklist;
bi_worklist_init(ctx, &worklist);
u_worklist worklist;
bi_worklist_init(ctx, &worklist);
/* First, assign slots. */
bi_foreach_block(ctx, block) {
bi_foreach_clause_in_block(block, clause) {
if (clause->message) {
unsigned slot = bi_choose_scoreboard_slot(clause->message);
clause->scoreboard_id = slot;
}
}
/* First, assign slots. */
bi_foreach_block(ctx, block) {
bi_foreach_clause_in_block(block, clause) {
if (clause->message) {
unsigned slot = bi_choose_scoreboard_slot(clause->message);
clause->scoreboard_id = slot;
}
}
bi_worklist_push_tail(&worklist, block);
}
bi_worklist_push_tail(&worklist, block);
}
/* Next, perform forward data flow analysis to calculate dependencies */
while (!u_worklist_is_empty(&worklist)) {
/* Pop from the front for forward analysis */
bi_block *blk = bi_worklist_pop_head(&worklist);
/* Next, perform forward data flow analysis to calculate dependencies */
while (!u_worklist_is_empty(&worklist)) {
/* Pop from the front for forward analysis */
bi_block *blk = bi_worklist_pop_head(&worklist);
if (scoreboard_block_update(blk)) {
bi_foreach_successor(blk, succ)
bi_worklist_push_tail(&worklist, succ);
}
}
if (scoreboard_block_update(blk)) {
bi_foreach_successor(blk, succ)
bi_worklist_push_tail(&worklist, succ);
}
}
u_worklist_fini(&worklist);
u_worklist_fini(&worklist);
}

View file

@ -27,38 +27,38 @@
#ifndef __BI_TEST_H
#define __BI_TEST_H
#include <stdio.h>
#include <inttypes.h>
#include <stdio.h>
#include "compiler.h"
/* Helper to generate a bi_builder suitable for creating test instructions */
static inline bi_block *
bit_block(bi_context *ctx)
{
bi_block *blk = rzalloc(ctx, bi_block);
bi_block *blk = rzalloc(ctx, bi_block);
util_dynarray_init(&blk->predecessors, blk);
list_addtail(&blk->link, &ctx->blocks);
list_inithead(&blk->instructions);
util_dynarray_init(&blk->predecessors, blk);
list_addtail(&blk->link, &ctx->blocks);
list_inithead(&blk->instructions);
blk->index = ctx->num_blocks++;
blk->index = ctx->num_blocks++;
return blk;
return blk;
}
static inline bi_builder *
bit_builder(void *memctx)
{
bi_context *ctx = rzalloc(memctx, bi_context);
list_inithead(&ctx->blocks);
ctx->inputs = rzalloc(memctx, struct panfrost_compile_inputs);
bi_context *ctx = rzalloc(memctx, bi_context);
list_inithead(&ctx->blocks);
ctx->inputs = rzalloc(memctx, struct panfrost_compile_inputs);
bi_block *blk = bit_block(ctx);
bi_block *blk = bit_block(ctx);
bi_builder *b = rzalloc(memctx, bi_builder);
b->shader = ctx;
b->cursor = bi_after_block(blk);
return b;
bi_builder *b = rzalloc(memctx, bi_builder);
b->shader = ctx;
b->cursor = bi_after_block(blk);
return b;
}
/* Helper to compare for logical equality of instructions. Need to skip over
@ -69,14 +69,15 @@ bit_instr_equal(bi_instr *A, bi_instr *B)
{
size_t skip = sizeof(struct list_head) + 2 * sizeof(bi_index *);
if (memcmp((uint8_t *) A + skip, (uint8_t *) B + skip, sizeof(bi_instr) - skip))
return false;
if (memcmp((uint8_t *)A + skip, (uint8_t *)B + skip,
sizeof(bi_instr) - skip))
return false;
if (memcmp(A->dest, B->dest, sizeof(bi_index) * A->nr_dests))
return false;
return false;
if (memcmp(A->src, B->src, sizeof(bi_index) * A->nr_srcs))
return false;
return false;
return true;
}
@ -87,8 +88,9 @@ bit_block_equal(bi_block *A, bi_block *B)
if (list_length(&A->instructions) != list_length(&B->instructions))
return false;
list_pair_for_each_entry(bi_instr, insA, insB,
&A->instructions, &B->instructions, link) {
list_pair_for_each_entry(bi_instr, insA, insB, &A->instructions,
&B->instructions, link)
{
if (!bit_instr_equal(insA, insB))
return false;
}
@ -102,8 +104,9 @@ bit_shader_equal(bi_context *A, bi_context *B)
if (list_length(&A->blocks) != list_length(&B->blocks))
return false;
list_pair_for_each_entry(bi_block, blockA, blockB,
&A->blocks, &B->blocks, link) {
list_pair_for_each_entry(bi_block, blockA, blockB, &A->blocks, &B->blocks,
link)
{
if (!bit_block_equal(blockA, blockB))
return false;
}
@ -111,30 +114,31 @@ bit_shader_equal(bi_context *A, bi_context *B)
return true;
}
#define ASSERT_SHADER_EQUAL(A, B) \
if (!bit_shader_equal(A, B)) { \
ADD_FAILURE(); \
fprintf(stderr, "Pass produced unexpected results"); \
fprintf(stderr, " Actual:\n"); \
bi_print_shader(A, stderr); \
fprintf(stderr, " Expected:\n"); \
bi_print_shader(B, stderr); \
fprintf(stderr, "\n"); \
} \
#define ASSERT_SHADER_EQUAL(A, B) \
if (!bit_shader_equal(A, B)) { \
ADD_FAILURE(); \
fprintf(stderr, "Pass produced unexpected results"); \
fprintf(stderr, " Actual:\n"); \
bi_print_shader(A, stderr); \
fprintf(stderr, " Expected:\n"); \
bi_print_shader(B, stderr); \
fprintf(stderr, "\n"); \
}
#define INSTRUCTION_CASE(instr, expected, pass) do { \
bi_builder *A = bit_builder(mem_ctx); \
bi_builder *B = bit_builder(mem_ctx); \
{ \
bi_builder *b = A; \
instr; \
} \
{ \
bi_builder *b = B; \
expected; \
} \
pass(A->shader); \
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
} while(0)
#define INSTRUCTION_CASE(instr, expected, pass) \
do { \
bi_builder *A = bit_builder(mem_ctx); \
bi_builder *B = bit_builder(mem_ctx); \
{ \
bi_builder *b = A; \
instr; \
} \
{ \
bi_builder *b = B; \
expected; \
} \
pass(A->shader); \
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
} while (0)
#endif

View file

@ -21,8 +21,8 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "util/u_memory.h"
#include "compiler.h"
/* Validatation doesn't make sense in release builds */
#ifndef NDEBUG
@ -35,21 +35,21 @@
bool
bi_validate_initialization(bi_context *ctx)
{
bool success = true;
bool success = true;
/* Calculate the live set */
bi_block *entry = bi_entry_block(ctx);
bi_compute_liveness_ssa(ctx);
/* Calculate the live set */
bi_block *entry = bi_entry_block(ctx);
bi_compute_liveness_ssa(ctx);
/* Validate that the live set is indeed empty */
for (unsigned i = 0; i < ctx->ssa_alloc; ++i) {
if (BITSET_TEST(entry->ssa_live_in, i)) {
fprintf(stderr, "%u\n", i);
success = false;
}
}
/* Validate that the live set is indeed empty */
for (unsigned i = 0; i < ctx->ssa_alloc; ++i) {
if (BITSET_TEST(entry->ssa_live_in, i)) {
fprintf(stderr, "%u\n", i);
success = false;
}
}
return success;
return success;
}
/*
@ -60,47 +60,46 @@ bi_validate_initialization(bi_context *ctx)
static bool
bi_validate_preload(bi_context *ctx)
{
bool start = true;
uint64_t preloaded = 0;
bool start = true;
uint64_t preloaded = 0;
bi_foreach_block(ctx, block) {
bi_foreach_instr_in_block(block, I) {
/* No instruction should have a register destination */
bi_foreach_dest(I, d) {
if (I->dest[d].type == BI_INDEX_REGISTER)
return false;
}
bi_foreach_block(ctx, block) {
bi_foreach_instr_in_block(block, I) {
/* No instruction should have a register destination */
bi_foreach_dest(I, d) {
if (I->dest[d].type == BI_INDEX_REGISTER)
return false;
}
/* Preloads are register moves at the start */
bool is_preload =
start && I->op == BI_OPCODE_MOV_I32 &&
I->src[0].type == BI_INDEX_REGISTER;
/* Preloads are register moves at the start */
bool is_preload = start && I->op == BI_OPCODE_MOV_I32 &&
I->src[0].type == BI_INDEX_REGISTER;
/* After the first nonpreload, we're done preloading */
start &= is_preload;
/* After the first nonpreload, we're done preloading */
start &= is_preload;
/* Only preloads may have a register source */
bi_foreach_src(I, s) {
if (I->src[s].type == BI_INDEX_REGISTER && !is_preload)
return false;
}
/* Only preloads may have a register source */
bi_foreach_src(I, s) {
if (I->src[s].type == BI_INDEX_REGISTER && !is_preload)
return false;
}
/* Check uniqueness */
if (is_preload) {
unsigned r = I->src[0].value;
/* Check uniqueness */
if (is_preload) {
unsigned r = I->src[0].value;
if (preloaded & BITFIELD64_BIT(r))
return false;
if (preloaded & BITFIELD64_BIT(r))
return false;
preloaded |= BITFIELD64_BIT(r);
}
}
preloaded |= BITFIELD64_BIT(r);
}
}
/* Only the first block may preload */
start = false;
}
/* Only the first block may preload */
start = false;
}
return true;
return true;
}
/*
@ -111,38 +110,37 @@ bi_validate_preload(bi_context *ctx)
static bool
bi_validate_width(bi_context *ctx)
{
bool succ = true;
uint8_t *width = calloc(ctx->ssa_alloc, sizeof(uint8_t));
bool succ = true;
uint8_t *width = calloc(ctx->ssa_alloc, sizeof(uint8_t));
bi_foreach_instr_global(ctx, I) {
bi_foreach_dest(I, d) {
assert(bi_is_ssa(I->dest[d]));
bi_foreach_instr_global(ctx, I) {
bi_foreach_dest(I, d) {
assert(bi_is_ssa(I->dest[d]));
unsigned v = I->dest[d].value;
assert(width[v] == 0 && "broken SSA");
unsigned v = I->dest[d].value;
assert(width[v] == 0 && "broken SSA");
width[v] = bi_count_write_registers(I, d);
}
}
width[v] = bi_count_write_registers(I, d);
}
}
bi_foreach_instr_global(ctx, I) {
bi_foreach_ssa_src(I, s) {
unsigned v = I->src[s].value;
unsigned n = bi_count_read_registers(I, s);
bi_foreach_instr_global(ctx, I) {
bi_foreach_ssa_src(I, s) {
unsigned v = I->src[s].value;
unsigned n = bi_count_read_registers(I, s);
if (width[v] != n) {
succ = false;
fprintf(stderr,
"source %u, expected width %u, got width %u\n",
s, n, width[v]);
bi_print_instr(I, stderr);
fprintf(stderr, "\n");
}
}
}
if (width[v] != n) {
succ = false;
fprintf(stderr, "source %u, expected width %u, got width %u\n", s,
n, width[v]);
bi_print_instr(I, stderr);
fprintf(stderr, "\n");
}
}
}
free(width);
return succ;
free(width);
return succ;
}
/*
@ -151,20 +149,20 @@ bi_validate_width(bi_context *ctx)
static bool
bi_validate_dest(bi_context *ctx)
{
bool succ = true;
bool succ = true;
bi_foreach_instr_global(ctx, I) {
bi_foreach_dest(I, d) {
if (bi_is_null(I->dest[d])) {
succ = false;
fprintf(stderr, "expected dest %u", d);
bi_print_instr(I, stderr);
fprintf(stderr, "\n");
}
}
}
bi_foreach_instr_global(ctx, I) {
bi_foreach_dest(I, d) {
if (bi_is_null(I->dest[d])) {
succ = false;
fprintf(stderr, "expected dest %u", d);
bi_print_instr(I, stderr);
fprintf(stderr, "\n");
}
}
}
return succ;
return succ;
}
/*
@ -173,57 +171,57 @@ bi_validate_dest(bi_context *ctx)
static bool
bi_validate_phi_ordering(bi_context *ctx)
{
bi_foreach_block(ctx, block) {
bool start = true;
bi_foreach_block(ctx, block) {
bool start = true;
bi_foreach_instr_in_block(block, I) {
if (start)
start = I->op == BI_OPCODE_PHI;
else if (I->op == BI_OPCODE_PHI)
return false;
}
}
bi_foreach_instr_in_block(block, I) {
if (start)
start = I->op == BI_OPCODE_PHI;
else if (I->op == BI_OPCODE_PHI)
return false;
}
}
return true;
return true;
}
void
bi_validate(bi_context *ctx, const char *after)
{
bool fail = false;
bool fail = false;
if (bifrost_debug & BIFROST_DBG_NOVALIDATE)
return;
if (bifrost_debug & BIFROST_DBG_NOVALIDATE)
return;
if (!bi_validate_initialization(ctx)) {
fprintf(stderr, "Uninitialized data read after %s\n", after);
fail = true;
}
if (!bi_validate_initialization(ctx)) {
fprintf(stderr, "Uninitialized data read after %s\n", after);
fail = true;
}
if (!bi_validate_preload(ctx)) {
fprintf(stderr, "Unexpected preload after %s\n", after);
fail = true;
}
if (!bi_validate_preload(ctx)) {
fprintf(stderr, "Unexpected preload after %s\n", after);
fail = true;
}
if (!bi_validate_width(ctx)) {
fprintf(stderr, "Unexpected vector with after %s\n", after);
fail = true;
}
if (!bi_validate_width(ctx)) {
fprintf(stderr, "Unexpected vector with after %s\n", after);
fail = true;
}
if (!bi_validate_dest(ctx)) {
fprintf(stderr, "Unexpected source/dest after %s\n", after);
fail = true;
}
if (!bi_validate_dest(ctx)) {
fprintf(stderr, "Unexpected source/dest after %s\n", after);
fail = true;
}
if (!bi_validate_phi_ordering(ctx)) {
fprintf(stderr, "Unexpected phi ordering after %s\n", after);
fail = true;
}
if (!bi_validate_phi_ordering(ctx)) {
fprintf(stderr, "Unexpected phi ordering after %s\n", after);
fail = true;
}
if (fail) {
bi_print_shader(ctx, stderr);
exit(1);
}
if (fail) {
bi_print_shader(ctx, stderr);
exit(1);
}
}
#endif /* NDEBUG */

View file

@ -26,63 +26,63 @@
#ifndef __bifrost_h__
#define __bifrost_h__
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#ifdef __cplusplus
extern "C" {
#endif
#define BIFROST_DBG_MSGS 0x0001
#define BIFROST_DBG_SHADERS 0x0002
#define BIFROST_DBG_SHADERDB 0x0004
#define BIFROST_DBG_VERBOSE 0x0008
#define BIFROST_DBG_INTERNAL 0x0010
#define BIFROST_DBG_NOSCHED 0x0020
#define BIFROST_DBG_INORDER 0x0040
#define BIFROST_DBG_NOVALIDATE 0x0080
#define BIFROST_DBG_NOOPT 0x0100
#define BIFROST_DBG_NOIDVS 0x0200
#define BIFROST_DBG_NOSB 0x0400
#define BIFROST_DBG_NOPRELOAD 0x0800
#define BIFROST_DBG_SPILL 0x1000
#define BIFROST_DBG_NOPSCHED 0x2000
#define BIFROST_DBG_MSGS 0x0001
#define BIFROST_DBG_SHADERS 0x0002
#define BIFROST_DBG_SHADERDB 0x0004
#define BIFROST_DBG_VERBOSE 0x0008
#define BIFROST_DBG_INTERNAL 0x0010
#define BIFROST_DBG_NOSCHED 0x0020
#define BIFROST_DBG_INORDER 0x0040
#define BIFROST_DBG_NOVALIDATE 0x0080
#define BIFROST_DBG_NOOPT 0x0100
#define BIFROST_DBG_NOIDVS 0x0200
#define BIFROST_DBG_NOSB 0x0400
#define BIFROST_DBG_NOPRELOAD 0x0800
#define BIFROST_DBG_SPILL 0x1000
#define BIFROST_DBG_NOPSCHED 0x2000
extern int bifrost_debug;
enum bifrost_message_type {
BIFROST_MESSAGE_NONE = 0,
BIFROST_MESSAGE_VARYING = 1,
BIFROST_MESSAGE_ATTRIBUTE = 2,
BIFROST_MESSAGE_TEX = 3,
BIFROST_MESSAGE_VARTEX = 4,
BIFROST_MESSAGE_LOAD = 5,
BIFROST_MESSAGE_STORE = 6,
BIFROST_MESSAGE_ATOMIC = 7,
BIFROST_MESSAGE_BARRIER = 8,
BIFROST_MESSAGE_BLEND = 9,
BIFROST_MESSAGE_TILE = 10,
/* type 11 reserved */
BIFROST_MESSAGE_Z_STENCIL = 12,
BIFROST_MESSAGE_ATEST = 13,
BIFROST_MESSAGE_JOB = 14,
BIFROST_MESSAGE_64BIT = 15
BIFROST_MESSAGE_NONE = 0,
BIFROST_MESSAGE_VARYING = 1,
BIFROST_MESSAGE_ATTRIBUTE = 2,
BIFROST_MESSAGE_TEX = 3,
BIFROST_MESSAGE_VARTEX = 4,
BIFROST_MESSAGE_LOAD = 5,
BIFROST_MESSAGE_STORE = 6,
BIFROST_MESSAGE_ATOMIC = 7,
BIFROST_MESSAGE_BARRIER = 8,
BIFROST_MESSAGE_BLEND = 9,
BIFROST_MESSAGE_TILE = 10,
/* type 11 reserved */
BIFROST_MESSAGE_Z_STENCIL = 12,
BIFROST_MESSAGE_ATEST = 13,
BIFROST_MESSAGE_JOB = 14,
BIFROST_MESSAGE_64BIT = 15
};
enum bifrost_ftz {
BIFROST_FTZ_DISABLE = 0,
BIFROST_FTZ_DX11 = 1,
BIFROST_FTZ_ALWAYS = 2,
BIFROST_FTZ_ABRUPT = 3
BIFROST_FTZ_DISABLE = 0,
BIFROST_FTZ_DX11 = 1,
BIFROST_FTZ_ALWAYS = 2,
BIFROST_FTZ_ABRUPT = 3
};
enum bifrost_exceptions {
BIFROST_EXCEPTIONS_ENABLED = 0,
BIFROST_EXCEPTIONS_DISABLED = 1,
BIFROST_EXCEPTIONS_PRECISE_DIVISION = 2,
BIFROST_EXCEPTIONS_PRECISE_SQRT = 3,
BIFROST_EXCEPTIONS_ENABLED = 0,
BIFROST_EXCEPTIONS_DISABLED = 1,
BIFROST_EXCEPTIONS_PRECISE_DIVISION = 2,
BIFROST_EXCEPTIONS_PRECISE_SQRT = 3,
};
/* Describes clause flow control, with respect to control flow and branch
@ -102,182 +102,182 @@ enum bifrost_exceptions {
*/
enum bifrost_flow {
/* End-of-shader */
BIFROST_FLOW_END = 0,
/* End-of-shader */
BIFROST_FLOW_END = 0,
/* Non back-to-back, PC-encoded reconvergence */
BIFROST_FLOW_NBTB_PC = 1,
/* Non back-to-back, PC-encoded reconvergence */
BIFROST_FLOW_NBTB_PC = 1,
/* Non back-to-back, unconditional reconvergence */
BIFROST_FLOW_NBTB_UNCONDITIONAL = 2,
/* Non back-to-back, unconditional reconvergence */
BIFROST_FLOW_NBTB_UNCONDITIONAL = 2,
/* Non back-to-back, no reconvergence */
BIFROST_FLOW_NBTB = 3,
/* Non back-to-back, no reconvergence */
BIFROST_FLOW_NBTB = 3,
/* Back-to-back, unconditional reconvergence */
BIFROST_FLOW_BTB_UNCONDITIONAL = 4,
/* Back-to-back, unconditional reconvergence */
BIFROST_FLOW_BTB_UNCONDITIONAL = 4,
/* Back-to-back, no reconvergence */
BIFROST_FLOW_BTB_NONE = 5,
/* Back-to-back, no reconvergence */
BIFROST_FLOW_BTB_NONE = 5,
/* Write elision, unconditional reconvergence */
BIFROST_FLOW_WE_UNCONDITIONAL = 6,
/* Write elision, unconditional reconvergence */
BIFROST_FLOW_WE_UNCONDITIONAL = 6,
/* Write elision, no reconvergence */
BIFROST_FLOW_WE = 7,
/* Write elision, no reconvergence */
BIFROST_FLOW_WE = 7,
};
enum bifrost_slot {
/* 0-5 are general purpose */
BIFROST_SLOT_ELDEST_DEPTH = 6,
BIFROST_SLOT_ELDEST_COLOUR = 7,
/* 0-5 are general purpose */
BIFROST_SLOT_ELDEST_DEPTH = 6,
BIFROST_SLOT_ELDEST_COLOUR = 7,
};
struct bifrost_header {
/* Reserved */
unsigned zero1 : 5;
/* Reserved */
unsigned zero1 : 5;
/* Flush-to-zero mode, leave zero for GL */
enum bifrost_ftz flush_to_zero : 2;
/* Flush-to-zero mode, leave zero for GL */
enum bifrost_ftz flush_to_zero : 2;
/* Convert any infinite result of any floating-point operation to the
* biggest representable number */
unsigned suppress_inf: 1;
/* Convert any infinite result of any floating-point operation to the
* biggest representable number */
unsigned suppress_inf : 1;
/* Convert NaN to +0.0 */
unsigned suppress_nan : 1;
/* Convert NaN to +0.0 */
unsigned suppress_nan : 1;
/* Floating-point excception handling mode */
enum bifrost_exceptions float_exceptions : 2;
/* Floating-point excception handling mode */
enum bifrost_exceptions float_exceptions : 2;
/* Enum describing the flow control, which matters for handling
* divergence and reconvergence efficiently */
enum bifrost_flow flow_control : 3;
/* Enum describing the flow control, which matters for handling
* divergence and reconvergence efficiently */
enum bifrost_flow flow_control : 3;
/* Reserved */
unsigned zero2 : 1;
/* Reserved */
unsigned zero2 : 1;
/* Terminate discarded threads, rather than continuing execution. Set
* for fragment shaders for standard GL behaviour of DISCARD. Also in a
* fragment shader, this disables helper invocations, so cannot be used
* in a shader that requires derivatives or texture LOD computation */
unsigned terminate_discarded_threads : 1;
/* Terminate discarded threads, rather than continuing execution. Set
* for fragment shaders for standard GL behaviour of DISCARD. Also in a
* fragment shader, this disables helper invocations, so cannot be used
* in a shader that requires derivatives or texture LOD computation */
unsigned terminate_discarded_threads : 1;
/* If set, the hardware may prefetch the next clause. If false, the
* hardware may not. Clear for unconditional branches. */
unsigned next_clause_prefetch : 1;
/* If set, the hardware may prefetch the next clause. If false, the
* hardware may not. Clear for unconditional branches. */
unsigned next_clause_prefetch : 1;
/* If set, a barrier will be inserted after the clause waiting for all
* message passing instructions to read their staging registers, such
* that it is safe for the next clause to write them. */
unsigned staging_barrier: 1;
unsigned staging_register : 6;
/* If set, a barrier will be inserted after the clause waiting for all
* message passing instructions to read their staging registers, such
* that it is safe for the next clause to write them. */
unsigned staging_barrier : 1;
unsigned staging_register : 6;
/* Slots to wait on and slot to be used for message passing
* instructions respectively */
unsigned dependency_wait : 8;
unsigned dependency_slot : 3;
/* Slots to wait on and slot to be used for message passing
* instructions respectively */
unsigned dependency_wait : 8;
unsigned dependency_slot : 3;
enum bifrost_message_type message_type : 5;
enum bifrost_message_type next_message_type : 5;
enum bifrost_message_type message_type : 5;
enum bifrost_message_type next_message_type : 5;
} __attribute__((packed));
enum bifrost_packed_src {
BIFROST_SRC_PORT0 = 0,
BIFROST_SRC_PORT1 = 1,
BIFROST_SRC_PORT2 = 2,
BIFROST_SRC_STAGE = 3,
BIFROST_SRC_FAU_LO = 4,
BIFROST_SRC_FAU_HI = 5,
BIFROST_SRC_PASS_FMA = 6,
BIFROST_SRC_PASS_ADD = 7,
BIFROST_SRC_PORT0 = 0,
BIFROST_SRC_PORT1 = 1,
BIFROST_SRC_PORT2 = 2,
BIFROST_SRC_STAGE = 3,
BIFROST_SRC_FAU_LO = 4,
BIFROST_SRC_FAU_HI = 5,
BIFROST_SRC_PASS_FMA = 6,
BIFROST_SRC_PASS_ADD = 7,
};
struct bifrost_fma_inst {
unsigned src0 : 3;
unsigned op : 20;
unsigned src0 : 3;
unsigned op : 20;
} __attribute__((packed));
struct bifrost_add_inst {
unsigned src0 : 3;
unsigned op : 17;
unsigned src0 : 3;
unsigned op : 17;
} __attribute__((packed));
enum branch_bit_size {
BR_SIZE_32 = 0,
BR_SIZE_16XX = 1,
BR_SIZE_16YY = 2,
// For the above combinations of bitsize and location, an extra bit is
// encoded via comparing the sources. The only possible source of ambiguity
// would be if the sources were the same, but then the branch condition
// would be always true or always false anyways, so we can ignore it. But
// this no longer works when comparing the y component to the x component,
// since it's valid to compare the y component of a source against its own
// x component. Instead, the extra bit is encoded via an extra bitsize.
BR_SIZE_16YX0 = 3,
BR_SIZE_16YX1 = 4,
BR_SIZE_32_AND_16X = 5,
BR_SIZE_32_AND_16Y = 6,
// Used for comparisons with zero and always-true, see below. I think this
// only works for integer comparisons.
BR_SIZE_ZERO = 7,
BR_SIZE_32 = 0,
BR_SIZE_16XX = 1,
BR_SIZE_16YY = 2,
// For the above combinations of bitsize and location, an extra bit is
// encoded via comparing the sources. The only possible source of ambiguity
// would be if the sources were the same, but then the branch condition
// would be always true or always false anyways, so we can ignore it. But
// this no longer works when comparing the y component to the x component,
// since it's valid to compare the y component of a source against its own
// x component. Instead, the extra bit is encoded via an extra bitsize.
BR_SIZE_16YX0 = 3,
BR_SIZE_16YX1 = 4,
BR_SIZE_32_AND_16X = 5,
BR_SIZE_32_AND_16Y = 6,
// Used for comparisons with zero and always-true, see below. I think this
// only works for integer comparisons.
BR_SIZE_ZERO = 7,
};
struct bifrost_regs {
unsigned fau_idx : 8;
unsigned reg3 : 6;
unsigned reg2 : 6;
unsigned reg0 : 5;
unsigned reg1 : 6;
unsigned ctrl : 4;
unsigned fau_idx : 8;
unsigned reg3 : 6;
unsigned reg2 : 6;
unsigned reg0 : 5;
unsigned reg1 : 6;
unsigned ctrl : 4;
} __attribute__((packed));
#define BIFROST_FMTC_CONSTANTS 0b0011
#define BIFROST_FMTC_FINAL 0b0111
#define BIFROST_FMTC_CONSTANTS 0b0011
#define BIFROST_FMTC_FINAL 0b0111
struct bifrost_fmt_constant {
unsigned pos : 4;
unsigned tag : 4;
uint64_t imm_1 : 60;
uint64_t imm_2 : 60;
unsigned pos : 4;
unsigned tag : 4;
uint64_t imm_1 : 60;
uint64_t imm_2 : 60;
} __attribute__((packed));
/* Clause formats, encoded in a table */
enum bi_clause_subword {
/* Literal 3-bit values */
BI_CLAUSE_SUBWORD_LITERAL_0 = 0,
/* etc */
BI_CLAUSE_SUBWORD_LITERAL_7 = 7,
/* Literal 3-bit values */
BI_CLAUSE_SUBWORD_LITERAL_0 = 0,
/* etc */
BI_CLAUSE_SUBWORD_LITERAL_7 = 7,
/* The value of the corresponding tuple in the corresponding bits */
BI_CLAUSE_SUBWORD_TUPLE_0 = 8,
/* etc */
BI_CLAUSE_SUBWORD_TUPLE_7 = 15,
/* The value of the corresponding tuple in the corresponding bits */
BI_CLAUSE_SUBWORD_TUPLE_0 = 8,
/* etc */
BI_CLAUSE_SUBWORD_TUPLE_7 = 15,
/* Clause header */
BI_CLAUSE_SUBWORD_HEADER = 16,
/* Clause header */
BI_CLAUSE_SUBWORD_HEADER = 16,
/* Leave zero, but semantically distinct from literal 0 */
BI_CLAUSE_SUBWORD_RESERVED = 17,
/* Leave zero, but semantically distinct from literal 0 */
BI_CLAUSE_SUBWORD_RESERVED = 17,
/* Embedded constant 0 */
BI_CLAUSE_SUBWORD_CONSTANT = 18,
/* Embedded constant 0 */
BI_CLAUSE_SUBWORD_CONSTANT = 18,
/* M bits controlling modifier for the constant */
BI_CLAUSE_SUBWORD_M = 19,
/* M bits controlling modifier for the constant */
BI_CLAUSE_SUBWORD_M = 19,
/* Z bit: 1 to begin encoding constants, 0 to terminate the clause */
BI_CLAUSE_SUBWORD_Z = 20,
/* Z bit: 1 to begin encoding constants, 0 to terminate the clause */
BI_CLAUSE_SUBWORD_Z = 20,
/* Upper 3-bits of a given tuple and zero extended */
BI_CLAUSE_SUBWORD_UPPER_0 = 32,
/* etc */
BI_CLAUSE_SUBWORD_UPPER_7 = BI_CLAUSE_SUBWORD_UPPER_0 + 7,
/* Upper 3-bits of a given tuple and zero extended */
BI_CLAUSE_SUBWORD_UPPER_0 = 32,
/* etc */
BI_CLAUSE_SUBWORD_UPPER_7 = BI_CLAUSE_SUBWORD_UPPER_0 + 7,
/* Upper 3-bits of two tuples, concatenated and zero-extended */
BI_CLAUSE_SUBWORD_UPPER_23 = BI_CLAUSE_SUBWORD_UPPER_0 + 23,
BI_CLAUSE_SUBWORD_UPPER_56 = BI_CLAUSE_SUBWORD_UPPER_0 + 56,
/* Upper 3-bits of two tuples, concatenated and zero-extended */
BI_CLAUSE_SUBWORD_UPPER_23 = BI_CLAUSE_SUBWORD_UPPER_0 + 23,
BI_CLAUSE_SUBWORD_UPPER_56 = BI_CLAUSE_SUBWORD_UPPER_0 + 56,
};
#define L(x) ((enum bi_clause_subword)(BI_CLAUSE_SUBWORD_LITERAL_0 + x))
@ -290,15 +290,15 @@ enum bi_clause_subword {
#define R BI_CLAUSE_SUBWORD_RESERVED
struct bi_clause_format {
unsigned format; /* format number */
unsigned pos; /* index in the clause */
enum bi_clause_subword tag_1; /* 2-bits */
enum bi_clause_subword tag_2; /* 3-bits */
enum bi_clause_subword tag_3; /* 3-bits */
enum bi_clause_subword s0_s3; /* 60 bits */
enum bi_clause_subword s4; /* 15 bits */
enum bi_clause_subword s5_s6; /* 30 bits */
enum bi_clause_subword s7; /* 15 bits */
unsigned format; /* format number */
unsigned pos; /* index in the clause */
enum bi_clause_subword tag_1; /* 2-bits */
enum bi_clause_subword tag_2; /* 3-bits */
enum bi_clause_subword tag_3; /* 3-bits */
enum bi_clause_subword s0_s3; /* 60 bits */
enum bi_clause_subword s4; /* 15 bits */
enum bi_clause_subword s5_s6; /* 30 bits */
enum bi_clause_subword s7; /* 15 bits */
};
/* clang-format off */
@ -341,46 +341,46 @@ static const struct bi_clause_format bi_clause_formats[] = {
* set (and ignored) as a placeholder to differentiate from reserved.
*/
enum bifrost_reg_mode {
BIFROST_R_WL_FMA = 1,
BIFROST_R_WH_FMA = 2,
BIFROST_R_W_FMA = 3,
BIFROST_R_WL_ADD = 4,
BIFROST_R_WH_ADD = 5,
BIFROST_R_W_ADD = 6,
BIFROST_WL_WL_ADD = 7,
BIFROST_WL_WH_ADD = 8,
BIFROST_WL_W_ADD = 9,
BIFROST_WH_WL_ADD = 10,
BIFROST_WH_WH_ADD = 11,
BIFROST_WH_W_ADD = 12,
BIFROST_W_WL_ADD = 13,
BIFROST_W_WH_ADD = 14,
BIFROST_W_W_ADD = 15,
BIFROST_IDLE_1 = 16,
BIFROST_I_W_FMA = 17,
BIFROST_I_WL_FMA = 18,
BIFROST_I_WH_FMA = 19,
BIFROST_R_I = 20,
BIFROST_I_W_ADD = 21,
BIFROST_I_WL_ADD = 22,
BIFROST_I_WH_ADD = 23,
BIFROST_WL_WH_MIX = 24,
BIFROST_WH_WL_MIX = 26,
BIFROST_IDLE = 27,
BIFROST_R_WL_FMA = 1,
BIFROST_R_WH_FMA = 2,
BIFROST_R_W_FMA = 3,
BIFROST_R_WL_ADD = 4,
BIFROST_R_WH_ADD = 5,
BIFROST_R_W_ADD = 6,
BIFROST_WL_WL_ADD = 7,
BIFROST_WL_WH_ADD = 8,
BIFROST_WL_W_ADD = 9,
BIFROST_WH_WL_ADD = 10,
BIFROST_WH_WH_ADD = 11,
BIFROST_WH_W_ADD = 12,
BIFROST_W_WL_ADD = 13,
BIFROST_W_WH_ADD = 14,
BIFROST_W_W_ADD = 15,
BIFROST_IDLE_1 = 16,
BIFROST_I_W_FMA = 17,
BIFROST_I_WL_FMA = 18,
BIFROST_I_WH_FMA = 19,
BIFROST_R_I = 20,
BIFROST_I_W_ADD = 21,
BIFROST_I_WL_ADD = 22,
BIFROST_I_WH_ADD = 23,
BIFROST_WL_WH_MIX = 24,
BIFROST_WH_WL_MIX = 26,
BIFROST_IDLE = 27,
};
enum bifrost_reg_op {
BIFROST_OP_IDLE = 0,
BIFROST_OP_READ = 1,
BIFROST_OP_WRITE = 2,
BIFROST_OP_WRITE_LO = 3,
BIFROST_OP_WRITE_HI = 4,
BIFROST_OP_IDLE = 0,
BIFROST_OP_READ = 1,
BIFROST_OP_WRITE = 2,
BIFROST_OP_WRITE_LO = 3,
BIFROST_OP_WRITE_HI = 4,
};
struct bifrost_reg_ctrl_23 {
enum bifrost_reg_op slot2;
enum bifrost_reg_op slot3;
bool slot3_fma;
enum bifrost_reg_op slot2;
enum bifrost_reg_op slot3;
bool slot3_fma;
};
/* clang-format off */
@ -420,201 +420,201 @@ static const struct bifrost_reg_ctrl_23 bifrost_reg_ctrl_lut[32] = {
* compiler and stored as a constant */
enum bifrost_texture_operation_mode {
/* Dual texturing */
BIFROST_TEXTURE_OPERATION_DUAL = 1,
/* Dual texturing */
BIFROST_TEXTURE_OPERATION_DUAL = 1,
/* Single texturing */
BIFROST_TEXTURE_OPERATION_SINGLE = 3,
/* Single texturing */
BIFROST_TEXTURE_OPERATION_SINGLE = 3,
};
enum bifrost_index {
/* Both texture/sampler index immediate */
BIFROST_INDEX_IMMEDIATE_SHARED = 0,
/* Both texture/sampler index immediate */
BIFROST_INDEX_IMMEDIATE_SHARED = 0,
/* Sampler index immediate, texture index from staging */
BIFROST_INDEX_IMMEDIATE_SAMPLER = 1,
/* Sampler index immediate, texture index from staging */
BIFROST_INDEX_IMMEDIATE_SAMPLER = 1,
/* Texture index immediate, sampler index from staging */
BIFROST_INDEX_IMMEDIATE_TEXTURE = 2,
/* Texture index immediate, sampler index from staging */
BIFROST_INDEX_IMMEDIATE_TEXTURE = 2,
/* Both indices from (separate) staging registers */
BIFROST_INDEX_REGISTER = 3,
/* Both indices from (separate) staging registers */
BIFROST_INDEX_REGISTER = 3,
};
enum bifrost_tex_op {
/* Given explicit derivatives, compute a gradient descriptor */
BIFROST_TEX_OP_GRDESC_DER = 4,
/* Given explicit derivatives, compute a gradient descriptor */
BIFROST_TEX_OP_GRDESC_DER = 4,
/* Given implicit derivatives (texture coordinates in a fragment
* shader), compute a gradient descriptor */
BIFROST_TEX_OP_GRDESC = 5,
/* Given implicit derivatives (texture coordinates in a fragment
* shader), compute a gradient descriptor */
BIFROST_TEX_OP_GRDESC = 5,
/* Fetch a texel. Takes a staging register with LOD level / face index
* packed 16:16 */
BIFROST_TEX_OP_FETCH = 6,
/* Fetch a texel. Takes a staging register with LOD level / face index
* packed 16:16 */
BIFROST_TEX_OP_FETCH = 6,
/* Filtered texture */
BIFROST_TEX_OP_TEX = 7,
/* Filtered texture */
BIFROST_TEX_OP_TEX = 7,
};
enum bifrost_lod_mode {
/* Takes two staging registers forming a 64-bit gradient descriptor
* (computed by a previous GRDESC or GRDESC_DER operation) */
BIFROST_LOD_MODE_GRDESC = 3,
/* Takes two staging registers forming a 64-bit gradient descriptor
* (computed by a previous GRDESC or GRDESC_DER operation) */
BIFROST_LOD_MODE_GRDESC = 3,
/* Take a staging register with 8:8 fixed-point in bottom 16-bits
* specifying an explicit LOD */
BIFROST_LOD_MODE_EXPLICIT = 4,
/* Take a staging register with 8:8 fixed-point in bottom 16-bits
* specifying an explicit LOD */
BIFROST_LOD_MODE_EXPLICIT = 4,
/* Takes a staging register with bottom 16-bits as 8:8 fixed-point LOD
* bias and top 16-bit as 8:8 fixed-point lower bound (generally left
* zero), added and clamped to a computed LOD */
BIFROST_LOD_MODE_BIAS = 5,
/* Takes a staging register with bottom 16-bits as 8:8 fixed-point LOD
* bias and top 16-bit as 8:8 fixed-point lower bound (generally left
* zero), added and clamped to a computed LOD */
BIFROST_LOD_MODE_BIAS = 5,
/* Set LOD to zero */
BIFROST_LOD_MODE_ZERO = 6,
/* Set LOD to zero */
BIFROST_LOD_MODE_ZERO = 6,
/* Compute LOD */
BIFROST_LOD_MODE_COMPUTE = 7,
/* Compute LOD */
BIFROST_LOD_MODE_COMPUTE = 7,
};
enum bifrost_texture_format {
/* 16-bit floating point, with optional clamping */
BIFROST_TEXTURE_FORMAT_F16 = 0,
BIFROST_TEXTURE_FORMAT_F16_POS = 1,
BIFROST_TEXTURE_FORMAT_F16_PM1 = 2,
BIFROST_TEXTURE_FORMAT_F16_1 = 3,
/* 16-bit floating point, with optional clamping */
BIFROST_TEXTURE_FORMAT_F16 = 0,
BIFROST_TEXTURE_FORMAT_F16_POS = 1,
BIFROST_TEXTURE_FORMAT_F16_PM1 = 2,
BIFROST_TEXTURE_FORMAT_F16_1 = 3,
/* 32-bit floating point, with optional clamping */
BIFROST_TEXTURE_FORMAT_F32 = 4,
BIFROST_TEXTURE_FORMAT_F32_POS = 5,
BIFROST_TEXTURE_FORMAT_F32_PM1 = 6,
BIFROST_TEXTURE_FORMAT_F32_1 = 7,
/* 32-bit floating point, with optional clamping */
BIFROST_TEXTURE_FORMAT_F32 = 4,
BIFROST_TEXTURE_FORMAT_F32_POS = 5,
BIFROST_TEXTURE_FORMAT_F32_PM1 = 6,
BIFROST_TEXTURE_FORMAT_F32_1 = 7,
};
enum bifrost_texture_format_full {
/* Transclude bifrost_texture_format from above */
/* Transclude bifrost_texture_format from above */
/* Integers, unclamped */
BIFROST_TEXTURE_FORMAT_U16 = 12,
BIFROST_TEXTURE_FORMAT_S16 = 13,
BIFROST_TEXTURE_FORMAT_U32 = 14,
BIFROST_TEXTURE_FORMAT_S32 = 15,
/* Integers, unclamped */
BIFROST_TEXTURE_FORMAT_U16 = 12,
BIFROST_TEXTURE_FORMAT_S16 = 13,
BIFROST_TEXTURE_FORMAT_U32 = 14,
BIFROST_TEXTURE_FORMAT_S32 = 15,
};
enum bifrost_texture_fetch {
/* Default texelFetch */
BIFROST_TEXTURE_FETCH_TEXEL = 1,
/* Default texelFetch */
BIFROST_TEXTURE_FETCH_TEXEL = 1,
/* Deprecated, fetches 4x U32 of a U8 x 4 texture. Do not use. */
BIFROST_TEXTURE_FETCH_GATHER4_RGBA = 3,
/* Deprecated, fetches 4x U32 of a U8 x 4 texture. Do not use. */
BIFROST_TEXTURE_FETCH_GATHER4_RGBA = 3,
/* Gathers */
BIFROST_TEXTURE_FETCH_GATHER4_R = 4,
BIFROST_TEXTURE_FETCH_GATHER4_G = 5,
BIFROST_TEXTURE_FETCH_GATHER4_B = 6,
BIFROST_TEXTURE_FETCH_GATHER4_A = 7
/* Gathers */
BIFROST_TEXTURE_FETCH_GATHER4_R = 4,
BIFROST_TEXTURE_FETCH_GATHER4_G = 5,
BIFROST_TEXTURE_FETCH_GATHER4_B = 6,
BIFROST_TEXTURE_FETCH_GATHER4_A = 7
};
struct bifrost_texture_operation {
/* If immediate_indices is set:
* - immediate sampler index
* - index used as texture index
* Otherwise:
* - bifrost_single_index in lower 2 bits
* - 0x3 in upper 2 bits (single-texturing)
*/
unsigned sampler_index_or_mode : 4;
unsigned index : 7;
bool immediate_indices : 1;
enum bifrost_tex_op op : 3;
/* If immediate_indices is set:
* - immediate sampler index
* - index used as texture index
* Otherwise:
* - bifrost_single_index in lower 2 bits
* - 0x3 in upper 2 bits (single-texturing)
*/
unsigned sampler_index_or_mode : 4;
unsigned index : 7;
bool immediate_indices : 1;
enum bifrost_tex_op op : 3;
/* If set for TEX/FETCH, loads texel offsets and multisample index from
* a staging register containing offset_x:offset_y:offset_z:ms_index
* packed 8:8:8:8. Offsets must be in [-31, +31]. If set for
* GRDESC(_DER), disable LOD bias. */
bool offset_or_bias_disable : 1;
/* If set for TEX/FETCH, loads texel offsets and multisample index from
* a staging register containing offset_x:offset_y:offset_z:ms_index
* packed 8:8:8:8. Offsets must be in [-31, +31]. If set for
* GRDESC(_DER), disable LOD bias. */
bool offset_or_bias_disable : 1;
/* If set for TEX/FETCH, loads fp32 shadow comparison value from a
* staging register. Implies fetch_component = gather4_r. If set for
* GRDESC(_DER), disables LOD clamping. */
bool shadow_or_clamp_disable : 1;
/* If set for TEX/FETCH, loads fp32 shadow comparison value from a
* staging register. Implies fetch_component = gather4_r. If set for
* GRDESC(_DER), disables LOD clamping. */
bool shadow_or_clamp_disable : 1;
/* If set, loads an uint32 array index from a staging register. */
bool array : 1;
/* If set, loads an uint32 array index from a staging register. */
bool array : 1;
/* Texture dimension, or 0 for a cubemap */
unsigned dimension : 2;
/* Texture dimension, or 0 for a cubemap */
unsigned dimension : 2;
/* Method to compute LOD value or for a FETCH, the
* bifrost_texture_fetch component specification */
enum bifrost_lod_mode lod_or_fetch : 3;
/* Method to compute LOD value or for a FETCH, the
* bifrost_texture_fetch component specification */
enum bifrost_lod_mode lod_or_fetch : 3;
/* Reserved */
unsigned zero : 1;
/* Reserved */
unsigned zero : 1;
/* Register format for the result */
enum bifrost_texture_format_full format : 4;
/* Register format for the result */
enum bifrost_texture_format_full format : 4;
/* Write mask for the result */
unsigned mask : 4;
/* Write mask for the result */
unsigned mask : 4;
} __attribute__((packed));
struct bifrost_dual_texture_operation {
unsigned primary_sampler_index : 2;
unsigned mode : 2; /* 0x1 for dual */
unsigned primary_texture_index : 2;
unsigned secondary_sampler_index : 2;
unsigned secondary_texture_index : 2;
unsigned primary_sampler_index : 2;
unsigned mode : 2; /* 0x1 for dual */
unsigned primary_texture_index : 2;
unsigned secondary_sampler_index : 2;
unsigned secondary_texture_index : 2;
/* Leave zero for dual texturing */
unsigned reserved : 1;
unsigned index_mode_zero : 1;
/* Leave zero for dual texturing */
unsigned reserved : 1;
unsigned index_mode_zero : 1;
/* Base staging register to write the secondary results to */
unsigned secondary_register : 6;
/* Base staging register to write the secondary results to */
unsigned secondary_register : 6;
/* Format/mask for each texture */
enum bifrost_texture_format secondary_format : 3;
unsigned secondary_mask : 4;
/* Format/mask for each texture */
enum bifrost_texture_format secondary_format : 3;
unsigned secondary_mask : 4;
enum bifrost_texture_format primary_format : 3;
unsigned primary_mask : 4;
enum bifrost_texture_format primary_format : 3;
unsigned primary_mask : 4;
} __attribute__((packed));
static inline uint32_t
bi_dual_tex_as_u32(struct bifrost_dual_texture_operation desc)
{
uint32_t desc_u;
memcpy(&desc_u, &desc, sizeof(desc));
uint32_t desc_u;
memcpy(&desc_u, &desc, sizeof(desc));
return desc_u;
return desc_u;
}
#define BIFROST_MEGA_SAMPLE 128
#define BIFROST_ALL_SAMPLES 255
#define BIFROST_MEGA_SAMPLE 128
#define BIFROST_ALL_SAMPLES 255
#define BIFROST_CURRENT_PIXEL 255
struct bifrost_pixel_indices {
unsigned sample : 8;
unsigned rt : 8;
unsigned x : 8;
unsigned y : 8;
unsigned sample : 8;
unsigned rt : 8;
unsigned x : 8;
unsigned y : 8;
} __attribute__((packed));
enum bi_constmod {
BI_CONSTMOD_NONE,
BI_CONSTMOD_PC_LO,
BI_CONSTMOD_PC_HI,
BI_CONSTMOD_PC_LO_HI
BI_CONSTMOD_NONE,
BI_CONSTMOD_PC_LO,
BI_CONSTMOD_PC_HI,
BI_CONSTMOD_PC_LO_HI
};
struct bi_constants {
/* Raw constant values */
uint64_t raw[6];
/* Raw constant values */
uint64_t raw[6];
/* Associated modifier derived from M values */
enum bi_constmod mods[6];
/* Associated modifier derived from M values */
enum bi_constmod mods[6];
};
/* FAU selectors for constants are out-of-order, construct the top bits
@ -623,12 +623,10 @@ struct bi_constants {
static inline unsigned
bi_constant_field(unsigned idx)
{
const unsigned values[] = {
4, 5, 6, 7, 2, 3
};
const unsigned values[] = {4, 5, 6, 7, 2, 3};
assert(idx <= 5);
return values[idx] << 4;
assert(idx <= 5);
return values[idx] << 4;
}
#ifdef __cplusplus

File diff suppressed because it is too large Load diff

View file

@ -25,73 +25,73 @@
#define __BIFROST_PUBLIC_H_
#include "compiler/nir/nir.h"
#include "util/u_dynarray.h"
#include "panfrost/util/pan_ir.h"
#include "util/u_dynarray.h"
void
bifrost_compile_shader_nir(nir_shader *nir,
const struct panfrost_compile_inputs *inputs,
struct util_dynarray *binary,
struct pan_shader_info *info);
void bifrost_compile_shader_nir(nir_shader *nir,
const struct panfrost_compile_inputs *inputs,
struct util_dynarray *binary,
struct pan_shader_info *info);
static const nir_shader_compiler_options bifrost_nir_options = {
.lower_scmp = true,
.lower_flrp16 = true,
.lower_flrp32 = true,
.lower_flrp64 = true,
.lower_ffract = true,
.lower_fmod = true,
.lower_fdiv = true,
.lower_isign = true,
.lower_find_lsb = true,
.lower_ifind_msb = true,
.lower_fdph = true,
.lower_fsqrt = true,
.lower_scmp = true,
.lower_flrp16 = true,
.lower_flrp32 = true,
.lower_flrp64 = true,
.lower_ffract = true,
.lower_fmod = true,
.lower_fdiv = true,
.lower_isign = true,
.lower_find_lsb = true,
.lower_ifind_msb = true,
.lower_fdph = true,
.lower_fsqrt = true,
.lower_fsign = true,
.lower_fsign = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_extract_to_shifts = true,
.lower_insert_byte = true,
.lower_rotate = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_extract_to_shifts = true,
.lower_insert_byte = true,
.lower_rotate = true,
.lower_pack_half_2x16 = true,
.lower_pack_unorm_2x16 = true,
.lower_pack_snorm_2x16 = true,
.lower_pack_unorm_4x8 = true,
.lower_pack_snorm_4x8 = true,
.lower_unpack_half_2x16 = true,
.lower_unpack_unorm_2x16 = true,
.lower_unpack_snorm_2x16 = true,
.lower_unpack_unorm_4x8 = true,
.lower_unpack_snorm_4x8 = true,
.lower_pack_split = true,
.lower_pack_half_2x16 = true,
.lower_pack_unorm_2x16 = true,
.lower_pack_snorm_2x16 = true,
.lower_pack_unorm_4x8 = true,
.lower_pack_snorm_4x8 = true,
.lower_unpack_half_2x16 = true,
.lower_unpack_unorm_2x16 = true,
.lower_unpack_snorm_2x16 = true,
.lower_unpack_unorm_4x8 = true,
.lower_unpack_snorm_4x8 = true,
.lower_pack_split = true,
.lower_doubles_options = nir_lower_dmod,
/* TODO: Don't lower supported 64-bit operations */
.lower_int64_options = ~0,
/* TODO: Use IMULD on v7 */
.lower_mul_high = true,
.lower_fisnormal = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
.lower_doubles_options = nir_lower_dmod,
/* TODO: Don't lower supported 64-bit operations */
.lower_int64_options = ~0,
/* TODO: Use IMULD on v7 */
.lower_mul_high = true,
.lower_fisnormal = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
.has_fsub = true,
.has_isub = true,
.vectorize_io = true,
.vectorize_vec2_16bit = true,
.fuse_ffma16 = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.use_interpolated_input_intrinsics = true,
.has_fsub = true,
.has_isub = true,
.vectorize_io = true,
.vectorize_vec2_16bit = true,
.fuse_ffma16 = true,
.fuse_ffma32 = true,
.fuse_ffma64 = true,
.use_interpolated_input_intrinsics = true,
.lower_uniforms_to_ubo = true,
.lower_uniforms_to_ubo = true,
.has_cs_global_id = true,
.lower_cs_local_index_to_id = true,
.max_unroll_iterations = 32,
.force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
.force_indirect_unrolling_sampler = true,
.has_cs_global_id = true,
.lower_cs_local_index_to_id = true,
.max_unroll_iterations = 32,
.force_indirect_unrolling =
(nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
.force_indirect_unrolling_sampler = true,
};
#endif

View file

@ -24,21 +24,21 @@
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
*/
#include "compiler.h"
#include "bi_builder.h"
#include "compiler.h"
bool
bi_has_arg(const bi_instr *ins, bi_index arg)
{
if (!ins)
return false;
if (!ins)
return false;
bi_foreach_src(ins, s) {
if (bi_is_equiv(ins->src[s], arg))
return true;
}
bi_foreach_src(ins, s) {
if (bi_is_equiv(ins->src[s], arg))
return true;
}
return false;
return false;
}
/* Precondition: valid 16-bit or 32-bit register format. Returns whether it is
@ -48,131 +48,131 @@ bi_has_arg(const bi_instr *ins, bi_index arg)
bool
bi_is_regfmt_16(enum bi_register_format fmt)
{
switch (fmt) {
case BI_REGISTER_FORMAT_F16:
case BI_REGISTER_FORMAT_S16:
case BI_REGISTER_FORMAT_U16:
return true;
case BI_REGISTER_FORMAT_F32:
case BI_REGISTER_FORMAT_S32:
case BI_REGISTER_FORMAT_U32:
case BI_REGISTER_FORMAT_AUTO:
return false;
default:
unreachable("Invalid register format");
}
switch (fmt) {
case BI_REGISTER_FORMAT_F16:
case BI_REGISTER_FORMAT_S16:
case BI_REGISTER_FORMAT_U16:
return true;
case BI_REGISTER_FORMAT_F32:
case BI_REGISTER_FORMAT_S32:
case BI_REGISTER_FORMAT_U32:
case BI_REGISTER_FORMAT_AUTO:
return false;
default:
unreachable("Invalid register format");
}
}
static unsigned
bi_count_staging_registers(const bi_instr *ins)
{
enum bi_sr_count count = bi_opcode_props[ins->op].sr_count;
unsigned vecsize = ins->vecsize + 1; /* XXX: off-by-one */
enum bi_sr_count count = bi_opcode_props[ins->op].sr_count;
unsigned vecsize = ins->vecsize + 1; /* XXX: off-by-one */
switch (count) {
case BI_SR_COUNT_0 ... BI_SR_COUNT_4:
return count;
case BI_SR_COUNT_FORMAT:
return bi_is_regfmt_16(ins->register_format) ?
DIV_ROUND_UP(vecsize, 2) : vecsize;
case BI_SR_COUNT_VECSIZE:
return vecsize;
case BI_SR_COUNT_SR_COUNT:
return ins->sr_count;
}
switch (count) {
case BI_SR_COUNT_0 ... BI_SR_COUNT_4:
return count;
case BI_SR_COUNT_FORMAT:
return bi_is_regfmt_16(ins->register_format) ? DIV_ROUND_UP(vecsize, 2)
: vecsize;
case BI_SR_COUNT_VECSIZE:
return vecsize;
case BI_SR_COUNT_SR_COUNT:
return ins->sr_count;
}
unreachable("Invalid sr_count");
unreachable("Invalid sr_count");
}
unsigned
bi_count_read_registers(const bi_instr *ins, unsigned s)
{
/* ATOM reads 1 but writes 2. Exception for ACMPXCHG */
if (s == 0 && ins->op == BI_OPCODE_ATOM_RETURN_I32)
return (ins->atom_opc == BI_ATOM_OPC_ACMPXCHG) ? 2 : 1;
else if (s == 0 && bi_opcode_props[ins->op].sr_read)
return bi_count_staging_registers(ins);
else if (s == 4 && ins->op == BI_OPCODE_BLEND)
return ins->sr_count_2; /* Dual source blending */
else if (s == 0 && ins->op == BI_OPCODE_SPLIT_I32)
return ins->nr_dests;
else
return 1;
/* ATOM reads 1 but writes 2. Exception for ACMPXCHG */
if (s == 0 && ins->op == BI_OPCODE_ATOM_RETURN_I32)
return (ins->atom_opc == BI_ATOM_OPC_ACMPXCHG) ? 2 : 1;
else if (s == 0 && bi_opcode_props[ins->op].sr_read)
return bi_count_staging_registers(ins);
else if (s == 4 && ins->op == BI_OPCODE_BLEND)
return ins->sr_count_2; /* Dual source blending */
else if (s == 0 && ins->op == BI_OPCODE_SPLIT_I32)
return ins->nr_dests;
else
return 1;
}
unsigned
bi_count_write_registers(const bi_instr *ins, unsigned d)
{
if (d == 0 && bi_opcode_props[ins->op].sr_write) {
switch (ins->op) {
case BI_OPCODE_TEXC:
case BI_OPCODE_TEXC_DUAL:
if (ins->sr_count_2)
return ins->sr_count;
else
return bi_is_regfmt_16(ins->register_format) ? 2 : 4;
if (d == 0 && bi_opcode_props[ins->op].sr_write) {
switch (ins->op) {
case BI_OPCODE_TEXC:
case BI_OPCODE_TEXC_DUAL:
if (ins->sr_count_2)
return ins->sr_count;
else
return bi_is_regfmt_16(ins->register_format) ? 2 : 4;
case BI_OPCODE_TEX_SINGLE:
case BI_OPCODE_TEX_FETCH:
case BI_OPCODE_TEX_GATHER: {
unsigned chans = util_bitcount(ins->write_mask);
case BI_OPCODE_TEX_SINGLE:
case BI_OPCODE_TEX_FETCH:
case BI_OPCODE_TEX_GATHER: {
unsigned chans = util_bitcount(ins->write_mask);
return bi_is_regfmt_16(ins->register_format) ?
DIV_ROUND_UP(chans, 2) : chans;
}
return bi_is_regfmt_16(ins->register_format) ? DIV_ROUND_UP(chans, 2)
: chans;
}
case BI_OPCODE_ACMPXCHG_I32:
/* Reads 2 but writes 1 */
return 1;
case BI_OPCODE_ACMPXCHG_I32:
/* Reads 2 but writes 1 */
return 1;
case BI_OPCODE_ATOM1_RETURN_I32:
/* Allow omitting the destination for plain ATOM1 */
return bi_is_null(ins->dest[0]) ? 0 : ins->sr_count;
default:
return bi_count_staging_registers(ins);
}
} else if (ins->op == BI_OPCODE_SEG_ADD_I64) {
return 2;
} else if (ins->op == BI_OPCODE_TEXC_DUAL && d == 1) {
return ins->sr_count_2;
} else if (ins->op == BI_OPCODE_COLLECT_I32 && d == 0) {
return ins->nr_srcs;
}
case BI_OPCODE_ATOM1_RETURN_I32:
/* Allow omitting the destination for plain ATOM1 */
return bi_is_null(ins->dest[0]) ? 0 : ins->sr_count;
default:
return bi_count_staging_registers(ins);
}
} else if (ins->op == BI_OPCODE_SEG_ADD_I64) {
return 2;
} else if (ins->op == BI_OPCODE_TEXC_DUAL && d == 1) {
return ins->sr_count_2;
} else if (ins->op == BI_OPCODE_COLLECT_I32 && d == 0) {
return ins->nr_srcs;
}
return 1;
return 1;
}
unsigned
bi_writemask(const bi_instr *ins, unsigned d)
{
unsigned mask = BITFIELD_MASK(bi_count_write_registers(ins, d));
unsigned shift = ins->dest[d].offset;
return (mask << shift);
unsigned mask = BITFIELD_MASK(bi_count_write_registers(ins, d));
unsigned shift = ins->dest[d].offset;
return (mask << shift);
}
bi_clause *
bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause)
{
if (!block && !clause)
return NULL;
if (!block && !clause)
return NULL;
/* Try the first clause in this block if we're starting from scratch */
if (!clause && !list_is_empty(&block->clauses))
return list_first_entry(&block->clauses, bi_clause, link);
/* Try the first clause in this block if we're starting from scratch */
if (!clause && !list_is_empty(&block->clauses))
return list_first_entry(&block->clauses, bi_clause, link);
/* Try the next clause in this block */
if (clause && clause->link.next != &block->clauses)
return list_first_entry(&(clause->link), bi_clause, link);
/* Try the next clause in this block */
if (clause && clause->link.next != &block->clauses)
return list_first_entry(&(clause->link), bi_clause, link);
/* Try the next block, or the one after that if it's empty, etc .*/
bi_block *next_block = bi_next_block(block);
/* Try the next block, or the one after that if it's empty, etc .*/
bi_block *next_block = bi_next_block(block);
bi_foreach_block_from(ctx, next_block, block) {
if (!list_is_empty(&block->clauses))
return list_first_entry(&block->clauses, bi_clause, link);
}
bi_foreach_block_from(ctx, next_block, block) {
if (!list_is_empty(&block->clauses))
return list_first_entry(&block->clauses, bi_clause, link);
}
return NULL;
return NULL;
}
/* Does an instruction have a side effect not captured by its register
@ -184,41 +184,41 @@ bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause)
bool
bi_side_effects(const bi_instr *I)
{
if (bi_opcode_props[I->op].last)
return true;
if (bi_opcode_props[I->op].last)
return true;
switch (I->op) {
case BI_OPCODE_DISCARD_F32:
case BI_OPCODE_DISCARD_B32:
return true;
default:
break;
}
switch (I->op) {
case BI_OPCODE_DISCARD_F32:
case BI_OPCODE_DISCARD_B32:
return true;
default:
break;
}
switch (bi_opcode_props[I->op].message) {
case BIFROST_MESSAGE_NONE:
case BIFROST_MESSAGE_VARYING:
case BIFROST_MESSAGE_ATTRIBUTE:
case BIFROST_MESSAGE_TEX:
case BIFROST_MESSAGE_VARTEX:
case BIFROST_MESSAGE_LOAD:
case BIFROST_MESSAGE_64BIT:
return false;
switch (bi_opcode_props[I->op].message) {
case BIFROST_MESSAGE_NONE:
case BIFROST_MESSAGE_VARYING:
case BIFROST_MESSAGE_ATTRIBUTE:
case BIFROST_MESSAGE_TEX:
case BIFROST_MESSAGE_VARTEX:
case BIFROST_MESSAGE_LOAD:
case BIFROST_MESSAGE_64BIT:
return false;
case BIFROST_MESSAGE_STORE:
case BIFROST_MESSAGE_ATOMIC:
case BIFROST_MESSAGE_BARRIER:
case BIFROST_MESSAGE_BLEND:
case BIFROST_MESSAGE_Z_STENCIL:
case BIFROST_MESSAGE_ATEST:
case BIFROST_MESSAGE_JOB:
return true;
case BIFROST_MESSAGE_STORE:
case BIFROST_MESSAGE_ATOMIC:
case BIFROST_MESSAGE_BARRIER:
case BIFROST_MESSAGE_BLEND:
case BIFROST_MESSAGE_Z_STENCIL:
case BIFROST_MESSAGE_ATEST:
case BIFROST_MESSAGE_JOB:
return true;
case BIFROST_MESSAGE_TILE:
return (I->op != BI_OPCODE_LD_TILE);
}
case BIFROST_MESSAGE_TILE:
return (I->op != BI_OPCODE_LD_TILE);
}
unreachable("Invalid message type");
unreachable("Invalid message type");
}
/* Branch reconvergence is required when the execution mask may change
@ -230,10 +230,10 @@ bi_side_effects(const bi_instr *I)
bool
bi_reconverge_branches(bi_block *block)
{
if (bi_num_successors(block) == 1)
return bi_num_predecessors(block->successors[0]) > 1;
else
return true;
if (bi_num_successors(block) == 1)
return bi_num_predecessors(block->successors[0]) > 1;
else
return true;
}
/*
@ -252,42 +252,41 @@ bi_reconverge_branches(bi_block *block)
bool
bi_can_replace_with_csel(bi_instr *I)
{
return ((I->op == BI_OPCODE_MUX_I32) || (I->op == BI_OPCODE_MUX_V2I16)) &&
(I->mux != BI_MUX_BIT) &&
(I->src[0].swizzle == BI_SWIZZLE_H01) &&
(I->src[1].swizzle == BI_SWIZZLE_H01) &&
(I->src[2].swizzle == BI_SWIZZLE_H01);
return ((I->op == BI_OPCODE_MUX_I32) || (I->op == BI_OPCODE_MUX_V2I16)) &&
(I->mux != BI_MUX_BIT) && (I->src[0].swizzle == BI_SWIZZLE_H01) &&
(I->src[1].swizzle == BI_SWIZZLE_H01) &&
(I->src[2].swizzle == BI_SWIZZLE_H01);
}
static enum bi_opcode
bi_csel_for_mux(bool must_sign, bool b32, enum bi_mux mux)
{
switch (mux) {
case BI_MUX_INT_ZERO:
if (must_sign)
return b32 ? BI_OPCODE_CSEL_U32 : BI_OPCODE_CSEL_V2U16;
else
return b32 ? BI_OPCODE_CSEL_I32 : BI_OPCODE_CSEL_V2I16;
case BI_MUX_NEG:
return b32 ? BI_OPCODE_CSEL_S32 : BI_OPCODE_CSEL_V2S16;
case BI_MUX_FP_ZERO:
return b32 ? BI_OPCODE_CSEL_F32 : BI_OPCODE_CSEL_V2F16;
default:
unreachable("No CSEL for MUX.bit");
}
switch (mux) {
case BI_MUX_INT_ZERO:
if (must_sign)
return b32 ? BI_OPCODE_CSEL_U32 : BI_OPCODE_CSEL_V2U16;
else
return b32 ? BI_OPCODE_CSEL_I32 : BI_OPCODE_CSEL_V2I16;
case BI_MUX_NEG:
return b32 ? BI_OPCODE_CSEL_S32 : BI_OPCODE_CSEL_V2S16;
case BI_MUX_FP_ZERO:
return b32 ? BI_OPCODE_CSEL_F32 : BI_OPCODE_CSEL_V2F16;
default:
unreachable("No CSEL for MUX.bit");
}
}
bi_instr *
bi_csel_from_mux(bi_builder *b, const bi_instr *I, bool must_sign)
{
assert(I->op == BI_OPCODE_MUX_I32 || I->op == BI_OPCODE_MUX_V2I16);
assert(I->op == BI_OPCODE_MUX_I32 || I->op == BI_OPCODE_MUX_V2I16);
/* Build a new CSEL */
enum bi_cmpf cmpf = (I->mux == BI_MUX_NEG) ? BI_CMPF_LT : BI_CMPF_EQ;
bi_instr *csel = bi_csel_u32_to(b, I->dest[0], I->src[2], bi_zero(),
I->src[0], I->src[1], cmpf);
/* Build a new CSEL */
enum bi_cmpf cmpf = (I->mux == BI_MUX_NEG) ? BI_CMPF_LT : BI_CMPF_EQ;
bi_instr *csel = bi_csel_u32_to(b, I->dest[0], I->src[2], bi_zero(),
I->src[0], I->src[1], cmpf);
/* Fixup the opcode and use it */
csel->op = bi_csel_for_mux(must_sign, I->op == BI_OPCODE_MUX_I32, I->mux);
return csel;
/* Fixup the opcode and use it */
csel->op = bi_csel_for_mux(must_sign, I->op == BI_OPCODE_MUX_I32, I->mux);
return csel;
}

View file

@ -26,15 +26,15 @@
#include <getopt.h>
#include <string.h>
#include "disassemble.h"
#include "valhall/disassemble.h"
#include "compiler.h"
#include "disassemble.h"
#include "main/mtypes.h"
#include "compiler/glsl/standalone.h"
#include "compiler/glsl/glsl_to_nir.h"
#include "compiler/glsl/gl_nir.h"
#include "compiler/glsl/glsl_to_nir.h"
#include "compiler/glsl/standalone.h"
#include "compiler/nir_types.h"
#include "main/mtypes.h"
#include "util/u_dynarray.h"
#include "bifrost_compile.h"
@ -44,25 +44,25 @@ int verbose = 0;
static gl_shader_stage
filename_to_stage(const char *stage)
{
const char *ext = strrchr(stage, '.');
const char *ext = strrchr(stage, '.');
if (ext == NULL) {
fprintf(stderr, "No extension found in %s\n", stage);
exit(1);
}
if (ext == NULL) {
fprintf(stderr, "No extension found in %s\n", stage);
exit(1);
}
if (!strcmp(ext, ".cs") || !strcmp(ext, ".comp"))
return MESA_SHADER_COMPUTE;
else if (!strcmp(ext, ".vs") || !strcmp(ext, ".vert"))
return MESA_SHADER_VERTEX;
else if (!strcmp(ext, ".fs") || !strcmp(ext, ".frag"))
return MESA_SHADER_FRAGMENT;
else {
fprintf(stderr, "Invalid extension %s\n", ext);
exit(1);
}
if (!strcmp(ext, ".cs") || !strcmp(ext, ".comp"))
return MESA_SHADER_COMPUTE;
else if (!strcmp(ext, ".vs") || !strcmp(ext, ".vert"))
return MESA_SHADER_VERTEX;
else if (!strcmp(ext, ".fs") || !strcmp(ext, ".frag"))
return MESA_SHADER_FRAGMENT;
else {
fprintf(stderr, "Invalid extension %s\n", ext);
exit(1);
}
unreachable("Should've returned or bailed");
unreachable("Should've returned or bailed");
}
static int
@ -80,7 +80,7 @@ glsl_type_size(const struct glsl_type *type, bool bindless)
static void
insert_sorted(struct exec_list *var_list, nir_variable *new_var)
{
nir_foreach_variable_in_list (var, var_list) {
nir_foreach_variable_in_list(var, var_list) {
if (var->data.location > new_var->data.location) {
exec_node_insert_node_before(&var->node, &new_var->node);
return;
@ -94,7 +94,7 @@ sort_varyings(nir_shader *nir, nir_variable_mode mode)
{
struct exec_list new_list;
exec_list_make_empty(&new_list);
nir_foreach_variable_with_modes_safe (var, nir, mode) {
nir_foreach_variable_with_modes_safe(var, nir, mode) {
exec_node_remove(&var->node);
insert_sorted(&new_list, var);
}
@ -104,7 +104,7 @@ sort_varyings(nir_shader *nir, nir_variable_mode mode)
static void
fixup_varying_slots(nir_shader *nir, nir_variable_mode mode)
{
nir_foreach_variable_with_modes (var, nir, mode) {
nir_foreach_variable_with_modes(var, nir, mode) {
if (var->data.location >= VARYING_SLOT_VAR0) {
var->data.location += 9;
} else if ((var->data.location >= VARYING_SLOT_TEX0) &&
@ -117,228 +117,219 @@ fixup_varying_slots(nir_shader *nir, nir_variable_mode mode)
static void
compile_shader(int stages, char **files)
{
struct gl_shader_program *prog;
nir_shader *nir[MESA_SHADER_COMPUTE + 1];
unsigned shader_types[MESA_SHADER_COMPUTE + 1];
struct gl_shader_program *prog;
nir_shader *nir[MESA_SHADER_COMPUTE + 1];
unsigned shader_types[MESA_SHADER_COMPUTE + 1];
if (stages > MESA_SHADER_COMPUTE) {
fprintf(stderr, "Too many stages");
exit(1);
}
if (stages > MESA_SHADER_COMPUTE) {
fprintf(stderr, "Too many stages");
exit(1);
}
for (unsigned i = 0; i < stages; ++i)
shader_types[i] = filename_to_stage(files[i]);
for (unsigned i = 0; i < stages; ++i)
shader_types[i] = filename_to_stage(files[i]);
struct standalone_options options = {
.glsl_version = 300, /* ES - needed for precision */
.do_link = true,
.lower_precision = true
};
struct standalone_options options = {
.glsl_version = 300, /* ES - needed for precision */
.do_link = true,
.lower_precision = true};
static struct gl_context local_ctx;
static struct gl_context local_ctx;
prog = standalone_compile_shader(&options, stages, files, &local_ctx);
prog = standalone_compile_shader(&options, stages, files, &local_ctx);
for (unsigned i = 0; i < stages; ++i) {
gl_shader_stage stage = shader_types[i];
prog->_LinkedShaders[stage]->Program->info.stage = stage;
}
for (unsigned i = 0; i < stages; ++i) {
gl_shader_stage stage = shader_types[i];
prog->_LinkedShaders[stage]->Program->info.stage = stage;
}
struct util_dynarray binary;
struct util_dynarray binary;
util_dynarray_init(&binary, NULL);
util_dynarray_init(&binary, NULL);
for (unsigned i = 0; i < stages; ++i) {
nir[i] = glsl_to_nir(&local_ctx.Const, prog, shader_types[i], &bifrost_nir_options);
for (unsigned i = 0; i < stages; ++i) {
nir[i] = glsl_to_nir(&local_ctx.Const, prog, shader_types[i],
&bifrost_nir_options);
if (shader_types[i] == MESA_SHADER_VERTEX) {
nir_assign_var_locations(nir[i], nir_var_shader_in, &nir[i]->num_inputs,
glsl_type_size);
sort_varyings(nir[i], nir_var_shader_out);
nir_assign_var_locations(nir[i], nir_var_shader_out, &nir[i]->num_outputs,
glsl_type_size);
fixup_varying_slots(nir[i], nir_var_shader_out);
} else if (shader_types[i] == MESA_SHADER_FRAGMENT) {
sort_varyings(nir[i], nir_var_shader_in);
nir_assign_var_locations(nir[i], nir_var_shader_in, &nir[i]->num_inputs,
glsl_type_size);
fixup_varying_slots(nir[i], nir_var_shader_in);
nir_assign_var_locations(nir[i], nir_var_shader_out, &nir[i]->num_outputs,
glsl_type_size);
}
if (shader_types[i] == MESA_SHADER_VERTEX) {
nir_assign_var_locations(nir[i], nir_var_shader_in,
&nir[i]->num_inputs, glsl_type_size);
sort_varyings(nir[i], nir_var_shader_out);
nir_assign_var_locations(nir[i], nir_var_shader_out,
&nir[i]->num_outputs, glsl_type_size);
fixup_varying_slots(nir[i], nir_var_shader_out);
} else if (shader_types[i] == MESA_SHADER_FRAGMENT) {
sort_varyings(nir[i], nir_var_shader_in);
nir_assign_var_locations(nir[i], nir_var_shader_in,
&nir[i]->num_inputs, glsl_type_size);
fixup_varying_slots(nir[i], nir_var_shader_in);
nir_assign_var_locations(nir[i], nir_var_shader_out,
&nir[i]->num_outputs, glsl_type_size);
}
nir_assign_var_locations(nir[i], nir_var_uniform, &nir[i]->num_uniforms,
glsl_type_size);
nir_assign_var_locations(nir[i], nir_var_uniform, &nir[i]->num_uniforms,
glsl_type_size);
NIR_PASS_V(nir[i], nir_lower_global_vars_to_local);
NIR_PASS_V(nir[i], nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir[i]), true, i == 0);
NIR_PASS_V(nir[i], nir_opt_copy_prop_vars);
NIR_PASS_V(nir[i], nir_opt_combine_stores, nir_var_all);
NIR_PASS_V(nir[i], nir_lower_global_vars_to_local);
NIR_PASS_V(nir[i], nir_lower_io_to_temporaries,
nir_shader_get_entrypoint(nir[i]), true, i == 0);
NIR_PASS_V(nir[i], nir_opt_copy_prop_vars);
NIR_PASS_V(nir[i], nir_opt_combine_stores, nir_var_all);
NIR_PASS_V(nir[i], nir_lower_system_values);
NIR_PASS_V(nir[i], gl_nir_lower_samplers, prog);
NIR_PASS_V(nir[i], nir_split_var_copies);
NIR_PASS_V(nir[i], nir_lower_var_copies);
NIR_PASS_V(nir[i], nir_lower_system_values);
NIR_PASS_V(nir[i], gl_nir_lower_samplers, prog);
NIR_PASS_V(nir[i], nir_split_var_copies);
NIR_PASS_V(nir[i], nir_lower_var_copies);
NIR_PASS_V(nir[i], nir_lower_io, nir_var_uniform,
st_packed_uniforms_type_size,
(nir_lower_io_options)0);
NIR_PASS_V(nir[i], nir_lower_uniforms_to_ubo, true, false);
NIR_PASS_V(nir[i], nir_lower_io, nir_var_uniform,
st_packed_uniforms_type_size, (nir_lower_io_options)0);
NIR_PASS_V(nir[i], nir_lower_uniforms_to_ubo, true, false);
/* before buffers and vars_to_ssa */
NIR_PASS_V(nir[i], gl_nir_lower_images, true);
/* before buffers and vars_to_ssa */
NIR_PASS_V(nir[i], gl_nir_lower_images, true);
NIR_PASS_V(nir[i], gl_nir_lower_buffers, prog);
NIR_PASS_V(nir[i], nir_opt_constant_folding);
NIR_PASS_V(nir[i], gl_nir_lower_buffers, prog);
NIR_PASS_V(nir[i], nir_opt_constant_folding);
struct panfrost_compile_inputs inputs = {
.gpu_id = gpu_id,
.fixed_sysval_ubo = -1,
};
struct pan_shader_info info = { 0 };
struct panfrost_compile_inputs inputs = {
.gpu_id = gpu_id,
.fixed_sysval_ubo = -1,
};
struct pan_shader_info info = {0};
util_dynarray_clear(&binary);
bifrost_compile_shader_nir(nir[i], &inputs, &binary, &info);
util_dynarray_clear(&binary);
bifrost_compile_shader_nir(nir[i], &inputs, &binary, &info);
char *fn = NULL;
asprintf(&fn, "shader_%u.bin", i);
assert(fn != NULL);
FILE *fp = fopen(fn, "wb");
fwrite(binary.data, 1, binary.size, fp);
fclose(fp);
free(fn);
}
char *fn = NULL;
asprintf(&fn, "shader_%u.bin", i);
assert(fn != NULL);
FILE *fp = fopen(fn, "wb");
fwrite(binary.data, 1, binary.size, fp);
fclose(fp);
free(fn);
}
util_dynarray_fini(&binary);
util_dynarray_fini(&binary);
}
#define BI_FOURCC(ch0, ch1, ch2, ch3) ( \
(uint32_t)(ch0) | (uint32_t)(ch1) << 8 | \
(uint32_t)(ch2) << 16 | (uint32_t)(ch3) << 24)
#define BI_FOURCC(ch0, ch1, ch2, ch3) \
((uint32_t)(ch0) | (uint32_t)(ch1) << 8 | (uint32_t)(ch2) << 16 | \
(uint32_t)(ch3) << 24)
static void
disassemble(const char *filename)
{
FILE *fp = fopen(filename, "rb");
assert(fp);
FILE *fp = fopen(filename, "rb");
assert(fp);
fseek(fp, 0, SEEK_END);
unsigned filesize = ftell(fp);
rewind(fp);
fseek(fp, 0, SEEK_END);
unsigned filesize = ftell(fp);
rewind(fp);
uint32_t *code = malloc(filesize);
unsigned res = fread(code, 1, filesize, fp);
if (res != filesize) {
printf("Couldn't read full file\n");
}
uint32_t *code = malloc(filesize);
unsigned res = fread(code, 1, filesize, fp);
if (res != filesize) {
printf("Couldn't read full file\n");
}
fclose(fp);
fclose(fp);
void *entrypoint = code;
void *entrypoint = code;
if (filesize && code[0] == BI_FOURCC('M', 'B', 'S', '2')) {
for (int i = 0; i < filesize / 4; ++i) {
if (code[i] != BI_FOURCC('O', 'B', 'J', 'C'))
continue;
if (filesize && code[0] == BI_FOURCC('M', 'B', 'S', '2')) {
for (int i = 0; i < filesize / 4; ++i) {
if (code[i] != BI_FOURCC('O', 'B', 'J', 'C'))
continue;
unsigned size = code[i + 1];
unsigned offset = i + 2;
unsigned size = code[i + 1];
unsigned offset = i + 2;
entrypoint = code + offset;
filesize = size;
}
}
entrypoint = code + offset;
filesize = size;
}
}
if ((gpu_id >> 12) >= 9)
disassemble_valhall(stdout, entrypoint, filesize, verbose);
else
disassemble_bifrost(stdout, entrypoint, filesize, verbose);
if ((gpu_id >> 12) >= 9)
disassemble_valhall(stdout, entrypoint, filesize, verbose);
else
disassemble_bifrost(stdout, entrypoint, filesize, verbose);
free(code);
free(code);
}
int
main(int argc, char **argv)
{
int c;
int c;
if (argc < 2) {
printf("Pass a command\n");
exit(1);
}
if (argc < 2) {
printf("Pass a command\n");
exit(1);
}
static struct option longopts[] = {
{ "id", optional_argument, NULL, 'i' },
{ "gpu", optional_argument, NULL, 'g' },
{ "verbose", no_argument, &verbose, 'v' },
{ NULL, 0, NULL, 0 }
};
static struct option longopts[] = {{"id", optional_argument, NULL, 'i'},
{"gpu", optional_argument, NULL, 'g'},
{"verbose", no_argument, &verbose, 'v'},
{NULL, 0, NULL, 0}};
static struct {
const char *name;
unsigned major, minor;
} gpus[] = {
{ "G71", 6, 0 },
{ "G72", 6, 2 },
{ "G51", 7, 0 },
{ "G76", 7, 1 },
{ "G52", 7, 2 },
{ "G31", 7, 3 },
{ "G77", 9, 0 },
{ "G57", 9, 1 },
{ "G78", 9, 2 },
{ "G57", 9, 3 },
{ "G68", 9, 4 },
{ "G78AE", 9, 5 },
};
static struct {
const char *name;
unsigned major, minor;
} gpus[] = {
{"G71", 6, 0}, {"G72", 6, 2}, {"G51", 7, 0}, {"G76", 7, 1},
{"G52", 7, 2}, {"G31", 7, 3}, {"G77", 9, 0}, {"G57", 9, 1},
{"G78", 9, 2}, {"G57", 9, 3}, {"G68", 9, 4}, {"G78AE", 9, 5},
};
while ((c = getopt_long(argc, argv, "v:", longopts, NULL)) != -1) {
while ((c = getopt_long(argc, argv, "v:", longopts, NULL)) != -1) {
switch (c) {
case 'i':
gpu_id = atoi(optarg);
switch (c) {
case 'i':
gpu_id = atoi(optarg);
if (!gpu_id) {
fprintf(stderr, "Expected GPU ID, got %s\n", optarg);
return 1;
}
if (!gpu_id) {
fprintf(stderr, "Expected GPU ID, got %s\n", optarg);
return 1;
}
break;
case 'g':
gpu_id = 0;
break;
case 'g':
gpu_id = 0;
/* Compatibility with the Arm compiler */
if (strncmp(optarg, "Mali-", 5) == 0) optarg += 5;
/* Compatibility with the Arm compiler */
if (strncmp(optarg, "Mali-", 5) == 0)
optarg += 5;
for (unsigned i = 0; i < ARRAY_SIZE(gpus); ++i) {
if (strcmp(gpus[i].name, optarg)) continue;
for (unsigned i = 0; i < ARRAY_SIZE(gpus); ++i) {
if (strcmp(gpus[i].name, optarg))
continue;
unsigned major = gpus[i].major;
unsigned minor = gpus[i].minor;
unsigned major = gpus[i].major;
unsigned minor = gpus[i].minor;
gpu_id = (major << 12) | (minor << 8);
break;
}
gpu_id = (major << 12) | (minor << 8);
break;
}
if (!gpu_id) {
fprintf(stderr, "Unknown GPU %s\n", optarg);
return 1;
}
if (!gpu_id) {
fprintf(stderr, "Unknown GPU %s\n", optarg);
return 1;
}
break;
default:
break;
}
}
break;
default:
break;
}
}
if (strcmp(argv[optind], "compile") == 0)
compile_shader(argc - optind - 1, &argv[optind + 1]);
else if (strcmp(argv[optind], "disasm") == 0)
disassemble(argv[optind + 1]);
else {
fprintf(stderr, "Unknown command. Valid: compile/disasm\n");
return 1;
}
if (strcmp(argv[optind], "compile") == 0)
compile_shader(argc - optind - 1, &argv[optind + 1]);
else if (strcmp(argv[optind], "disasm") == 0)
disassemble(argv[optind + 1]);
else {
fprintf(stderr, "Unknown command. Valid: compile/disasm\n");
return 1;
}
return 0;
return 0;
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -34,14 +34,20 @@
void disassemble_bifrost(FILE *fp, uint8_t *code, size_t size, bool verbose);
void
bi_disasm_fma(FILE *fp, unsigned bits, struct bifrost_regs *srcs, struct bifrost_regs *next_regs, unsigned staging_register, unsigned branch_offset, struct bi_constants *consts, bool first);
void bi_disasm_fma(FILE *fp, unsigned bits, struct bifrost_regs *srcs,
struct bifrost_regs *next_regs, unsigned staging_register,
unsigned branch_offset, struct bi_constants *consts,
bool first);
void bi_disasm_add(FILE *fp, unsigned bits, struct bifrost_regs *srcs, struct bifrost_regs *next_regs, unsigned staging_register, unsigned branch_offset, struct bi_constants *consts, bool first);
void bi_disasm_add(FILE *fp, unsigned bits, struct bifrost_regs *srcs,
struct bifrost_regs *next_regs, unsigned staging_register,
unsigned branch_offset, struct bi_constants *consts,
bool first);
void bi_disasm_dest_fma(FILE *fp, struct bifrost_regs *next_regs, bool first);
void bi_disasm_dest_add(FILE *fp, struct bifrost_regs *next_regs, bool first);
void dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs, unsigned branch_offset, struct bi_constants *consts, bool isFMA);
void dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs,
unsigned branch_offset, struct bi_constants *consts, bool isFMA);
#endif

View file

@ -62,182 +62,187 @@ typedef uint16_t nodearray_value;
typedef uint64_t nodearray_sparse;
typedef struct {
union {
nodearray_sparse *sparse;
nodearray_value *dense;
};
unsigned size;
unsigned sparse_capacity;
union {
nodearray_sparse *sparse;
nodearray_value *dense;
};
unsigned size;
unsigned sparse_capacity;
} nodearray;
/* Align sizes to 16-bytes for SIMD purposes */
#define NODEARRAY_DENSE_ALIGN(x) ALIGN_POT(x, 16)
#define nodearray_sparse_foreach(buf, elem) \
for (nodearray_sparse *elem = (buf)->sparse; \
#define nodearray_sparse_foreach(buf, elem) \
for (nodearray_sparse *elem = (buf)->sparse; \
elem < (buf)->sparse + (buf)->size; elem++)
#define nodearray_dense_foreach(buf, elem) \
for (nodearray_value *elem = (buf)->dense; \
#define nodearray_dense_foreach(buf, elem) \
for (nodearray_value *elem = (buf)->dense; \
elem < (buf)->dense + (buf)->size; elem++)
#define nodearray_dense_foreach_64(buf, elem) \
for (uint64_t *elem = (uint64_t *)(buf)->dense; \
#define nodearray_dense_foreach_64(buf, elem) \
for (uint64_t *elem = (uint64_t *)(buf)->dense; \
(nodearray_value *)elem < (buf)->dense + (buf)->size; elem++)
static inline bool
nodearray_is_sparse(const nodearray *a)
{
return a->sparse_capacity != ~0U;
return a->sparse_capacity != ~0U;
}
static inline void
nodearray_init(nodearray *a)
{
memset(a, 0, sizeof(nodearray));
memset(a, 0, sizeof(nodearray));
}
static inline void
nodearray_reset(nodearray *a)
{
free(a->sparse);
nodearray_init(a);
free(a->sparse);
nodearray_init(a);
}
static inline nodearray_sparse
nodearray_encode(unsigned key, nodearray_value value)
{
static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
return ((nodearray_sparse) key << 16) | value;
static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
return ((nodearray_sparse)key << 16) | value;
}
static inline unsigned
nodearray_sparse_key(const nodearray_sparse *elem)
{
static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
return *elem >> 16;
static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
return *elem >> 16;
}
static inline nodearray_value
nodearray_sparse_value(const nodearray_sparse *elem)
{
return *elem & NODEARRAY_MAX_VALUE;
return *elem & NODEARRAY_MAX_VALUE;
}
static inline unsigned
nodearray_sparse_search(const nodearray *a, nodearray_sparse key, nodearray_sparse **elem)
nodearray_sparse_search(const nodearray *a, nodearray_sparse key,
nodearray_sparse **elem)
{
assert(nodearray_is_sparse(a) && a->size);
assert(nodearray_is_sparse(a) && a->size);
nodearray_sparse *data = a->sparse;
nodearray_sparse *data = a->sparse;
/* Encode the key using the highest possible value, so that the
* matching node must be encoded lower than this
*/
nodearray_sparse skey = nodearray_encode(key, NODEARRAY_MAX_VALUE);
/* Encode the key using the highest possible value, so that the
* matching node must be encoded lower than this
*/
nodearray_sparse skey = nodearray_encode(key, NODEARRAY_MAX_VALUE);
unsigned left = 0;
unsigned right = a->size - 1;
unsigned left = 0;
unsigned right = a->size - 1;
if (data[right] <= skey)
left = right;
if (data[right] <= skey)
left = right;
while (left != right) {
/* No need to worry about overflow, we couldn't have more than
* 2^24 elements */
unsigned probe = (left + right + 1) / 2;
while (left != right) {
/* No need to worry about overflow, we couldn't have more than
* 2^24 elements */
unsigned probe = (left + right + 1) / 2;
if (data[probe] > skey)
right = probe - 1;
else
left = probe;
}
if (data[probe] > skey)
right = probe - 1;
else
left = probe;
}
*elem = data + left;
return left;
*elem = data + left;
return left;
}
static inline void
nodearray_orr(nodearray *a, unsigned key, nodearray_value value,
unsigned max_sparse, unsigned max)
{
assert(key < (1 << 24));
assert(key < max);
assert(key < (1 << 24));
assert(key < max);
if (!value)
return;
if (!value)
return;
if (nodearray_is_sparse(a)) {
unsigned size = a->size;
unsigned left = 0;
if (nodearray_is_sparse(a)) {
unsigned size = a->size;
unsigned left = 0;
if (size) {
/* First, binary search for key */
nodearray_sparse *elem;
left = nodearray_sparse_search(a, key, &elem);
if (size) {
/* First, binary search for key */
nodearray_sparse *elem;
left = nodearray_sparse_search(a, key, &elem);
if (nodearray_sparse_key(elem) == key) {
*elem |= value;
return;
}
if (nodearray_sparse_key(elem) == key) {
*elem |= value;
return;
}
/* We insert before `left`, so increment it if it's
* out of order */
if (nodearray_sparse_key(elem) < key)
++left;
}
/* We insert before `left`, so increment it if it's
* out of order */
if (nodearray_sparse_key(elem) < key)
++left;
}
if (size < max_sparse && (size + 1) < max / 4) {
/* We didn't find it, but we know where to insert it. */
if (size < max_sparse && (size + 1) < max / 4) {
/* We didn't find it, but we know where to insert it. */
nodearray_sparse *data = a->sparse;
nodearray_sparse *data_move = data + left;
nodearray_sparse *data = a->sparse;
nodearray_sparse *data_move = data + left;
bool realloc = (++a->size) > a->sparse_capacity;
bool realloc = (++a->size) > a->sparse_capacity;
if (realloc) {
a->sparse_capacity = MIN2(MAX2(a->sparse_capacity * 2, 64), max / 4);
if (realloc) {
a->sparse_capacity =
MIN2(MAX2(a->sparse_capacity * 2, 64), max / 4);
a->sparse = (nodearray_sparse *)malloc(a->sparse_capacity * sizeof(nodearray_sparse));
a->sparse = (nodearray_sparse *)malloc(a->sparse_capacity *
sizeof(nodearray_sparse));
if (left)
memcpy(a->sparse, data, left * sizeof(nodearray_sparse));
}
if (left)
memcpy(a->sparse, data, left * sizeof(nodearray_sparse));
}
nodearray_sparse *elem = a->sparse + left;
nodearray_sparse *elem = a->sparse + left;
if (left != size)
memmove(elem + 1, data_move, (size - left) * sizeof(nodearray_sparse));
if (left != size)
memmove(elem + 1, data_move,
(size - left) * sizeof(nodearray_sparse));
*elem = nodearray_encode(key, value);
*elem = nodearray_encode(key, value);
if (realloc)
free(data);
if (realloc)
free(data);
return;
}
return;
}
/* There are too many elements, so convert to a dense array */
nodearray old = *a;
/* There are too many elements, so convert to a dense array */
nodearray old = *a;
a->dense = (nodearray_value *)calloc(NODEARRAY_DENSE_ALIGN(max), sizeof(nodearray_value));
a->size = max;
a->sparse_capacity = ~0U;
a->dense = (nodearray_value *)calloc(NODEARRAY_DENSE_ALIGN(max),
sizeof(nodearray_value));
a->size = max;
a->sparse_capacity = ~0U;
nodearray_value *data = a->dense;
nodearray_value *data = a->dense;
nodearray_sparse_foreach(&old, x) {
unsigned key = nodearray_sparse_key(x);
nodearray_value value = nodearray_sparse_value(x);
nodearray_sparse_foreach(&old, x) {
unsigned key = nodearray_sparse_key(x);
nodearray_value value = nodearray_sparse_value(x);
assert(key < max);
data[key] = value;
}
assert(key < max);
data[key] = value;
}
free(old.sparse);
}
free(old.sparse);
}
a->dense[key] |= value;
a->dense[key] |= value;
}
#ifdef __cplusplus

View file

@ -21,14 +21,15 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "bi_test.h"
#include "bi_builder.h"
#include "bi_test.h"
#include "compiler.h"
#include <gtest/gtest.h>
static std::string
to_string(const bi_instr *I) {
to_string(const bi_instr *I)
{
char *cstr = NULL;
size_t size = 0;
FILE *f = open_memstream(&cstr, &size);
@ -40,23 +41,21 @@ to_string(const bi_instr *I) {
}
static testing::AssertionResult
constant_fold_pred(const char *I_expr,
const char *expected_expr,
bi_instr *I,
constant_fold_pred(const char *I_expr, const char *expected_expr, bi_instr *I,
uint32_t expected)
{
bool unsupported = false;
uint32_t v = bi_fold_constant(I, &unsupported);
if (unsupported) {
return testing::AssertionFailure()
<< "Constant fold unsupported for instruction \n\n"
<< " " << to_string(I);
<< "Constant fold unsupported for instruction \n\n"
<< " " << to_string(I);
} else if (v != expected) {
return testing::AssertionFailure()
<< "Unexpected result when constant folding instruction\n\n"
<< " " << to_string(I) << "\n"
<< " Actual: " << v << "\n"
<< "Expected: " << expected << "\n";
<< "Unexpected result when constant folding instruction\n\n"
<< " " << to_string(I) << "\n"
<< " Actual: " << v << "\n"
<< "Expected: " << expected << "\n";
} else {
return testing::AssertionSuccess();
}
@ -64,7 +63,6 @@ constant_fold_pred(const char *I_expr,
#define EXPECT_FOLD(i, e) EXPECT_PRED_FORMAT2(constant_fold_pred, i, e)
static testing::AssertionResult
not_constant_fold_pred(const char *I_expr, bi_instr *I)
{
@ -74,22 +72,23 @@ not_constant_fold_pred(const char *I_expr, bi_instr *I)
return testing::AssertionSuccess();
} else {
return testing::AssertionFailure()
<< "Instruction\n\n"
<< " " << to_string(I) << "\n"
<< "shouldn't have constant folded, but folded to: " << v;
<< "Instruction\n\n"
<< " " << to_string(I) << "\n"
<< "shouldn't have constant folded, but folded to: " << v;
}
}
#define EXPECT_NOT_FOLD(i) EXPECT_PRED_FORMAT1(not_constant_fold_pred, i)
class ConstantFold : public testing::Test {
protected:
ConstantFold() {
protected:
ConstantFold()
{
mem_ctx = ralloc_context(NULL);
b = bit_builder(mem_ctx);
}
~ConstantFold() {
~ConstantFold()
{
ralloc_free(mem_ctx);
}
@ -101,9 +100,7 @@ TEST_F(ConstantFold, Swizzles)
{
bi_index reg = bi_register(0);
EXPECT_FOLD(
bi_swz_v2i16_to(b, reg, bi_imm_u32(0xCAFEBABE)),
0xCAFEBABE);
EXPECT_FOLD(bi_swz_v2i16_to(b, reg, bi_imm_u32(0xCAFEBABE)), 0xCAFEBABE);
EXPECT_FOLD(
bi_swz_v2i16_to(b, reg, bi_swz_16(bi_imm_u32(0xCAFEBABE), false, false)),
@ -123,18 +120,17 @@ TEST_F(ConstantFold, VectorConstructions2i16)
bi_index reg = bi_register(0);
EXPECT_FOLD(
bi_mkvec_v2i16_to(b, reg, bi_imm_u16(0xCAFE),
bi_imm_u16(0xBABE)),
bi_mkvec_v2i16_to(b, reg, bi_imm_u16(0xCAFE), bi_imm_u16(0xBABE)),
0xBABECAFE);
EXPECT_FOLD(
bi_mkvec_v2i16_to(b, reg, bi_swz_16(bi_imm_u32(0xCAFEBABE), true, true),
bi_imm_u16(0xBABE)),
bi_imm_u16(0xBABE)),
0xBABECAFE);
EXPECT_FOLD(
bi_mkvec_v2i16_to(b, reg, bi_swz_16(bi_imm_u32(0xCAFEBABE), true, true),
bi_swz_16(bi_imm_u32(0xCAFEBABE), false, false)),
bi_swz_16(bi_imm_u32(0xCAFEBABE), false, false)),
0xBABECAFE);
}
@ -173,17 +169,18 @@ TEST_F(ConstantFold, LimitedShiftsForTexturing)
{
bi_index reg = bi_register(0);
EXPECT_FOLD(
bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE), bi_imm_u32(0xA0000), bi_imm_u8(4)),
(0xCAFE << 4) | 0xA0000);
EXPECT_FOLD(bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE),
bi_imm_u32(0xA0000), bi_imm_u8(4)),
(0xCAFE << 4) | 0xA0000);
EXPECT_NOT_FOLD(
bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE), bi_not(bi_imm_u32(0xA0000)), bi_imm_u8(4)));
EXPECT_NOT_FOLD(bi_lshift_or_i32_to(
b, reg, bi_imm_u32(0xCAFE), bi_not(bi_imm_u32(0xA0000)), bi_imm_u8(4)));
EXPECT_NOT_FOLD(
bi_lshift_or_i32_to(b, reg, bi_not(bi_imm_u32(0xCAFE)), bi_imm_u32(0xA0000), bi_imm_u8(4)));
EXPECT_NOT_FOLD(bi_lshift_or_i32_to(b, reg, bi_not(bi_imm_u32(0xCAFE)),
bi_imm_u32(0xA0000), bi_imm_u8(4)));
bi_instr *I = bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE), bi_imm_u32(0xA0000), bi_imm_u8(4));
bi_instr *I = bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE),
bi_imm_u32(0xA0000), bi_imm_u8(4));
I->not_result = true;
EXPECT_NOT_FOLD(I);
}
@ -193,9 +190,12 @@ TEST_F(ConstantFold, NonConstantSourcesCannotBeFolded)
bi_index reg = bi_register(0);
EXPECT_NOT_FOLD(bi_swz_v2i16_to(b, reg, bi_temp(b->shader)));
EXPECT_NOT_FOLD(bi_mkvec_v2i16_to(b, reg, bi_temp(b->shader), bi_temp(b->shader)));
EXPECT_NOT_FOLD(bi_mkvec_v2i16_to(b, reg, bi_temp(b->shader), bi_imm_u32(0xDEADBEEF)));
EXPECT_NOT_FOLD(bi_mkvec_v2i16_to(b, reg, bi_imm_u32(0xDEADBEEF), bi_temp(b->shader)));
EXPECT_NOT_FOLD(
bi_mkvec_v2i16_to(b, reg, bi_temp(b->shader), bi_temp(b->shader)));
EXPECT_NOT_FOLD(
bi_mkvec_v2i16_to(b, reg, bi_temp(b->shader), bi_imm_u32(0xDEADBEEF)));
EXPECT_NOT_FOLD(
bi_mkvec_v2i16_to(b, reg, bi_imm_u32(0xDEADBEEF), bi_temp(b->shader)));
}
TEST_F(ConstantFold, OtherOperationsShouldNotFold)

View file

@ -21,55 +21,57 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "bi_test.h"
#include "bi_builder.h"
#include "bi_test.h"
#include "compiler.h"
#include <gtest/gtest.h>
#define CASE(shader_stage, instr, expected) do { \
bi_builder *A = bit_builder(mem_ctx); \
bi_builder *B = bit_builder(mem_ctx); \
{ \
bi_builder *b = A; \
bi_index u = bi_temp(b->shader); \
bi_index v = bi_temp(b->shader); \
A->shader->stage = MESA_SHADER_ ## shader_stage; \
instr; \
} \
{ \
bi_builder *b = B; \
bi_index u = bi_temp(b->shader); \
bi_index v = bi_temp(b->shader); \
B->shader->stage = MESA_SHADER_ ## shader_stage; \
expected; \
} \
bi_opt_fuse_dual_texture(A->shader); \
if (!bit_shader_equal(A->shader, B->shader)) { \
ADD_FAILURE(); \
fprintf(stderr, "Optimization produce unexpected result"); \
fprintf(stderr, " Actual:\n"); \
bi_print_shader(A->shader, stderr); \
fprintf(stderr, "Expected:\n"); \
bi_print_shader(B->shader, stderr); \
fprintf(stderr, "\n"); \
} \
} while(0)
#define CASE(shader_stage, instr, expected) \
do { \
bi_builder *A = bit_builder(mem_ctx); \
bi_builder *B = bit_builder(mem_ctx); \
{ \
bi_builder *b = A; \
bi_index u = bi_temp(b->shader); \
bi_index v = bi_temp(b->shader); \
A->shader->stage = MESA_SHADER_##shader_stage; \
instr; \
} \
{ \
bi_builder *b = B; \
bi_index u = bi_temp(b->shader); \
bi_index v = bi_temp(b->shader); \
B->shader->stage = MESA_SHADER_##shader_stage; \
expected; \
} \
bi_opt_fuse_dual_texture(A->shader); \
if (!bit_shader_equal(A->shader, B->shader)) { \
ADD_FAILURE(); \
fprintf(stderr, "Optimization produce unexpected result"); \
fprintf(stderr, " Actual:\n"); \
bi_print_shader(A->shader, stderr); \
fprintf(stderr, "Expected:\n"); \
bi_print_shader(B->shader, stderr); \
fprintf(stderr, "\n"); \
} \
} while (0)
#define NEGCASE(stage, instr) CASE(stage, instr, instr)
class DualTexture : public testing::Test {
protected:
DualTexture() {
protected:
DualTexture()
{
mem_ctx = ralloc_context(NULL);
reg = bi_register(0);
x = bi_register(4);
y = bi_register(8);
reg = bi_register(0);
x = bi_register(4);
y = bi_register(8);
}
~DualTexture() {
~DualTexture()
{
ralloc_free(mem_ctx);
}
@ -78,134 +80,165 @@ protected:
bi_index reg, x, y;
};
TEST_F(DualTexture, FuseDualTexFragment)
{
CASE(FRAGMENT, {
CASE(
FRAGMENT,
{
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
}, {
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), false, 4, 4);
});
},
{
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144),
false, 4, 4);
});
}
TEST_F(DualTexture, FuseDualTexKernel)
{
CASE(KERNEL, {
CASE(
KERNEL,
{
bi_texs_2d_f32_to(b, x, u, v, true, 0, 0);
bi_texs_2d_f32_to(b, y, u, v, true, 1, 1);
}, {
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), true, 4, 4);
});
},
{
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), true,
4, 4);
});
}
TEST_F(DualTexture, FuseDualTexVertex)
{
CASE(VERTEX, {
CASE(
VERTEX,
{
bi_texs_2d_f32_to(b, x, u, v, true, 0, 0);
bi_texs_2d_f32_to(b, y, u, v, true, 1, 1);
}, {
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), true, 4, 4);
});
},
{
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), true,
4, 4);
});
}
TEST_F(DualTexture, DontFuseDualTexWrongStage)
{
NEGCASE(FRAGMENT, {
bi_texs_2d_f32_to(b, x, u, v, true, 0, 0);
bi_texs_2d_f32_to(b, y, u, v, true, 1, 1);
bi_texs_2d_f32_to(b, x, u, v, true, 0, 0);
bi_texs_2d_f32_to(b, y, u, v, true, 1, 1);
});
NEGCASE(KERNEL, {
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
});
NEGCASE(VERTEX, {
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
});
}
TEST_F(DualTexture, FuseDualTexMaximumIndex)
{
CASE(FRAGMENT, {
CASE(
FRAGMENT,
{
bi_texs_2d_f32_to(b, x, u, v, false, 2, 2);
bi_texs_2d_f32_to(b, y, u, v, false, 3, 3);
}, {
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F003E6), false, 4, 4);
});
},
{
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F003E6),
false, 4, 4);
});
}
TEST_F(DualTexture, FuseDualTexMixedIndex)
{
CASE(FRAGMENT, {
CASE(
FRAGMENT,
{
bi_texs_2d_f32_to(b, x, u, v, false, 3, 2);
bi_texs_2d_f32_to(b, y, u, v, false, 2, 3);
}, {
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F003A7), false, 4, 4);
});
},
{
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F003A7),
false, 4, 4);
});
}
TEST_F(DualTexture, DontFuseDualTexOutOfBounds)
{
NEGCASE(FRAGMENT, {
bi_texs_2d_f32_to(b, x, u, v, false, 4, 0);
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
bi_texs_2d_f32_to(b, x, u, v, false, 4, 0);
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
});
NEGCASE(FRAGMENT, {
bi_texs_2d_f32_to(b, x, u, v, false, 0, 4);
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
bi_texs_2d_f32_to(b, x, u, v, false, 0, 4);
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
});
NEGCASE(FRAGMENT, {
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
bi_texs_2d_f32_to(b, y, u, v, false, 4, 1);
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
bi_texs_2d_f32_to(b, y, u, v, false, 4, 1);
});
NEGCASE(FRAGMENT, {
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
bi_texs_2d_f32_to(b, y, u, v, false, 1, 4);
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
bi_texs_2d_f32_to(b, y, u, v, false, 1, 4);
});
}
TEST_F(DualTexture, FuseDualTexFP16)
{
CASE(FRAGMENT, {
CASE(
FRAGMENT,
{
bi_texs_2d_f16_to(b, x, u, v, false, 0, 0);
bi_texs_2d_f16_to(b, y, u, v, false, 1, 1);
}, {
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF1E00144), false, 2, 2);
});
},
{
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF1E00144),
false, 2, 2);
});
}
TEST_F(DualTexture, FuseDualTexMixedSize)
{
CASE(FRAGMENT, {
CASE(
FRAGMENT,
{
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
bi_texs_2d_f16_to(b, y, u, v, false, 1, 1);
}, {
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0XF9E00144), false, 4, 2);
});
},
{
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0XF9E00144),
false, 4, 2);
});
CASE(FRAGMENT, {
CASE(
FRAGMENT,
{
bi_texs_2d_f16_to(b, x, u, v, false, 0, 0);
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
}, {
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF1F00144), false, 2, 4);
});
},
{
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF1F00144),
false, 2, 4);
});
}
TEST_F(DualTexture, DontFuseMixedCoordinates)
{
NEGCASE(FRAGMENT, {
bi_texs_2d_f32_to(b, x, bi_neg(u), v, false, 0, 0);
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
bi_texs_2d_f32_to(b, x, bi_neg(u), v, false, 0, 0);
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
});
NEGCASE(FRAGMENT, {
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
bi_texs_2d_f32_to(b, y, v, u, false, 1, 1);
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
bi_texs_2d_f32_to(b, y, v, u, false, 1, 1);
});
}

View file

@ -21,31 +21,34 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "bi_test.h"
#include "bi_builder.h"
#include "bi_test.h"
#include "compiler.h"
#include <gtest/gtest.h>
#define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, bi_lower_swizzle)
#define CASE(instr, expected) \
INSTRUCTION_CASE(instr, expected, bi_lower_swizzle)
#define NEGCASE(instr) CASE(instr, instr)
class LowerSwizzle : public testing::Test {
protected:
LowerSwizzle() {
protected:
LowerSwizzle()
{
mem_ctx = ralloc_context(NULL);
reg = bi_register(0);
x = bi_register(1);
y = bi_register(2);
z = bi_register(3);
w = bi_register(4);
reg = bi_register(0);
x = bi_register(1);
y = bi_register(2);
z = bi_register(3);
w = bi_register(4);
x3210 = x;
x3210 = x;
x3210.swizzle = BI_SWIZZLE_B3210;
}
~LowerSwizzle() {
~LowerSwizzle()
{
ralloc_free(mem_ctx);
}
@ -58,7 +61,8 @@ protected:
TEST_F(LowerSwizzle, Csel16)
{
CASE(bi_csel_v2f16_to(b, reg, bi_half(x, 0), y, z, w, BI_CMPF_NE),
bi_csel_v2f16_to(b, reg, bi_swz_v2i16(b, bi_half(x, 0)), y, z, w, BI_CMPF_NE));
bi_csel_v2f16_to(b, reg, bi_swz_v2i16(b, bi_half(x, 0)), y, z, w,
BI_CMPF_NE));
}
TEST_F(LowerSwizzle, Fma16)
@ -79,23 +83,22 @@ TEST_F(LowerSwizzle, ClzHadd8)
TEST_F(LowerSwizzle, FirstShift8)
{
enum bi_opcode ops[] = {
BI_OPCODE_LSHIFT_AND_V4I8,
BI_OPCODE_LSHIFT_OR_V4I8,
BI_OPCODE_LSHIFT_XOR_V4I8,
BI_OPCODE_RSHIFT_AND_V4I8,
BI_OPCODE_RSHIFT_OR_V4I8,
BI_OPCODE_RSHIFT_XOR_V4I8,
BI_OPCODE_LSHIFT_AND_V4I8, BI_OPCODE_LSHIFT_OR_V4I8,
BI_OPCODE_LSHIFT_XOR_V4I8, BI_OPCODE_RSHIFT_AND_V4I8,
BI_OPCODE_RSHIFT_OR_V4I8, BI_OPCODE_RSHIFT_XOR_V4I8,
};
for (unsigned i = 0; i < ARRAY_SIZE(ops); ++i) {
CASE({
CASE(
{
bi_instr *I = bi_lshift_and_v4i8_to(b, reg, x3210, y, z);
I->op = ops[i];
},
{
bi_instr *I = bi_lshift_and_v4i8_to(b, reg, bi_swz_v4i8(b, x3210), y, z);
},
{
bi_instr *I =
bi_lshift_and_v4i8_to(b, reg, bi_swz_v4i8(b, x3210), y, z);
I->op = ops[i];
});
});
}
}

View file

@ -21,56 +21,58 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "bi_test.h"
#include "bi_builder.h"
#include "bi_test.h"
#include "compiler.h"
#include <gtest/gtest.h>
#define CASE(instr, expected) do { \
bi_builder *A = bit_builder(mem_ctx); \
bi_builder *B = bit_builder(mem_ctx); \
A->shader->info.bifrost = rzalloc(mem_ctx, struct bifrost_shader_info); \
B->shader->info.bifrost = rzalloc(mem_ctx, struct bifrost_shader_info); \
{ \
bi_builder *b = A; \
bi_index u = bi_temp(b->shader); \
UNUSED bi_index v = bi_temp(b->shader); \
UNUSED bi_index w = bi_temp(b->shader); \
instr; \
} \
{ \
bi_builder *b = B; \
bi_index u = bi_temp(b->shader); \
UNUSED bi_index v = bi_temp(b->shader); \
UNUSED bi_index w = bi_temp(b->shader); \
expected; \
} \
bi_opt_message_preload(A->shader); \
if (!bit_shader_equal(A->shader, B->shader)) { \
ADD_FAILURE(); \
fprintf(stderr, "Optimization produce unexpected result"); \
fprintf(stderr, " Actual:\n"); \
bi_print_shader(A->shader, stderr); \
fprintf(stderr, "Expected:\n"); \
bi_print_shader(B->shader, stderr); \
fprintf(stderr, "\n"); \
} \
} while(0)
#define CASE(instr, expected) \
do { \
bi_builder *A = bit_builder(mem_ctx); \
bi_builder *B = bit_builder(mem_ctx); \
A->shader->info.bifrost = rzalloc(mem_ctx, struct bifrost_shader_info); \
B->shader->info.bifrost = rzalloc(mem_ctx, struct bifrost_shader_info); \
{ \
bi_builder *b = A; \
bi_index u = bi_temp(b->shader); \
UNUSED bi_index v = bi_temp(b->shader); \
UNUSED bi_index w = bi_temp(b->shader); \
instr; \
} \
{ \
bi_builder *b = B; \
bi_index u = bi_temp(b->shader); \
UNUSED bi_index v = bi_temp(b->shader); \
UNUSED bi_index w = bi_temp(b->shader); \
expected; \
} \
bi_opt_message_preload(A->shader); \
if (!bit_shader_equal(A->shader, B->shader)) { \
ADD_FAILURE(); \
fprintf(stderr, "Optimization produce unexpected result"); \
fprintf(stderr, " Actual:\n"); \
bi_print_shader(A->shader, stderr); \
fprintf(stderr, "Expected:\n"); \
bi_print_shader(B->shader, stderr); \
fprintf(stderr, "\n"); \
} \
} while (0)
#define NEGCASE(instr) CASE(instr, instr)
class MessagePreload : public testing::Test {
protected:
MessagePreload() {
protected:
MessagePreload()
{
mem_ctx = ralloc_context(NULL);
x = bi_register(16);
y = bi_register(32);
x = bi_register(16);
y = bi_register(32);
}
~MessagePreload() {
~MessagePreload()
{
ralloc_free(mem_ctx);
}
@ -84,100 +86,117 @@ protected:
b->cursor = bi_before_block(bi_start_block(&b->shader->blocks));
bi_foreach_src(I, i)
I->src[i] = bi_mov_i32(b, bi_register(idx*4 + i));
I->src[i] = bi_mov_i32(b, bi_register(idx * 4 + i));
b->cursor = bi_after_instr(I);
}
};
TEST_F(MessagePreload, PreloadLdVarSample)
{
CASE({
CASE(
{
bi_ld_var_imm_to(b, u, bi_register(61), BI_REGISTER_FORMAT_F32,
BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
}, {
preload_moves(b, u, 4, 0);
});
},
{ preload_moves(b, u, 4, 0); });
}
TEST_F(MessagePreload, PreloadLdVarLdVar)
{
CASE({
CASE(
{
bi_ld_var_imm_to(b, u, bi_register(61), BI_REGISTER_FORMAT_F32,
BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 2);
bi_ld_var_imm_to(b, v, bi_register(61), BI_REGISTER_FORMAT_F32,
BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 1);
}, {
},
{
preload_moves(b, u, 4, 0);
preload_moves(b, v, 4, 1);
});
});
}
TEST_F(MessagePreload, MaxTwoMessages)
{
CASE({
CASE(
{
bi_ld_var_imm_to(b, u, bi_register(61), BI_REGISTER_FORMAT_F32,
BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 2);
bi_ld_var_imm_to(b, v, bi_register(61), BI_REGISTER_FORMAT_F32,
BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 1);
bi_ld_var_imm_to(b, w, bi_register(61), BI_REGISTER_FORMAT_F32,
BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
},
{
},
{
preload_moves(b, u, 4, 0);
preload_moves(b, v, 4, 1);
bi_ld_var_imm_to(b, w, bi_register(61), BI_REGISTER_FORMAT_F32,
BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
});
});
CASE({
bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
bi_var_tex_f16_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 1, 2);
bi_var_tex_f16_to(b, w, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 3, 3);
}, {
CASE(
{
bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
0);
bi_var_tex_f16_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 1,
2);
bi_var_tex_f16_to(b, w, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 3,
3);
},
{
preload_moves(b, u, 4, 0);
preload_moves(b, v, 2, 1);
bi_var_tex_f16_to(b, w, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 3, 3);
});
bi_var_tex_f16_to(b, w, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 3,
3);
});
}
TEST_F(MessagePreload, PreloadVartexF16)
{
CASE({
bi_var_tex_f16_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
}, {
preload_moves(b, u, 2, 0);
});
CASE(
{
bi_var_tex_f16_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
0);
},
{ preload_moves(b, u, 2, 0); });
}
TEST_F(MessagePreload, PreloadVartexF32)
{
CASE({
bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
}, {
preload_moves(b, u, 4, 0);
});
CASE(
{
bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
0);
},
{ preload_moves(b, u, 4, 0); });
}
TEST_F(MessagePreload, PreloadVartexF32VartexF16)
{
CASE({
bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
bi_var_tex_f16_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 1, 2);
}, {
CASE(
{
bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
0);
bi_var_tex_f16_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 1,
2);
},
{
preload_moves(b, u, 4, 0);
preload_moves(b, v, 2, 1);
});
});
}
TEST_F(MessagePreload, PreloadVartexLodModes)
{
CASE({
CASE(
{
bi_var_tex_f32_to(b, u, true, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
bi_var_tex_f32_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
}, {
bi_var_tex_f32_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
0);
},
{
preload_moves(b, u, 4, 0);
preload_moves(b, v, 4, 1);
});
});
}

View file

@ -21,9 +21,9 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "bi_test.h"
#include "bi_builder.h"
#include "bi_test.h"
#include "compiler.h"
#include <gtest/gtest.h>
@ -38,24 +38,35 @@ bi_optimizer(bi_context *ctx)
/* Define reg first so it has a consistent variable index, and pass it to an
* instruction that cannot be dead code eliminated so the program is nontrivial.
*/
#define CASE(instr, expected) INSTRUCTION_CASE(\
{ UNUSED bi_index reg = bi_temp(b->shader); instr; bi_kaboom(b, reg); }, \
{ UNUSED bi_index reg = bi_temp(b->shader); expected; bi_kaboom(b, reg); }, \
#define CASE(instr, expected) \
INSTRUCTION_CASE( \
{ \
UNUSED bi_index reg = bi_temp(b->shader); \
instr; \
bi_kaboom(b, reg); \
}, \
{ \
UNUSED bi_index reg = bi_temp(b->shader); \
expected; \
bi_kaboom(b, reg); \
}, \
bi_optimizer);
#define NEGCASE(instr) CASE(instr, instr)
class Optimizer : public testing::Test {
protected:
Optimizer() {
protected:
Optimizer()
{
mem_ctx = ralloc_context(NULL);
x = bi_register(1);
y = bi_register(2);
x = bi_register(1);
y = bi_register(2);
negabsx = bi_neg(bi_abs(x));
}
~Optimizer() {
~Optimizer()
{
ralloc_free(mem_ctx);
}
@ -95,91 +106,124 @@ TEST_F(Optimizer, FusedFABSNEGForFP16)
TEST_F(Optimizer, FuseFADD_F32WithEqualSourcesAbsAbsAndClamp)
{
CASE({
bi_instr *I = bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), bi_abs(x));
CASE(
{
bi_instr *I =
bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), bi_abs(x));
I->clamp = BI_CLAMP_CLAMP_0_1;
}, {
},
{
bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
I->clamp = BI_CLAMP_CLAMP_0_1;
});
});
CASE({
bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_fabsneg_f32(b, bi_abs(x)));
CASE(
{
bi_instr *I =
bi_fadd_f32_to(b, reg, bi_abs(x), bi_fabsneg_f32(b, bi_abs(x)));
I->clamp = BI_CLAMP_CLAMP_0_1;
}, {
},
{
bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
I->clamp = BI_CLAMP_CLAMP_0_1;
});
});
CASE({
bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), bi_abs(x)));
CASE(
{
bi_instr *I =
bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), bi_abs(x)));
I->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
},
{
bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
I->clamp = BI_CLAMP_CLAMP_0_INF;
});
});
}
TEST_F(Optimizer, FuseFADD_V2F16WithDifferentSourcesAbsAbsAndClamp)
{
CASE({
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(y));
CASE(
{
bi_instr *I =
bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(y));
I->clamp = BI_CLAMP_CLAMP_0_1;
}, {
},
{
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
I->clamp = BI_CLAMP_CLAMP_0_1;
});
});
CASE({
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(y)));
CASE(
{
bi_instr *I =
bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(y)));
I->clamp = BI_CLAMP_CLAMP_0_1;
}, {
},
{
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
I->clamp = BI_CLAMP_CLAMP_0_1;
});
});
CASE({
bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(y)));
CASE(
{
bi_instr *I =
bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(y)));
I->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
},
{
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
I->clamp = BI_CLAMP_CLAMP_0_INF;
});
});
}
TEST_F(Optimizer, AvoidFADD_V2F16WithEqualSourcesAbsAbsAndClamp)
{
NEGCASE({
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(x));
I->clamp = BI_CLAMP_CLAMP_0_1;
bi_instr *I =
bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(x));
I->clamp = BI_CLAMP_CLAMP_0_1;
});
NEGCASE({
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(x)));
I->clamp = BI_CLAMP_CLAMP_0_1;
bi_instr *I =
bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(x)));
I->clamp = BI_CLAMP_CLAMP_0_1;
});
NEGCASE({
bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(x)));
bi_instr *I =
bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(x)));
I->clamp = BI_CLAMP_CLAMP_0_INF;
});
}
TEST_F(Optimizer, SwizzlesComposedForFP16)
{
CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y),
CASE(bi_fadd_v2f16_to(
b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y),
bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y));
CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, negabsx), true, false), y),
CASE(bi_fadd_v2f16_to(
b, reg, bi_swz_16(bi_fabsneg_v2f16(b, negabsx), true, false), y),
bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y));
CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), true, false), y),
CASE(bi_fadd_v2f16_to(
b, reg,
bi_swz_16(bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), true,
false),
y),
bi_fadd_v2f16_to(b, reg, negabsx, y));
CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, false)), true, false), y),
CASE(bi_fadd_v2f16_to(
b, reg,
bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, false)), true, false),
y),
bi_fadd_v2f16_to(b, reg, bi_half(negabsx, false), y));
CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, true)), true, false), y),
CASE(bi_fadd_v2f16_to(
b, reg,
bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, true)), true, false),
y),
bi_fadd_v2f16_to(b, reg, bi_half(negabsx, true), y));
}
@ -192,7 +236,8 @@ TEST_F(Optimizer, PreserveWidens)
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, true)), y),
bi_fadd_f32_to(b, reg, bi_half(negabsx, true), y));
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(x, true)), bi_fabsneg_f32(b, bi_half(x, false))),
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(x, true)),
bi_fabsneg_f32(b, bi_half(x, false))),
bi_fadd_f32_to(b, reg, bi_half(x, true), bi_half(x, false)));
}
@ -219,85 +264,100 @@ TEST_F(Optimizer, AvoidZeroAndFABSNEGFootguns)
TEST_F(Optimizer, ClampsPropagated)
{
CASE({
bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, x, y));
I->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_INF;
});
CASE(
{
bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, x, y));
I->clamp = BI_CLAMP_CLAMP_0_INF;
},
{
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_INF;
});
CASE({
bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y));
I->clamp = BI_CLAMP_CLAMP_0_1;
}, {
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_1;
});
CASE(
{
bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y));
I->clamp = BI_CLAMP_CLAMP_0_1;
},
{
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_1;
});
}
TEST_F(Optimizer, ClampsComposed)
{
CASE({
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_M1_1;
J->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_1;
});
CASE(
{
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_M1_1;
J->clamp = BI_CLAMP_CLAMP_0_INF;
},
{
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_1;
});
CASE({
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_0_1;
J->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_1;
});
CASE(
{
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_0_1;
J->clamp = BI_CLAMP_CLAMP_0_INF;
},
{
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_1;
});
CASE({
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_0_INF;
J->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_INF;
});
CASE(
{
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_0_INF;
J->clamp = BI_CLAMP_CLAMP_0_INF;
},
{
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_INF;
});
CASE({
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_M1_1;
J->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_1;
});
CASE(
{
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_M1_1;
J->clamp = BI_CLAMP_CLAMP_0_INF;
},
{
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_1;
});
CASE({
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_0_1;
J->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_1;
});
CASE(
{
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_0_1;
J->clamp = BI_CLAMP_CLAMP_0_INF;
},
{
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_1;
});
CASE({
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_0_INF;
J->clamp = BI_CLAMP_CLAMP_0_INF;
}, {
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_INF;
});
CASE(
{
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
I->clamp = BI_CLAMP_CLAMP_0_INF;
J->clamp = BI_CLAMP_CLAMP_0_INF;
},
{
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
I->clamp = BI_CLAMP_CLAMP_0_INF;
});
}
TEST_F(Optimizer, DoNotMixSizesWhenClamping)
@ -341,21 +401,29 @@ TEST_F(Optimizer, FuseComparisonsWithDISCARD)
bi_discard_f32(b, x, y, BI_CMPF_EQ));
for (unsigned h = 0; h < 2; ++h) {
CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1), h)),
CASE(bi_discard_b32(
b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1),
h)),
bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_LE));
CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_NE, BI_RESULT_TYPE_I1), h)),
CASE(bi_discard_b32(
b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_NE, BI_RESULT_TYPE_I1),
h)),
bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_NE));
CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_EQ, BI_RESULT_TYPE_M1), h)),
CASE(bi_discard_b32(
b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_EQ, BI_RESULT_TYPE_M1),
h)),
bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_EQ));
}
}
TEST_F(Optimizer, DoNotFuseSpecialComparisons)
{
NEGCASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_GTLT, BI_RESULT_TYPE_F1)));
NEGCASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_TOTAL, BI_RESULT_TYPE_F1)));
NEGCASE(
bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_GTLT, BI_RESULT_TYPE_F1)));
NEGCASE(bi_discard_b32(
b, bi_fcmp_f32(b, x, y, BI_CMPF_TOTAL, BI_RESULT_TYPE_F1)));
}
TEST_F(Optimizer, FuseResultType)
@ -365,25 +433,33 @@ TEST_F(Optimizer, FuseResultType)
BI_MUX_INT_ZERO),
bi_fcmp_f32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1));
CASE(bi_mux_i32_to(b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO),
bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_F1));
CASE(bi_mux_i32_to(
b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO),
bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
BI_RESULT_TYPE_F1));
CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1),
bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO),
bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_I1));
CASE(bi_mux_i32_to(
b, reg, bi_imm_u32(0), bi_imm_u32(1),
bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO),
bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
BI_RESULT_TYPE_I1));
CASE(bi_mux_v2i16_to(b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0),
bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO),
bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_F1));
bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE,
BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO),
bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
BI_RESULT_TYPE_F1));
CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO),
bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_I1));
bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE,
BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO),
bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
BI_RESULT_TYPE_I1));
CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1),
bi_icmp_u32(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
@ -391,13 +467,13 @@ TEST_F(Optimizer, FuseResultType)
bi_icmp_u32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
bi_icmp_v2u16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO),
bi_icmp_v2u16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO),
bi_icmp_v2u16_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
CASE(bi_mux_v4i8_to(b, reg, bi_imm_u8(0), bi_imm_u8(1),
bi_icmp_v4u8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO),
bi_icmp_v4u8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO),
bi_icmp_v4u8_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1),
@ -406,31 +482,36 @@ TEST_F(Optimizer, FuseResultType)
bi_icmp_s32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
bi_icmp_v2s16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO),
bi_icmp_v2s16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO),
bi_icmp_v2s16_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
CASE(bi_mux_v4i8_to(b, reg, bi_imm_u8(0), bi_imm_u8(1),
bi_icmp_v4s8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO),
bi_icmp_v4s8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO),
bi_icmp_v4s8_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
}
TEST_F(Optimizer, DoNotFuseMixedSizeResultType)
{
NEGCASE(bi_mux_i32_to(b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO));
NEGCASE(bi_mux_i32_to(
b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO));
NEGCASE(bi_mux_v2i16_to(b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0),
bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO));
NEGCASE(bi_mux_v2i16_to(
b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0),
bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
BI_MUX_INT_ZERO));
}
TEST_F(Optimizer, VarTexCoord32)
{
CASE({
bi_index ld = bi_ld_var_imm(b, bi_null(), BI_REGISTER_FORMAT_F32, BI_SAMPLE_CENTER, BI_UPDATE_STORE, BI_VECSIZE_V2, 0);
CASE(
{
bi_index ld =
bi_ld_var_imm(b, bi_null(), BI_REGISTER_FORMAT_F32,
BI_SAMPLE_CENTER, BI_UPDATE_STORE, BI_VECSIZE_V2, 0);
bi_index x = bi_temp(b->shader);
bi_index y = bi_temp(b->shader);
@ -439,9 +520,11 @@ TEST_F(Optimizer, VarTexCoord32)
split->dest[1] = y;
bi_texs_2d_f32_to(b, reg, x, y, false, 0, 0);
}, {
bi_var_tex_f32_to(b, reg, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
});
},
{
bi_var_tex_f32_to(b, reg, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
0);
});
}
TEST_F(Optimizer, Int8ToFloat32)
@ -458,7 +541,6 @@ TEST_F(Optimizer, Int8ToFloat32)
}
}
TEST_F(Optimizer, Int16ToFloat32)
{
for (unsigned i = 0; i < 2; ++i) {

View file

@ -21,23 +21,27 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "bi_test.h"
#include "compiler.h"
#include <gtest/gtest.h>
#include "mesa-gtest-extras.h"
class PackFormats : public testing::Test
{
protected:
PackFormats() {
class PackFormats : public testing::Test {
protected:
PackFormats()
{
util_dynarray_init(&result, NULL);
}
~PackFormats() {
~PackFormats()
{
util_dynarray_fini(&result);
}
const uint64_t *result_as_u64_array() { return reinterpret_cast<uint64_t *>(result.data); }
const uint64_t *result_as_u64_array()
{
return reinterpret_cast<uint64_t *>(result.data);
}
struct util_dynarray result;
};
@ -46,7 +50,7 @@ TEST_F(PackFormats, 1)
{
/* Test case from the blob */
struct bi_packed_tuple tuples[] = {
{ 0x2380cb1c02200000, 0x10e0 },
{0x2380cb1c02200000, 0x10e0},
};
uint64_t header = 0x021000011800;
@ -65,8 +69,8 @@ TEST_F(PackFormats, 1)
TEST_F(PackFormats, 2)
{
struct bi_packed_tuple tuples[] = {
{ 0x9380cb6044000044, 0xf65 },
{ 0xaf8721a05c000081, 0x1831 },
{0x9380cb6044000044, 0xf65},
{0xaf8721a05c000081, 0x1831},
};
bi_pack_format(&result, 0, tuples, 2, 0x52800011800, 0, 0, false);
@ -86,9 +90,9 @@ TEST_F(PackFormats, 2)
TEST_F(PackFormats, 3)
{
struct bi_packed_tuple tuples[] = {
{ 0x93805b8040000000, 0xf65 },
{ 0x93886db05c000000, 0xf65 },
{ 0xb380cb180c000080, 0x18b1 },
{0x93805b8040000000, 0xf65},
{0x93886db05c000000, 0xf65},
{0xb380cb180c000080, 0x18b1},
};
bi_pack_format(&result, 0, tuples, 3, 0x3100000000, 0, 0, true);
@ -96,12 +100,8 @@ TEST_F(PackFormats, 3)
bi_pack_format(&result, 4, tuples, 3, 0x3100000000, 0, 0, true);
const uint64_t expected[] = {
0x805b804000000029,
0x0188000000076593,
0x886db05c00000021,
0x58c0600004076593,
0x0000000000000044,
0x60002c6ce0300000,
0x805b804000000029, 0x0188000000076593, 0x886db05c00000021,
0x58c0600004076593, 0x0000000000000044, 0x60002c6ce0300000,
};
ASSERT_EQ(result.size, 48);
@ -111,10 +111,10 @@ TEST_F(PackFormats, 3)
TEST_F(PackFormats, 4)
{
struct bi_packed_tuple tuples[] = {
{ 0xad8c87004000005f, 0x2f18 },
{ 0xad8c87385c00004f, 0x2f18 },
{ 0xad8c87385c00006e, 0x2f18 },
{ 0xb380cb182c000080, 0x18b1 },
{0xad8c87004000005f, 0x2f18},
{0xad8c87385c00004f, 0x2f18},
{0xad8c87385c00006e, 0x2f18},
{0xb380cb182c000080, 0x18b1},
};
uint64_t EC0 = (0x10000001ff000000) >> 4;
@ -124,12 +124,8 @@ TEST_F(PackFormats, 4)
bi_pack_format(&result, 6, tuples, 4, 0x3100000000, EC0, 0, false);
const uint64_t expected[] = {
0x8c87004000005f2d,
0x01880000000718ad,
0x8c87385c00004f25,
0x39c2e000037718ad,
0x80cb182c00008005,
0xac01c62b6320b1b3,
0x8c87004000005f2d, 0x01880000000718ad, 0x8c87385c00004f25,
0x39c2e000037718ad, 0x80cb182c00008005, 0xac01c62b6320b1b3,
};
ASSERT_EQ(result.size, 48);
@ -139,11 +135,9 @@ TEST_F(PackFormats, 4)
TEST_F(PackFormats, 5)
{
struct bi_packed_tuple tuples[] = {
{ 0x9380688040000000, 0xf65 },
{ 0xd4057300c000040, 0xf26 },
{ 0x1f80cb1858000000, 0x19ab },
{ 0x937401f85c000000, 0xf65 },
{ 0xb380cb180c000080, 0x18a1 },
{0x9380688040000000, 0xf65}, {0xd4057300c000040, 0xf26},
{0x1f80cb1858000000, 0x19ab}, {0x937401f85c000000, 0xf65},
{0xb380cb180c000080, 0x18a1},
};
uint64_t EC0 = (0x183f800000) >> 4;
@ -154,14 +148,9 @@ TEST_F(PackFormats, 5)
bi_pack_format(&result, 8, tuples, 5, 0x3100000000, EC0, 0, true);
const uint64_t expected[] = {
0x8068804000000029,
0x0188000000076593,
0x4057300c00004021,
0x58c2c0000007260d,
0x7401f85c0000008b,
0x00006ac7e0376593,
0x80cb180c00008053,
0x000000183f80a1b3,
0x8068804000000029, 0x0188000000076593, 0x4057300c00004021,
0x58c2c0000007260d, 0x7401f85c0000008b, 0x00006ac7e0376593,
0x80cb180c00008053, 0x000000183f80a1b3,
};
ASSERT_EQ(result.size, 64);
@ -171,12 +160,9 @@ TEST_F(PackFormats, 5)
TEST_F(PackFormats, 6)
{
struct bi_packed_tuple tuples[] = {
{ 0xad8c870068000048, 0x2f18 },
{ 0xad8c87385c000050, 0x2f18 },
{ 0xad8c87385c00006a, 0x2f18 },
{ 0xad8c87385c000074, 0x2f18 },
{ 0xad8c87385c000020, 0x2f18 },
{ 0xad8c87385c000030, 0x2f18 },
{0xad8c870068000048, 0x2f18}, {0xad8c87385c000050, 0x2f18},
{0xad8c87385c00006a, 0x2f18}, {0xad8c87385c000074, 0x2f18},
{0xad8c87385c000020, 0x2f18}, {0xad8c87385c000030, 0x2f18},
};
uint64_t EC0 = (0x345678912345670) >> 4;
@ -188,15 +174,9 @@ TEST_F(PackFormats, 6)
bi_pack_format(&result, 10, tuples, 6, 0x60000011800, EC0, 0, false);
const uint64_t expected[] = {
0x8c8700680000482d,
0x30000008c00718ad,
0x8c87385c00005025,
0x39c2e000035718ad,
0x8c87385c00007401,
0xb401c62b632718ad,
0x8c87385c00002065,
0x39c2e000018718ad,
0x3456789123456706,
0x8c8700680000482d, 0x30000008c00718ad, 0x8c87385c00005025,
0x39c2e000035718ad, 0x8c87385c00007401, 0xb401c62b632718ad,
0x8c87385c00002065, 0x39c2e000018718ad, 0x3456789123456706,
0xa001c62b63200000,
};
@ -207,13 +187,10 @@ TEST_F(PackFormats, 6)
TEST_F(PackFormats, 7)
{
struct bi_packed_tuple tuples[] = {
{ 0x9020074040000083, 0xf65 },
{ 0x90000d4058100080, 0xf65 },
{ 0x90000a3058700082, 0xf65 },
{ 0x9020074008114581, 0xf65 },
{ 0x90000d0058000080, 0xf65 },
{ 0x9000083058700082, 0xf65 },
{ 0x2380cb199ac38400, 0x327a },
{0x9020074040000083, 0xf65}, {0x90000d4058100080, 0xf65},
{0x90000a3058700082, 0xf65}, {0x9020074008114581, 0xf65},
{0x90000d0058000080, 0xf65}, {0x9000083058700082, 0xf65},
{0x2380cb199ac38400, 0x327a},
};
bi_pack_format(&result, 0, tuples, 7, 0x3000100000, 0, 0, true);
@ -223,15 +200,9 @@ TEST_F(PackFormats, 7)
bi_pack_format(&result, 11, tuples, 7, 0x3000100000, 0, 0, true);
const uint64_t expected[] = {
0x2007404000008329,
0x0180008000076590,
0x000d405810008021,
0x5182c38004176590,
0x2007400811458101,
0x2401d96400076590,
0x000d005800008061,
0x4182c38004176590,
0x80cb199ac3840047,
0x2007404000008329, 0x0180008000076590, 0x000d405810008021,
0x5182c38004176590, 0x2007400811458101, 0x2401d96400076590,
0x000d005800008061, 0x4182c38004176590, 0x80cb199ac3840047,
0x3801d96400027a23,
};
@ -242,14 +213,10 @@ TEST_F(PackFormats, 7)
TEST_F(PackFormats, 8)
{
struct bi_packed_tuple tuples[] = {
{ 0x442087037a2f8643, 0x3021 },
{ 0x84008d0586100043, 0x200 },
{ 0x7c008d0028014543, 0x0 },
{ 0x1c00070058200081, 0x1980 },
{ 0x1600dd878320400, 0x200 },
{ 0x49709c1b08308900, 0x200 },
{ 0x6c2007807881ca00, 0x40 },
{ 0x8d70fc0d94900083, 0x800 },
{0x442087037a2f8643, 0x3021}, {0x84008d0586100043, 0x200},
{0x7c008d0028014543, 0x0}, {0x1c00070058200081, 0x1980},
{0x1600dd878320400, 0x200}, {0x49709c1b08308900, 0x200},
{0x6c2007807881ca00, 0x40}, {0x8d70fc0d94900083, 0x800},
};
uint64_t EC0 = (0x32e635d0) >> 4;
@ -262,18 +229,10 @@ TEST_F(PackFormats, 8)
bi_pack_format(&result, 13, tuples, 8, 0x61001311800, EC0, 0, true);
const uint64_t expected[] = {
0x2087037a2f86432e,
0x30800988c0002144,
0x008d058610004320,
0x6801400a2a1a0084,
0x0007005820008101,
0x0c00001f0021801c,
0x600dd87832040060,
0xe0d8418448020001,
0x2007807881ca00c0,
0xc6ba80125c20406c,
0x70fc0d9490008359,
0x0000000032e0008d,
0x2087037a2f86432e, 0x30800988c0002144, 0x008d058610004320,
0x6801400a2a1a0084, 0x0007005820008101, 0x0c00001f0021801c,
0x600dd87832040060, 0xe0d8418448020001, 0x2007807881ca00c0,
0xc6ba80125c20406c, 0x70fc0d9490008359, 0x0000000032e0008d,
};
ASSERT_EQ(result.size, 96);

View file

@ -39,14 +39,9 @@ TEST(Packing, PackLiteral)
TEST(Packing, PackUpper)
{
struct bi_packed_tuple tuples[] = {
{ 0, 0x3 << (75 - 64) },
{ 0, 0x1 << (75 - 64) },
{ 0, 0x7 << (75 - 64) },
{ 0, 0x0 << (75 - 64) },
{ 0, 0x2 << (75 - 64) },
{ 0, 0x6 << (75 - 64) },
{ 0, 0x5 << (75 - 64) },
{ 0, 0x4 << (75 - 64) },
{0, 0x3 << (75 - 64)}, {0, 0x1 << (75 - 64)}, {0, 0x7 << (75 - 64)},
{0, 0x0 << (75 - 64)}, {0, 0x2 << (75 - 64)}, {0, 0x6 << (75 - 64)},
{0, 0x5 << (75 - 64)}, {0, 0x4 << (75 - 64)},
};
EXPECT_EQ(bi_pack_upper(U(0), tuples, 8), 3);
@ -62,9 +57,9 @@ TEST(Packing, PackUpper)
TEST(Packing, PackTupleBits)
{
struct bi_packed_tuple tuples[] = {
{ 0x1234567801234567, 0x3A },
{ 0x9876543299999999, 0x1B },
{ 0xABCDEF0101234567, 0x7C },
{0x1234567801234567, 0x3A},
{0x9876543299999999, 0x1B},
{0xABCDEF0101234567, 0x7C},
};
EXPECT_EQ(bi_pack_tuple_bits(T(0), tuples, 8, 0, 30), 0x01234567);
@ -75,19 +70,14 @@ TEST(Packing, PackTupleBits)
TEST(Packing, PackSync)
{
struct bi_packed_tuple tuples[] = {
{ 0, 0x3 << (75 - 64) },
{ 0, 0x5 << (75 - 64) },
{ 0, 0x7 << (75 - 64) },
{ 0, 0x0 << (75 - 64) },
{ 0, 0x2 << (75 - 64) },
{ 0, 0x6 << (75 - 64) },
{ 0, 0x5 << (75 - 64) },
{ 0, 0x4 << (75 - 64) },
{0, 0x3 << (75 - 64)}, {0, 0x5 << (75 - 64)}, {0, 0x7 << (75 - 64)},
{0, 0x0 << (75 - 64)}, {0, 0x2 << (75 - 64)}, {0, 0x6 << (75 - 64)},
{0, 0x5 << (75 - 64)}, {0, 0x4 << (75 - 64)},
};
EXPECT_EQ(bi_pack_sync(L(3), L(1), L(7), tuples, 8, false), 0xCF);
EXPECT_EQ(bi_pack_sync(L(3), L(1), U(7), tuples, 8, false), 0xCC);
EXPECT_EQ(bi_pack_sync(L(3), U(1), U(7), tuples, 8, false), 0xEC);
EXPECT_EQ(bi_pack_sync(Z, U(1), U(7), tuples, 8, false), 0x2C);
EXPECT_EQ(bi_pack_sync(Z, U(1), U(7), tuples, 8, true) , 0x6C);
EXPECT_EQ(bi_pack_sync(Z, U(1), U(7), tuples, 8, false), 0x2C);
EXPECT_EQ(bi_pack_sync(Z, U(1), U(7), tuples, 8, true), 0x6C);
}

View file

@ -21,23 +21,28 @@
* SOFTWARE.
*/
#include "compiler.h"
#include "bi_test.h"
#include "bi_builder.h"
#include "bi_test.h"
#include "compiler.h"
#include <gtest/gtest.h>
class SchedulerPredicates : public testing::Test {
protected:
SchedulerPredicates() {
protected:
SchedulerPredicates()
{
mem_ctx = ralloc_context(NULL);
b = bit_builder(mem_ctx);
}
~SchedulerPredicates() {
~SchedulerPredicates()
{
ralloc_free(mem_ctx);
}
bi_index TMP() { return bi_temp(b->shader); }
bi_index TMP()
{
return bi_temp(b->shader);
}
void *mem_ctx;
bi_builder *b;

View file

@ -1,21 +1,21 @@
#ifndef __DISASM_H
#define __DISASM_H
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include <assert.h>
#include <inttypes.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BIT(b) (1ull << (b))
#define MASK(count) ((1ull << (count)) - 1)
#define BIT(b) (1ull << (b))
#define MASK(count) ((1ull << (count)) - 1)
#define SEXT(b, count) ((b ^ BIT(count - 1)) - BIT(count - 1))
#define UNUSED __attribute__((unused))
#define UNUSED __attribute__((unused))
#define VA_SRC_UNIFORM_TYPE 0x2
#define VA_SRC_IMM_TYPE 0x3
#define VA_SRC_IMM_TYPE 0x3
static inline void
va_print_dest(FILE *fp, uint8_t dest, bool can_mask)
@ -51,7 +51,7 @@ disassemble_valhall(FILE *fp, const uint64_t *code, unsigned size, bool verbose)
if (verbose) {
/* Print byte pattern */
for (unsigned j = 0; j < 8; ++j)
fprintf(fp, "%02x ", (uint8_t) (instr >> (j * 8)));
fprintf(fp, "%02x ", (uint8_t)(instr >> (j * 8)));
fprintf(fp, " ");
} else {

View file

@ -21,10 +21,10 @@
* SOFTWARE.
*/
#include "va_compiler.h"
#include "bi_test.h"
#include "bi_builder.h"
#include "util/u_cpu_detect.h"
#include "bi_builder.h"
#include "bi_test.h"
#include "va_compiler.h"
#include <gtest/gtest.h>
@ -37,102 +37,137 @@ add_imm(bi_context *ctx)
}
#define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, add_imm)
#define NEGCASE(instr) CASE(instr, instr)
#define NEGCASE(instr) CASE(instr, instr)
class AddImm : public testing::Test {
protected:
AddImm() {
protected:
AddImm()
{
mem_ctx = ralloc_context(NULL);
}
~AddImm() {
~AddImm()
{
ralloc_free(mem_ctx);
}
void *mem_ctx;
};
TEST_F(AddImm, Basic) {
TEST_F(AddImm, Basic)
{
CASE(bi_mov_i32_to(b, bi_register(63), bi_imm_u32(0xABAD1DEA)),
bi_iadd_imm_i32_to(b, bi_register(63), bi_zero(), 0xABAD1DEA));
CASE(bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0)),
bi_fadd_imm_f32_to(b, bi_register(1), bi_register(2), fui(42.0)));
CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_imm_f32(42.0)),
bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)), fui(42.0)));
CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)),
bi_imm_f32(42.0)),
bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)),
fui(42.0)));
CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_neg(bi_imm_f32(42.0))),
bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)), fui(-42.0)));
CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)),
bi_neg(bi_imm_f32(42.0))),
bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)),
fui(-42.0)));
}
TEST_F(AddImm, Commutativty) {
TEST_F(AddImm, Commutativty)
{
CASE(bi_fadd_f32_to(b, bi_register(1), bi_imm_f32(42.0), bi_register(2)),
bi_fadd_imm_f32_to(b, bi_register(1), bi_register(2), fui(42.0)));
}
TEST_F(AddImm, NoModifiers) {
NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_abs(bi_register(2)), bi_imm_f32(42.0)));
NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_neg(bi_register(2)), bi_imm_f32(42.0)));
NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_f32(42.0)));
TEST_F(AddImm, NoModifiers)
{
NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_abs(bi_register(2)),
bi_imm_f32(42.0)));
NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_neg(bi_register(2)),
bi_imm_f32(42.0)));
NEGCASE(bi_fadd_f32_to(b, bi_register(1),
bi_swz_16(bi_register(2), false, false),
bi_imm_f32(42.0)));
}
TEST_F(AddImm, NoClamp) {
TEST_F(AddImm, NoClamp)
{
NEGCASE({
bi_instr *I = bi_fadd_f32_to(b, bi_register(1), bi_register(2),
bi_imm_f32(42.0));
bi_instr *I =
bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0));
I->clamp = BI_CLAMP_CLAMP_M1_1;
});
}
TEST_F(AddImm, OtherTypes) {
TEST_F(AddImm, OtherTypes)
{
CASE(bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0)),
bi_fadd_imm_v2f16_to(b, bi_register(1), bi_register(2), 0x51405140));
CASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
CASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2),
bi_imm_u32(0xDEADBEEF), false),
bi_iadd_imm_i32_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
CASE(bi_iadd_v2u16_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
CASE(bi_iadd_v2u16_to(b, bi_register(1), bi_register(2),
bi_imm_u32(0xDEADBEEF), false),
bi_iadd_imm_v2i16_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
CASE(bi_iadd_v4u8_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
CASE(bi_iadd_v4u8_to(b, bi_register(1), bi_register(2),
bi_imm_u32(0xDEADBEEF), false),
bi_iadd_imm_v4i8_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
CASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
CASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2),
bi_imm_u32(0xDEADBEEF), false),
bi_iadd_imm_i32_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
CASE(bi_iadd_v2s16_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
CASE(bi_iadd_v2s16_to(b, bi_register(1), bi_register(2),
bi_imm_u32(0xDEADBEEF), false),
bi_iadd_imm_v2i16_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
CASE(bi_iadd_v4s8_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
CASE(bi_iadd_v4s8_to(b, bi_register(1), bi_register(2),
bi_imm_u32(0xDEADBEEF), false),
bi_iadd_imm_v4i8_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
NEGCASE(bi_iadd_v2u16_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), true));
NEGCASE(bi_iadd_s32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
NEGCASE(bi_iadd_v2s16_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
NEGCASE(bi_iadd_u32_to(b, bi_register(1),
bi_swz_16(bi_register(2), false, false),
bi_imm_u32(0xDEADBEEF), false));
NEGCASE(bi_iadd_v2u16_to(b, bi_register(1),
bi_swz_16(bi_register(2), false, false),
bi_imm_u32(0xDEADBEEF), false));
NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2),
bi_imm_u32(0xDEADBEEF), true));
NEGCASE(bi_iadd_s32_to(b, bi_register(1),
bi_swz_16(bi_register(2), false, false),
bi_imm_u32(0xDEADBEEF), false));
NEGCASE(bi_iadd_v2s16_to(b, bi_register(1),
bi_swz_16(bi_register(2), false, false),
bi_imm_u32(0xDEADBEEF), false));
NEGCASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), true));
NEGCASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2),
bi_imm_u32(0xDEADBEEF), true));
}
TEST_F(AddImm, Int8) {
TEST_F(AddImm, Int8)
{
bi_index idx = bi_register(2);
idx.swizzle = BI_SWIZZLE_B0000;
NEGCASE(bi_iadd_v4u8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
NEGCASE(bi_iadd_v4s8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
NEGCASE(
bi_iadd_v4u8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
NEGCASE(
bi_iadd_v4s8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
}
TEST_F(AddImm, OnlyRTE) {
TEST_F(AddImm, OnlyRTE)
{
NEGCASE({
bi_instr *I = bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0));
I->round = BI_ROUND_RTP;
bi_instr *I =
bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0));
I->round = BI_ROUND_RTP;
});
NEGCASE({
bi_instr *I = bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0));
I->round = BI_ROUND_RTZ;
bi_instr *I =
bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0));
I->round = BI_ROUND_RTZ;
});
}

View file

@ -21,8 +21,8 @@
* SOFTWARE.
*/
#include <stdio.h>
#include <inttypes.h>
#include <stdio.h>
#include "disassemble.h"
static inline uint8_t
@ -39,7 +39,7 @@ parse_hex(const char *in)
for (unsigned i = 0; i < 8; ++i) {
uint8_t byte = (parse_nibble(in[0]) << 4) | parse_nibble(in[1]);
v |= ((uint64_t) byte) << (8 * i);
v |= ((uint64_t)byte) << (8 * i);
/* Skip the space after the byte */
in += 3;

View file

@ -21,8 +21,8 @@
* SOFTWARE.
*/
#include "bi_test.h"
#include "bi_builder.h"
#include "bi_test.h"
#include "va_compiler.h"
#include "valhall_enums.h"
@ -37,177 +37,190 @@ strip_nops(bi_context *ctx)
}
}
#define CASE(shader_stage, test) do { \
bi_builder *A = bit_builder(mem_ctx); \
bi_builder *B = bit_builder(mem_ctx); \
{ \
UNUSED bi_builder *b = A; \
A->shader->stage = MESA_SHADER_ ## shader_stage; \
test; \
} \
strip_nops(A->shader); \
va_insert_flow_control_nops(A->shader); \
{ \
UNUSED bi_builder *b = B; \
B->shader->stage = MESA_SHADER_ ## shader_stage; \
test; \
} \
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
} while(0)
#define CASE(shader_stage, test) \
do { \
bi_builder *A = bit_builder(mem_ctx); \
bi_builder *B = bit_builder(mem_ctx); \
{ \
UNUSED bi_builder *b = A; \
A->shader->stage = MESA_SHADER_##shader_stage; \
test; \
} \
strip_nops(A->shader); \
va_insert_flow_control_nops(A->shader); \
{ \
UNUSED bi_builder *b = B; \
B->shader->stage = MESA_SHADER_##shader_stage; \
test; \
} \
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
} while (0)
#define flow(f) bi_nop(b)->flow = VA_FLOW_ ## f
#define flow(f) bi_nop(b)->flow = VA_FLOW_##f
class InsertFlow : public testing::Test {
protected:
InsertFlow() {
protected:
InsertFlow()
{
mem_ctx = ralloc_context(NULL);
}
~InsertFlow() {
~InsertFlow()
{
ralloc_free(mem_ctx);
}
void *mem_ctx;
};
TEST_F(InsertFlow, PreserveEmptyShader) {
TEST_F(InsertFlow, PreserveEmptyShader)
{
CASE(FRAGMENT, {});
}
TEST_F(InsertFlow, TilebufferWait7) {
TEST_F(InsertFlow, TilebufferWait7)
{
CASE(FRAGMENT, {
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(WAIT);
bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
bi_register(6), bi_register(7), bi_register(8),
BI_REGISTER_FORMAT_AUTO, 4, 4);
flow(END);
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(WAIT);
bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
bi_register(6), bi_register(7), bi_register(8),
BI_REGISTER_FORMAT_AUTO, 4, 4);
flow(END);
});
CASE(FRAGMENT, {
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(WAIT);
bi_st_tile(b, bi_register(0), bi_register(4), bi_register(5),
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(WAIT);
bi_st_tile(b, bi_register(0), bi_register(4), bi_register(5),
bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
flow(END);
});
CASE(FRAGMENT, {
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(WAIT);
bi_ld_tile_to(b, bi_register(0), bi_register(4), bi_register(5),
bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
flow(END);
});
CASE(FRAGMENT, {
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(WAIT);
bi_ld_tile_to(b, bi_register(0), bi_register(4), bi_register(5),
bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
flow(END);
flow(END);
});
}
TEST_F(InsertFlow, AtestWait6AndWait0After) {
TEST_F(InsertFlow, AtestWait6AndWait0After)
{
CASE(FRAGMENT, {
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(WAIT0126);
bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5),
bi_fau(BIR_FAU_ATEST_PARAM, false));
flow(WAIT0);
flow(END);
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(WAIT0126);
bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5),
bi_fau(BIR_FAU_ATEST_PARAM, false));
flow(WAIT0);
flow(END);
});
}
TEST_F(InsertFlow, ZSEmitWait6) {
TEST_F(InsertFlow, ZSEmitWait6)
{
CASE(FRAGMENT, {
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(WAIT0126);
bi_zs_emit_to(b, bi_register(0), bi_register(4), bi_register(5),
bi_register(6), true, true);
flow(END);
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(WAIT0126);
bi_zs_emit_to(b, bi_register(0), bi_register(4), bi_register(5),
bi_register(6), true, true);
flow(END);
});
}
TEST_F(InsertFlow, LoadThenUnrelatedThenUse) {
TEST_F(InsertFlow, LoadThenUnrelatedThenUse)
{
CASE(VERTEX, {
bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(WAIT0);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
flow(END);
bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(WAIT0);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
flow(END);
});
}
TEST_F(InsertFlow, SingleLdVar) {
TEST_F(InsertFlow, SingleLdVar)
{
CASE(FRAGMENT, {
flow(DISCARD);
bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
BI_SOURCE_FORMAT_F16,
BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0);
flow(WAIT0);
flow(END);
flow(DISCARD);
bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE,
BI_VECSIZE_V4, 0);
flow(WAIT0);
flow(END);
});
}
TEST_F(InsertFlow, SerializeLdVars) {
TEST_F(InsertFlow, SerializeLdVars)
{
CASE(FRAGMENT, {
flow(DISCARD);
bi_ld_var_buf_imm_f16_to(b, bi_register(16), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
BI_SOURCE_FORMAT_F16,
BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
BI_SOURCE_FORMAT_F16,
BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0);
flow(WAIT0);
bi_ld_var_buf_imm_f16_to(b, bi_register(8), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
BI_SOURCE_FORMAT_F16,
BI_UPDATE_STORE, BI_VECSIZE_V4, 1);
flow(WAIT0);
flow(END);
flow(DISCARD);
bi_ld_var_buf_imm_f16_to(b, bi_register(16), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
BI_VECSIZE_V4, 0);
bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE,
BI_VECSIZE_V4, 0);
flow(WAIT0);
bi_ld_var_buf_imm_f16_to(b, bi_register(8), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
BI_VECSIZE_V4, 1);
flow(WAIT0);
flow(END);
});
}
TEST_F(InsertFlow, Clper) {
TEST_F(InsertFlow, Clper)
{
CASE(FRAGMENT, {
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
BI_SUBGROUP_SUBGROUP4);
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(END);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
BI_SUBGROUP_SUBGROUP4);
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(END);
});
}
TEST_F(InsertFlow, TextureImplicit) {
TEST_F(InsertFlow, TextureImplicit)
{
CASE(FRAGMENT, {
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
bi_register(12), false, BI_DIMENSION_2D,
BI_REGISTER_FORMAT_F32, false, false,
BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
flow(DISCARD);
flow(WAIT0);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(END);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
bi_register(12), false, BI_DIMENSION_2D,
BI_REGISTER_FORMAT_F32, false, false,
BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
flow(DISCARD);
flow(WAIT0);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(END);
});
}
TEST_F(InsertFlow, TextureExplicit) {
TEST_F(InsertFlow, TextureExplicit)
{
CASE(FRAGMENT, {
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
bi_register(12), false, BI_DIMENSION_2D,
BI_REGISTER_FORMAT_F32, false, false,
BI_VA_LOD_MODE_ZERO_LOD, BI_WRITE_MASK_RGBA, 4);
flow(WAIT0);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(END);
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
bi_register(12), false, BI_DIMENSION_2D,
BI_REGISTER_FORMAT_F32, false, false,
BI_VA_LOD_MODE_ZERO_LOD, BI_WRITE_MASK_RGBA, 4);
flow(WAIT0);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(END);
});
}
@ -217,49 +230,52 @@ TEST_F(InsertFlow, TextureExplicit) {
* \ /
* D
*/
TEST_F(InsertFlow, DiamondCFG) {
TEST_F(InsertFlow, DiamondCFG)
{
CASE(FRAGMENT, {
bi_block *A = bi_start_block(&b->shader->blocks);
bi_block *B = bit_block(b->shader);
bi_block *C = bit_block(b->shader);
bi_block *D = bit_block(b->shader);
bi_block *A = bi_start_block(&b->shader->blocks);
bi_block *B = bit_block(b->shader);
bi_block *C = bit_block(b->shader);
bi_block *D = bit_block(b->shader);
bi_block_add_successor(A, B);
bi_block_add_successor(A, C);
bi_block_add_successor(A, B);
bi_block_add_successor(A, C);
bi_block_add_successor(B, D);
bi_block_add_successor(C, D);
bi_block_add_successor(B, D);
bi_block_add_successor(C, D);
/* B uses helper invocations, no other block does.
*
* That means B and C need to discard helpers.
*/
b->cursor = bi_after_block(B);
bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
BI_SUBGROUP_SUBGROUP4);
flow(DISCARD);
flow(RECONVERGE);
/* B uses helper invocations, no other block does.
*
* That means B and C need to discard helpers.
*/
b->cursor = bi_after_block(B);
bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
BI_SUBGROUP_SUBGROUP4);
flow(DISCARD);
flow(RECONVERGE);
b->cursor = bi_after_block(C);
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(RECONVERGE);
b->cursor = bi_after_block(C);
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(RECONVERGE);
b->cursor = bi_after_block(D);
flow(END);
b->cursor = bi_after_block(D);
flow(END);
});
}
TEST_F(InsertFlow, BarrierBug) {
TEST_F(InsertFlow, BarrierBug)
{
CASE(KERNEL, {
bi_instr *I = bi_store_i32(b, bi_register(0), bi_register(2), bi_register(4), BI_SEG_NONE, 0);
I->slot = 2;
bi_instr *I = bi_store_i32(b, bi_register(0), bi_register(2),
bi_register(4), BI_SEG_NONE, 0);
I->slot = 2;
bi_fadd_f32_to(b, bi_register(10), bi_register(10), bi_register(10));
flow(WAIT2);
bi_barrier(b);
flow(WAIT);
flow(END);
bi_fadd_f32_to(b, bi_register(10), bi_register(10), bi_register(10));
flow(WAIT2);
bi_barrier(b);
flow(WAIT);
flow(END);
});
}

View file

@ -21,9 +21,9 @@
* SOFTWARE.
*/
#include "va_compiler.h"
#include "bi_test.h"
#include "bi_builder.h"
#include "bi_test.h"
#include "va_compiler.h"
#include <gtest/gtest.h>
@ -38,19 +38,22 @@ add_imm(bi_context *ctx)
#define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, add_imm)
class LowerConstants : public testing::Test {
protected:
LowerConstants() {
protected:
LowerConstants()
{
mem_ctx = ralloc_context(NULL);
}
~LowerConstants() {
~LowerConstants()
{
ralloc_free(mem_ctx);
}
void *mem_ctx;
};
TEST_F(LowerConstants, Float32) {
TEST_F(LowerConstants, Float32)
{
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(0.0)),
bi_fadd_f32_to(b, bi_register(0), bi_register(0), va_lut(0)));
@ -61,46 +64,59 @@ TEST_F(LowerConstants, Float32) {
bi_fadd_f32_to(b, bi_register(0), bi_register(0), va_lut(17)));
}
TEST_F(LowerConstants, WidenFloat16) {
TEST_F(LowerConstants, WidenFloat16)
{
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(0.5)),
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_half(va_lut(26), 1)));
bi_fadd_f32_to(b, bi_register(0), bi_register(0),
bi_half(va_lut(26), 1)));
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(255.0)),
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_half(va_lut(23), 0)));
bi_fadd_f32_to(b, bi_register(0), bi_register(0),
bi_half(va_lut(23), 0)));
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(256.0)),
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_half(va_lut(23), 1)));
bi_fadd_f32_to(b, bi_register(0), bi_register(0),
bi_half(va_lut(23), 1)));
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(8.0)),
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_half(va_lut(30), 1)));
bi_fadd_f32_to(b, bi_register(0), bi_register(0),
bi_half(va_lut(30), 1)));
}
TEST_F(LowerConstants, ReplicateFloat16) {
TEST_F(LowerConstants, ReplicateFloat16)
{
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(255.0)),
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_half(va_lut(23), 0)));
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
bi_half(va_lut(23), 0)));
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(4.0)),
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_half(va_lut(29), 1)));
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
bi_half(va_lut(29), 1)));
}
TEST_F(LowerConstants, NegateFloat32) {
TEST_F(LowerConstants, NegateFloat32)
{
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(-1.0)),
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_neg(va_lut(16))));
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(-255.0)),
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_neg(bi_half(va_lut(23), 0))));
bi_fadd_f32_to(b, bi_register(0), bi_register(0),
bi_neg(bi_half(va_lut(23), 0))));
}
TEST_F(LowerConstants, NegateReplicateFloat16)
{
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(-255.0)),
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_neg(bi_half(va_lut(23), 0))));
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
bi_neg(bi_half(va_lut(23), 0))));
}
TEST_F(LowerConstants, NegateVec2Float16)
{
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xBC008000)),
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_neg(va_lut(27))));
CASE(
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
bi_imm_u32(0xBC008000)),
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_neg(va_lut(27))));
}
TEST_F(LowerConstants, Int8InInt32)
@ -117,87 +133,105 @@ TEST_F(LowerConstants, ZeroExtendForUnsigned)
CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFF),
bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
bi_byte(va_lut(1), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
bi_byte(va_lut(1), 0), bi_register(0), BI_CMPF_LT,
BI_RESULT_TYPE_I1));
CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
bi_imm_u32(0xFFFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
bi_half(va_lut(1), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
CASE(
bi_icmp_and_u32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFFFF),
bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
bi_half(va_lut(1), 0), bi_register(0), BI_CMPF_LT,
BI_RESULT_TYPE_I1));
}
TEST_F(LowerConstants, SignExtendPositiveForSigned)
{
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
bi_imm_u32(0x7F), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0x7F),
bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
bi_byte(va_lut(2), 3), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
bi_byte(va_lut(2), 3), bi_register(0), BI_CMPF_LT,
BI_RESULT_TYPE_I1));
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
bi_imm_u32(0x7FFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
bi_half(va_lut(2), 1), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
CASE(
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0x7FFF),
bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
bi_half(va_lut(2), 1), bi_register(0), BI_CMPF_LT,
BI_RESULT_TYPE_I1));
}
TEST_F(LowerConstants, SignExtendNegativeForSigned)
{
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT,
BI_RESULT_TYPE_I1),
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
bi_byte(va_lut(23), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
bi_byte(va_lut(23), 0), bi_register(0), BI_CMPF_LT,
BI_RESULT_TYPE_I1));
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT,
BI_RESULT_TYPE_I1),
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
bi_half(va_lut(3), 1), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
bi_half(va_lut(3), 1), bi_register(0), BI_CMPF_LT,
BI_RESULT_TYPE_I1));
}
TEST_F(LowerConstants, DontZeroExtendForSigned)
{
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
bi_imm_u32(0xFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFF),
bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
bi_iadd_imm_i32(b, va_lut(0), 0xFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
bi_imm_u32(0xFFFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
bi_iadd_imm_i32(b, va_lut(0), 0xFFFF), bi_register(0),
bi_iadd_imm_i32(b, va_lut(0), 0xFF), bi_register(0),
BI_CMPF_LT, BI_RESULT_TYPE_I1));
CASE(
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFFFF),
bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
bi_iadd_imm_i32(b, va_lut(0), 0xFFFF), bi_register(0),
BI_CMPF_LT, BI_RESULT_TYPE_I1));
}
TEST_F(LowerConstants, DontZeroExtendNegative)
{
CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT,
BI_RESULT_TYPE_I1),
bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFFF8), bi_register(0),
BI_CMPF_LT, BI_RESULT_TYPE_I1));
bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFFF8),
bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT,
BI_RESULT_TYPE_I1),
bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFAFC), bi_register(0),
BI_CMPF_LT, BI_RESULT_TYPE_I1));
bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFAFC),
bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
}
TEST_F(LowerConstants, HandleTrickyNegativesFP16)
{
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(-57216.0)),
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_half(va_lut(3), 1)));
CASE(
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(-57216.0)),
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
bi_half(va_lut(3), 1)));
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(57216.0)),
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_neg(bi_half(va_lut(3), 1))));
CASE(
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(57216.0)),
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
bi_neg(bi_half(va_lut(3), 1))));
}
TEST_F(LowerConstants, MaintainMkvecRestrictedSwizzles)
{
CASE(bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0),
bi_imm_u8(0), bi_imm_u32(0)),
CASE(bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0), bi_imm_u8(0),
bi_imm_u32(0)),
bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0),
bi_byte(va_lut(0), 0), va_lut(0)));
CASE(bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0),
bi_imm_u8(14), bi_imm_u32(0)),
CASE(bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0), bi_imm_u8(14),
bi_imm_u32(0)),
bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0),
bi_byte(va_lut(11), 2), va_lut(0)));
}

View file

@ -21,18 +21,19 @@
* SOFTWARE.
*/
#include "va_compiler.h"
#include "bi_test.h"
#include "bi_builder.h"
#include "bi_test.h"
#include "va_compiler.h"
#include <gtest/gtest.h>
#define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, va_lower_isel)
#define NEGCASE(instr) CASE(instr, instr)
#define NEGCASE(instr) CASE(instr, instr)
class LowerIsel : public testing::Test {
protected:
LowerIsel() {
protected:
LowerIsel()
{
mem_ctx = ralloc_context(NULL);
reg = bi_register(1);
x = bi_register(2);
@ -40,7 +41,8 @@ protected:
z = bi_register(4);
}
~LowerIsel() {
~LowerIsel()
{
ralloc_free(mem_ctx);
}
@ -48,14 +50,16 @@ protected:
bi_index reg, x, y, z;
};
TEST_F(LowerIsel, 8BitSwizzles) {
TEST_F(LowerIsel, 8BitSwizzles)
{
for (unsigned i = 0; i < 4; ++i) {
CASE(bi_swz_v4i8_to(b, reg, bi_byte(reg, i)),
bi_iadd_v4u8_to(b, reg, bi_byte(reg, i), bi_zero(), false));
}
}
TEST_F(LowerIsel, 16BitSwizzles) {
TEST_F(LowerIsel, 16BitSwizzles)
{
for (unsigned i = 0; i < 2; ++i) {
for (unsigned j = 0; j < 2; ++j) {
CASE(bi_swz_v2i16_to(b, reg, bi_swz_16(reg, i, j)),
@ -64,24 +68,30 @@ TEST_F(LowerIsel, 16BitSwizzles) {
}
}
TEST_F(LowerIsel, JumpsLoweredToBranches) {
bi_block block = { };
TEST_F(LowerIsel, JumpsLoweredToBranches)
{
bi_block block = {};
CASE({
bi_instr *I = bi_jump(b, bi_imm_u32(0xDEADBEEF));
I->branch_target = &block;
}, {
bi_instr *I = bi_branchz_i16(b, bi_zero(), bi_imm_u32(0xDEADBEEF), BI_CMPF_EQ);
I->branch_target = &block;
});
CASE(
{
bi_instr *I = bi_jump(b, bi_imm_u32(0xDEADBEEF));
I->branch_target = &block;
},
{
bi_instr *I =
bi_branchz_i16(b, bi_zero(), bi_imm_u32(0xDEADBEEF), BI_CMPF_EQ);
I->branch_target = &block;
});
}
TEST_F(LowerIsel, IndirectJumpsLoweredToBranches) {
TEST_F(LowerIsel, IndirectJumpsLoweredToBranches)
{
CASE(bi_jump(b, bi_register(17)),
bi_branchzi(b, bi_zero(), bi_register(17), BI_CMPF_EQ));
}
TEST_F(LowerIsel, IntegerCSEL) {
TEST_F(LowerIsel, IntegerCSEL)
{
CASE(bi_csel_i32(b, reg, reg, reg, reg, BI_CMPF_EQ),
bi_csel_u32(b, reg, reg, reg, reg, BI_CMPF_EQ));
@ -89,7 +99,8 @@ TEST_F(LowerIsel, IntegerCSEL) {
bi_csel_v2u16(b, reg, reg, reg, reg, BI_CMPF_EQ));
}
TEST_F(LowerIsel, AvoidSimpleMux) {
TEST_F(LowerIsel, AvoidSimpleMux)
{
CASE(bi_mux_i32(b, x, y, z, BI_MUX_INT_ZERO),
bi_csel_u32(b, z, bi_zero(), x, y, BI_CMPF_EQ));
CASE(bi_mux_i32(b, x, y, z, BI_MUX_NEG),
@ -105,27 +116,32 @@ TEST_F(LowerIsel, AvoidSimpleMux) {
bi_csel_v2f16(b, z, bi_zero(), x, y, BI_CMPF_EQ));
}
TEST_F(LowerIsel, BitwiseMux) {
TEST_F(LowerIsel, BitwiseMux)
{
NEGCASE(bi_mux_i32(b, x, y, z, BI_MUX_BIT));
NEGCASE(bi_mux_v2i16(b, x, y, z, BI_MUX_BIT));
NEGCASE(bi_mux_v4i8(b, x, y, z, BI_MUX_BIT));
}
TEST_F(LowerIsel, MuxInt8) {
TEST_F(LowerIsel, MuxInt8)
{
NEGCASE(bi_mux_v4i8(b, x, y, z, BI_MUX_INT_ZERO));
NEGCASE(bi_mux_v4i8(b, x, y, z, BI_MUX_NEG));
NEGCASE(bi_mux_v4i8(b, x, y, z, BI_MUX_FP_ZERO));
}
TEST_F(LowerIsel, FaddRscale) {
CASE(bi_fadd_rscale_f32_to(b, reg, x, y, z, BI_SPECIAL_NONE),
bi_fma_rscale_f32_to(b, reg, x, bi_imm_f32(1.0), y, z, BI_SPECIAL_NONE));
TEST_F(LowerIsel, FaddRscale)
{
CASE(
bi_fadd_rscale_f32_to(b, reg, x, y, z, BI_SPECIAL_NONE),
bi_fma_rscale_f32_to(b, reg, x, bi_imm_f32(1.0), y, z, BI_SPECIAL_NONE));
CASE(bi_fadd_rscale_f32_to(b, reg, x, y, z, BI_SPECIAL_N),
bi_fma_rscale_f32_to(b, reg, x, bi_imm_f32(1.0), y, z, BI_SPECIAL_N));
}
TEST_F(LowerIsel, Smoke) {
TEST_F(LowerIsel, Smoke)
{
NEGCASE(bi_fadd_f32_to(b, reg, reg, reg));
NEGCASE(bi_csel_s32_to(b, reg, reg, reg, reg, reg, BI_CMPF_LT));
NEGCASE(bi_csel_u32_to(b, reg, reg, reg, reg, reg, BI_CMPF_LT));

View file

@ -21,14 +21,14 @@
* SOFTWARE.
*/
#include "bi_test.h"
#include "bi_builder.h"
#include "bi_test.h"
#include "va_compiler.h"
#include "valhall_enums.h"
#include <gtest/gtest.h>
#define R(x) bi_register(x)
#define R(x) bi_register(x)
#define DR(x) bi_discard(R(x))
static void
@ -40,105 +40,119 @@ strip_discard(bi_context *ctx)
}
}
#define CASE(test) do { \
void *mem_ctx = ralloc_context(NULL); \
bi_builder *A = bit_builder(mem_ctx); \
bi_builder *B = bit_builder(mem_ctx); \
{ \
UNUSED bi_builder *b = A; \
test; \
} \
strip_discard(A->shader); \
va_mark_last(A->shader); \
{ \
UNUSED bi_builder *b = B; \
test; \
} \
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
ralloc_free(mem_ctx); \
} while(0)
#define CASE(test) \
do { \
void *mem_ctx = ralloc_context(NULL); \
bi_builder *A = bit_builder(mem_ctx); \
bi_builder *B = bit_builder(mem_ctx); \
{ \
UNUSED bi_builder *b = A; \
test; \
} \
strip_discard(A->shader); \
va_mark_last(A->shader); \
{ \
UNUSED bi_builder *b = B; \
test; \
} \
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
ralloc_free(mem_ctx); \
} while (0)
TEST(MarkLast, Simple) {
TEST(MarkLast, Simple)
{
CASE(bi_fadd_f32_to(b, R(0), DR(0), DR(1)));
CASE({
bi_fadd_f32_to(b, R(2), R(0), DR(1));
bi_fadd_f32_to(b, R(0), DR(0), DR(2));
bi_fadd_f32_to(b, R(2), R(0), DR(1));
bi_fadd_f32_to(b, R(0), DR(0), DR(2));
});
}
TEST(MarkLast, SameSourceAndDestination) {
TEST(MarkLast, SameSourceAndDestination)
{
CASE({
bi_fadd_f32_to(b, R(0), DR(0), DR(0));
bi_fadd_f32_to(b, R(0), DR(0), DR(0));
bi_fadd_f32_to(b, R(0), DR(0), DR(0));
bi_fadd_f32_to(b, R(0), DR(0), DR(0));
bi_fadd_f32_to(b, R(0), DR(0), DR(0));
bi_fadd_f32_to(b, R(0), DR(0), DR(0));
});
}
TEST(MarkLast, StagingReadBefore) {
TEST(MarkLast, StagingReadBefore)
{
CASE({
bi_fadd_f32_to(b, R(9), R(2), DR(7));
bi_st_tile(b, R(0), DR(4), DR(5), DR(6), BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
bi_fadd_f32_to(b, R(9), R(2), DR(7));
bi_st_tile(b, R(0), DR(4), DR(5), DR(6), BI_REGISTER_FORMAT_F32,
BI_VECSIZE_V4);
});
}
TEST(MarkLast, StagingReadAfter) {
TEST(MarkLast, StagingReadAfter)
{
CASE({
bi_st_tile(b, R(0), DR(4), DR(5), DR(6), BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
bi_fadd_f32_to(b, R(9), R(2), DR(7));
bi_st_tile(b, R(0), DR(4), DR(5), DR(6), BI_REGISTER_FORMAT_F32,
BI_VECSIZE_V4);
bi_fadd_f32_to(b, R(9), R(2), DR(7));
});
}
TEST(MarkLast, NonstagingSourceToAsync) {
TEST(MarkLast, NonstagingSourceToAsync)
{
CASE({
bi_st_tile(b, R(0), R(4), R(5), DR(6), BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
bi_fadd_f32_to(b, R(9), DR(4), DR(5));
bi_st_tile(b, R(0), R(4), R(5), DR(6), BI_REGISTER_FORMAT_F32,
BI_VECSIZE_V4);
bi_fadd_f32_to(b, R(9), DR(4), DR(5));
});
}
TEST(MarkLast, Both64) {
TEST(MarkLast, Both64)
{
CASE(bi_load_i32_to(b, R(0), DR(8), DR(9), BI_SEG_NONE, 0));
}
TEST(MarkLast, Neither64ThenBoth) {
TEST(MarkLast, Neither64ThenBoth)
{
CASE({
bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
bi_load_i32_to(b, R(1), DR(8), DR(9), BI_SEG_NONE, 8);
bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
bi_load_i32_to(b, R(1), DR(8), DR(9), BI_SEG_NONE, 8);
});
}
TEST(MarkLast, Half64) {
TEST(MarkLast, Half64)
{
CASE({
bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
bi_fadd_f32_to(b, R(8), DR(8), DR(8));
bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
bi_fadd_f32_to(b, R(8), DR(8), DR(8));
});
CASE({
bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
bi_fadd_f32_to(b, R(9), DR(9), DR(9));
bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
bi_fadd_f32_to(b, R(9), DR(9), DR(9));
});
}
TEST(MarkLast, RegisterBlendDescriptor) {
TEST(MarkLast, RegisterBlendDescriptor)
{
CASE({
bi_blend_to(b, R(48), R(0), DR(60), DR(4), DR(5), bi_null(),
BI_REGISTER_FORMAT_F32, 4, 0);
bi_blend_to(b, R(48), R(0), DR(60), DR(4), DR(5), bi_null(),
BI_REGISTER_FORMAT_F32, 4, 0);
});
CASE({
bi_blend_to(b, R(48), R(0), DR(60), R(4), R(5), bi_null(),
BI_REGISTER_FORMAT_F32, 4, 0);
bi_fadd_f32_to(b, R(4), DR(4), DR(7));
bi_blend_to(b, R(48), R(0), DR(60), R(4), R(5), bi_null(),
BI_REGISTER_FORMAT_F32, 4, 0);
bi_fadd_f32_to(b, R(4), DR(4), DR(7));
});
CASE({
bi_blend_to(b, R(48), R(0), DR(60), R(4), R(5), bi_null(),
BI_REGISTER_FORMAT_F32, 4, 0);
bi_fadd_f32_to(b, R(4), DR(5), DR(7));
bi_blend_to(b, R(48), R(0), DR(60), R(4), R(5), bi_null(),
BI_REGISTER_FORMAT_F32, 4, 0);
bi_fadd_f32_to(b, R(4), DR(5), DR(7));
});
}
TEST(MarkLast, ControlFlowAllFeatures) {
TEST(MarkLast, ControlFlowAllFeatures)
{
/* A
* / \
* B C
@ -153,9 +167,8 @@ TEST(MarkLast, ControlFlowAllFeatures) {
b->cursor = bi_after_block(A);
{
bi_instr *I =
bi_st_tile(b, R(10), DR(14), DR(15), DR(16),
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
bi_instr *I = bi_st_tile(b, R(10), DR(14), DR(15), DR(16),
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
I->slot = 2;
bi_load_i32_to(b, R(20), R(28), R(29), BI_SEG_NONE, 0);

View file

@ -21,42 +21,45 @@
* SOFTWARE.
*/
#include "bi_test.h"
#include "bi_builder.h"
#include "bi_test.h"
#include "va_compiler.h"
#include "valhall_enums.h"
#include <gtest/gtest.h>
#define CASE(test, expected) do { \
bi_builder *A = bit_builder(mem_ctx); \
bi_builder *B = bit_builder(mem_ctx); \
{ \
bi_builder *b = A; \
A->shader->stage = MESA_SHADER_FRAGMENT; \
test; \
} \
va_merge_flow(A->shader); \
{ \
bi_builder *b = B; \
B->shader->stage = MESA_SHADER_FRAGMENT; \
expected; \
} \
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
} while(0)
#define CASE(test, expected) \
do { \
bi_builder *A = bit_builder(mem_ctx); \
bi_builder *B = bit_builder(mem_ctx); \
{ \
bi_builder *b = A; \
A->shader->stage = MESA_SHADER_FRAGMENT; \
test; \
} \
va_merge_flow(A->shader); \
{ \
bi_builder *b = B; \
B->shader->stage = MESA_SHADER_FRAGMENT; \
expected; \
} \
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
} while (0)
#define NEGCASE(test) CASE(test, test)
#define flow(f) bi_nop(b)->flow = VA_FLOW_ ## f
#define flow(f) bi_nop(b)->flow = VA_FLOW_##f
class MergeFlow : public testing::Test {
protected:
MergeFlow() {
protected:
MergeFlow()
{
mem_ctx = ralloc_context(NULL);
atest = bi_fau(BIR_FAU_ATEST_PARAM, false);
}
~MergeFlow() {
~MergeFlow()
{
ralloc_free(mem_ctx);
}
@ -65,74 +68,84 @@ protected:
bi_index atest;
};
TEST_F(MergeFlow, End) {
CASE({
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
bi_register(6), bi_register(7), bi_register(8),
BI_REGISTER_FORMAT_AUTO, 4, 4);
flow(END);
},
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
bi_register(6), bi_register(7), bi_register(8),
BI_REGISTER_FORMAT_AUTO, 4, 4);
I->flow = VA_FLOW_END;
});
TEST_F(MergeFlow, End)
{
CASE(
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
bi_register(6), bi_register(7), bi_register(8),
BI_REGISTER_FORMAT_AUTO, 4, 4);
flow(END);
},
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
bi_register(6), bi_register(7), bi_register(8),
BI_REGISTER_FORMAT_AUTO, 4, 4);
I->flow = VA_FLOW_END;
});
}
TEST_F(MergeFlow, Reconverge) {
CASE({
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
bi_register(6), bi_register(7), bi_register(8),
BI_REGISTER_FORMAT_AUTO, 4, 4);
flow(RECONVERGE);
},
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
bi_register(6), bi_register(7), bi_register(8),
BI_REGISTER_FORMAT_AUTO, 4, 4);
I->flow = VA_FLOW_RECONVERGE;
});
TEST_F(MergeFlow, Reconverge)
{
CASE(
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
bi_register(6), bi_register(7), bi_register(8),
BI_REGISTER_FORMAT_AUTO, 4, 4);
flow(RECONVERGE);
},
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
bi_register(6), bi_register(7), bi_register(8),
BI_REGISTER_FORMAT_AUTO, 4, 4);
I->flow = VA_FLOW_RECONVERGE;
});
}
TEST_F(MergeFlow, TrivialWait) {
CASE({
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(WAIT0126);
bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
},
{
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I->flow = VA_FLOW_WAIT0126;
bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
});
TEST_F(MergeFlow, TrivialWait)
{
CASE(
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(WAIT0126);
bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
},
{
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I->flow = VA_FLOW_WAIT0126;
bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
});
}
TEST_F(MergeFlow, LoadThenUnrelatedThenUse) {
CASE({
TEST_F(MergeFlow, LoadThenUnrelatedThenUse)
{
CASE(
{
bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(WAIT0);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
flow(END);
},
{
},
{
bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I->flow = VA_FLOW_WAIT0;
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
I->flow = VA_FLOW_END;
});
});
}
TEST_F(MergeFlow, TrivialDiscard) {
CASE({
TEST_F(MergeFlow, TrivialDiscard)
{
CASE(
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
@ -140,31 +153,35 @@ TEST_F(MergeFlow, TrivialDiscard) {
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(END);
},
{
},
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
BI_SUBGROUP_SUBGROUP4);
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
BI_SUBGROUP_SUBGROUP4);
I->flow = VA_FLOW_DISCARD;
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I->flow = VA_FLOW_END;
});
});
}
TEST_F(MergeFlow, TrivialDiscardAtTheStart) {
CASE({
TEST_F(MergeFlow, TrivialDiscardAtTheStart)
{
CASE(
{
flow(DISCARD);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
},
{
},
{
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I->flow = VA_FLOW_DISCARD;
});
});
}
TEST_F(MergeFlow, MoveDiscardPastWait) {
CASE({
TEST_F(MergeFlow, MoveDiscardPastWait)
{
CASE(
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
@ -172,20 +189,22 @@ TEST_F(MergeFlow, MoveDiscardPastWait) {
flow(DISCARD);
flow(WAIT0);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
},
{
},
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
BI_SUBGROUP_SUBGROUP4);
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
BI_SUBGROUP_SUBGROUP4);
I->flow = VA_FLOW_WAIT0;
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I->flow = VA_FLOW_DISCARD;
});
});
}
TEST_F(MergeFlow, OccludedWaitsAndDiscard) {
CASE({
TEST_F(MergeFlow, OccludedWaitsAndDiscard)
{
CASE(
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
@ -194,75 +213,84 @@ TEST_F(MergeFlow, OccludedWaitsAndDiscard) {
flow(DISCARD);
flow(WAIT2);
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
},
{
},
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
BI_SUBGROUP_SUBGROUP4);
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
BI_SUBGROUP_SUBGROUP4);
I->flow = VA_FLOW_WAIT02;
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I->flow = VA_FLOW_DISCARD;
});
});
}
TEST_F(MergeFlow, DeleteUselessWaits) {
CASE({
TEST_F(MergeFlow, DeleteUselessWaits)
{
CASE(
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
flow(WAIT0);
flow(WAIT2);
flow(END);
},
{
},
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I->flow = VA_FLOW_END;
});
});
}
TEST_F(MergeFlow, BlockFullOfUselessWaits) {
CASE({
TEST_F(MergeFlow, BlockFullOfUselessWaits)
{
CASE(
{
flow(WAIT0);
flow(WAIT2);
flow(DISCARD);
flow(END);
},
{
flow(END);
});
},
{ flow(END); });
}
TEST_F(MergeFlow, WaitWithMessage) {
CASE({
TEST_F(MergeFlow, WaitWithMessage)
{
CASE(
{
bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
flow(WAIT0);
},
{
I = bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
},
{
I = bi_ld_attr_imm_to(b, bi_register(16), bi_register(60),
bi_register(61), BI_REGISTER_FORMAT_F32,
BI_VECSIZE_V4, 1);
I->flow = VA_FLOW_WAIT0;
});
});
}
TEST_F(MergeFlow, CantMoveWaitPastMessage) {
TEST_F(MergeFlow, CantMoveWaitPastMessage)
{
NEGCASE({
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I = bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I =
bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
/* Pretend it's blocked for some reason. This doesn't actually happen
* with the current algorithm, but it's good to handle the special
* cases correctly in case we change later on.
*/
I->flow = VA_FLOW_DISCARD;
flow(WAIT0);
/* Pretend it's blocked for some reason. This doesn't actually happen
* with the current algorithm, but it's good to handle the special
* cases correctly in case we change later on.
*/
I->flow = VA_FLOW_DISCARD;
flow(WAIT0);
});
}
TEST_F(MergeFlow, DeletePointlessDiscard) {
CASE({
TEST_F(MergeFlow, DeletePointlessDiscard)
{
CASE(
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
bi_register(12), false, BI_DIMENSION_2D,
@ -277,31 +305,34 @@ TEST_F(MergeFlow, DeletePointlessDiscard) {
bi_register(6), bi_register(7), bi_register(8),
BI_REGISTER_FORMAT_AUTO, 4, 4);
flow(END);
},
{
},
{
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
I = bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
bi_register(12), false, BI_DIMENSION_2D,
BI_REGISTER_FORMAT_F32, false, false,
BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
I = bi_tex_single_to(
b, bi_register(0), bi_register(4), bi_register(8), bi_register(12),
false, BI_DIMENSION_2D, BI_REGISTER_FORMAT_F32, false, false,
BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
I->flow = VA_FLOW_WAIT0126;
I = bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
I = bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5),
atest);
I->flow = VA_FLOW_WAIT;
I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
bi_register(6), bi_register(7), bi_register(8),
BI_REGISTER_FORMAT_AUTO, 4, 4);
I->flow = VA_FLOW_END;
});
});
}
TEST_F(MergeFlow, PreserveTerminalBarriers) {
CASE({
TEST_F(MergeFlow, PreserveTerminalBarriers)
{
CASE(
{
bi_barrier(b);
flow(WAIT);
flow(END);
},
{
},
{
bi_barrier(b)->flow = VA_FLOW_WAIT;
flow(END);
});
});
}

View file

@ -21,34 +21,38 @@
* SOFTWARE.
*/
#include "va_compiler.h"
#include "bi_test.h"
#include "bi_builder.h"
#include "bi_test.h"
#include "va_compiler.h"
#include <gtest/gtest.h>
#define CASE(instr, expected) do { \
uint64_t _value = va_pack_instr(instr); \
if (_value != expected) { \
fprintf(stderr, "Got %" PRIx64 ", expected %" PRIx64 "\n", _value, (uint64_t) expected); \
bi_print_instr(instr, stderr); \
fprintf(stderr, "\n"); \
ADD_FAILURE(); \
} \
} while(0)
#define CASE(instr, expected) \
do { \
uint64_t _value = va_pack_instr(instr); \
if (_value != expected) { \
fprintf(stderr, "Got %" PRIx64 ", expected %" PRIx64 "\n", _value, \
(uint64_t)expected); \
bi_print_instr(instr, stderr); \
fprintf(stderr, "\n"); \
ADD_FAILURE(); \
} \
} while (0)
class ValhallPacking : public testing::Test {
protected:
ValhallPacking() {
protected:
ValhallPacking()
{
mem_ctx = ralloc_context(NULL);
b = bit_builder(mem_ctx);
zero = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 0), false);
one = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 8), false);
n4567 = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 4), true);
zero = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 0), false);
one = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 8), false);
n4567 = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 4), true);
}
~ValhallPacking() {
~ValhallPacking()
{
ralloc_free(mem_ctx);
}
@ -57,60 +61,67 @@ protected:
bi_index zero, one, n4567;
};
TEST_F(ValhallPacking, Moves) {
TEST_F(ValhallPacking, Moves)
{
CASE(bi_mov_i32_to(b, bi_register(1), bi_register(2)),
0x0091c10000000002ULL);
CASE(bi_mov_i32_to(b, bi_register(1), bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 5), false)),
0x0091c1000000008aULL);
0x0091c10000000002ULL);
CASE(bi_mov_i32_to(b, bi_register(1),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 5), false)),
0x0091c1000000008aULL);
}
TEST_F(ValhallPacking, Fadd) {
TEST_F(ValhallPacking, Fadd)
{
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_register(2)),
0x00a4c00000000201ULL);
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_abs(bi_register(2))),
0x00a4c02000000201ULL);
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(bi_register(2))),
0x00a4c01000000201ULL);
0x00a4c00000000201ULL);
CASE(
bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_abs(bi_register(2))),
0x00a4c02000000201ULL);
CASE(
bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(bi_register(2))),
0x00a4c01000000201ULL);
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_swz_16(bi_register(1), false, false),
CASE(bi_fadd_v2f16_to(b, bi_register(0),
bi_swz_16(bi_register(1), false, false),
bi_swz_16(bi_register(0), true, true)),
0x00a5c0000c000001ULL);
0x00a5c0000c000001ULL);
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(1), bi_register(0)),
0x00a5c00028000001ULL);
0x00a5c00028000001ULL);
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(1),
bi_swz_16(bi_register(0), true, false)),
0x00a5c00024000001ULL);
0x00a5c00024000001ULL);
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_discard(bi_abs(bi_register(0))),
bi_neg(zero)),
0x00a5c0902800c040ULL);
0x00a5c0902800c040ULL);
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
zero),
0x00a4c0000000c001ULL);
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), zero),
0x00a4c0000000c001ULL);
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
bi_neg(zero)),
0x00a4c0100000c001ULL);
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(zero)),
0x00a4c0100000c001ULL);
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
bi_half(bi_register(0), true)),
0x00a4c00008000001ULL);
0x00a4c00008000001ULL);
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
bi_half(bi_register(0), false)),
0x00a4c00004000001ULL);
0x00a4c00004000001ULL);
}
TEST_F(ValhallPacking, Clper) {
TEST_F(ValhallPacking, Clper)
{
CASE(bi_clper_i32_to(b, bi_register(0), bi_register(0), bi_byte(n4567, 0),
BI_INACTIVE_RESULT_F1, BI_LANE_OP_NONE, BI_SUBGROUP_SUBGROUP16),
0x00a0c030128fc900);
BI_INACTIVE_RESULT_F1, BI_LANE_OP_NONE,
BI_SUBGROUP_SUBGROUP16),
0x00a0c030128fc900);
}
TEST_F(ValhallPacking, Clamps) {
TEST_F(ValhallPacking, Clamps)
{
bi_instr *I = bi_fadd_f32_to(b, bi_register(0), bi_register(1),
bi_neg(bi_abs(bi_register(2))));
CASE(I, 0x00a4c03000000201ULL);
@ -119,209 +130,243 @@ TEST_F(ValhallPacking, Clamps) {
CASE(I, 0x00a4c03200000201ULL);
}
TEST_F(ValhallPacking, Misc) {
TEST_F(ValhallPacking, Misc)
{
CASE(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)),
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 4), false),
bi_neg(zero)),
0x00b2c10400c08841ULL);
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 4), false),
bi_neg(zero)),
0x00b2c10400c08841ULL);
CASE(bi_fround_f32_to(b, bi_register(2), bi_discard(bi_neg(bi_register(2))),
BI_ROUND_RTN),
0x0090c240800d0042ULL);
0x0090c240800d0042ULL);
CASE(bi_fround_v2f16_to(b, bi_half(bi_register(0), false), bi_register(0),
BI_ROUND_RTN),
0x00904000a00f0000ULL);
BI_ROUND_RTN),
0x00904000a00f0000ULL);
CASE(bi_fround_v2f16_to(b, bi_half(bi_register(0), false),
bi_swz_16(bi_register(1), true, false), BI_ROUND_RTN),
0x00904000900f0001ULL);
CASE(
bi_fround_v2f16_to(b, bi_half(bi_register(0), false),
bi_swz_16(bi_register(1), true, false), BI_ROUND_RTN),
0x00904000900f0001ULL);
}
TEST_F(ValhallPacking, FaddImm) {
CASE(bi_fadd_imm_f32_to(b, bi_register(2), bi_discard(bi_register(2)), 0x4847C6C0),
0x0114C24847C6C042ULL);
TEST_F(ValhallPacking, FaddImm)
{
CASE(bi_fadd_imm_f32_to(b, bi_register(2), bi_discard(bi_register(2)),
0x4847C6C0),
0x0114C24847C6C042ULL);
CASE(bi_fadd_imm_v2f16_to(b, bi_register(2), bi_discard(bi_register(2)), 0x70AC6784),
0x0115C270AC678442ULL);
CASE(bi_fadd_imm_v2f16_to(b, bi_register(2), bi_discard(bi_register(2)),
0x70AC6784),
0x0115C270AC678442ULL);
}
TEST_F(ValhallPacking, Comparions) {
TEST_F(ValhallPacking, Comparions)
{
CASE(bi_icmp_or_v2s16_to(b, bi_register(2),
bi_discard(bi_swz_16(bi_register(3), true, false)),
bi_discard(bi_swz_16(bi_register(2), true, false)),
zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
bi_discard(bi_swz_16(bi_register(3), true, false)),
bi_discard(bi_swz_16(bi_register(2), true, false)),
zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
0x00f9c21184c04243);
CASE(bi_fcmp_or_v2f16_to(b, bi_register(2),
bi_discard(bi_swz_16(bi_register(3), true, false)),
bi_discard(bi_swz_16(bi_register(2), false, false)),
zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
0x00f5c20190c04243);
bi_discard(bi_swz_16(bi_register(3), true, false)),
bi_discard(bi_swz_16(bi_register(2), false, false)),
zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
0x00f5c20190c04243);
}
TEST_F(ValhallPacking, Conversions) {
TEST_F(ValhallPacking, Conversions)
{
CASE(bi_v2s16_to_v2f16_to(b, bi_register(2), bi_discard(bi_register(2))),
0x0090c22000070042);
0x0090c22000070042);
}
TEST_F(ValhallPacking, BranchzI16) {
bi_instr *I = bi_branchz_i16(b, bi_half(bi_register(2), false), bi_null(), BI_CMPF_EQ);
TEST_F(ValhallPacking, BranchzI16)
{
bi_instr *I =
bi_branchz_i16(b, bi_half(bi_register(2), false), bi_null(), BI_CMPF_EQ);
I->branch_offset = 1;
CASE(I, 0x001fc03000000102);
}
TEST_F(ValhallPacking, BranchzI16Backwards) {
TEST_F(ValhallPacking, BranchzI16Backwards)
{
bi_instr *I = bi_branchz_i16(b, zero, bi_null(), BI_CMPF_EQ);
I->branch_offset = -8;
CASE(I, 0x001fc017fffff8c0);
}
TEST_F(ValhallPacking, Blend) {
CASE(bi_blend_to(b, bi_null(), bi_register(0), bi_register(60),
bi_fau(BIR_FAU_BLEND_0, false),
bi_fau(BIR_FAU_BLEND_0, true),
bi_null(), BI_REGISTER_FORMAT_F16, 2, 0),
0x007f4004333c00f0);
TEST_F(ValhallPacking, Blend)
{
CASE(
bi_blend_to(b, bi_null(), bi_register(0), bi_register(60),
bi_fau(BIR_FAU_BLEND_0, false), bi_fau(BIR_FAU_BLEND_0, true),
bi_null(), BI_REGISTER_FORMAT_F16, 2, 0),
0x007f4004333c00f0);
}
TEST_F(ValhallPacking, Mux) {
TEST_F(ValhallPacking, Mux)
{
CASE(bi_mux_i32_to(b, bi_register(0), bi_discard(bi_register(0)),
bi_discard(bi_register(4)),
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 0), false), BI_MUX_BIT),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 0), false),
BI_MUX_BIT),
0x00b8c00300804440ull);
}
TEST_F(ValhallPacking, AtestFP16) {
TEST_F(ValhallPacking, AtestFP16)
{
CASE(bi_atest_to(b, bi_register(60), bi_register(60),
bi_half(bi_register(1), true),
bi_fau(BIR_FAU_ATEST_PARAM, false)),
0x007dbc0208ea013c);
}
TEST_F(ValhallPacking, AtestFP32) {
TEST_F(ValhallPacking, AtestFP32)
{
CASE(bi_atest_to(b, bi_register(60), bi_register(60), one,
bi_fau(BIR_FAU_ATEST_PARAM, false)),
0x007dbc0200ead03c);
}
TEST_F(ValhallPacking, Transcendentals) {
TEST_F(ValhallPacking, Transcendentals)
{
CASE(bi_frexpm_f32_to(b, bi_register(1), bi_register(0), false, true),
0x0099c10001000000);
CASE(bi_frexpe_f32_to(b, bi_register(0), bi_discard(bi_register(0)), false, true),
CASE(bi_frexpe_f32_to(b, bi_register(0), bi_discard(bi_register(0)), false,
true),
0x0099c00001020040);
CASE(bi_frsq_f32_to(b, bi_register(2), bi_register(1)),
0x009cc20000020001);
CASE(bi_frsq_f32_to(b, bi_register(2), bi_register(1)), 0x009cc20000020001);
CASE(bi_fma_rscale_f32_to(b, bi_register(0), bi_discard(bi_register(1)), bi_discard(bi_register(2)), bi_neg(zero), bi_discard(bi_register(0)), BI_SPECIAL_LEFT),
CASE(bi_fma_rscale_f32_to(b, bi_register(0), bi_discard(bi_register(1)),
bi_discard(bi_register(2)), bi_neg(zero),
bi_discard(bi_register(0)), BI_SPECIAL_LEFT),
0x0162c00440c04241);
}
TEST_F(ValhallPacking, Csel) {
TEST_F(ValhallPacking, Csel)
{
CASE(bi_csel_u32_to(b, bi_register(1), bi_discard(bi_register(2)),
bi_discard(bi_register(3)),
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), false),
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), true),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true),
BI_CMPF_EQ),
0x0150c10085844342);
CASE(bi_csel_u32_to(b, bi_register(1), bi_discard(bi_register(2)),
bi_discard(bi_register(3)),
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), false),
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), true),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true),
BI_CMPF_LT),
0x0150c10485844342);
CASE(bi_csel_s32_to(b, bi_register(1), bi_discard(bi_register(2)),
bi_discard(bi_register(3)),
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), false),
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), true),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true),
BI_CMPF_LT),
0x0158c10485844342);
}
TEST_F(ValhallPacking, LdAttrImm) {
bi_instr *I = bi_ld_attr_imm_to(b, bi_register(0),
bi_discard(bi_register(60)),
bi_discard(bi_register(61)),
BI_REGISTER_FORMAT_F16, BI_VECSIZE_V4, 1);
TEST_F(ValhallPacking, LdAttrImm)
{
bi_instr *I = bi_ld_attr_imm_to(
b, bi_register(0), bi_discard(bi_register(60)),
bi_discard(bi_register(61)), BI_REGISTER_FORMAT_F16, BI_VECSIZE_V4, 1);
I->table = 1;
CASE(I, 0x0066800433117d7c);
}
TEST_F(ValhallPacking, LdVarBufImmF16) {
TEST_F(ValhallPacking, LdVarBufImmF16)
{
CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
BI_SOURCE_FORMAT_F16,
BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0),
BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE,
BI_VECSIZE_V4, 0),
0x005d82143300003d);
CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_SAMPLE_SAMPLE,
BI_SOURCE_FORMAT_F16,
BI_UPDATE_STORE, BI_VECSIZE_V4, 0),
0x005d80843300003d);
BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
BI_VECSIZE_V4, 0),
0x005d80843300003d);
CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTROID,
BI_SOURCE_FORMAT_F16,
BI_UPDATE_STORE, BI_VECSIZE_V4, 8),
0x005d80443308003d);
BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
BI_VECSIZE_V4, 8),
0x005d80443308003d);
}
TEST_F(ValhallPacking, LeaBufImm) {
TEST_F(ValhallPacking, LeaBufImm)
{
CASE(bi_lea_buf_imm_to(b, bi_register(4), bi_discard(bi_register(59))),
0x005e840400000d7b);
}
TEST_F(ValhallPacking, StoreSegment) {
TEST_F(ValhallPacking, StoreSegment)
{
CASE(bi_store_i96(b, bi_register(0), bi_discard(bi_register(4)),
bi_discard(bi_register(5)), BI_SEG_VARY, 0),
bi_discard(bi_register(5)), BI_SEG_VARY, 0),
0x0061400632000044);
}
TEST_F(ValhallPacking, Convert16To32) {
CASE(bi_u16_to_u32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), false, false))),
0x0090c20000140077);
TEST_F(ValhallPacking, Convert16To32)
{
CASE(bi_u16_to_u32_to(b, bi_register(2),
bi_discard(bi_swz_16(bi_register(55), false, false))),
0x0090c20000140077);
CASE(bi_u16_to_u32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), true, false))),
0x0090c20010140077);
CASE(bi_u16_to_u32_to(b, bi_register(2),
bi_discard(bi_swz_16(bi_register(55), true, false))),
0x0090c20010140077);
CASE(bi_u16_to_f32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), false, false))),
0x0090c20000150077);
CASE(bi_u16_to_f32_to(b, bi_register(2),
bi_discard(bi_swz_16(bi_register(55), false, false))),
0x0090c20000150077);
CASE(bi_u16_to_f32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), true, false))),
0x0090c20010150077);
CASE(bi_u16_to_f32_to(b, bi_register(2),
bi_discard(bi_swz_16(bi_register(55), true, false))),
0x0090c20010150077);
CASE(bi_s16_to_s32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), false, false))),
0x0090c20000040077);
CASE(bi_s16_to_s32_to(b, bi_register(2),
bi_discard(bi_swz_16(bi_register(55), false, false))),
0x0090c20000040077);
CASE(bi_s16_to_s32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), true, false))),
0x0090c20010040077);
CASE(bi_s16_to_s32_to(b, bi_register(2),
bi_discard(bi_swz_16(bi_register(55), true, false))),
0x0090c20010040077);
}
TEST_F(ValhallPacking, Swizzle8) {
CASE(bi_icmp_or_v4u8_to(b, bi_register(1), bi_byte(bi_register(0), 0),
zero, zero, BI_CMPF_NE, BI_RESULT_TYPE_I1),
TEST_F(ValhallPacking, Swizzle8)
{
CASE(bi_icmp_or_v4u8_to(b, bi_register(1), bi_byte(bi_register(0), 0), zero,
zero, BI_CMPF_NE, BI_RESULT_TYPE_I1),
0x00f2c14300c0c000);
}
TEST_F(ValhallPacking, FauPage1) {
CASE(bi_mov_i32_to(b, bi_register(1), bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 32), false)),
0x0291c10000000080ULL);
TEST_F(ValhallPacking, FauPage1)
{
CASE(bi_mov_i32_to(b, bi_register(1),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 32), false)),
0x0291c10000000080ULL);
}
TEST_F(ValhallPacking, LdTileV3F16) {
TEST_F(ValhallPacking, LdTileV3F16)
{
CASE(bi_ld_tile_to(b, bi_register(4), bi_discard(bi_register(0)),
bi_register(60), bi_register(3),
BI_REGISTER_FORMAT_F16, BI_VECSIZE_V3),
bi_register(60), bi_register(3), BI_REGISTER_FORMAT_F16,
BI_VECSIZE_V3),
0x0078840423033c40);
}
TEST_F(ValhallPacking, Rhadd8) {
TEST_F(ValhallPacking, Rhadd8)
{
CASE(bi_hadd_v4s8_to(b, bi_register(0), bi_discard(bi_register(1)),
bi_discard(bi_register(0)), BI_ROUND_RTP),
0x00aac000400b4041);

View file

@ -21,41 +21,44 @@
* SOFTWARE.
*/
#include "va_compiler.h"
#include "bi_test.h"
#include "bi_builder.h"
#include "bi_test.h"
#include "va_compiler.h"
#include <gtest/gtest.h>
#define CASE(instr, expected) do { \
if (va_validate_fau(instr) != expected) { \
fprintf(stderr, "Incorrect validation for:\n"); \
bi_print_instr(instr, stderr); \
fprintf(stderr, "\n"); \
ADD_FAILURE(); \
} \
} while(0)
#define CASE(instr, expected) \
do { \
if (va_validate_fau(instr) != expected) { \
fprintf(stderr, "Incorrect validation for:\n"); \
bi_print_instr(instr, stderr); \
fprintf(stderr, "\n"); \
ADD_FAILURE(); \
} \
} while (0)
#define VALID(instr) CASE(instr, true)
#define VALID(instr) CASE(instr, true)
#define INVALID(instr) CASE(instr, false)
class ValidateFau : public testing::Test {
protected:
ValidateFau() {
protected:
ValidateFau()
{
mem_ctx = ralloc_context(NULL);
b = bit_builder(mem_ctx);
zero = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 0), false);
imm1 = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 1), false);
imm2 = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 2), false);
unif = bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 5), false);
unif_hi = bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 5), true);
unif2 = bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 6), false);
zero = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 0), false);
imm1 = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 1), false);
imm2 = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 2), false);
unif = bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 5), false);
unif_hi = bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 5), true);
unif2 = bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 6), false);
core_id = bi_fau(BIR_FAU_CORE_ID, false);
lane_id = bi_fau(BIR_FAU_LANE_ID, false);
}
~ValidateFau() {
~ValidateFau()
{
ralloc_free(mem_ctx);
}
@ -66,8 +69,8 @@ protected:
TEST_F(ValidateFau, One64BitUniformSlot)
{
VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(3),
unif));
VALID(
bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(3), unif));
VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), unif_hi, unif));
VALID(bi_fma_f32_to(b, bi_register(1), unif, unif, unif_hi));
INVALID(bi_fma_f32_to(b, bi_register(1), unif, unif2, bi_register(1)));
@ -77,8 +80,8 @@ TEST_F(ValidateFau, One64BitUniformSlot)
* marked as valid in early versions of the validator.
*/
INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2),
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 0), false),
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 1), true)));
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 0), false),
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 1), true)));
}
TEST_F(ValidateFau, Combined64BitUniformsConstants)
@ -99,17 +102,16 @@ TEST_F(ValidateFau, UniformsOnlyInDefaultMode)
TEST_F(ValidateFau, SingleSpecialImmediate)
{
VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(2),
lane_id));
lane_id));
VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(2),
core_id));
INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), lane_id,
core_id));
core_id));
INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), lane_id, core_id));
}
TEST_F(ValidateFau, SmokeTests)
{
VALID(bi_mov_i32_to(b, bi_register(1), bi_register(2)));
VALID(bi_mov_i32_to(b, bi_register(1), unif));
VALID(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)),
unif, bi_neg(zero)));
VALID(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)), unif,
bi_neg(zero)));
}

View file

@ -79,7 +79,7 @@ va_select_fau_page(const bi_instr *I)
{
bi_foreach_src(I, s) {
if (I->src[s].type == BI_INDEX_FAU)
return va_fau_page((enum bir_fau) I->src[s].value);
return va_fau_page((enum bir_fau)I->src[s].value);
}
return 0;
@ -91,8 +91,7 @@ struct va_stats {
unsigned fma, cvt, sfu, v, ls, t;
};
void
va_count_instr_stats(bi_instr *I, struct va_stats *stats);
void va_count_instr_stats(bi_instr *I, struct va_stats *stats);
#ifdef __cplusplus
} /* extern C */

View file

@ -21,9 +21,9 @@
* SOFTWARE.
*/
#include "bi_builder.h"
#include "va_compiler.h"
#include "valhall_enums.h"
#include "bi_builder.h"
/*
* Insert flow control into a scheduled and register allocated shader. This
@ -176,7 +176,8 @@ bi_depend_on_writers(struct bi_scoreboard_state *st, uint64_t regmask)
/* Sets the dependencies for a given clause, updating the model */
static void
bi_set_dependencies(bi_block *block, bi_instr *I, struct bi_scoreboard_state *st)
bi_set_dependencies(bi_block *block, bi_instr *I,
struct bi_scoreboard_state *st)
{
/* Depend on writers to handle read-after-write and write-after-write
* dependencies. Write-after-read dependencies are handled in the hardware
@ -482,7 +483,8 @@ va_insert_flow_control_nops(bi_context *ctx)
*/
if (va_should_end(block) || block->needs_nop) {
/* Don't bother adding a NOP into an unreachable block */
if (block == bi_start_block(&ctx->blocks) || bi_num_predecessors(block))
if (block == bi_start_block(&ctx->blocks) ||
bi_num_predecessors(block))
bi_flow(ctx, bi_after_block(block), VA_FLOW_END);
} else if (bi_reconverge_branches(block)) {
/* TODO: Do we have ever need to reconverge from an empty block? */

View file

@ -21,9 +21,9 @@
* SOFTWARE.
*/
#include "bi_builder.h"
#include "va_compiler.h"
#include "valhall.h"
#include "bi_builder.h"
/* Only some special immediates are available, as specified in the Table of
* Immediates in the specification. Other immediates must be lowered, either to
@ -51,7 +51,7 @@ va_lut_index_32(uint32_t imm)
static bi_index
va_lut_index_16(uint16_t imm)
{
uint16_t *arr16 = (uint16_t *) valhall_immediates;
uint16_t *arr16 = (uint16_t *)valhall_immediates;
for (unsigned i = 0; i < (2 * ARRAY_SIZE(valhall_immediates)); ++i) {
if (arr16[i] == imm)
@ -64,7 +64,7 @@ va_lut_index_16(uint16_t imm)
UNUSED static bi_index
va_lut_index_8(uint8_t imm)
{
uint8_t *arr8 = (uint8_t *) valhall_immediates;
uint8_t *arr8 = (uint8_t *)valhall_immediates;
for (unsigned i = 0; i < (4 * ARRAY_SIZE(valhall_immediates)); ++i) {
if (arr8[i] == imm)
@ -109,36 +109,43 @@ is_extension_of_16(uint32_t x, bool is_signed)
}
static bi_index
va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool is_signed, bool staging)
va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info,
bool is_signed, bool staging)
{
/* Try the constant as-is */
if (!staging) {
bi_index lut = va_lut_index_32(value);
if (!bi_is_null(lut)) return lut;
if (!bi_is_null(lut))
return lut;
/* ...or negated as a FP32 constant */
if (info.absneg && info.size == VA_SIZE_32) {
lut = bi_neg(va_lut_index_32(fui(-uif(value))));
if (!bi_is_null(lut)) return lut;
if (!bi_is_null(lut))
return lut;
}
/* ...or negated as a FP16 constant */
if (info.absneg && info.size == VA_SIZE_16) {
lut = bi_neg(va_lut_index_32(value ^ 0x80008000));
if (!bi_is_null(lut)) return lut;
if (!bi_is_null(lut))
return lut;
}
}
/* Try using a single half of a FP16 constant */
bool replicated_halves = (value & 0xFFFF) == (value >> 16);
if (!staging && info.swizzle && info.size == VA_SIZE_16 && replicated_halves) {
if (!staging && info.swizzle && info.size == VA_SIZE_16 &&
replicated_halves) {
bi_index lut = va_lut_index_16(value & 0xFFFF);
if (!bi_is_null(lut)) return lut;
if (!bi_is_null(lut))
return lut;
/* ...possibly negated */
if (info.absneg) {
lut = bi_neg(va_lut_index_16((value & 0xFFFF) ^ 0x8000));
if (!bi_is_null(lut)) return lut;
if (!bi_is_null(lut))
return lut;
}
}
@ -147,25 +154,28 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool
is_extension_of_8(value, is_signed)) {
bi_index lut = va_lut_index_8(value & 0xFF);
if (!bi_is_null(lut)) return lut;
if (!bi_is_null(lut))
return lut;
}
/* Try extending a halfword */
if (!staging && info.widen &&
is_extension_of_16(value, is_signed)) {
if (!staging && info.widen && is_extension_of_16(value, is_signed)) {
bi_index lut = va_lut_index_16(value & 0xFFFF);
if (!bi_is_null(lut)) return lut;
if (!bi_is_null(lut))
return lut;
}
/* Try demoting the constant to FP16 */
if (!staging && info.swizzle && info.size == VA_SIZE_32) {
bi_index lut = va_demote_constant_fp16(value);
if (!bi_is_null(lut)) return lut;
if (!bi_is_null(lut))
return lut;
if (info.absneg) {
bi_index lut = bi_neg(va_demote_constant_fp16(fui(-uif(value))));
if (!bi_is_null(lut)) return lut;
if (!bi_is_null(lut))
return lut;
}
}
@ -218,7 +228,8 @@ va_lower_constants(bi_context *ctx, bi_instr *I)
value = bi_apply_swizzle(value, swz);
}
bi_index cons = va_resolve_constant(&b, value, info, is_signed, staging);
bi_index cons =
va_resolve_constant(&b, value, info, is_signed, staging);
cons.neg ^= I->src[s].neg;
I->src[s] = cons;

View file

@ -21,9 +21,9 @@
* SOFTWARE.
*/
#include "bi_builder.h"
#include "va_compiler.h"
#include "valhall.h"
#include "bi_builder.h"
static bi_instr *
lower(bi_builder *b, bi_instr *I)
@ -38,45 +38,56 @@ lower(bi_builder *b, bi_instr *I)
return bi_iadd_v4u8_to(b, I->dest[0], I->src[0], bi_zero(), false);
case BI_OPCODE_ICMP_I32:
return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
I->cmpf, I->result_type);
case BI_OPCODE_ICMP_V2I16:
return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
I->cmpf, I->result_type);
case BI_OPCODE_ICMP_V4I8:
return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
I->cmpf, I->result_type);
case BI_OPCODE_ICMP_U32:
return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
I->cmpf, I->result_type);
case BI_OPCODE_ICMP_V2U16:
return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
I->cmpf, I->result_type);
case BI_OPCODE_ICMP_V4U8:
return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
I->cmpf, I->result_type);
case BI_OPCODE_ICMP_S32:
return bi_icmp_or_s32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
return bi_icmp_or_s32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
I->cmpf, I->result_type);
case BI_OPCODE_ICMP_V2S16:
return bi_icmp_or_v2s16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
return bi_icmp_or_v2s16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
I->cmpf, I->result_type);
case BI_OPCODE_ICMP_V4S8:
return bi_icmp_or_v4s8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
return bi_icmp_or_v4s8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
I->cmpf, I->result_type);
case BI_OPCODE_FCMP_F32:
return bi_fcmp_or_f32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
return bi_fcmp_or_f32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
I->cmpf, I->result_type);
case BI_OPCODE_FCMP_V2F16:
return bi_fcmp_or_v2f16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
return bi_fcmp_or_v2f16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
I->cmpf, I->result_type);
/* Integer CSEL must have a signedness */
case BI_OPCODE_CSEL_I32:
case BI_OPCODE_CSEL_V2I16:
assert(I->cmpf == BI_CMPF_EQ || I->cmpf == BI_CMPF_NE);
I->op = (I->op == BI_OPCODE_CSEL_I32) ? BI_OPCODE_CSEL_U32 :
BI_OPCODE_CSEL_V2U16;
I->op = (I->op == BI_OPCODE_CSEL_I32) ? BI_OPCODE_CSEL_U32
: BI_OPCODE_CSEL_V2U16;
return NULL;
/* Jump -> conditional branch with condition tied to true. */
@ -117,7 +128,7 @@ lower(bi_builder *b, bi_instr *I)
case BI_OPCODE_FADD_RSCALE_F32:
return bi_fma_rscale_f32_to(b, I->dest[0], I->src[0], bi_imm_f32(1.0),
I->src[1], I->src[2], I->special);
I->src[1], I->src[2], I->special);
default:
return NULL;

View file

@ -21,8 +21,8 @@
* SOFTWARE.
*/
#include "va_compiler.h"
#include "bi_builder.h"
#include "va_compiler.h"
/*
* Bifrost uses split 64-bit addresses, specified as two consecutive sources.
@ -38,8 +38,7 @@ lower_split_src(bi_context *ctx, bi_instr *I, unsigned s)
bi_index offset_fau = I->src[s];
offset_fau.offset++;
if (I->src[s].type == BI_INDEX_FAU &&
I->src[s].offset == 0 &&
if (I->src[s].type == BI_INDEX_FAU && I->src[s].offset == 0 &&
bi_is_value_equiv(offset_fau, I->src[s + 1])) {
return;
}

View file

@ -97,7 +97,7 @@ scoreboard_update(struct bi_scoreboard_state *st, const bi_instr *I)
/* Unmark registers after they are waited on */
for (unsigned i = 0; i < VA_NUM_GENERAL_SLOTS; ++i) {
if (waits_on_slot(I->flow, i))
st->read[i] = 0;
st->read[i] = 0;
}
}
@ -111,8 +111,8 @@ va_analyze_scoreboard_reads(bi_context *ctx)
bi_worklist_push_tail(&worklist, block);
/* Reset analysis from previous pass */
block->scoreboard_in = (struct bi_scoreboard_state){ 0 };
block->scoreboard_out = (struct bi_scoreboard_state){ 0 };
block->scoreboard_in = (struct bi_scoreboard_state){0};
block->scoreboard_out = (struct bi_scoreboard_state){0};
}
/* Perform forward data flow analysis to calculate dependencies */

View file

@ -21,9 +21,9 @@
* SOFTWARE.
*/
#include "bi_builder.h"
#include "va_compiler.h"
#include "valhall_enums.h"
#include "bi_builder.h"
/*
* Merge NOPs with flow control with nearby instructions to eliminate the NOPs,
@ -80,8 +80,10 @@ merge_end_reconverge(bi_block *block)
bi_instr *last = list_last_entry(&block->instructions, bi_instr, link);
bi_instr *penult = bi_prev_op(last);
if (last->op != BI_OPCODE_NOP) return;
if (last->flow != VA_FLOW_RECONVERGE && last->flow != VA_FLOW_END) return;
if (last->op != BI_OPCODE_NOP)
return;
if (last->flow != VA_FLOW_RECONVERGE && last->flow != VA_FLOW_END)
return;
/* End implies all other flow control except for waiting on barriers (slot
* #7, with VA_FLOW_WAIT), so remove blocking flow control.
@ -99,7 +101,8 @@ merge_end_reconverge(bi_block *block)
}
/* If there is blocking flow control, we can't merge */
if (penult->flow != VA_FLOW_NONE) return;
if (penult->flow != VA_FLOW_NONE)
return;
/* Else, merge */
penult->flow = last->flow;
@ -133,8 +136,8 @@ merge_waits(bi_block *block)
bi_instr *last_free = NULL;
bi_foreach_instr_in_block_safe(block, I) {
if (last_free != NULL &&
I->op == BI_OPCODE_NOP && va_flow_is_wait_or_none(I->flow)) {
if (last_free != NULL && I->op == BI_OPCODE_NOP &&
va_flow_is_wait_or_none(I->flow)) {
/* Merge waits with compatible instructions */
last_free->flow = union_waits(last_free->flow, I->flow);
@ -212,8 +215,10 @@ va_merge_flow(bi_context *ctx)
{
bi_foreach_block(ctx, block) {
/* If there are less than 2 instructions, there's nothing to merge */
if (list_is_empty(&block->instructions)) continue;
if (list_is_singular(&block->instructions)) continue;
if (list_is_empty(&block->instructions))
continue;
if (list_is_singular(&block->instructions))
continue;
merge_end_reconverge(block);
merge_waits(block);

View file

@ -29,15 +29,21 @@ static enum bi_opcode
va_op_add_imm(enum bi_opcode op)
{
switch (op) {
case BI_OPCODE_FADD_F32: return BI_OPCODE_FADD_IMM_F32;
case BI_OPCODE_FADD_V2F16: return BI_OPCODE_FADD_IMM_V2F16;
case BI_OPCODE_FADD_F32:
return BI_OPCODE_FADD_IMM_F32;
case BI_OPCODE_FADD_V2F16:
return BI_OPCODE_FADD_IMM_V2F16;
case BI_OPCODE_IADD_S32:
case BI_OPCODE_IADD_U32: return BI_OPCODE_IADD_IMM_I32;
case BI_OPCODE_IADD_U32:
return BI_OPCODE_IADD_IMM_I32;
case BI_OPCODE_IADD_V2S16:
case BI_OPCODE_IADD_V2U16: return BI_OPCODE_IADD_IMM_V2I16;
case BI_OPCODE_IADD_V2U16:
return BI_OPCODE_IADD_IMM_V2I16;
case BI_OPCODE_IADD_V4S8:
case BI_OPCODE_IADD_V4U8: return BI_OPCODE_IADD_IMM_V4I8;
default: return 0;
case BI_OPCODE_IADD_V4U8:
return BI_OPCODE_IADD_IMM_V4I8;
default:
return 0;
}
}
@ -46,8 +52,8 @@ va_is_add_imm(bi_instr *I, unsigned s)
{
assert(s < I->nr_srcs);
return I->src[s].swizzle == BI_SWIZZLE_H01 &&
!I->src[s].abs && !I->src[s].neg && !I->clamp && !I->round;
return I->src[s].swizzle == BI_SWIZZLE_H01 && !I->src[s].abs &&
!I->src[s].neg && !I->clamp && !I->round;
}
static unsigned
@ -83,11 +89,14 @@ va_fuse_add_imm(bi_instr *I)
}
enum bi_opcode op = va_op_add_imm(I->op);
if (!op) return;
if (!op)
return;
unsigned s = va_choose_imm(I);
if (s > 1) return;
if (!va_is_add_imm(I, 1 - s)) return;
if (s > 1)
return;
if (!va_is_add_imm(I, 1 - s))
return;
I->op = op;
I->index = bi_apply_swizzle(I->src[s].value, I->src[s].swizzle);

View file

@ -21,10 +21,10 @@
* SOFTWARE.
*/
#include "bi_builder.h"
#include "va_compiler.h"
#include "valhall.h"
#include "valhall_enums.h"
#include "bi_builder.h"
/* This file contains the final passes of the compiler. Running after
* scheduling and RA, the IR is now finalized, so we need to emit it to actual
@ -36,7 +36,7 @@
* Prints the (first) failing instruction to aid debugging.
*/
NORETURN static void PRINTFLIKE(2, 3)
invalid_instruction(const bi_instr *I, const char *cause, ...)
invalid_instruction(const bi_instr *I, const char *cause, ...)
{
fputs("\nInvalid ", stderr);
@ -56,8 +56,9 @@ invalid_instruction(const bi_instr *I, const char *cause, ...)
* Like assert, but prints the instruction if the assertion fails to aid
* debugging invalid inputs to the packing module.
*/
#define pack_assert(I, cond) \
if (!(cond)) invalid_instruction(I, "invariant " #cond);
#define pack_assert(I, cond) \
if (!(cond)) \
invalid_instruction(I, "invariant " #cond);
/*
* Validate that two adjacent 32-bit sources form an aligned 64-bit register
@ -95,14 +96,20 @@ static unsigned
va_pack_fau_special(const bi_instr *I, enum bir_fau fau)
{
switch (fau) {
case BIR_FAU_ATEST_PARAM: return VA_FAU_SPECIAL_PAGE_0_ATEST_DATUM;
case BIR_FAU_TLS_PTR: return VA_FAU_SPECIAL_PAGE_1_THREAD_LOCAL_POINTER;
case BIR_FAU_WLS_PTR: return VA_FAU_SPECIAL_PAGE_1_WORKGROUP_LOCAL_POINTER;
case BIR_FAU_LANE_ID: return VA_FAU_SPECIAL_PAGE_3_LANE_ID;
case BIR_FAU_PROGRAM_COUNTER: return VA_FAU_SPECIAL_PAGE_3_PROGRAM_COUNTER;
case BIR_FAU_SAMPLE_POS_ARRAY:return VA_FAU_SPECIAL_PAGE_0_SAMPLE;
case BIR_FAU_ATEST_PARAM:
return VA_FAU_SPECIAL_PAGE_0_ATEST_DATUM;
case BIR_FAU_TLS_PTR:
return VA_FAU_SPECIAL_PAGE_1_THREAD_LOCAL_POINTER;
case BIR_FAU_WLS_PTR:
return VA_FAU_SPECIAL_PAGE_1_WORKGROUP_LOCAL_POINTER;
case BIR_FAU_LANE_ID:
return VA_FAU_SPECIAL_PAGE_3_LANE_ID;
case BIR_FAU_PROGRAM_COUNTER:
return VA_FAU_SPECIAL_PAGE_3_PROGRAM_COUNTER;
case BIR_FAU_SAMPLE_POS_ARRAY:
return VA_FAU_SPECIAL_PAGE_0_SAMPLE;
case BIR_FAU_BLEND_0...(BIR_FAU_BLEND_0 + 7):
case BIR_FAU_BLEND_0 ...(BIR_FAU_BLEND_0 + 7):
return VA_FAU_SPECIAL_PAGE_0_BLEND_DESCRIPTOR_0 + (fau - BIR_FAU_BLEND_0);
default:
@ -136,7 +143,8 @@ va_pack_src(const bi_instr *I, unsigned s)
if (idx.type == BI_INDEX_REGISTER) {
unsigned value = va_pack_reg(I, idx);
if (idx.discard) value |= (1 << 6);
if (idx.discard)
value |= (1 << 6);
return value;
} else if (idx.type == BI_INDEX_FAU) {
pack_assert(I, idx.offset <= 1);
@ -150,10 +158,14 @@ static unsigned
va_pack_wrmask(const bi_instr *I)
{
switch (I->dest[0].swizzle) {
case BI_SWIZZLE_H00: return 0x1;
case BI_SWIZZLE_H11: return 0x2;
case BI_SWIZZLE_H01: return 0x3;
default: invalid_instruction(I, "write mask");
case BI_SWIZZLE_H00:
return 0x1;
case BI_SWIZZLE_H11:
return 0x2;
case BI_SWIZZLE_H01:
return 0x3;
default:
invalid_instruction(I, "write mask");
}
}
@ -161,17 +173,27 @@ static enum va_atomic_operation
va_pack_atom_opc(const bi_instr *I)
{
switch (I->atom_opc) {
case BI_ATOM_OPC_AADD: return VA_ATOMIC_OPERATION_AADD;
case BI_ATOM_OPC_ASMIN: return VA_ATOMIC_OPERATION_ASMIN;
case BI_ATOM_OPC_ASMAX: return VA_ATOMIC_OPERATION_ASMAX;
case BI_ATOM_OPC_AUMIN: return VA_ATOMIC_OPERATION_AUMIN;
case BI_ATOM_OPC_AUMAX: return VA_ATOMIC_OPERATION_AUMAX;
case BI_ATOM_OPC_AAND: return VA_ATOMIC_OPERATION_AAND;
case BI_ATOM_OPC_AOR: return VA_ATOMIC_OPERATION_AOR;
case BI_ATOM_OPC_AXOR: return VA_ATOMIC_OPERATION_AXOR;
case BI_ATOM_OPC_AADD:
return VA_ATOMIC_OPERATION_AADD;
case BI_ATOM_OPC_ASMIN:
return VA_ATOMIC_OPERATION_ASMIN;
case BI_ATOM_OPC_ASMAX:
return VA_ATOMIC_OPERATION_ASMAX;
case BI_ATOM_OPC_AUMIN:
return VA_ATOMIC_OPERATION_AUMIN;
case BI_ATOM_OPC_AUMAX:
return VA_ATOMIC_OPERATION_AUMAX;
case BI_ATOM_OPC_AAND:
return VA_ATOMIC_OPERATION_AAND;
case BI_ATOM_OPC_AOR:
return VA_ATOMIC_OPERATION_AOR;
case BI_ATOM_OPC_AXOR:
return VA_ATOMIC_OPERATION_AXOR;
case BI_ATOM_OPC_ACMPXCHG:
case BI_ATOM_OPC_AXCHG: return VA_ATOMIC_OPERATION_AXCHG;
default: invalid_instruction(I, "atomic opcode");
case BI_ATOM_OPC_AXCHG:
return VA_ATOMIC_OPERATION_AXCHG;
default:
invalid_instruction(I, "atomic opcode");
}
}
@ -179,12 +201,18 @@ static enum va_atomic_operation_with_1
va_pack_atom_opc_1(const bi_instr *I)
{
switch (I->atom_opc) {
case BI_ATOM_OPC_AINC: return VA_ATOMIC_OPERATION_WITH_1_AINC;
case BI_ATOM_OPC_ADEC: return VA_ATOMIC_OPERATION_WITH_1_ADEC;
case BI_ATOM_OPC_AUMAX1: return VA_ATOMIC_OPERATION_WITH_1_AUMAX1;
case BI_ATOM_OPC_ASMAX1: return VA_ATOMIC_OPERATION_WITH_1_ASMAX1;
case BI_ATOM_OPC_AOR1: return VA_ATOMIC_OPERATION_WITH_1_AOR1;
default: invalid_instruction(I, "atomic opcode with implicit 1");
case BI_ATOM_OPC_AINC:
return VA_ATOMIC_OPERATION_WITH_1_AINC;
case BI_ATOM_OPC_ADEC:
return VA_ATOMIC_OPERATION_WITH_1_ADEC;
case BI_ATOM_OPC_AUMAX1:
return VA_ATOMIC_OPERATION_WITH_1_AUMAX1;
case BI_ATOM_OPC_ASMAX1:
return VA_ATOMIC_OPERATION_WITH_1_ASMAX1;
case BI_ATOM_OPC_AOR1:
return VA_ATOMIC_OPERATION_WITH_1_AOR1;
default:
invalid_instruction(I, "atomic opcode with implicit 1");
}
}
@ -199,10 +227,14 @@ static enum va_widen
va_pack_widen_f32(const bi_instr *I, enum bi_swizzle swz)
{
switch (swz) {
case BI_SWIZZLE_H01: return VA_WIDEN_NONE;
case BI_SWIZZLE_H00: return VA_WIDEN_H0;
case BI_SWIZZLE_H11: return VA_WIDEN_H1;
default: invalid_instruction(I, "widen");
case BI_SWIZZLE_H01:
return VA_WIDEN_NONE;
case BI_SWIZZLE_H00:
return VA_WIDEN_H0;
case BI_SWIZZLE_H11:
return VA_WIDEN_H1;
default:
invalid_instruction(I, "widen");
}
}
@ -210,11 +242,16 @@ static enum va_swizzles_16_bit
va_pack_swizzle_f16(const bi_instr *I, enum bi_swizzle swz)
{
switch (swz) {
case BI_SWIZZLE_H00: return VA_SWIZZLES_16_BIT_H00;
case BI_SWIZZLE_H10: return VA_SWIZZLES_16_BIT_H10;
case BI_SWIZZLE_H01: return VA_SWIZZLES_16_BIT_H01;
case BI_SWIZZLE_H11: return VA_SWIZZLES_16_BIT_H11;
default: invalid_instruction(I, "16-bit swizzle");
case BI_SWIZZLE_H00:
return VA_SWIZZLES_16_BIT_H00;
case BI_SWIZZLE_H10:
return VA_SWIZZLES_16_BIT_H10;
case BI_SWIZZLE_H01:
return VA_SWIZZLES_16_BIT_H01;
case BI_SWIZZLE_H11:
return VA_SWIZZLES_16_BIT_H11;
default:
invalid_instruction(I, "16-bit swizzle");
}
}
@ -223,37 +260,62 @@ va_pack_widen(const bi_instr *I, enum bi_swizzle swz, enum va_size size)
{
if (size == VA_SIZE_8) {
switch (swz) {
case BI_SWIZZLE_H01: return VA_SWIZZLES_8_BIT_B0123;
case BI_SWIZZLE_H00: return VA_SWIZZLES_8_BIT_B0101;
case BI_SWIZZLE_H11: return VA_SWIZZLES_8_BIT_B2323;
case BI_SWIZZLE_B0000: return VA_SWIZZLES_8_BIT_B0000;
case BI_SWIZZLE_B1111: return VA_SWIZZLES_8_BIT_B1111;
case BI_SWIZZLE_B2222: return VA_SWIZZLES_8_BIT_B2222;
case BI_SWIZZLE_B3333: return VA_SWIZZLES_8_BIT_B3333;
default: invalid_instruction(I, "8-bit widen");
case BI_SWIZZLE_H01:
return VA_SWIZZLES_8_BIT_B0123;
case BI_SWIZZLE_H00:
return VA_SWIZZLES_8_BIT_B0101;
case BI_SWIZZLE_H11:
return VA_SWIZZLES_8_BIT_B2323;
case BI_SWIZZLE_B0000:
return VA_SWIZZLES_8_BIT_B0000;
case BI_SWIZZLE_B1111:
return VA_SWIZZLES_8_BIT_B1111;
case BI_SWIZZLE_B2222:
return VA_SWIZZLES_8_BIT_B2222;
case BI_SWIZZLE_B3333:
return VA_SWIZZLES_8_BIT_B3333;
default:
invalid_instruction(I, "8-bit widen");
}
} else if (size == VA_SIZE_16) {
switch (swz) {
case BI_SWIZZLE_H00: return VA_SWIZZLES_16_BIT_H00;
case BI_SWIZZLE_H10: return VA_SWIZZLES_16_BIT_H10;
case BI_SWIZZLE_H01: return VA_SWIZZLES_16_BIT_H01;
case BI_SWIZZLE_H11: return VA_SWIZZLES_16_BIT_H11;
case BI_SWIZZLE_B0000: return VA_SWIZZLES_16_BIT_B00;
case BI_SWIZZLE_B1111: return VA_SWIZZLES_16_BIT_B11;
case BI_SWIZZLE_B2222: return VA_SWIZZLES_16_BIT_B22;
case BI_SWIZZLE_B3333: return VA_SWIZZLES_16_BIT_B33;
default: invalid_instruction(I, "16-bit widen");
case BI_SWIZZLE_H00:
return VA_SWIZZLES_16_BIT_H00;
case BI_SWIZZLE_H10:
return VA_SWIZZLES_16_BIT_H10;
case BI_SWIZZLE_H01:
return VA_SWIZZLES_16_BIT_H01;
case BI_SWIZZLE_H11:
return VA_SWIZZLES_16_BIT_H11;
case BI_SWIZZLE_B0000:
return VA_SWIZZLES_16_BIT_B00;
case BI_SWIZZLE_B1111:
return VA_SWIZZLES_16_BIT_B11;
case BI_SWIZZLE_B2222:
return VA_SWIZZLES_16_BIT_B22;
case BI_SWIZZLE_B3333:
return VA_SWIZZLES_16_BIT_B33;
default:
invalid_instruction(I, "16-bit widen");
}
} else if (size == VA_SIZE_32) {
switch (swz) {
case BI_SWIZZLE_H01: return VA_SWIZZLES_32_BIT_NONE;
case BI_SWIZZLE_H00: return VA_SWIZZLES_32_BIT_H0;
case BI_SWIZZLE_H11: return VA_SWIZZLES_32_BIT_H1;
case BI_SWIZZLE_B0000: return VA_SWIZZLES_32_BIT_B0;
case BI_SWIZZLE_B1111: return VA_SWIZZLES_32_BIT_B1;
case BI_SWIZZLE_B2222: return VA_SWIZZLES_32_BIT_B2;
case BI_SWIZZLE_B3333: return VA_SWIZZLES_32_BIT_B3;
default: invalid_instruction(I, "32-bit widen");
case BI_SWIZZLE_H01:
return VA_SWIZZLES_32_BIT_NONE;
case BI_SWIZZLE_H00:
return VA_SWIZZLES_32_BIT_H0;
case BI_SWIZZLE_H11:
return VA_SWIZZLES_32_BIT_H1;
case BI_SWIZZLE_B0000:
return VA_SWIZZLES_32_BIT_B0;
case BI_SWIZZLE_B1111:
return VA_SWIZZLES_32_BIT_B1;
case BI_SWIZZLE_B2222:
return VA_SWIZZLES_32_BIT_B2;
case BI_SWIZZLE_B3333:
return VA_SWIZZLES_32_BIT_B3;
default:
invalid_instruction(I, "32-bit widen");
}
} else {
invalid_instruction(I, "type size for widen");
@ -264,14 +326,22 @@ static enum va_half_swizzles_8_bit
va_pack_halfswizzle(const bi_instr *I, enum bi_swizzle swz)
{
switch (swz) {
case BI_SWIZZLE_B0000: return VA_HALF_SWIZZLES_8_BIT_B00;
case BI_SWIZZLE_B1111: return VA_HALF_SWIZZLES_8_BIT_B11;
case BI_SWIZZLE_B2222: return VA_HALF_SWIZZLES_8_BIT_B22;
case BI_SWIZZLE_B3333: return VA_HALF_SWIZZLES_8_BIT_B33;
case BI_SWIZZLE_B0011: return VA_HALF_SWIZZLES_8_BIT_B01;
case BI_SWIZZLE_B2233: return VA_HALF_SWIZZLES_8_BIT_B23;
case BI_SWIZZLE_B0022: return VA_HALF_SWIZZLES_8_BIT_B02;
default: invalid_instruction(I, "v2u8 swizzle");
case BI_SWIZZLE_B0000:
return VA_HALF_SWIZZLES_8_BIT_B00;
case BI_SWIZZLE_B1111:
return VA_HALF_SWIZZLES_8_BIT_B11;
case BI_SWIZZLE_B2222:
return VA_HALF_SWIZZLES_8_BIT_B22;
case BI_SWIZZLE_B3333:
return VA_HALF_SWIZZLES_8_BIT_B33;
case BI_SWIZZLE_B0011:
return VA_HALF_SWIZZLES_8_BIT_B01;
case BI_SWIZZLE_B2233:
return VA_HALF_SWIZZLES_8_BIT_B23;
case BI_SWIZZLE_B0022:
return VA_HALF_SWIZZLES_8_BIT_B02;
default:
invalid_instruction(I, "v2u8 swizzle");
}
}
@ -279,12 +349,18 @@ static enum va_lanes_8_bit
va_pack_shift_lanes(const bi_instr *I, enum bi_swizzle swz)
{
switch (swz) {
case BI_SWIZZLE_H01: return VA_LANES_8_BIT_B02;
case BI_SWIZZLE_B0000: return VA_LANES_8_BIT_B00;
case BI_SWIZZLE_B1111: return VA_LANES_8_BIT_B11;
case BI_SWIZZLE_B2222: return VA_LANES_8_BIT_B22;
case BI_SWIZZLE_B3333: return VA_LANES_8_BIT_B33;
default: invalid_instruction(I, "lane shift");
case BI_SWIZZLE_H01:
return VA_LANES_8_BIT_B02;
case BI_SWIZZLE_B0000:
return VA_LANES_8_BIT_B00;
case BI_SWIZZLE_B1111:
return VA_LANES_8_BIT_B11;
case BI_SWIZZLE_B2222:
return VA_LANES_8_BIT_B22;
case BI_SWIZZLE_B3333:
return VA_LANES_8_BIT_B33;
default:
invalid_instruction(I, "lane shift");
}
}
@ -292,10 +368,14 @@ static enum va_combine
va_pack_combine(const bi_instr *I, enum bi_swizzle swz)
{
switch (swz) {
case BI_SWIZZLE_H01: return VA_COMBINE_NONE;
case BI_SWIZZLE_H00: return VA_COMBINE_H0;
case BI_SWIZZLE_H11: return VA_COMBINE_H1;
default: invalid_instruction(I, "branch lane");
case BI_SWIZZLE_H01:
return VA_COMBINE_NONE;
case BI_SWIZZLE_H00:
return VA_COMBINE_H0;
case BI_SWIZZLE_H11:
return VA_COMBINE_H1;
default:
invalid_instruction(I, "branch lane");
}
}
@ -303,10 +383,14 @@ static enum va_source_format
va_pack_source_format(const bi_instr *I)
{
switch (I->source_format) {
case BI_SOURCE_FORMAT_FLAT32: return VA_SOURCE_FORMAT_SRC_FLAT32;
case BI_SOURCE_FORMAT_FLAT16: return VA_SOURCE_FORMAT_SRC_FLAT16;
case BI_SOURCE_FORMAT_F32: return VA_SOURCE_FORMAT_SRC_F32;
case BI_SOURCE_FORMAT_F16: return VA_SOURCE_FORMAT_SRC_F16;
case BI_SOURCE_FORMAT_FLAT32:
return VA_SOURCE_FORMAT_SRC_FLAT32;
case BI_SOURCE_FORMAT_FLAT16:
return VA_SOURCE_FORMAT_SRC_FLAT16;
case BI_SOURCE_FORMAT_F32:
return VA_SOURCE_FORMAT_SRC_F32;
case BI_SOURCE_FORMAT_F16:
return VA_SOURCE_FORMAT_SRC_F16;
}
invalid_instruction(I, "source format");
@ -316,9 +400,12 @@ static uint64_t
va_pack_rhadd(const bi_instr *I)
{
switch (I->round) {
case BI_ROUND_RTN: return 0; /* hadd */
case BI_ROUND_RTP: return BITFIELD_BIT(30); /* rhadd */
default: unreachable("Invalid round for HADD");
case BI_ROUND_RTN:
return 0; /* hadd */
case BI_ROUND_RTP:
return BITFIELD_BIT(30); /* rhadd */
default:
unreachable("Invalid round for HADD");
}
}
@ -334,15 +421,17 @@ va_pack_alu(const bi_instr *I)
case BI_OPCODE_FREXPE_V2F16:
case BI_OPCODE_FREXPM_F32:
case BI_OPCODE_FREXPM_V2F16:
if (I->sqrt) hex |= 1ull << 24;
if (I->log) hex |= 1ull << 25;
if (I->sqrt)
hex |= 1ull << 24;
if (I->log)
hex |= 1ull << 25;
break;
/* Add mux type */
case BI_OPCODE_MUX_I32:
case BI_OPCODE_MUX_V2I16:
case BI_OPCODE_MUX_V4I8:
hex |= (uint64_t) I->mux << 32;
hex |= (uint64_t)I->mux << 32;
break;
/* Add .eq flag */
@ -350,12 +439,13 @@ va_pack_alu(const bi_instr *I)
case BI_OPCODE_BRANCHZI:
pack_assert(I, I->cmpf == BI_CMPF_EQ || I->cmpf == BI_CMPF_NE);
if (I->cmpf == BI_CMPF_EQ) hex |= (1ull << 36);
if (I->cmpf == BI_CMPF_EQ)
hex |= (1ull << 36);
if (I->op == BI_OPCODE_BRANCHZI)
hex |= (0x1ull << 40); /* Absolute */
else
hex |= ((uint64_t) I->branch_offset & BITFIELD_MASK(27)) << 8;
hex |= ((uint64_t)I->branch_offset & BITFIELD_MASK(27)) << 8;
break;
@ -369,7 +459,7 @@ va_pack_alu(const bi_instr *I)
case BI_OPCODE_RSHIFT_XOR_I32:
case BI_OPCODE_RSHIFT_XOR_V2I16:
case BI_OPCODE_RSHIFT_XOR_V4I8:
hex |= (uint64_t) I->arithmetic << 34;
hex |= (uint64_t)I->arithmetic << 34;
break;
case BI_OPCODE_LEA_BUF_IMM:
@ -378,8 +468,8 @@ va_pack_alu(const bi_instr *I)
break;
case BI_OPCODE_LEA_ATTR_IMM:
hex |= ((uint64_t) I->table) << 16;
hex |= ((uint64_t) I->attribute_index) << 20;
hex |= ((uint64_t)I->table) << 16;
hex |= ((uint64_t)I->attribute_index) << 20;
break;
case BI_OPCODE_IADD_IMM_I32:
@ -387,13 +477,13 @@ va_pack_alu(const bi_instr *I)
case BI_OPCODE_IADD_IMM_V4I8:
case BI_OPCODE_FADD_IMM_F32:
case BI_OPCODE_FADD_IMM_V2F16:
hex |= ((uint64_t) I->index) << 8;
hex |= ((uint64_t)I->index) << 8;
break;
case BI_OPCODE_CLPER_I32:
hex |= ((uint64_t) I->inactive_result) << 22;
hex |= ((uint64_t) I->lane_op) << 32;
hex |= ((uint64_t) I->subgroup) << 36;
hex |= ((uint64_t)I->inactive_result) << 22;
hex |= ((uint64_t)I->lane_op) << 32;
hex |= ((uint64_t)I->subgroup) << 36;
break;
case BI_OPCODE_LD_VAR:
@ -406,35 +496,37 @@ va_pack_alu(const bi_instr *I)
case BI_OPCODE_LD_VAR_BUF_IMM_F32:
case BI_OPCODE_LD_VAR_SPECIAL:
if (I->op == BI_OPCODE_LD_VAR_SPECIAL)
hex |= ((uint64_t) I->varying_name) << 12; /* instead of index */
hex |= ((uint64_t)I->varying_name) << 12; /* instead of index */
else if (I->op == BI_OPCODE_LD_VAR_BUF_IMM_F16 ||
I->op == BI_OPCODE_LD_VAR_BUF_IMM_F32) {
hex |= ((uint64_t) I->index) << 16;
hex |= ((uint64_t)I->index) << 16;
} else if (I->op == BI_OPCODE_LD_VAR_IMM ||
I->op == BI_OPCODE_LD_VAR_FLAT_IMM) {
hex |= ((uint64_t) I->table) << 8;
hex |= ((uint64_t) I->index) << 12;
hex |= ((uint64_t)I->table) << 8;
hex |= ((uint64_t)I->index) << 12;
}
hex |= ((uint64_t) va_pack_source_format(I)) << 24;
hex |= ((uint64_t) I->update) << 36;
hex |= ((uint64_t) I->sample) << 38;
hex |= ((uint64_t)va_pack_source_format(I)) << 24;
hex |= ((uint64_t)I->update) << 36;
hex |= ((uint64_t)I->sample) << 38;
break;
case BI_OPCODE_LD_ATTR_IMM:
hex |= ((uint64_t) I->table) << 16;
hex |= ((uint64_t) I->attribute_index) << 20;
hex |= ((uint64_t)I->table) << 16;
hex |= ((uint64_t)I->attribute_index) << 20;
break;
case BI_OPCODE_LD_TEX_IMM:
case BI_OPCODE_LEA_TEX_IMM:
hex |= ((uint64_t) I->table) << 16;
hex |= ((uint64_t) I->texture_index) << 20;
hex |= ((uint64_t)I->table) << 16;
hex |= ((uint64_t)I->texture_index) << 20;
break;
case BI_OPCODE_ZS_EMIT:
if (I->stencil) hex |= (1 << 24);
if (I->z) hex |= (1 << 25);
if (I->stencil)
hex |= (1 << 24);
if (I->z)
hex |= (1 << 25);
break;
default:
@ -444,14 +536,14 @@ va_pack_alu(const bi_instr *I)
/* FMA_RSCALE.f32 special modes treated as extra opcodes */
if (I->op == BI_OPCODE_FMA_RSCALE_F32) {
pack_assert(I, I->special < 4);
hex |= ((uint64_t) I->special) << 48;
hex |= ((uint64_t)I->special) << 48;
}
/* Add the normal destination or a placeholder. Staging destinations are
* added elsewhere, as they require special handling for control fields.
*/
if (info.has_dest && info.nr_staging_dests == 0) {
hex |= (uint64_t) va_pack_dest(I) << 40;
hex |= (uint64_t)va_pack_dest(I) << 40;
} else if (info.nr_staging_dests == 0 && info.nr_staging_srcs == 0) {
pack_assert(I, I->nr_dests == 0);
hex |= 0xC0ull << 40; /* Placeholder */
@ -469,19 +561,24 @@ va_pack_alu(const bi_instr *I)
enum va_size size = src_info.size;
bi_index src = I->src[logical_i + src_offset];
hex |= (uint64_t) va_pack_src(I, logical_i + src_offset) << (8 * i);
hex |= (uint64_t)va_pack_src(I, logical_i + src_offset) << (8 * i);
if (src_info.notted) {
if (src.neg) hex |= (1ull << 35);
if (src.neg)
hex |= (1ull << 35);
} else if (src_info.absneg) {
unsigned neg_offs = 32 + 2 + ((2 - i) * 2);
unsigned abs_offs = 33 + 2 + ((2 - i) * 2);
if (src.neg) hex |= 1ull << neg_offs;
if (src.abs) hex |= 1ull << abs_offs;
if (src.neg)
hex |= 1ull << neg_offs;
if (src.abs)
hex |= 1ull << abs_offs;
} else {
if (src.neg) invalid_instruction(I, "negate");
if (src.abs) invalid_instruction(I, "absolute value");
if (src.neg)
invalid_instruction(I, "negate");
if (src.abs)
invalid_instruction(I, "absolute value");
}
if (src_info.swizzle) {
@ -489,50 +586,56 @@ va_pack_alu(const bi_instr *I)
unsigned S = src.swizzle;
pack_assert(I, size == VA_SIZE_16 || size == VA_SIZE_32);
uint64_t v = (size == VA_SIZE_32 ? va_pack_widen_f32(I, S) : va_pack_swizzle_f16(I, S));
uint64_t v = (size == VA_SIZE_32 ? va_pack_widen_f32(I, S)
: va_pack_swizzle_f16(I, S));
hex |= v << offs;
} else if (src_info.widen) {
unsigned offs = (i == 1) ? 26 : 36;
hex |= (uint64_t) va_pack_widen(I, src.swizzle, src_info.size) << offs;
hex |= (uint64_t)va_pack_widen(I, src.swizzle, src_info.size) << offs;
} else if (src_info.lane) {
unsigned offs = (I->op == BI_OPCODE_MKVEC_V2I8) ?
((i == 0) ? 38 : 36) :
28;
unsigned offs =
(I->op == BI_OPCODE_MKVEC_V2I8) ? ((i == 0) ? 38 : 36) : 28;
if (src_info.size == VA_SIZE_16) {
hex |= (src.swizzle == BI_SWIZZLE_H11 ? 1 : 0) << offs;
} else if (I->op == BI_OPCODE_BRANCHZ_I16) {
hex |= ((uint64_t) va_pack_combine(I, src.swizzle) << 37);
hex |= ((uint64_t)va_pack_combine(I, src.swizzle) << 37);
} else {
pack_assert(I, src_info.size == VA_SIZE_8);
unsigned comp = src.swizzle - BI_SWIZZLE_B0000;
pack_assert(I, comp < 4);
hex |= (uint64_t) comp << offs;
hex |= (uint64_t)comp << offs;
}
} else if (src_info.lanes) {
pack_assert(I, src_info.size == VA_SIZE_8);
pack_assert(I, i == 1);
hex |= (uint64_t) va_pack_shift_lanes(I, src.swizzle) << 26;
hex |= (uint64_t)va_pack_shift_lanes(I, src.swizzle) << 26;
} else if (src_info.combine) {
/* Treat as swizzle, subgroup ops not yet supported */
pack_assert(I, src_info.size == VA_SIZE_32);
pack_assert(I, i == 0);
hex |= (uint64_t) va_pack_widen_f32(I, src.swizzle) << 37;
hex |= (uint64_t)va_pack_widen_f32(I, src.swizzle) << 37;
} else if (src_info.halfswizzle) {
pack_assert(I, src_info.size == VA_SIZE_8);
pack_assert(I, i == 0);
hex |= (uint64_t) va_pack_halfswizzle(I, src.swizzle) << 36;
hex |= (uint64_t)va_pack_halfswizzle(I, src.swizzle) << 36;
} else if (src.swizzle != BI_SWIZZLE_H01) {
invalid_instruction(I, "swizzle");
}
}
if (info.saturate) hex |= (uint64_t) I->saturate << 30;
if (info.rhadd) hex |= va_pack_rhadd(I);
if (info.clamp) hex |= (uint64_t) I->clamp << 32;
if (info.round_mode) hex |= (uint64_t) I->round << 30;
if (info.condition) hex |= (uint64_t) I->cmpf << 32;
if (info.result_type) hex |= (uint64_t) I->result_type << 30;
if (info.saturate)
hex |= (uint64_t)I->saturate << 30;
if (info.rhadd)
hex |= va_pack_rhadd(I);
if (info.clamp)
hex |= (uint64_t)I->clamp << 32;
if (info.round_mode)
hex |= (uint64_t)I->round << 30;
if (info.condition)
hex |= (uint64_t)I->cmpf << 32;
if (info.result_type)
hex |= (uint64_t)I->result_type << 30;
return hex;
}
@ -541,37 +644,35 @@ static uint64_t
va_pack_byte_offset(const bi_instr *I)
{
int16_t offset = I->byte_offset;
if (offset != I->byte_offset) invalid_instruction(I, "byte offset");
if (offset != I->byte_offset)
invalid_instruction(I, "byte offset");
uint16_t offset_as_u16 = offset;
return ((uint64_t) offset_as_u16) << 8;
return ((uint64_t)offset_as_u16) << 8;
}
static uint64_t
va_pack_byte_offset_8(const bi_instr *I)
{
uint8_t offset = I->byte_offset;
if (offset != I->byte_offset) invalid_instruction(I, "byte offset");
if (offset != I->byte_offset)
invalid_instruction(I, "byte offset");
return ((uint64_t) offset) << 8;
return ((uint64_t)offset) << 8;
}
static uint64_t
va_pack_load(const bi_instr *I, bool buffer_descriptor)
{
const uint8_t load_lane_identity[8] = {
VA_LOAD_LANE_8_BIT_B0,
VA_LOAD_LANE_16_BIT_H0,
VA_LOAD_LANE_24_BIT_IDENTITY,
VA_LOAD_LANE_32_BIT_W0,
VA_LOAD_LANE_48_BIT_IDENTITY,
VA_LOAD_LANE_64_BIT_IDENTITY,
VA_LOAD_LANE_96_BIT_IDENTITY,
VA_LOAD_LANE_128_BIT_IDENTITY,
VA_LOAD_LANE_8_BIT_B0, VA_LOAD_LANE_16_BIT_H0,
VA_LOAD_LANE_24_BIT_IDENTITY, VA_LOAD_LANE_32_BIT_W0,
VA_LOAD_LANE_48_BIT_IDENTITY, VA_LOAD_LANE_64_BIT_IDENTITY,
VA_LOAD_LANE_96_BIT_IDENTITY, VA_LOAD_LANE_128_BIT_IDENTITY,
};
unsigned memory_size = (valhall_opcodes[I->op].exact >> 27) & 0x7;
uint64_t hex = (uint64_t) load_lane_identity[memory_size] << 36;
uint64_t hex = (uint64_t)load_lane_identity[memory_size] << 36;
// unsigned
hex |= (1ull << 39);
@ -579,10 +680,10 @@ va_pack_load(const bi_instr *I, bool buffer_descriptor)
if (!buffer_descriptor)
hex |= va_pack_byte_offset(I);
hex |= (uint64_t) va_pack_src(I, 0) << 0;
hex |= (uint64_t)va_pack_src(I, 0) << 0;
if (buffer_descriptor)
hex |= (uint64_t) va_pack_src(I, 1) << 8;
hex |= (uint64_t)va_pack_src(I, 1) << 8;
return hex;
}
@ -591,10 +692,14 @@ static uint64_t
va_pack_memory_access(const bi_instr *I)
{
switch (I->seg) {
case BI_SEG_TL: return VA_MEMORY_ACCESS_FORCE;
case BI_SEG_POS: return VA_MEMORY_ACCESS_ISTREAM;
case BI_SEG_VARY: return VA_MEMORY_ACCESS_ESTREAM;
default: return VA_MEMORY_ACCESS_NONE;
case BI_SEG_TL:
return VA_MEMORY_ACCESS_FORCE;
case BI_SEG_POS:
return VA_MEMORY_ACCESS_ISTREAM;
case BI_SEG_VARY:
return VA_MEMORY_ACCESS_ESTREAM;
default:
return VA_MEMORY_ACCESS_NONE;
}
}
@ -604,7 +709,7 @@ va_pack_store(const bi_instr *I)
uint64_t hex = va_pack_memory_access(I) << 24;
va_validate_register_pair(I, 1);
hex |= (uint64_t) va_pack_src(I, 1) << 0;
hex |= (uint64_t)va_pack_src(I, 1) << 0;
hex |= va_pack_byte_offset(I);
@ -615,11 +720,16 @@ static enum va_lod_mode
va_pack_lod_mode(const bi_instr *I)
{
switch (I->va_lod_mode) {
case BI_VA_LOD_MODE_ZERO_LOD: return VA_LOD_MODE_ZERO;
case BI_VA_LOD_MODE_COMPUTED_LOD: return VA_LOD_MODE_COMPUTED;
case BI_VA_LOD_MODE_EXPLICIT: return VA_LOD_MODE_EXPLICIT;
case BI_VA_LOD_MODE_COMPUTED_BIAS: return VA_LOD_MODE_COMPUTED_BIAS;
case BI_VA_LOD_MODE_GRDESC: return VA_LOD_MODE_GRDESC;
case BI_VA_LOD_MODE_ZERO_LOD:
return VA_LOD_MODE_ZERO;
case BI_VA_LOD_MODE_COMPUTED_LOD:
return VA_LOD_MODE_COMPUTED;
case BI_VA_LOD_MODE_EXPLICIT:
return VA_LOD_MODE_EXPLICIT;
case BI_VA_LOD_MODE_COMPUTED_BIAS:
return VA_LOD_MODE_COMPUTED_BIAS;
case BI_VA_LOD_MODE_GRDESC:
return VA_LOD_MODE_GRDESC;
}
invalid_instruction(I, "LOD mode");
@ -650,14 +760,22 @@ static enum va_register_format
va_pack_register_format(const bi_instr *I)
{
switch (I->register_format) {
case BI_REGISTER_FORMAT_AUTO: return VA_REGISTER_FORMAT_AUTO;
case BI_REGISTER_FORMAT_F32: return VA_REGISTER_FORMAT_F32;
case BI_REGISTER_FORMAT_F16: return VA_REGISTER_FORMAT_F16;
case BI_REGISTER_FORMAT_S32: return VA_REGISTER_FORMAT_S32;
case BI_REGISTER_FORMAT_S16: return VA_REGISTER_FORMAT_S16;
case BI_REGISTER_FORMAT_U32: return VA_REGISTER_FORMAT_U32;
case BI_REGISTER_FORMAT_U16: return VA_REGISTER_FORMAT_U16;
default: invalid_instruction(I, "register format");
case BI_REGISTER_FORMAT_AUTO:
return VA_REGISTER_FORMAT_AUTO;
case BI_REGISTER_FORMAT_F32:
return VA_REGISTER_FORMAT_F32;
case BI_REGISTER_FORMAT_F16:
return VA_REGISTER_FORMAT_F16;
case BI_REGISTER_FORMAT_S32:
return VA_REGISTER_FORMAT_S32;
case BI_REGISTER_FORMAT_S16:
return VA_REGISTER_FORMAT_S16;
case BI_REGISTER_FORMAT_U32:
return VA_REGISTER_FORMAT_U32;
case BI_REGISTER_FORMAT_U16:
return VA_REGISTER_FORMAT_U16;
default:
invalid_instruction(I, "register format");
}
}
@ -666,35 +784,34 @@ va_pack_instr(const bi_instr *I)
{
struct va_opcode_info info = valhall_opcodes[I->op];
uint64_t hex = info.exact | (((uint64_t) I->flow) << 59);
hex |= ((uint64_t) va_select_fau_page(I)) << 57;
uint64_t hex = info.exact | (((uint64_t)I->flow) << 59);
hex |= ((uint64_t)va_select_fau_page(I)) << 57;
if (info.slot)
hex |= ((uint64_t) I->slot << 30);
hex |= ((uint64_t)I->slot << 30);
if (info.sr_count) {
bool read = bi_opcode_props[I->op].sr_read;
bi_index sr = read ? I->src[0] : I->dest[0];
unsigned count = read ?
bi_count_read_registers(I, 0) :
bi_count_write_registers(I, 0);
unsigned count =
read ? bi_count_read_registers(I, 0) : bi_count_write_registers(I, 0);
hex |= ((uint64_t) count << 33);
hex |= (uint64_t) va_pack_reg(I, sr) << 40;
hex |= ((uint64_t) info.sr_control << 46);
hex |= ((uint64_t)count << 33);
hex |= (uint64_t)va_pack_reg(I, sr) << 40;
hex |= ((uint64_t)info.sr_control << 46);
}
if (info.sr_write_count) {
hex |= ((uint64_t) bi_count_write_registers(I, 0) - 1) << 36;
hex |= ((uint64_t) va_pack_reg(I, I->dest[0])) << 16;
hex |= ((uint64_t)bi_count_write_registers(I, 0) - 1) << 36;
hex |= ((uint64_t)va_pack_reg(I, I->dest[0])) << 16;
}
if (info.vecsize)
hex |= ((uint64_t) I->vecsize << 28);
hex |= ((uint64_t)I->vecsize << 28);
if (info.register_format)
hex |= ((uint64_t) va_pack_register_format(I)) << 24;
hex |= ((uint64_t)va_pack_register_format(I)) << 24;
switch (I->op) {
case BI_OPCODE_LOAD_I8:
@ -738,18 +855,18 @@ va_pack_instr(const bi_instr *I)
/* 64-bit source */
va_validate_register_pair(I, 0);
hex |= (uint64_t) va_pack_src(I, 0) << 0;
hex |= (uint64_t)va_pack_src(I, 0) << 0;
hex |= va_pack_byte_offset_8(I);
hex |= ((uint64_t) va_pack_atom_opc_1(I)) << 22;
hex |= ((uint64_t)va_pack_atom_opc_1(I)) << 22;
break;
case BI_OPCODE_ATOM_I32:
case BI_OPCODE_ATOM_RETURN_I32:
/* 64-bit source */
va_validate_register_pair(I, 1);
hex |= (uint64_t) va_pack_src(I, 1) << 0;
hex |= (uint64_t)va_pack_src(I, 1) << 0;
hex |= va_pack_byte_offset_8(I);
hex |= ((uint64_t) va_pack_atom_opc(I)) << 22;
hex |= ((uint64_t)va_pack_atom_opc(I)) << 22;
if (I->op == BI_OPCODE_ATOM_RETURN_I32)
hex |= (0xc0ull << 40); // flags
@ -764,56 +881,61 @@ va_pack_instr(const bi_instr *I)
hex |= va_pack_store(I);
/* Conversion descriptor */
hex |= (uint64_t) va_pack_src(I, 3) << 16;
hex |= (uint64_t)va_pack_src(I, 3) << 16;
break;
case BI_OPCODE_BLEND:
{
case BI_OPCODE_BLEND: {
/* Source 0 - Blend descriptor (64-bit) */
hex |= ((uint64_t) va_pack_src(I, 2)) << 0;
hex |= ((uint64_t)va_pack_src(I, 2)) << 0;
va_validate_register_pair(I, 2);
/* Target */
if (I->branch_offset & 0x7) invalid_instruction(I, "unaligned branch");
if (I->branch_offset & 0x7)
invalid_instruction(I, "unaligned branch");
hex |= ((I->branch_offset >> 3) << 8);
/* Source 2 - coverage mask */
hex |= ((uint64_t) va_pack_reg(I, I->src[1])) << 16;
hex |= ((uint64_t)va_pack_reg(I, I->src[1])) << 16;
/* Vector size */
unsigned vecsize = 4;
hex |= ((uint64_t) (vecsize - 1) << 28);
hex |= ((uint64_t)(vecsize - 1) << 28);
break;
}
case BI_OPCODE_TEX_SINGLE:
case BI_OPCODE_TEX_FETCH:
case BI_OPCODE_TEX_GATHER:
{
case BI_OPCODE_TEX_GATHER: {
/* Image to read from */
hex |= ((uint64_t) va_pack_src(I, 1)) << 0;
hex |= ((uint64_t)va_pack_src(I, 1)) << 0;
if (I->op == BI_OPCODE_TEX_FETCH && I->shadow)
invalid_instruction(I, "TEX_FETCH does not support .shadow");
if (I->array_enable) hex |= (1ull << 10);
if (I->texel_offset) hex |= (1ull << 11);
if (I->shadow) hex |= (1ull << 12);
if (I->skip) hex |= (1ull << 39);
if (!bi_is_regfmt_16(I->register_format)) hex |= (1ull << 46);
if (I->array_enable)
hex |= (1ull << 10);
if (I->texel_offset)
hex |= (1ull << 11);
if (I->shadow)
hex |= (1ull << 12);
if (I->skip)
hex |= (1ull << 39);
if (!bi_is_regfmt_16(I->register_format))
hex |= (1ull << 46);
if (I->op == BI_OPCODE_TEX_SINGLE)
hex |= ((uint64_t) va_pack_lod_mode(I)) << 13;
hex |= ((uint64_t)va_pack_lod_mode(I)) << 13;
if (I->op == BI_OPCODE_TEX_GATHER) {
if (I->integer_coordinates) hex |= (1 << 13);
hex |= ((uint64_t) I->fetch_component) << 14;
if (I->integer_coordinates)
hex |= (1 << 13);
hex |= ((uint64_t)I->fetch_component) << 14;
}
hex |= (I->write_mask << 22);
hex |= ((uint64_t) va_pack_register_type(I)) << 26;
hex |= ((uint64_t) I->dimension) << 28;
hex |= ((uint64_t)va_pack_register_type(I)) << 26;
hex |= ((uint64_t)I->dimension) << 28;
break;
}

View file

@ -22,9 +22,9 @@
* SOFTWARE.
*/
#include "bi_builder.h"
#include "va_compiler.h"
#include "valhall.h"
#include "bi_builder.h"
void
va_count_instr_stats(bi_instr *I, struct va_stats *stats)
@ -48,8 +48,8 @@ va_count_instr_stats(bi_instr *I, struct va_stats *stats)
/* Varying is scaled by 16-bit components interpolated */
case VA_UNIT_V:
stats->v += (I->vecsize + 1) *
(bi_is_regfmt_16(I->register_format) ? 1 : 2);
stats->v +=
(I->vecsize + 1) * (bi_is_regfmt_16(I->register_format) ? 1 : 2);
return;
/* We just count load/store and texturing for now */

View file

@ -21,15 +21,16 @@
* SOFTWARE.
*/
#include "bi_builder.h"
#include "va_compiler.h"
#include "valhall.h"
#include "bi_builder.h"
/* Valhall has limits on access to fast-access uniforms:
*
* An instruction may access no more than a single 64-bit uniform slot.
* An instruction may access no more than 64-bits of combined uniforms and constants.
* An instruction may access no more than a single special immediate (e.g. lane_id).
* An instruction may access no more than 64-bits of combined uniforms and
* constants. An instruction may access no more than a single special immediate
* (e.g. lane_id).
*
* We validate these constraints.
*
@ -114,7 +115,7 @@ bool
va_validate_fau(bi_instr *I)
{
bool valid = true;
struct fau_state fau = { .uniform_slot = -1 };
struct fau_state fau = {.uniform_slot = -1};
unsigned fau_page = va_select_fau_page(I);
bi_foreach_src(I, s) {
@ -127,7 +128,7 @@ va_validate_fau(bi_instr *I)
void
va_repair_fau(bi_builder *b, bi_instr *I)
{
struct fau_state fau = { .uniform_slot = -1 };
struct fau_state fau = {.uniform_slot = -1};
unsigned fau_page = va_select_fau_page(I);
bi_foreach_src(I, s) {

View file

@ -73,43 +73,42 @@ enum va_unit {
};
struct va_src_info {
bool absneg : 1;
bool swizzle : 1;
bool notted : 1;
bool lane : 1;
bool lanes : 1;
bool halfswizzle : 1;
bool widen : 1;
bool combine : 1;
bool absneg : 1;
bool swizzle : 1;
bool notted : 1;
bool lane : 1;
bool lanes : 1;
bool halfswizzle : 1;
bool widen : 1;
bool combine : 1;
enum va_size size : 2;
} __attribute__((packed));
struct va_opcode_info {
uint64_t exact;
struct va_src_info srcs[4];
uint8_t type_size : 8;
enum va_unit unit : 3;
unsigned nr_srcs : 3;
unsigned nr_staging_srcs : 2;
uint8_t type_size : 8;
enum va_unit unit : 3;
unsigned nr_srcs : 3;
unsigned nr_staging_srcs : 2;
unsigned nr_staging_dests : 2;
bool has_dest : 1;
bool is_signed : 1;
bool clamp : 1;
bool saturate : 1;
bool rhadd : 1;
bool round_mode : 1;
bool condition : 1;
bool result_type : 1;
bool vecsize : 1;
bool register_format : 1;
bool slot : 1;
bool sr_count : 1;
bool sr_write_count : 1;
unsigned sr_control : 2;
bool has_dest : 1;
bool is_signed : 1;
bool clamp : 1;
bool saturate : 1;
bool rhadd : 1;
bool round_mode : 1;
bool condition : 1;
bool result_type : 1;
bool vecsize : 1;
bool register_format : 1;
bool slot : 1;
bool sr_count : 1;
bool sr_write_count : 1;
unsigned sr_control : 2;
};
extern const struct va_opcode_info
valhall_opcodes[BI_NUM_OPCODES];
extern const struct va_opcode_info valhall_opcodes[BI_NUM_OPCODES];
/* Bifrost specifies the source of bitwise operations as (A, B, shift), but
* Valhall specifies (A, shift, B). We follow Bifrost conventions in the

View file

@ -47,8 +47,7 @@ pan_ioctl_get_param(int fd, unsigned long request, void *arg)
struct drm_panfrost_get_param *gp = arg;
switch (gp->param) {
case DRM_PANFROST_PARAM_GPU_PROD_ID:
{
case DRM_PANFROST_PARAM_GPU_PROD_ID: {
char *override_version = getenv("PAN_GPU_ID");
if (override_version)

View file

@ -13,22 +13,21 @@
#include "pan_pps_perf.h"
namespace pps
{
namespace pps {
/// @brief Panfrost implementation of PPS driver.
/// This driver queries the GPU through `drm/panfrost_drm.h`, using performance counters ioctls,
/// which can be enabled by setting a kernel parameter: `modprobe panfrost unstable_ioctls=1`.
/// The ioctl needs a buffer to copy data from kernel to user space.
class PanfrostDriver : public Driver
{
public:
/// This driver queries the GPU through `drm/panfrost_drm.h`, using performance
/// counters ioctls, which can be enabled by setting a kernel parameter:
/// `modprobe panfrost unstable_ioctls=1`. The ioctl needs a buffer to copy data
/// from kernel to user space.
class PanfrostDriver : public Driver {
public:
static inline PanfrostDriver &into(Driver &dri);
static inline const PanfrostDriver &into(const Driver &dri);
/// @param A list of mali counter names
/// @return A pair with two lists: counter groups and available counters
static std::pair<std::vector<CounterGroup>, std::vector<Counter>> create_available_counters(
const PanfrostPerf& perf);
static std::pair<std::vector<CounterGroup>, std::vector<Counter>>
create_available_counters(const PanfrostPerf &perf);
PanfrostDriver();
~PanfrostDriver();
@ -50,12 +49,14 @@ class PanfrostDriver : public Driver
std::unique_ptr<PanfrostPerf> perf = nullptr;
};
PanfrostDriver &PanfrostDriver::into(Driver &dri)
PanfrostDriver &
PanfrostDriver::into(Driver &dri)
{
return reinterpret_cast<PanfrostDriver &>(dri);
}
const PanfrostDriver &PanfrostDriver::into(const Driver &dri)
const PanfrostDriver &
PanfrostDriver::into(const Driver &dri)
{
return reinterpret_cast<const PanfrostDriver &>(dri);
}

View file

@ -10,35 +10,32 @@
struct panfrost_device;
struct panfrost_perf;
namespace pps
{
class PanfrostDevice
{
public:
namespace pps {
class PanfrostDevice {
public:
PanfrostDevice(int fd);
~PanfrostDevice();
PanfrostDevice(const PanfrostDevice &) = delete;
PanfrostDevice &operator=(const PanfrostDevice &) = delete;
PanfrostDevice(PanfrostDevice&&);
PanfrostDevice& operator=(PanfrostDevice&&);
PanfrostDevice(PanfrostDevice &&);
PanfrostDevice &operator=(PanfrostDevice &&);
void *ctx = nullptr;
struct panfrost_device* dev = nullptr;
struct panfrost_device *dev = nullptr;
};
class PanfrostPerf
{
public:
PanfrostPerf(const PanfrostDevice& dev);
class PanfrostPerf {
public:
PanfrostPerf(const PanfrostDevice &dev);
~PanfrostPerf();
PanfrostPerf(const PanfrostPerf &) = delete;
PanfrostPerf &operator=(const PanfrostPerf &) = delete;
PanfrostPerf(PanfrostPerf&&);
PanfrostPerf& operator=(PanfrostPerf&&);
PanfrostPerf(PanfrostPerf &&);
PanfrostPerf &operator=(PanfrostPerf &&);
int enable() const;
void disable() const;

View file

@ -28,11 +28,11 @@
#ifndef __PANFROST_JOB_H__
#define __PANFROST_JOB_H__
#include <stdint.h>
#include <stdbool.h>
#include <inttypes.h>
#include <stdbool.h>
#include <stdint.h>
typedef uint8_t u8;
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
@ -68,13 +68,13 @@ typedef uint64_t mali_ptr;
/* These formats seem to largely duplicate the others. They're used at least
* for Bifrost framebuffer output.
*/
#define MALI_FORMAT_SPECIAL2 (7 << 5)
#define MALI_EXTRACT_TYPE(fmt) ((fmt) & 0xe0)
#define MALI_FORMAT_SPECIAL2 (7 << 5)
#define MALI_EXTRACT_TYPE(fmt) ((fmt)&0xe0)
/* If the high 3 bits are 3 to 6 these two bits say how many components
* there are.
*/
#define MALI_NR_CHANNELS(n) ((n - 1) << 3)
#define MALI_NR_CHANNELS(n) ((n - 1) << 3)
#define MALI_EXTRACT_CHANNELS(fmt) ((((fmt) >> 3) & 3) + 1)
/* If the high 3 bits are 3 to 6, then the low 3 bits say how big each
@ -93,7 +93,7 @@ typedef uint64_t mali_ptr;
/* For MALI_FORMAT_SINT it means a half-float (e.g. RG16F). For
* MALI_FORMAT_UNORM, it means a 32-bit float.
*/
#define MALI_CHANNEL_FLOAT 7
#define MALI_CHANNEL_FLOAT 7
#define MALI_EXTRACT_BITS(fmt) (fmt & 0x7)
#define MALI_EXTRACT_INDEX(pixfmt) (((pixfmt) >> 12) & 0xFF)
@ -241,18 +241,18 @@ typedef uint64_t mali_ptr;
/* Used for lod encoding. Thanks @urjaman for pointing out these routines can
* be cleaned up a lot. */
#define DECODE_FIXED_16(x) ((float) (x / 256.0))
#define DECODE_FIXED_16(x) ((float)(x / 256.0))
static inline int16_t
FIXED_16(float x, bool allow_negative)
{
/* Clamp inputs, accounting for float error */
float max_lod = (32.0 - (1.0 / 512.0));
float min_lod = allow_negative ? -max_lod : 0.0;
/* Clamp inputs, accounting for float error */
float max_lod = (32.0 - (1.0 / 512.0));
float min_lod = allow_negative ? -max_lod : 0.0;
x = ((x > max_lod) ? max_lod : ((x < min_lod) ? min_lod : x));
x = ((x > max_lod) ? max_lod : ((x < min_lod) ? min_lod : x));
return (int) (x * 256.0);
return (int)(x * 256.0);
}
#endif /* __PANFROST_JOB_H__ */

File diff suppressed because it is too large Load diff

View file

@ -36,54 +36,54 @@ extern FILE *pandecode_dump_stream;
void pandecode_dump_file_open(void);
struct pandecode_mapped_memory {
struct rb_node node;
size_t length;
void *addr;
uint64_t gpu_va;
bool ro;
char name[32];
struct rb_node node;
size_t length;
void *addr;
uint64_t gpu_va;
bool ro;
char name[32];
};
char *pointer_as_memory_reference(uint64_t ptr);
struct pandecode_mapped_memory *pandecode_find_mapped_gpu_mem_containing(uint64_t addr);
struct pandecode_mapped_memory *
pandecode_find_mapped_gpu_mem_containing(uint64_t addr);
void pandecode_map_read_write(void);
void pandecode_dump_mappings(void);
static inline void *
__pandecode_fetch_gpu_mem(uint64_t gpu_va, size_t size,
int line, const char *filename)
__pandecode_fetch_gpu_mem(uint64_t gpu_va, size_t size, int line,
const char *filename)
{
const struct pandecode_mapped_memory *mem =
pandecode_find_mapped_gpu_mem_containing(gpu_va);
const struct pandecode_mapped_memory *mem =
pandecode_find_mapped_gpu_mem_containing(gpu_va);
if (!mem) {
fprintf(stderr, "Access to unknown memory %" PRIx64 " in %s:%d\n",
gpu_va, filename, line);
assert(0);
}
if (!mem) {
fprintf(stderr, "Access to unknown memory %" PRIx64 " in %s:%d\n", gpu_va,
filename, line);
assert(0);
}
assert(size + (gpu_va - mem->gpu_va) <= mem->length);
assert(size + (gpu_va - mem->gpu_va) <= mem->length);
return mem->addr + gpu_va - mem->gpu_va;
return mem->addr + gpu_va - mem->gpu_va;
}
#define pandecode_fetch_gpu_mem(gpu_va, size) \
__pandecode_fetch_gpu_mem(gpu_va, size, __LINE__, __FILE__)
#define pandecode_fetch_gpu_mem(gpu_va, size) \
__pandecode_fetch_gpu_mem(gpu_va, size, __LINE__, __FILE__)
/* Returns a validated pointer to mapped GPU memory with the given pointer type,
* size automatically determined from the pointer type
*/
#define PANDECODE_PTR(gpu_va, type) \
((type*)(__pandecode_fetch_gpu_mem(gpu_va, sizeof(type), \
__LINE__, __FILE__)))
#define PANDECODE_PTR(gpu_va, type) \
((type *)(__pandecode_fetch_gpu_mem(gpu_va, sizeof(type), __LINE__, \
__FILE__)))
/* Usage: <variable type> PANDECODE_PTR_VAR(name, gpu_va) */
#define PANDECODE_PTR_VAR(name, gpu_va) \
name = __pandecode_fetch_gpu_mem(gpu_va, sizeof(*name), \
__LINE__, __FILE__)
#define PANDECODE_PTR_VAR(name, gpu_va) \
name = __pandecode_fetch_gpu_mem(gpu_va, sizeof(*name), __LINE__, __FILE__)
/* Forward declare for all supported gens to permit thunking */
void pandecode_jc_v4(mali_ptr jc_gpu_va, unsigned gpu_id);
@ -101,44 +101,44 @@ void pandecode_abort_on_fault_v9(mali_ptr jc_gpu_va);
static inline void
pan_hexdump(FILE *fp, const uint8_t *hex, size_t cnt, bool with_strings)
{
for (unsigned i = 0; i < cnt; ++i) {
if ((i & 0xF) == 0)
fprintf(fp, "%06X ", i);
for (unsigned i = 0; i < cnt; ++i) {
if ((i & 0xF) == 0)
fprintf(fp, "%06X ", i);
uint8_t v = hex[i];
uint8_t v = hex[i];
if (v == 0 && (i & 0xF) == 0) {
/* Check if we're starting an aligned run of zeroes */
unsigned zero_count = 0;
if (v == 0 && (i & 0xF) == 0) {
/* Check if we're starting an aligned run of zeroes */
unsigned zero_count = 0;
for (unsigned j = i; j < cnt; ++j) {
if (hex[j] == 0)
zero_count++;
else
break;
}
for (unsigned j = i; j < cnt; ++j) {
if (hex[j] == 0)
zero_count++;
else
break;
}
if (zero_count >= 32) {
fprintf(fp, "*\n");
i += (zero_count & ~0xF) - 1;
continue;
}
}
if (zero_count >= 32) {
fprintf(fp, "*\n");
i += (zero_count & ~0xF) - 1;
continue;
}
}
fprintf(fp, "%02X ", hex[i]);
if ((i & 0xF) == 0xF && with_strings) {
fprintf(fp, " | ");
for (unsigned j = i & ~0xF; j <= i; ++j) {
uint8_t c = hex[j];
fputc((c < 32 || c > 128) ? '.' : c, fp);
}
}
fprintf(fp, "%02X ", hex[i]);
if ((i & 0xF) == 0xF && with_strings) {
fprintf(fp, " | ");
for (unsigned j = i & ~0xF; j <= i; ++j) {
uint8_t c = hex[j];
fputc((c < 32 || c > 128) ? '.' : c, fp);
}
}
if ((i & 0xF) == 0xF)
fprintf(fp, "\n");
}
if ((i & 0xF) == 0xF)
fprintf(fp, "\n");
}
fprintf(fp, "\n");
fprintf(fp, "\n");
}
#endif /* __MMAP_TRACE_H__ */

View file

@ -23,18 +23,18 @@
* SOFTWARE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include "decode.h"
#include "util/macros.h"
#include "util/simple_mtx.h"
#include "util/u_debug.h"
#include "util/u_dynarray.h"
#include "util/simple_mtx.h"
#include "decode.h"
FILE *pandecode_dump_stream;
@ -46,8 +46,8 @@ static struct util_dynarray ro_mappings;
static simple_mtx_t pandecode_lock = SIMPLE_MTX_INITIALIZER;
#define to_mapped_memory(x) \
rb_node_data(struct pandecode_mapped_memory, x, node)
#define to_mapped_memory(x) \
rb_node_data(struct pandecode_mapped_memory, x, node)
/*
* Compare a GPU VA to a node, considering a GPU VA to be equal to a node if it
@ -57,147 +57,147 @@ static simple_mtx_t pandecode_lock = SIMPLE_MTX_INITIALIZER;
static int
pandecode_cmp_key(const struct rb_node *lhs, const void *key)
{
struct pandecode_mapped_memory *mem = to_mapped_memory(lhs);
uint64_t *gpu_va = (uint64_t *) key;
struct pandecode_mapped_memory *mem = to_mapped_memory(lhs);
uint64_t *gpu_va = (uint64_t *)key;
if (mem->gpu_va <= *gpu_va && *gpu_va < (mem->gpu_va + mem->length))
return 0;
else
return mem->gpu_va - *gpu_va;
if (mem->gpu_va <= *gpu_va && *gpu_va < (mem->gpu_va + mem->length))
return 0;
else
return mem->gpu_va - *gpu_va;
}
static int
pandecode_cmp(const struct rb_node *lhs, const struct rb_node *rhs)
{
return to_mapped_memory(lhs)->gpu_va - to_mapped_memory(rhs)->gpu_va;
return to_mapped_memory(lhs)->gpu_va - to_mapped_memory(rhs)->gpu_va;
}
static struct pandecode_mapped_memory *
pandecode_find_mapped_gpu_mem_containing_rw(uint64_t addr)
{
simple_mtx_assert_locked(&pandecode_lock);
simple_mtx_assert_locked(&pandecode_lock);
struct rb_node *node = rb_tree_search(&mmap_tree, &addr, pandecode_cmp_key);
struct rb_node *node = rb_tree_search(&mmap_tree, &addr, pandecode_cmp_key);
return to_mapped_memory(node);
return to_mapped_memory(node);
}
struct pandecode_mapped_memory *
pandecode_find_mapped_gpu_mem_containing(uint64_t addr)
{
simple_mtx_assert_locked(&pandecode_lock);
simple_mtx_assert_locked(&pandecode_lock);
struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing_rw(addr);
struct pandecode_mapped_memory *mem =
pandecode_find_mapped_gpu_mem_containing_rw(addr);
if (mem && mem->addr && !mem->ro) {
mprotect(mem->addr, mem->length, PROT_READ);
mem->ro = true;
util_dynarray_append(&ro_mappings, struct pandecode_mapped_memory *, mem);
}
if (mem && mem->addr && !mem->ro) {
mprotect(mem->addr, mem->length, PROT_READ);
mem->ro = true;
util_dynarray_append(&ro_mappings, struct pandecode_mapped_memory *, mem);
}
return mem;
return mem;
}
void
pandecode_map_read_write(void)
{
simple_mtx_assert_locked(&pandecode_lock);
simple_mtx_assert_locked(&pandecode_lock);
util_dynarray_foreach(&ro_mappings, struct pandecode_mapped_memory *, mem) {
(*mem)->ro = false;
mprotect((*mem)->addr, (*mem)->length, PROT_READ | PROT_WRITE);
}
util_dynarray_clear(&ro_mappings);
util_dynarray_foreach(&ro_mappings, struct pandecode_mapped_memory *, mem) {
(*mem)->ro = false;
mprotect((*mem)->addr, (*mem)->length, PROT_READ | PROT_WRITE);
}
util_dynarray_clear(&ro_mappings);
}
static void
pandecode_add_name(struct pandecode_mapped_memory *mem, uint64_t gpu_va, const char *name)
pandecode_add_name(struct pandecode_mapped_memory *mem, uint64_t gpu_va,
const char *name)
{
simple_mtx_assert_locked(&pandecode_lock);
simple_mtx_assert_locked(&pandecode_lock);
if (!name) {
/* If we don't have a name, assign one */
if (!name) {
/* If we don't have a name, assign one */
snprintf(mem->name, sizeof(mem->name) - 1,
"memory_%" PRIx64, gpu_va);
} else {
assert((strlen(name) + 1) < sizeof(mem->name));
memcpy(mem->name, name, strlen(name) + 1);
}
snprintf(mem->name, sizeof(mem->name) - 1, "memory_%" PRIx64, gpu_va);
} else {
assert((strlen(name) + 1) < sizeof(mem->name));
memcpy(mem->name, name, strlen(name) + 1);
}
}
void
pandecode_inject_mmap(uint64_t gpu_va, void *cpu, unsigned sz, const char *name)
{
simple_mtx_lock(&pandecode_lock);
simple_mtx_lock(&pandecode_lock);
/* First, search if we already mapped this and are just updating an address */
/* First, search if we already mapped this and are just updating an address */
struct pandecode_mapped_memory *existing =
pandecode_find_mapped_gpu_mem_containing_rw(gpu_va);
struct pandecode_mapped_memory *existing =
pandecode_find_mapped_gpu_mem_containing_rw(gpu_va);
if (existing && existing->gpu_va == gpu_va) {
existing->length = sz;
existing->addr = cpu;
pandecode_add_name(existing, gpu_va, name);
} else {
/* Otherwise, add a fresh mapping */
struct pandecode_mapped_memory *mapped_mem = NULL;
if (existing && existing->gpu_va == gpu_va) {
existing->length = sz;
existing->addr = cpu;
pandecode_add_name(existing, gpu_va, name);
} else {
/* Otherwise, add a fresh mapping */
struct pandecode_mapped_memory *mapped_mem = NULL;
mapped_mem = calloc(1, sizeof(*mapped_mem));
mapped_mem->gpu_va = gpu_va;
mapped_mem->length = sz;
mapped_mem->addr = cpu;
pandecode_add_name(mapped_mem, gpu_va, name);
mapped_mem = calloc(1, sizeof(*mapped_mem));
mapped_mem->gpu_va = gpu_va;
mapped_mem->length = sz;
mapped_mem->addr = cpu;
pandecode_add_name(mapped_mem, gpu_va, name);
/* Add it to the tree */
rb_tree_insert(&mmap_tree, &mapped_mem->node, pandecode_cmp);
}
/* Add it to the tree */
rb_tree_insert(&mmap_tree, &mapped_mem->node, pandecode_cmp);
}
simple_mtx_unlock(&pandecode_lock);
simple_mtx_unlock(&pandecode_lock);
}
void
pandecode_inject_free(uint64_t gpu_va, unsigned sz)
{
simple_mtx_lock(&pandecode_lock);
simple_mtx_lock(&pandecode_lock);
struct pandecode_mapped_memory *mem =
pandecode_find_mapped_gpu_mem_containing_rw(gpu_va);
struct pandecode_mapped_memory *mem =
pandecode_find_mapped_gpu_mem_containing_rw(gpu_va);
if (mem) {
assert(mem->gpu_va == gpu_va);
assert(mem->length == sz);
if (mem) {
assert(mem->gpu_va == gpu_va);
assert(mem->length == sz);
rb_tree_remove(&mmap_tree, &mem->node);
free(mem);
}
rb_tree_remove(&mmap_tree, &mem->node);
free(mem);
}
simple_mtx_unlock(&pandecode_lock);
simple_mtx_unlock(&pandecode_lock);
}
char *
pointer_as_memory_reference(uint64_t ptr)
{
simple_mtx_assert_locked(&pandecode_lock);
simple_mtx_assert_locked(&pandecode_lock);
struct pandecode_mapped_memory *mapped;
char *out = malloc(128);
struct pandecode_mapped_memory *mapped;
char *out = malloc(128);
/* Try to find the corresponding mapped zone */
/* Try to find the corresponding mapped zone */
mapped = pandecode_find_mapped_gpu_mem_containing_rw(ptr);
mapped = pandecode_find_mapped_gpu_mem_containing_rw(ptr);
if (mapped) {
snprintf(out, 128, "%s + %d", mapped->name, (int) (ptr - mapped->gpu_va));
return out;
}
if (mapped) {
snprintf(out, 128, "%s + %d", mapped->name, (int)(ptr - mapped->gpu_va));
return out;
}
/* Just use the raw address if other options are exhausted */
snprintf(out, 128, "0x%" PRIx64, ptr);
return out;
/* Just use the raw address if other options are exhausted */
snprintf(out, 128, "0x%" PRIx64, ptr);
return out;
}
static int pandecode_dump_frame_count = 0;
@ -207,129 +207,153 @@ static bool force_stderr = false;
void
pandecode_dump_file_open(void)
{
simple_mtx_assert_locked(&pandecode_lock);
simple_mtx_assert_locked(&pandecode_lock);
if (pandecode_dump_stream)
return;
if (pandecode_dump_stream)
return;
/* This does a getenv every frame, so it is possible to use
* setenv to change the base at runtime.
*/
const char *dump_file_base = debug_get_option("PANDECODE_DUMP_FILE", "pandecode.dump");
if (force_stderr || !strcmp(dump_file_base, "stderr"))
pandecode_dump_stream = stderr;
else {
char buffer[1024];
snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base, pandecode_dump_frame_count);
printf("pandecode: dump command stream to file %s\n", buffer);
pandecode_dump_stream = fopen(buffer, "w");
if (!pandecode_dump_stream)
fprintf(stderr,
"pandecode: failed to open command stream log file %s\n",
buffer);
}
/* This does a getenv every frame, so it is possible to use
* setenv to change the base at runtime.
*/
const char *dump_file_base =
debug_get_option("PANDECODE_DUMP_FILE", "pandecode.dump");
if (force_stderr || !strcmp(dump_file_base, "stderr"))
pandecode_dump_stream = stderr;
else {
char buffer[1024];
snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base,
pandecode_dump_frame_count);
printf("pandecode: dump command stream to file %s\n", buffer);
pandecode_dump_stream = fopen(buffer, "w");
if (!pandecode_dump_stream)
fprintf(stderr,
"pandecode: failed to open command stream log file %s\n",
buffer);
}
}
static void
pandecode_dump_file_close(void)
{
simple_mtx_assert_locked(&pandecode_lock);
simple_mtx_assert_locked(&pandecode_lock);
if (pandecode_dump_stream && pandecode_dump_stream != stderr) {
if (fclose(pandecode_dump_stream))
perror("pandecode: dump file");
if (pandecode_dump_stream && pandecode_dump_stream != stderr) {
if (fclose(pandecode_dump_stream))
perror("pandecode: dump file");
pandecode_dump_stream = NULL;
}
pandecode_dump_stream = NULL;
}
}
void
pandecode_initialize(bool to_stderr)
{
force_stderr = to_stderr;
rb_tree_init(&mmap_tree);
util_dynarray_init(&ro_mappings, NULL);
force_stderr = to_stderr;
rb_tree_init(&mmap_tree);
util_dynarray_init(&ro_mappings, NULL);
}
void
pandecode_next_frame(void)
{
simple_mtx_lock(&pandecode_lock);
simple_mtx_lock(&pandecode_lock);
pandecode_dump_file_close();
pandecode_dump_frame_count++;
pandecode_dump_file_close();
pandecode_dump_frame_count++;
simple_mtx_unlock(&pandecode_lock);
simple_mtx_unlock(&pandecode_lock);
}
void
pandecode_close(void)
{
simple_mtx_lock(&pandecode_lock);
simple_mtx_lock(&pandecode_lock);
rb_tree_foreach_safe(struct pandecode_mapped_memory, it, &mmap_tree, node) {
rb_tree_remove(&mmap_tree, &it->node);
free(it);
}
rb_tree_foreach_safe(struct pandecode_mapped_memory, it, &mmap_tree, node) {
rb_tree_remove(&mmap_tree, &it->node);
free(it);
}
util_dynarray_fini(&ro_mappings);
pandecode_dump_file_close();
util_dynarray_fini(&ro_mappings);
pandecode_dump_file_close();
simple_mtx_unlock(&pandecode_lock);
simple_mtx_unlock(&pandecode_lock);
}
void
pandecode_dump_mappings(void)
{
simple_mtx_lock(&pandecode_lock);
simple_mtx_lock(&pandecode_lock);
pandecode_dump_file_open();
pandecode_dump_file_open();
rb_tree_foreach(struct pandecode_mapped_memory, it, &mmap_tree, node) {
if (!it->addr || !it->length)
continue;
rb_tree_foreach(struct pandecode_mapped_memory, it, &mmap_tree, node) {
if (!it->addr || !it->length)
continue;
fprintf(pandecode_dump_stream, "Buffer: %s gpu %" PRIx64 "\n\n",
it->name, it->gpu_va);
fprintf(pandecode_dump_stream, "Buffer: %s gpu %" PRIx64 "\n\n", it->name,
it->gpu_va);
pan_hexdump(pandecode_dump_stream, it->addr, it->length, false);
fprintf(pandecode_dump_stream, "\n");
}
pan_hexdump(pandecode_dump_stream, it->addr, it->length, false);
fprintf(pandecode_dump_stream, "\n");
}
fflush(pandecode_dump_stream);
simple_mtx_unlock(&pandecode_lock);
fflush(pandecode_dump_stream);
simple_mtx_unlock(&pandecode_lock);
}
void
pandecode_abort_on_fault(mali_ptr jc_gpu_va, unsigned gpu_id)
{
simple_mtx_lock(&pandecode_lock);
simple_mtx_lock(&pandecode_lock);
switch (pan_arch(gpu_id)) {
case 4: pandecode_abort_on_fault_v4(jc_gpu_va); break;
case 5: pandecode_abort_on_fault_v5(jc_gpu_va); break;
case 6: pandecode_abort_on_fault_v6(jc_gpu_va); break;
case 7: pandecode_abort_on_fault_v7(jc_gpu_va); break;
case 9: pandecode_abort_on_fault_v9(jc_gpu_va); break;
default: unreachable("Unsupported architecture");
}
switch (pan_arch(gpu_id)) {
case 4:
pandecode_abort_on_fault_v4(jc_gpu_va);
break;
case 5:
pandecode_abort_on_fault_v5(jc_gpu_va);
break;
case 6:
pandecode_abort_on_fault_v6(jc_gpu_va);
break;
case 7:
pandecode_abort_on_fault_v7(jc_gpu_va);
break;
case 9:
pandecode_abort_on_fault_v9(jc_gpu_va);
break;
default:
unreachable("Unsupported architecture");
}
simple_mtx_unlock(&pandecode_lock);
simple_mtx_unlock(&pandecode_lock);
}
void
pandecode_jc(mali_ptr jc_gpu_va, unsigned gpu_id)
{
simple_mtx_lock(&pandecode_lock);
simple_mtx_lock(&pandecode_lock);
switch (pan_arch(gpu_id)) {
case 4: pandecode_jc_v4(jc_gpu_va, gpu_id); break;
case 5: pandecode_jc_v5(jc_gpu_va, gpu_id); break;
case 6: pandecode_jc_v6(jc_gpu_va, gpu_id); break;
case 7: pandecode_jc_v7(jc_gpu_va, gpu_id); break;
case 9: pandecode_jc_v9(jc_gpu_va, gpu_id); break;
default: unreachable("Unsupported architecture");
}
switch (pan_arch(gpu_id)) {
case 4:
pandecode_jc_v4(jc_gpu_va, gpu_id);
break;
case 5:
pandecode_jc_v5(jc_gpu_va, gpu_id);
break;
case 6:
pandecode_jc_v6(jc_gpu_va, gpu_id);
break;
case 7:
pandecode_jc_v7(jc_gpu_va, gpu_id);
break;
case 9:
pandecode_jc_v9(jc_gpu_va, gpu_id);
break;
default:
unreachable("Unsupported architecture");
}
simple_mtx_unlock(&pandecode_lock);
simple_mtx_unlock(&pandecode_lock);
}

View file

@ -56,45 +56,45 @@
static inline unsigned
pan_arch(unsigned gpu_id)
{
switch (gpu_id) {
case 0x600:
case 0x620:
case 0x720:
return 4;
case 0x750:
case 0x820:
case 0x830:
case 0x860:
case 0x880:
return 5;
default:
return gpu_id >> 12;
}
switch (gpu_id) {
case 0x600:
case 0x620:
case 0x720:
return 4;
case 0x750:
case 0x820:
case 0x830:
case 0x860:
case 0x880:
return 5;
default:
return gpu_id >> 12;
}
}
/* Base macro defined on the command line. */
#ifndef PAN_ARCH
# include "genxml/common_pack.h"
#include "genxml/common_pack.h"
#else
/* Suffixing macros */
#if (PAN_ARCH == 4)
# define GENX(X) X##_v4
# include "genxml/v4_pack.h"
#define GENX(X) X##_v4
#include "genxml/v4_pack.h"
#elif (PAN_ARCH == 5)
# define GENX(X) X##_v5
# include "genxml/v5_pack.h"
#define GENX(X) X##_v5
#include "genxml/v5_pack.h"
#elif (PAN_ARCH == 6)
# define GENX(X) X##_v6
# include "genxml/v6_pack.h"
#define GENX(X) X##_v6
#include "genxml/v6_pack.h"
#elif (PAN_ARCH == 7)
# define GENX(X) X##_v7
# include "genxml/v7_pack.h"
#define GENX(X) X##_v7
#include "genxml/v7_pack.h"
#elif (PAN_ARCH == 9)
# define GENX(X) X##_v9
# include "genxml/v9_pack.h"
#define GENX(X) X##_v9
#include "genxml/v9_pack.h"
#else
# error "Need to add suffixing macro for this architecture"
#error "Need to add suffixing macro for this architecture"
#endif
#endif /* PAN_ARCH */

View file

@ -50,8 +50,8 @@
* must also be cache-line aligned, so there can sometimes be a bit of padding
* between the header and body.
*
* As an example, a 64x64 RGBA framebuffer contains 64/16 = 4 tiles horizontally and
* 4 tiles vertically. There are 4*4=16 tiles in total, each containing 16
* As an example, a 64x64 RGBA framebuffer contains 64/16 = 4 tiles horizontally
* and 4 tiles vertically. There are 4*4=16 tiles in total, each containing 16
* bytes of metadata, so there is a 16*16=256 byte header. 64x64 is already
* tile aligned, so the body is 64*64 * 4 bytes per pixel = 16384 bytes of
* body.
@ -69,45 +69,45 @@
static enum pipe_format
unswizzled_format(enum pipe_format format)
{
switch (format) {
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_I8_UNORM:
return PIPE_FORMAT_R8_UNORM;
switch (format) {
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_I8_UNORM:
return PIPE_FORMAT_R8_UNORM;
case PIPE_FORMAT_L8A8_UNORM:
return PIPE_FORMAT_R8G8_UNORM;
case PIPE_FORMAT_L8A8_UNORM:
return PIPE_FORMAT_R8G8_UNORM;
case PIPE_FORMAT_B8G8R8_UNORM:
return PIPE_FORMAT_R8G8B8_UNORM;
case PIPE_FORMAT_B8G8R8_UNORM:
return PIPE_FORMAT_R8G8B8_UNORM;
case PIPE_FORMAT_R8G8B8X8_UNORM:
case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B8G8R8X8_UNORM:
case PIPE_FORMAT_A8R8G8B8_UNORM:
case PIPE_FORMAT_X8R8G8B8_UNORM:
case PIPE_FORMAT_X8B8G8R8_UNORM:
case PIPE_FORMAT_A8B8G8R8_UNORM:
return PIPE_FORMAT_R8G8B8A8_UNORM;
case PIPE_FORMAT_R8G8B8X8_UNORM:
case PIPE_FORMAT_B8G8R8A8_UNORM:
case PIPE_FORMAT_B8G8R8X8_UNORM:
case PIPE_FORMAT_A8R8G8B8_UNORM:
case PIPE_FORMAT_X8R8G8B8_UNORM:
case PIPE_FORMAT_X8B8G8R8_UNORM:
case PIPE_FORMAT_A8B8G8R8_UNORM:
return PIPE_FORMAT_R8G8B8A8_UNORM;
case PIPE_FORMAT_B5G6R5_UNORM:
return PIPE_FORMAT_R5G6B5_UNORM;
case PIPE_FORMAT_B5G6R5_UNORM:
return PIPE_FORMAT_R5G6B5_UNORM;
case PIPE_FORMAT_B5G5R5A1_UNORM:
return PIPE_FORMAT_R5G5B5A1_UNORM;
case PIPE_FORMAT_B5G5R5A1_UNORM:
return PIPE_FORMAT_R5G5B5A1_UNORM;
case PIPE_FORMAT_R10G10B10X2_UNORM:
case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_B10G10R10X2_UNORM:
return PIPE_FORMAT_R10G10B10A2_UNORM;
case PIPE_FORMAT_R10G10B10X2_UNORM:
case PIPE_FORMAT_B10G10R10A2_UNORM:
case PIPE_FORMAT_B10G10R10X2_UNORM:
return PIPE_FORMAT_R10G10B10A2_UNORM;
case PIPE_FORMAT_A4B4G4R4_UNORM:
case PIPE_FORMAT_B4G4R4A4_UNORM:
return PIPE_FORMAT_R4G4B4A4_UNORM;
case PIPE_FORMAT_A4B4G4R4_UNORM:
case PIPE_FORMAT_B4G4R4A4_UNORM:
return PIPE_FORMAT_R4G4B4A4_UNORM;
default:
return format;
}
default:
return format;
}
}
/* AFBC supports compressing a few canonical formats. Additional formats are
@ -118,29 +118,29 @@ unswizzled_format(enum pipe_format format)
enum pan_afbc_mode
panfrost_afbc_format(unsigned arch, enum pipe_format format)
{
/* Luminance-alpha not supported for AFBC on v7+ */
switch (format) {
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8A8_UNORM:
if (arch >= 7)
return PAN_AFBC_MODE_INVALID;
else
break;
default:
break;
}
/* Luminance-alpha not supported for AFBC on v7+ */
switch (format) {
case PIPE_FORMAT_A8_UNORM:
case PIPE_FORMAT_L8_UNORM:
case PIPE_FORMAT_I8_UNORM:
case PIPE_FORMAT_L8A8_UNORM:
if (arch >= 7)
return PAN_AFBC_MODE_INVALID;
else
break;
default:
break;
}
/* sRGB does not change the pixel format itself, only the
* interpretation. The interpretation is handled by conversion hardware
* independent to the compression hardware, so we can compress sRGB
* formats by using the corresponding linear format.
*/
format = util_format_linear(format);
/* sRGB does not change the pixel format itself, only the
* interpretation. The interpretation is handled by conversion hardware
* independent to the compression hardware, so we can compress sRGB
* formats by using the corresponding linear format.
*/
format = util_format_linear(format);
/* We handle swizzling orthogonally to AFBC */
format = unswizzled_format(format);
/* We handle swizzling orthogonally to AFBC */
format = unswizzled_format(format);
/* clang-format off */
switch (format) {
@ -166,9 +166,10 @@ panfrost_afbc_format(unsigned arch, enum pipe_format format)
/* A format may be compressed as AFBC if it has an AFBC internal format */
bool
panfrost_format_supports_afbc(const struct panfrost_device *dev, enum pipe_format format)
panfrost_format_supports_afbc(const struct panfrost_device *dev,
enum pipe_format format)
{
return panfrost_afbc_format(dev->arch, format) != PAN_AFBC_MODE_INVALID;
return panfrost_afbc_format(dev->arch, format) != PAN_AFBC_MODE_INVALID;
}
/* The lossless colour transform (AFBC_FORMAT_MOD_YTR) requires RGB. */
@ -176,15 +177,14 @@ panfrost_format_supports_afbc(const struct panfrost_device *dev, enum pipe_forma
bool
panfrost_afbc_can_ytr(enum pipe_format format)
{
const struct util_format_description *desc =
util_format_description(format);
const struct util_format_description *desc = util_format_description(format);
/* YTR is only defined for RGB(A) */
if (desc->nr_channels != 3 && desc->nr_channels != 4)
return false;
/* YTR is only defined for RGB(A) */
if (desc->nr_channels != 3 && desc->nr_channels != 4)
return false;
/* The fourth channel if it exists doesn't matter */
return desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB;
/* The fourth channel if it exists doesn't matter */
return desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB;
}
/*
@ -194,5 +194,5 @@ panfrost_afbc_can_ytr(enum pipe_format format)
bool
panfrost_afbc_can_tile(const struct panfrost_device *dev)
{
return (dev->arch >= 7);
return (dev->arch >= 7);
}

View file

@ -39,91 +39,92 @@
static unsigned
panfrost_small_padded_vertex_count(unsigned idx)
{
if (idx < 10)
return idx;
else
return (idx + 1) & ~1;
if (idx < 10)
return idx;
else
return (idx + 1) & ~1;
}
static unsigned
panfrost_large_padded_vertex_count(uint32_t vertex_count)
{
/* First, we have to find the highest set one */
unsigned highest = 32 - __builtin_clz(vertex_count);
/* First, we have to find the highest set one */
unsigned highest = 32 - __builtin_clz(vertex_count);
/* Using that, we mask out the highest 4-bits */
unsigned n = highest - 4;
unsigned nibble = (vertex_count >> n) & 0xF;
/* Using that, we mask out the highest 4-bits */
unsigned n = highest - 4;
unsigned nibble = (vertex_count >> n) & 0xF;
/* Great, we have the nibble. Now we can just try possibilities. Note
* that we don't care about the bottom most bit in most cases, and we
* know the top bit must be 1 */
/* Great, we have the nibble. Now we can just try possibilities. Note
* that we don't care about the bottom most bit in most cases, and we
* know the top bit must be 1 */
unsigned middle_two = (nibble >> 1) & 0x3;
unsigned middle_two = (nibble >> 1) & 0x3;
switch (middle_two) {
case 0b00:
if (!(nibble & 1))
return (1 << n) * 9;
else
return (1 << (n + 1)) * 5;
case 0b01:
return (1 << (n + 2)) * 3;
case 0b10:
return (1 << (n + 1)) * 7;
case 0b11:
return (1 << (n + 4));
default:
return 0; /* unreachable */
}
switch (middle_two) {
case 0b00:
if (!(nibble & 1))
return (1 << n) * 9;
else
return (1 << (n + 1)) * 5;
case 0b01:
return (1 << (n + 2)) * 3;
case 0b10:
return (1 << (n + 1)) * 7;
case 0b11:
return (1 << (n + 4));
default:
return 0; /* unreachable */
}
}
unsigned
panfrost_padded_vertex_count(unsigned vertex_count)
{
if (vertex_count < 20)
return panfrost_small_padded_vertex_count(vertex_count);
else
return panfrost_large_padded_vertex_count(vertex_count);
if (vertex_count < 20)
return panfrost_small_padded_vertex_count(vertex_count);
else
return panfrost_large_padded_vertex_count(vertex_count);
}
/* The much, much more irritating case -- instancing is enabled. See
* panfrost_job.h for notes on how this works */
unsigned
panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned *extra_flags)
panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift,
unsigned *extra_flags)
{
/* We have a NPOT divisor. Here's the fun one (multipling by
* the inverse and shifting) */
/* We have a NPOT divisor. Here's the fun one (multipling by
* the inverse and shifting) */
/* floor(log2(d)) */
unsigned shift = util_logbase2(hw_divisor);
/* floor(log2(d)) */
unsigned shift = util_logbase2(hw_divisor);
/* m = ceil(2^(32 + shift) / d) */
uint64_t shift_hi = 32 + shift;
uint64_t t = 1ll << shift_hi;
double t_f = t;
double hw_divisor_d = hw_divisor;
double m_f = ceil(t_f / hw_divisor_d);
unsigned m = m_f;
/* m = ceil(2^(32 + shift) / d) */
uint64_t shift_hi = 32 + shift;
uint64_t t = 1ll << shift_hi;
double t_f = t;
double hw_divisor_d = hw_divisor;
double m_f = ceil(t_f / hw_divisor_d);
unsigned m = m_f;
/* Default case */
uint32_t magic_divisor = m;
/* Default case */
uint32_t magic_divisor = m;
/* e = 2^(shift + 32) % d */
uint64_t e = t % hw_divisor;
/* e = 2^(shift + 32) % d */
uint64_t e = t % hw_divisor;
/* Apply round-down algorithm? e <= 2^shift?. XXX: The blob
* seems to use a different condition */
if (e <= (1ll << shift)) {
magic_divisor = m - 1;
*extra_flags = 1;
}
/* Apply round-down algorithm? e <= 2^shift?. XXX: The blob
* seems to use a different condition */
if (e <= (1ll << shift)) {
magic_divisor = m - 1;
*extra_flags = 1;
}
/* Top flag implicitly set */
assert(magic_divisor & (1u << 31));
magic_divisor &= ~(1u << 31);
*o_shift = shift;
/* Top flag implicitly set */
assert(magic_divisor & (1u << 31));
magic_divisor &= ~(1u << 31);
*o_shift = shift;
return magic_divisor;
return magic_divisor;
}

File diff suppressed because it is too large Load diff

View file

@ -27,10 +27,10 @@
#include "genxml/gen_macros.h"
#include "util/u_dynarray.h"
#include "util/format/u_format.h"
#include "compiler/shader_enums.h"
#include "compiler/nir/nir.h"
#include "compiler/shader_enums.h"
#include "util/format/u_format.h"
#include "util/u_dynarray.h"
#include "panfrost/util/pan_ir.h"
@ -38,84 +38,78 @@ struct MALI_BLEND_EQUATION;
struct panfrost_device;
struct pan_blend_equation {
unsigned blend_enable : 1;
enum blend_func rgb_func : 3;
unsigned rgb_invert_src_factor : 1;
enum blend_factor rgb_src_factor : 4;
unsigned rgb_invert_dst_factor : 1;
enum blend_factor rgb_dst_factor : 4;
enum blend_func alpha_func : 3;
unsigned alpha_invert_src_factor : 1;
enum blend_factor alpha_src_factor : 4;
unsigned alpha_invert_dst_factor : 1;
enum blend_factor alpha_dst_factor : 4;
unsigned color_mask : 4;
unsigned blend_enable : 1;
enum blend_func rgb_func : 3;
unsigned rgb_invert_src_factor : 1;
enum blend_factor rgb_src_factor : 4;
unsigned rgb_invert_dst_factor : 1;
enum blend_factor rgb_dst_factor : 4;
enum blend_func alpha_func : 3;
unsigned alpha_invert_src_factor : 1;
enum blend_factor alpha_src_factor : 4;
unsigned alpha_invert_dst_factor : 1;
enum blend_factor alpha_dst_factor : 4;
unsigned color_mask : 4;
};
struct pan_blend_rt_state {
/* RT format */
enum pipe_format format;
/* RT format */
enum pipe_format format;
/* Number of samples */
unsigned nr_samples;
/* Number of samples */
unsigned nr_samples;
struct pan_blend_equation equation;
struct pan_blend_equation equation;
};
struct pan_blend_state {
bool logicop_enable;
enum pipe_logicop logicop_func;
float constants[4];
unsigned rt_count;
struct pan_blend_rt_state rts[8];
bool logicop_enable;
enum pipe_logicop logicop_func;
float constants[4];
unsigned rt_count;
struct pan_blend_rt_state rts[8];
};
struct pan_blend_shader_key {
enum pipe_format format;
nir_alu_type src0_type, src1_type;
uint32_t rt : 3;
uint32_t has_constants : 1;
uint32_t logicop_enable : 1;
uint32_t logicop_func:4;
uint32_t nr_samples : 5;
uint32_t padding : 18;
struct pan_blend_equation equation;
enum pipe_format format;
nir_alu_type src0_type, src1_type;
uint32_t rt : 3;
uint32_t has_constants : 1;
uint32_t logicop_enable : 1;
uint32_t logicop_func : 4;
uint32_t nr_samples : 5;
uint32_t padding : 18;
struct pan_blend_equation equation;
};
struct pan_blend_shader_variant {
struct list_head node;
float constants[4];
struct util_dynarray binary;
unsigned first_tag;
unsigned work_reg_count;
struct list_head node;
float constants[4];
struct util_dynarray binary;
unsigned first_tag;
unsigned work_reg_count;
};
#define PAN_BLEND_SHADER_MAX_VARIANTS 32
struct pan_blend_shader {
struct pan_blend_shader_key key;
unsigned nvariants;
struct list_head variants;
struct pan_blend_shader_key key;
unsigned nvariants;
struct list_head variants;
};
bool
pan_blend_reads_dest(const struct pan_blend_equation eq);
bool pan_blend_reads_dest(const struct pan_blend_equation eq);
bool
pan_blend_can_fixed_function(const struct pan_blend_equation equation,
bool supports_2src);
bool pan_blend_can_fixed_function(const struct pan_blend_equation equation,
bool supports_2src);
bool
pan_blend_is_opaque(const struct pan_blend_equation eq);
bool pan_blend_is_opaque(const struct pan_blend_equation eq);
bool
pan_blend_alpha_zero_nop(const struct pan_blend_equation eq);
bool pan_blend_alpha_zero_nop(const struct pan_blend_equation eq);
bool
pan_blend_alpha_one_store(const struct pan_blend_equation eq);
bool pan_blend_alpha_one_store(const struct pan_blend_equation eq);
unsigned
pan_blend_constant_mask(const struct pan_blend_equation eq);
unsigned pan_blend_constant_mask(const struct pan_blend_equation eq);
/* Fixed-function blending only supports a single constant, so if multiple bits
* are set in constant_mask, the constants must match. Therefore we may pick
@ -124,7 +118,7 @@ pan_blend_constant_mask(const struct pan_blend_equation eq);
static inline float
pan_blend_get_constant(unsigned mask, const float *constants)
{
return mask ? constants[ffs(mask) - 1] : 0.0;
return mask ? constants[ffs(mask) - 1] : 0.0;
}
/* v6 doesn't support blend constants in FF blend equations whatsoever, and v7
@ -134,7 +128,7 @@ pan_blend_get_constant(unsigned mask, const float *constants)
static inline bool
pan_blend_supports_constant(unsigned arch, unsigned rt)
{
return !((arch == 6) || (arch == 7 && rt > 0));
return !((arch == 6) || (arch == 7 && rt > 0));
}
/* The SOURCE_2 value is new in Bifrost */
@ -142,50 +136,39 @@ pan_blend_supports_constant(unsigned arch, unsigned rt)
static inline bool
pan_blend_supports_2src(unsigned arch)
{
return (arch >= 6);
return (arch >= 6);
}
bool
pan_blend_is_homogenous_constant(unsigned mask, const float *constants);
bool pan_blend_is_homogenous_constant(unsigned mask, const float *constants);
void
pan_blend_to_fixed_function_equation(const struct pan_blend_equation eq,
struct MALI_BLEND_EQUATION *equation);
void pan_blend_to_fixed_function_equation(const struct pan_blend_equation eq,
struct MALI_BLEND_EQUATION *equation);
uint32_t
pan_pack_blend(const struct pan_blend_equation equation);
uint32_t pan_pack_blend(const struct pan_blend_equation equation);
void
pan_blend_shaders_init(struct panfrost_device *dev);
void pan_blend_shaders_init(struct panfrost_device *dev);
void
pan_blend_shaders_cleanup(struct panfrost_device *dev);
void pan_blend_shaders_cleanup(struct panfrost_device *dev);
#ifdef PAN_ARCH
nir_shader *
GENX(pan_blend_create_shader)(const struct panfrost_device *dev,
const struct pan_blend_state *state,
nir_alu_type src0_type,
nir_alu_type src1_type,
unsigned rt);
nir_shader *GENX(pan_blend_create_shader)(const struct panfrost_device *dev,
const struct pan_blend_state *state,
nir_alu_type src0_type,
nir_alu_type src1_type, unsigned rt);
#if PAN_ARCH >= 6
uint64_t
GENX(pan_blend_get_internal_desc)(const struct panfrost_device *dev,
enum pipe_format fmt, unsigned rt,
unsigned force_size, bool dithered);
uint64_t GENX(pan_blend_get_internal_desc)(const struct panfrost_device *dev,
enum pipe_format fmt, unsigned rt,
unsigned force_size, bool dithered);
#endif
/* Take blend_shaders.lock before calling this function and release it when
* you're done with the shader variant object.
*/
struct pan_blend_shader_variant *
GENX(pan_blend_get_shader_locked)(const struct panfrost_device *dev,
const struct pan_blend_state *state,
nir_alu_type src0_type,
nir_alu_type src1_type,
unsigned rt);
struct pan_blend_shader_variant *GENX(pan_blend_get_shader_locked)(
const struct panfrost_device *dev, const struct pan_blend_state *state,
nir_alu_type src0_type, nir_alu_type src1_type, unsigned rt);
#endif
#endif

File diff suppressed because it is too large Load diff

View file

@ -27,12 +27,12 @@
#include "genxml/gen_macros.h"
#include "panfrost-job.h"
#include "util/format/u_format.h"
#include "pan_cs.h"
#include "pan_pool.h"
#include "pan_texture.h"
#include "pan_util.h"
#include "util/format/u_format.h"
#include "panfrost-job.h"
struct pan_fb_info;
struct pan_scoreboard;
@ -40,90 +40,84 @@ struct pan_pool;
struct panfrost_device;
struct pan_blit_info {
struct {
struct {
const struct pan_image *image;
enum pipe_format format;
} planes[2];
unsigned level;
struct {
int32_t x, y, z;
unsigned layer;
} start, end;
} src, dst;
struct {
bool enable;
uint16_t minx, miny, maxx, maxy;
} scissor;
bool nearest;
struct {
struct {
const struct pan_image *image;
enum pipe_format format;
} planes[2];
unsigned level;
struct {
int32_t x, y, z;
unsigned layer;
} start, end;
} src, dst;
struct {
bool enable;
uint16_t minx, miny, maxx, maxy;
} scissor;
bool nearest;
};
struct pan_blit_context {
mali_ptr rsd, vpd;
mali_ptr textures;
mali_ptr samplers;
mali_ptr position;
struct {
enum mali_texture_dimension dim;
struct {
float x, y;
} start, end;
union {
unsigned layer_offset;
float z_offset;
};
} src;
struct {
int32_t layer_offset;
int32_t cur_layer;
int32_t last_layer;
} dst;
float z_scale;
mali_ptr rsd, vpd;
mali_ptr textures;
mali_ptr samplers;
mali_ptr position;
struct {
enum mali_texture_dimension dim;
struct {
float x, y;
} start, end;
union {
unsigned layer_offset;
float z_offset;
};
} src;
struct {
int32_t layer_offset;
int32_t cur_layer;
int32_t last_layer;
} dst;
float z_scale;
};
void
GENX(pan_blitter_init)(struct panfrost_device *dev,
struct pan_pool *bin_pool,
struct pan_pool *desc_pool);
void GENX(pan_blitter_init)(struct panfrost_device *dev,
struct pan_pool *bin_pool,
struct pan_pool *desc_pool);
void
GENX(pan_blitter_cleanup)(struct panfrost_device *dev);
void GENX(pan_blitter_cleanup)(struct panfrost_device *dev);
unsigned
GENX(pan_preload_fb)(struct pan_pool *desc_pool,
struct pan_scoreboard *scoreboard,
struct pan_fb_info *fb,
mali_ptr tsd, mali_ptr tiler,
struct panfrost_ptr *jobs);
unsigned GENX(pan_preload_fb)(struct pan_pool *desc_pool,
struct pan_scoreboard *scoreboard,
struct pan_fb_info *fb, mali_ptr tsd,
mali_ptr tiler, struct panfrost_ptr *jobs);
void
GENX(pan_blit_ctx_init)(struct panfrost_device *dev,
const struct pan_blit_info *info,
struct pan_pool *blit_pool,
struct pan_blit_context *ctx);
void GENX(pan_blit_ctx_init)(struct panfrost_device *dev,
const struct pan_blit_info *info,
struct pan_pool *blit_pool,
struct pan_blit_context *ctx);
static inline bool
pan_blit_next_surface(struct pan_blit_context *ctx)
{
if (ctx->dst.last_layer < ctx->dst.layer_offset) {
if (ctx->dst.cur_layer <= ctx->dst.last_layer)
return false;
if (ctx->dst.last_layer < ctx->dst.layer_offset) {
if (ctx->dst.cur_layer <= ctx->dst.last_layer)
return false;
ctx->dst.cur_layer--;
} else {
if (ctx->dst.cur_layer >= ctx->dst.last_layer)
return false;
ctx->dst.cur_layer--;
} else {
if (ctx->dst.cur_layer >= ctx->dst.last_layer)
return false;
ctx->dst.cur_layer++;
}
ctx->dst.cur_layer++;
}
return true;
return true;
}
struct panfrost_ptr
GENX(pan_blit)(struct pan_blit_context *ctx,
struct pan_pool *pool,
struct pan_scoreboard *scoreboard,
mali_ptr tsd, mali_ptr tiler);
struct panfrost_ptr GENX(pan_blit)(struct pan_blit_context *ctx,
struct pan_pool *pool,
struct pan_scoreboard *scoreboard,
mali_ptr tsd, mali_ptr tiler);
#endif

View file

@ -24,10 +24,10 @@
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
*/
#include <errno.h>
#include <stdio.h>
#include <fcntl.h>
#include <xf86drm.h>
#include <pthread.h>
#include <stdio.h>
#include <xf86drm.h>
#include "drm-uapi/panfrost_drm.h"
#include "pan_bo.h"
@ -56,53 +56,53 @@
*/
static struct panfrost_bo *
panfrost_bo_alloc(struct panfrost_device *dev, size_t size,
uint32_t flags, const char *label)
panfrost_bo_alloc(struct panfrost_device *dev, size_t size, uint32_t flags,
const char *label)
{
struct drm_panfrost_create_bo create_bo = { .size = size };
struct panfrost_bo *bo;
int ret;
struct drm_panfrost_create_bo create_bo = {.size = size};
struct panfrost_bo *bo;
int ret;
if (dev->kernel_version->version_major > 1 ||
dev->kernel_version->version_minor >= 1) {
if (flags & PAN_BO_GROWABLE)
create_bo.flags |= PANFROST_BO_HEAP;
if (!(flags & PAN_BO_EXECUTE))
create_bo.flags |= PANFROST_BO_NOEXEC;
}
if (dev->kernel_version->version_major > 1 ||
dev->kernel_version->version_minor >= 1) {
if (flags & PAN_BO_GROWABLE)
create_bo.flags |= PANFROST_BO_HEAP;
if (!(flags & PAN_BO_EXECUTE))
create_bo.flags |= PANFROST_BO_NOEXEC;
}
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
if (ret) {
fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
return NULL;
}
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
if (ret) {
fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
return NULL;
}
bo = pan_lookup_bo(dev, create_bo.handle);
assert(!memcmp(bo, &((struct panfrost_bo){}), sizeof(*bo)));
bo = pan_lookup_bo(dev, create_bo.handle);
assert(!memcmp(bo, &((struct panfrost_bo){}), sizeof(*bo)));
bo->size = create_bo.size;
bo->ptr.gpu = create_bo.offset;
bo->gem_handle = create_bo.handle;
bo->flags = flags;
bo->dev = dev;
bo->label = label;
return bo;
bo->size = create_bo.size;
bo->ptr.gpu = create_bo.offset;
bo->gem_handle = create_bo.handle;
bo->flags = flags;
bo->dev = dev;
bo->label = label;
return bo;
}
static void
panfrost_bo_free(struct panfrost_bo *bo)
{
struct drm_gem_close gem_close = { .handle = bo->gem_handle };
int ret;
struct drm_gem_close gem_close = {.handle = bo->gem_handle};
int ret;
ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
if (ret) {
fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
assert(0);
}
ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
if (ret) {
fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
assert(0);
}
/* BO will be freed with the sparse array, but zero to indicate free */
memset(bo, 0, sizeof(*bo));
/* BO will be freed with the sparse array, but zero to indicate free */
memset(bo, 0, sizeof(*bo));
}
/* Returns true if the BO is ready, false otherwise.
@ -113,44 +113,44 @@ panfrost_bo_free(struct panfrost_bo *bo)
bool
panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers)
{
struct drm_panfrost_wait_bo req = {
.handle = bo->gem_handle,
.timeout_ns = timeout_ns,
};
int ret;
struct drm_panfrost_wait_bo req = {
.handle = bo->gem_handle,
.timeout_ns = timeout_ns,
};
int ret;
/* If the BO has been exported or imported we can't rely on the cached
* state, we need to call the WAIT_BO ioctl.
*/
if (!(bo->flags & PAN_BO_SHARED)) {
/* If ->gpu_access is 0, the BO is idle, no need to wait. */
if (!bo->gpu_access)
return true;
/* If the BO has been exported or imported we can't rely on the cached
* state, we need to call the WAIT_BO ioctl.
*/
if (!(bo->flags & PAN_BO_SHARED)) {
/* If ->gpu_access is 0, the BO is idle, no need to wait. */
if (!bo->gpu_access)
return true;
/* If the caller only wants to wait for writers and no
* writes are pending, we don't have to wait.
*/
if (!wait_readers && !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
return true;
}
/* If the caller only wants to wait for writers and no
* writes are pending, we don't have to wait.
*/
if (!wait_readers && !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
return true;
}
/* The ioctl returns >= 0 value when the BO we are waiting for is ready
* -1 otherwise.
*/
ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
if (ret != -1) {
/* Set gpu_access to 0 so that the next call to bo_wait()
* doesn't have to call the WAIT_BO ioctl.
*/
bo->gpu_access = 0;
return true;
}
/* The ioctl returns >= 0 value when the BO we are waiting for is ready
* -1 otherwise.
*/
ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
if (ret != -1) {
/* Set gpu_access to 0 so that the next call to bo_wait()
* doesn't have to call the WAIT_BO ioctl.
*/
bo->gpu_access = 0;
return true;
}
/* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
* is invalid, which shouldn't happen here.
*/
assert(errno == ETIMEDOUT || errno == EBUSY);
return false;
/* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
* is invalid, which shouldn't happen here.
*/
assert(errno == ETIMEDOUT || errno == EBUSY);
return false;
}
/* Helper to calculate the bucket index of a BO */
@ -158,24 +158,23 @@ panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers)
static unsigned
pan_bucket_index(unsigned size)
{
/* Round down to POT to compute a bucket index */
/* Round down to POT to compute a bucket index */
unsigned bucket_index = util_logbase2(size);
unsigned bucket_index = util_logbase2(size);
/* Clamp the bucket index; all huge allocations will be
* sorted into the largest bucket */
/* Clamp the bucket index; all huge allocations will be
* sorted into the largest bucket */
bucket_index = CLAMP(bucket_index, MIN_BO_CACHE_BUCKET,
MAX_BO_CACHE_BUCKET);
bucket_index = CLAMP(bucket_index, MIN_BO_CACHE_BUCKET, MAX_BO_CACHE_BUCKET);
/* Reindex from 0 */
return (bucket_index - MIN_BO_CACHE_BUCKET);
/* Reindex from 0 */
return (bucket_index - MIN_BO_CACHE_BUCKET);
}
static struct list_head *
pan_bucket(struct panfrost_device *dev, unsigned size)
{
return &dev->bo_cache.buckets[pan_bucket_index(size)];
return &dev->bo_cache.buckets[pan_bucket_index(size)];
}
/* Tries to fetch a BO of sufficient size with the appropriate flags from the
@ -184,74 +183,71 @@ pan_bucket(struct panfrost_device *dev, unsigned size)
* BO. */
static struct panfrost_bo *
panfrost_bo_cache_fetch(struct panfrost_device *dev,
size_t size, uint32_t flags, const char *label,
bool dontwait)
panfrost_bo_cache_fetch(struct panfrost_device *dev, size_t size,
uint32_t flags, const char *label, bool dontwait)
{
pthread_mutex_lock(&dev->bo_cache.lock);
struct list_head *bucket = pan_bucket(dev, size);
struct panfrost_bo *bo = NULL;
pthread_mutex_lock(&dev->bo_cache.lock);
struct list_head *bucket = pan_bucket(dev, size);
struct panfrost_bo *bo = NULL;
/* Iterate the bucket looking for something suitable */
list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
bucket_link) {
if (entry->size < size || entry->flags != flags)
continue;
/* Iterate the bucket looking for something suitable */
list_for_each_entry_safe(struct panfrost_bo, entry, bucket, bucket_link) {
if (entry->size < size || entry->flags != flags)
continue;
/* If the oldest BO in the cache is busy, likely so is
* everything newer, so bail. */
if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
PAN_BO_ACCESS_RW))
break;
/* If the oldest BO in the cache is busy, likely so is
* everything newer, so bail. */
if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX, PAN_BO_ACCESS_RW))
break;
struct drm_panfrost_madvise madv = {
.handle = entry->gem_handle,
.madv = PANFROST_MADV_WILLNEED,
};
int ret;
struct drm_panfrost_madvise madv = {
.handle = entry->gem_handle,
.madv = PANFROST_MADV_WILLNEED,
};
int ret;
/* This one works, splice it out of the cache */
list_del(&entry->bucket_link);
list_del(&entry->lru_link);
/* This one works, splice it out of the cache */
list_del(&entry->bucket_link);
list_del(&entry->lru_link);
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
if (!ret && !madv.retained) {
panfrost_bo_free(entry);
continue;
}
/* Let's go! */
bo = entry;
bo->label = label;
break;
}
pthread_mutex_unlock(&dev->bo_cache.lock);
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
if (!ret && !madv.retained) {
panfrost_bo_free(entry);
continue;
}
/* Let's go! */
bo = entry;
bo->label = label;
break;
}
pthread_mutex_unlock(&dev->bo_cache.lock);
return bo;
return bo;
}
static void
panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev)
{
struct timespec time;
struct timespec time;
clock_gettime(CLOCK_MONOTONIC, &time);
list_for_each_entry_safe(struct panfrost_bo, entry,
&dev->bo_cache.lru, lru_link) {
/* We want all entries that have been used more than 1 sec
* ago to be dropped, others can be kept.
* Note the <= 2 check and not <= 1. It's here to account for
* the fact that we're only testing ->tv_sec, not ->tv_nsec.
* That means we might keep entries that are between 1 and 2
* seconds old, but we don't really care, as long as unused BOs
* are dropped at some point.
*/
if (time.tv_sec - entry->last_used <= 2)
break;
clock_gettime(CLOCK_MONOTONIC, &time);
list_for_each_entry_safe(struct panfrost_bo, entry, &dev->bo_cache.lru,
lru_link) {
/* We want all entries that have been used more than 1 sec
* ago to be dropped, others can be kept.
* Note the <= 2 check and not <= 1. It's here to account for
* the fact that we're only testing ->tv_sec, not ->tv_nsec.
* That means we might keep entries that are between 1 and 2
* seconds old, but we don't really care, as long as unused BOs
* are dropped at some point.
*/
if (time.tv_sec - entry->last_used <= 2)
break;
list_del(&entry->bucket_link);
list_del(&entry->lru_link);
panfrost_bo_free(entry);
}
list_del(&entry->bucket_link);
list_del(&entry->lru_link);
panfrost_bo_free(entry);
}
}
/* Tries to add a BO to the cache. Returns if it was
@ -260,43 +256,43 @@ panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev)
static bool
panfrost_bo_cache_put(struct panfrost_bo *bo)
{
struct panfrost_device *dev = bo->dev;
struct panfrost_device *dev = bo->dev;
if (bo->flags & PAN_BO_SHARED || dev->debug & PAN_DBG_NO_CACHE)
return false;
if (bo->flags & PAN_BO_SHARED || dev->debug & PAN_DBG_NO_CACHE)
return false;
/* Must be first */
pthread_mutex_lock(&dev->bo_cache.lock);
/* Must be first */
pthread_mutex_lock(&dev->bo_cache.lock);
struct list_head *bucket = pan_bucket(dev, MAX2(bo->size, 4096));
struct drm_panfrost_madvise madv;
struct timespec time;
struct list_head *bucket = pan_bucket(dev, MAX2(bo->size, 4096));
struct drm_panfrost_madvise madv;
struct timespec time;
madv.handle = bo->gem_handle;
madv.madv = PANFROST_MADV_DONTNEED;
madv.retained = 0;
madv.handle = bo->gem_handle;
madv.madv = PANFROST_MADV_DONTNEED;
madv.retained = 0;
drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
/* Add us to the bucket */
list_addtail(&bo->bucket_link, bucket);
/* Add us to the bucket */
list_addtail(&bo->bucket_link, bucket);
/* Add us to the LRU list and update the last_used field. */
list_addtail(&bo->lru_link, &dev->bo_cache.lru);
clock_gettime(CLOCK_MONOTONIC, &time);
bo->last_used = time.tv_sec;
/* Add us to the LRU list and update the last_used field. */
list_addtail(&bo->lru_link, &dev->bo_cache.lru);
clock_gettime(CLOCK_MONOTONIC, &time);
bo->last_used = time.tv_sec;
/* Let's do some cleanup in the BO cache while we hold the
* lock.
*/
panfrost_bo_cache_evict_stale_bos(dev);
/* Let's do some cleanup in the BO cache while we hold the
* lock.
*/
panfrost_bo_cache_evict_stale_bos(dev);
/* Update the label to help debug BO cache memory usage issues */
bo->label = "Unused (BO cache)";
/* Update the label to help debug BO cache memory usage issues */
bo->label = "Unused (BO cache)";
/* Must be last */
pthread_mutex_unlock(&dev->bo_cache.lock);
return true;
/* Must be last */
pthread_mutex_unlock(&dev->bo_cache.lock);
return true;
}
/* Evicts all BOs from the cache. Called during context
@ -306,228 +302,226 @@ panfrost_bo_cache_put(struct panfrost_bo *bo)
* OS) */
void
panfrost_bo_cache_evict_all(
struct panfrost_device *dev)
panfrost_bo_cache_evict_all(struct panfrost_device *dev)
{
pthread_mutex_lock(&dev->bo_cache.lock);
for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
struct list_head *bucket = &dev->bo_cache.buckets[i];
pthread_mutex_lock(&dev->bo_cache.lock);
for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
struct list_head *bucket = &dev->bo_cache.buckets[i];
list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
bucket_link) {
list_del(&entry->bucket_link);
list_del(&entry->lru_link);
panfrost_bo_free(entry);
}
}
pthread_mutex_unlock(&dev->bo_cache.lock);
list_for_each_entry_safe(struct panfrost_bo, entry, bucket, bucket_link) {
list_del(&entry->bucket_link);
list_del(&entry->lru_link);
panfrost_bo_free(entry);
}
}
pthread_mutex_unlock(&dev->bo_cache.lock);
}
void
panfrost_bo_mmap(struct panfrost_bo *bo)
{
struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle };
int ret;
struct drm_panfrost_mmap_bo mmap_bo = {.handle = bo->gem_handle};
int ret;
if (bo->ptr.cpu)
return;
if (bo->ptr.cpu)
return;
ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
if (ret) {
fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
assert(0);
}
ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
if (ret) {
fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
assert(0);
}
bo->ptr.cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
bo->dev->fd, mmap_bo.offset);
if (bo->ptr.cpu == MAP_FAILED) {
bo->ptr.cpu = NULL;
fprintf(stderr,
"mmap failed: result=%p size=0x%llx fd=%i offset=0x%llx %m\n",
bo->ptr.cpu, (long long)bo->size, bo->dev->fd,
(long long)mmap_bo.offset);
}
bo->ptr.cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
bo->dev->fd, mmap_bo.offset);
if (bo->ptr.cpu == MAP_FAILED) {
bo->ptr.cpu = NULL;
fprintf(stderr,
"mmap failed: result=%p size=0x%llx fd=%i offset=0x%llx %m\n",
bo->ptr.cpu, (long long)bo->size, bo->dev->fd,
(long long)mmap_bo.offset);
}
}
static void
panfrost_bo_munmap(struct panfrost_bo *bo)
{
if (!bo->ptr.cpu)
return;
if (!bo->ptr.cpu)
return;
if (os_munmap((void *) (uintptr_t)bo->ptr.cpu, bo->size)) {
perror("munmap");
abort();
}
if (os_munmap((void *)(uintptr_t)bo->ptr.cpu, bo->size)) {
perror("munmap");
abort();
}
bo->ptr.cpu = NULL;
bo->ptr.cpu = NULL;
}
struct panfrost_bo *
panfrost_bo_create(struct panfrost_device *dev, size_t size,
uint32_t flags, const char *label)
panfrost_bo_create(struct panfrost_device *dev, size_t size, uint32_t flags,
const char *label)
{
struct panfrost_bo *bo;
struct panfrost_bo *bo;
/* Kernel will fail (confusingly) with EPERM otherwise */
assert(size > 0);
/* Kernel will fail (confusingly) with EPERM otherwise */
assert(size > 0);
/* To maximize BO cache usage, don't allocate tiny BOs */
size = ALIGN_POT(size, 4096);
/* To maximize BO cache usage, don't allocate tiny BOs */
size = ALIGN_POT(size, 4096);
/* GROWABLE BOs cannot be mmapped */
if (flags & PAN_BO_GROWABLE)
assert(flags & PAN_BO_INVISIBLE);
/* GROWABLE BOs cannot be mmapped */
if (flags & PAN_BO_GROWABLE)
assert(flags & PAN_BO_INVISIBLE);
/* Ideally, we get a BO that's ready in the cache, or allocate a fresh
* BO. If allocation fails, we can try waiting for something in the
* cache. But if there's no nothing suitable, we should flush the cache
* to make space for the new allocation.
*/
bo = panfrost_bo_cache_fetch(dev, size, flags, label, true);
if (!bo)
bo = panfrost_bo_alloc(dev, size, flags, label);
if (!bo)
bo = panfrost_bo_cache_fetch(dev, size, flags, label, false);
if (!bo) {
panfrost_bo_cache_evict_all(dev);
bo = panfrost_bo_alloc(dev, size, flags, label);
}
/* Ideally, we get a BO that's ready in the cache, or allocate a fresh
* BO. If allocation fails, we can try waiting for something in the
* cache. But if there's no nothing suitable, we should flush the cache
* to make space for the new allocation.
*/
bo = panfrost_bo_cache_fetch(dev, size, flags, label, true);
if (!bo)
bo = panfrost_bo_alloc(dev, size, flags, label);
if (!bo)
bo = panfrost_bo_cache_fetch(dev, size, flags, label, false);
if (!bo) {
panfrost_bo_cache_evict_all(dev);
bo = panfrost_bo_alloc(dev, size, flags, label);
}
if (!bo) {
unreachable("BO creation failed. We don't handle that yet.");
return NULL;
}
if (!bo) {
unreachable("BO creation failed. We don't handle that yet.");
return NULL;
}
/* Only mmap now if we know we need to. For CPU-invisible buffers, we
* never map since we don't care about their contents; they're purely
* for GPU-internal use. But we do trace them anyway. */
/* Only mmap now if we know we need to. For CPU-invisible buffers, we
* never map since we don't care about their contents; they're purely
* for GPU-internal use. But we do trace them anyway. */
if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
panfrost_bo_mmap(bo);
if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
panfrost_bo_mmap(bo);
p_atomic_set(&bo->refcnt, 1);
p_atomic_set(&bo->refcnt, 1);
if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
if (flags & PAN_BO_INVISIBLE)
pandecode_inject_mmap(bo->ptr.gpu, NULL, bo->size, NULL);
else if (!(flags & PAN_BO_DELAY_MMAP))
pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL);
}
if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
if (flags & PAN_BO_INVISIBLE)
pandecode_inject_mmap(bo->ptr.gpu, NULL, bo->size, NULL);
else if (!(flags & PAN_BO_DELAY_MMAP))
pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL);
}
return bo;
return bo;
}
void
panfrost_bo_reference(struct panfrost_bo *bo)
{
if (bo) {
ASSERTED int count = p_atomic_inc_return(&bo->refcnt);
assert(count != 1);
}
if (bo) {
ASSERTED int count = p_atomic_inc_return(&bo->refcnt);
assert(count != 1);
}
}
void
panfrost_bo_unreference(struct panfrost_bo *bo)
{
if (!bo)
return;
if (!bo)
return;
/* Don't return to cache if there are still references */
if (p_atomic_dec_return(&bo->refcnt))
return;
/* Don't return to cache if there are still references */
if (p_atomic_dec_return(&bo->refcnt))
return;
struct panfrost_device *dev = bo->dev;
struct panfrost_device *dev = bo->dev;
pthread_mutex_lock(&dev->bo_map_lock);
pthread_mutex_lock(&dev->bo_map_lock);
/* Someone might have imported this BO while we were waiting for the
* lock, let's make sure it's still not referenced before freeing it.
*/
if (p_atomic_read(&bo->refcnt) == 0) {
/* When the reference count goes to zero, we need to cleanup */
panfrost_bo_munmap(bo);
/* Someone might have imported this BO while we were waiting for the
* lock, let's make sure it's still not referenced before freeing it.
*/
if (p_atomic_read(&bo->refcnt) == 0) {
/* When the reference count goes to zero, we need to cleanup */
panfrost_bo_munmap(bo);
if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
pandecode_inject_free(bo->ptr.gpu, bo->size);
if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
pandecode_inject_free(bo->ptr.gpu, bo->size);
/* Rather than freeing the BO now, we'll cache the BO for later
* allocations if we're allowed to.
*/
if (!panfrost_bo_cache_put(bo))
panfrost_bo_free(bo);
}
pthread_mutex_unlock(&dev->bo_map_lock);
/* Rather than freeing the BO now, we'll cache the BO for later
* allocations if we're allowed to.
*/
if (!panfrost_bo_cache_put(bo))
panfrost_bo_free(bo);
}
pthread_mutex_unlock(&dev->bo_map_lock);
}
struct panfrost_bo *
panfrost_bo_import(struct panfrost_device *dev, int fd)
{
struct panfrost_bo *bo;
struct drm_panfrost_get_bo_offset get_bo_offset = {0,};
ASSERTED int ret;
unsigned gem_handle;
struct panfrost_bo *bo;
struct drm_panfrost_get_bo_offset get_bo_offset = {
0,
};
ASSERTED int ret;
unsigned gem_handle;
ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
assert(!ret);
ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
assert(!ret);
pthread_mutex_lock(&dev->bo_map_lock);
bo = pan_lookup_bo(dev, gem_handle);
pthread_mutex_lock(&dev->bo_map_lock);
bo = pan_lookup_bo(dev, gem_handle);
if (!bo->dev) {
get_bo_offset.handle = gem_handle;
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
assert(!ret);
if (!bo->dev) {
get_bo_offset.handle = gem_handle;
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
assert(!ret);
bo->dev = dev;
bo->ptr.gpu = (mali_ptr) get_bo_offset.offset;
bo->size = lseek(fd, 0, SEEK_END);
/* Sometimes this can fail and return -1. size of -1 is not
* a nice thing for mmap to try mmap. Be more robust also
* for zero sized maps and fail nicely too
*/
if ((bo->size == 0) || (bo->size == (size_t)-1)) {
pthread_mutex_unlock(&dev->bo_map_lock);
return NULL;
}
bo->flags = PAN_BO_SHARED;
bo->gem_handle = gem_handle;
p_atomic_set(&bo->refcnt, 1);
} else {
/* bo->refcnt == 0 can happen if the BO
* was being released but panfrost_bo_import() acquired the
* lock before panfrost_bo_unreference(). In that case, refcnt
* is 0 and we can't use panfrost_bo_reference() directly, we
* have to re-initialize the refcnt().
* Note that panfrost_bo_unreference() checks
* refcnt value just after acquiring the lock to
* make sure the object is not freed if panfrost_bo_import()
* acquired it in the meantime.
*/
if (p_atomic_read(&bo->refcnt) == 0)
p_atomic_set(&bo->refcnt, 1);
else
panfrost_bo_reference(bo);
}
pthread_mutex_unlock(&dev->bo_map_lock);
bo->dev = dev;
bo->ptr.gpu = (mali_ptr)get_bo_offset.offset;
bo->size = lseek(fd, 0, SEEK_END);
/* Sometimes this can fail and return -1. size of -1 is not
* a nice thing for mmap to try mmap. Be more robust also
* for zero sized maps and fail nicely too
*/
if ((bo->size == 0) || (bo->size == (size_t)-1)) {
pthread_mutex_unlock(&dev->bo_map_lock);
return NULL;
}
bo->flags = PAN_BO_SHARED;
bo->gem_handle = gem_handle;
p_atomic_set(&bo->refcnt, 1);
} else {
/* bo->refcnt == 0 can happen if the BO
* was being released but panfrost_bo_import() acquired the
* lock before panfrost_bo_unreference(). In that case, refcnt
* is 0 and we can't use panfrost_bo_reference() directly, we
* have to re-initialize the refcnt().
* Note that panfrost_bo_unreference() checks
* refcnt value just after acquiring the lock to
* make sure the object is not freed if panfrost_bo_import()
* acquired it in the meantime.
*/
if (p_atomic_read(&bo->refcnt) == 0)
p_atomic_set(&bo->refcnt, 1);
else
panfrost_bo_reference(bo);
}
pthread_mutex_unlock(&dev->bo_map_lock);
return bo;
return bo;
}
int
panfrost_bo_export(struct panfrost_bo *bo)
{
struct drm_prime_handle args = {
.handle = bo->gem_handle,
.flags = DRM_CLOEXEC,
};
struct drm_prime_handle args = {
.handle = bo->gem_handle,
.flags = DRM_CLOEXEC,
};
int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
if (ret == -1)
return -1;
int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
if (ret == -1)
return -1;
bo->flags |= PAN_BO_SHARED;
return args.fd;
bo->flags |= PAN_BO_SHARED;
return args.fd;
}

View file

@ -26,113 +26,106 @@
#ifndef __PAN_BO_H__
#define __PAN_BO_H__
#include <time.h>
#include "util/list.h"
#include "panfrost-job.h"
#include <time.h>
/* Flags for allocated memory */
/* This memory region is executable */
#define PAN_BO_EXECUTE (1 << 0)
#define PAN_BO_EXECUTE (1 << 0)
/* This memory region should be lazily allocated and grow-on-page-fault. Must
* be used in conjunction with INVISIBLE */
#define PAN_BO_GROWABLE (1 << 1)
#define PAN_BO_GROWABLE (1 << 1)
/* This memory region should not be mapped to the CPU */
#define PAN_BO_INVISIBLE (1 << 2)
#define PAN_BO_INVISIBLE (1 << 2)
/* This region may not be used immediately and will not mmap on allocate
* (semantically distinct from INVISIBLE, which cannot never be mmaped) */
#define PAN_BO_DELAY_MMAP (1 << 3)
#define PAN_BO_DELAY_MMAP (1 << 3)
/* BO is shared across processes (imported or exported) and therefore cannot be
* cached locally */
#define PAN_BO_SHARED (1 << 4)
#define PAN_BO_SHARED (1 << 4)
/* GPU access flags */
/* BO is either shared (can be accessed by more than one GPU batch) or private
* (reserved by a specific GPU job). */
#define PAN_BO_ACCESS_PRIVATE (0 << 0)
#define PAN_BO_ACCESS_SHARED (1 << 0)
#define PAN_BO_ACCESS_PRIVATE (0 << 0)
#define PAN_BO_ACCESS_SHARED (1 << 0)
/* BO is being read/written by the GPU */
#define PAN_BO_ACCESS_READ (1 << 1)
#define PAN_BO_ACCESS_WRITE (1 << 2)
#define PAN_BO_ACCESS_RW (PAN_BO_ACCESS_READ | PAN_BO_ACCESS_WRITE)
#define PAN_BO_ACCESS_READ (1 << 1)
#define PAN_BO_ACCESS_WRITE (1 << 2)
#define PAN_BO_ACCESS_RW (PAN_BO_ACCESS_READ | PAN_BO_ACCESS_WRITE)
/* BO is accessed by the vertex/tiler job. */
#define PAN_BO_ACCESS_VERTEX_TILER (1 << 3)
#define PAN_BO_ACCESS_VERTEX_TILER (1 << 3)
/* BO is accessed by the fragment job. */
#define PAN_BO_ACCESS_FRAGMENT (1 << 4)
#define PAN_BO_ACCESS_FRAGMENT (1 << 4)
typedef uint8_t pan_bo_access;
struct panfrost_device;
struct panfrost_ptr {
/* CPU address */
void *cpu;
/* CPU address */
void *cpu;
/* GPU address */
mali_ptr gpu;
/* GPU address */
mali_ptr gpu;
};
struct panfrost_bo {
/* Must be first for casting */
struct list_head bucket_link;
/* Must be first for casting */
struct list_head bucket_link;
/* Used to link the BO to the BO cache LRU list. */
struct list_head lru_link;
/* Used to link the BO to the BO cache LRU list. */
struct list_head lru_link;
/* Store the time this BO was use last, so the BO cache logic can evict
* stale BOs.
*/
time_t last_used;
/* Store the time this BO was use last, so the BO cache logic can evict
* stale BOs.
*/
time_t last_used;
/* Atomic reference count */
int32_t refcnt;
/* Atomic reference count */
int32_t refcnt;
struct panfrost_device *dev;
struct panfrost_device *dev;
/* Mapping for the entire object (all levels) */
struct panfrost_ptr ptr;
/* Mapping for the entire object (all levels) */
struct panfrost_ptr ptr;
/* Size of all entire trees */
size_t size;
/* Size of all entire trees */
size_t size;
int gem_handle;
int gem_handle;
uint32_t flags;
uint32_t flags;
/* Combination of PAN_BO_ACCESS_{READ,WRITE} flags encoding pending
* GPU accesses to this BO. Useful to avoid calling the WAIT_BO ioctl
* when the BO is idle.
*/
uint32_t gpu_access;
/* Combination of PAN_BO_ACCESS_{READ,WRITE} flags encoding pending
* GPU accesses to this BO. Useful to avoid calling the WAIT_BO ioctl
* when the BO is idle.
*/
uint32_t gpu_access;
/* Human readable description of the BO for debugging. */
const char *label;
/* Human readable description of the BO for debugging. */
const char *label;
};
bool
panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers);
void
panfrost_bo_reference(struct panfrost_bo *bo);
void
panfrost_bo_unreference(struct panfrost_bo *bo);
struct panfrost_bo *
panfrost_bo_create(struct panfrost_device *dev, size_t size,
uint32_t flags, const char *label);
void
panfrost_bo_mmap(struct panfrost_bo *bo);
struct panfrost_bo *
panfrost_bo_import(struct panfrost_device *dev, int fd);
int
panfrost_bo_export(struct panfrost_bo *bo);
void
panfrost_bo_cache_evict_all(struct panfrost_device *dev);
bool panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
bool wait_readers);
void panfrost_bo_reference(struct panfrost_bo *bo);
void panfrost_bo_unreference(struct panfrost_bo *bo);
struct panfrost_bo *panfrost_bo_create(struct panfrost_device *dev, size_t size,
uint32_t flags, const char *label);
void panfrost_bo_mmap(struct panfrost_bo *bo);
struct panfrost_bo *panfrost_bo_import(struct panfrost_device *dev, int fd);
int panfrost_bo_export(struct panfrost_bo *bo);
void panfrost_bo_cache_evict_all(struct panfrost_device *dev);
#endif /* __PAN_BO_H__ */

View file

@ -26,11 +26,11 @@
#include "genxml/gen_macros.h"
#include <string.h>
#include "pan_util.h"
#include "pan_format.h"
#include "gallium/auxiliary/util/u_pack_color.h"
#include "util/rounding.h"
#include "util/format_srgb.h"
#include "util/rounding.h"
#include "pan_format.h"
#include "pan_util.h"
/* Clear colours are packed as the internal format of the tilebuffer, looked up
* in the blendable formats table given the render target format.
@ -49,8 +49,8 @@
static void
pan_pack_color_32(uint32_t *packed, uint32_t v)
{
for (unsigned i = 0; i < 4; ++i)
packed[i] = v;
for (unsigned i = 0; i < 4; ++i)
packed[i] = v;
}
/* For m integer bits and n fractional bits, calculate the conversion factor,
@ -61,22 +61,22 @@ pan_pack_color_32(uint32_t *packed, uint32_t v)
static inline uint32_t
float_to_fixed(float f, unsigned bits_int, unsigned bits_frac, bool dither)
{
uint32_t m = (1 << bits_int) - 1;
uint32_t m = (1 << bits_int) - 1;
if (dither) {
float factor = m << bits_frac;
return _mesa_roundevenf(f * factor);
} else {
uint32_t v = _mesa_roundevenf(f * (float) m);
return v << bits_frac;
}
if (dither) {
float factor = m << bits_frac;
return _mesa_roundevenf(f * factor);
} else {
uint32_t v = _mesa_roundevenf(f * (float)m);
return v << bits_frac;
}
}
struct mali_tib_layout {
unsigned int_r, frac_r;
unsigned int_g, frac_g;
unsigned int_b, frac_b;
unsigned int_a, frac_a;
unsigned int_r, frac_r;
unsigned int_g, frac_g;
unsigned int_b, frac_b;
unsigned int_a, frac_a;
};
/* clang-format off */
@ -93,76 +93,77 @@ static const struct mali_tib_layout tib_layouts[] = {
/* Raw values are stored as-is but replicated for multisampling */
static void
pan_pack_raw(uint32_t *packed, const union pipe_color_union *color, enum pipe_format format)
pan_pack_raw(uint32_t *packed, const union pipe_color_union *color,
enum pipe_format format)
{
union util_color out = { 0 };
unsigned size = util_format_get_blocksize(format);
assert(size <= 16);
union util_color out = {0};
unsigned size = util_format_get_blocksize(format);
assert(size <= 16);
util_pack_color(color->f, format, &out);
util_pack_color(color->f, format, &out);
if (size == 1) {
unsigned s = out.ui[0] | (out.ui[0] << 8);
pan_pack_color_32(packed, s | (s << 16));
} else if (size == 2)
pan_pack_color_32(packed, out.ui[0] | (out.ui[0] << 16));
else if (size <= 4)
pan_pack_color_32(packed, out.ui[0]);
else if (size <= 8) {
memcpy(packed + 0, out.ui, 8);
memcpy(packed + 2, out.ui, 8);
} else {
memcpy(packed, out.ui, 16);
}
if (size == 1) {
unsigned s = out.ui[0] | (out.ui[0] << 8);
pan_pack_color_32(packed, s | (s << 16));
} else if (size == 2)
pan_pack_color_32(packed, out.ui[0] | (out.ui[0] << 16));
else if (size <= 4)
pan_pack_color_32(packed, out.ui[0]);
else if (size <= 8) {
memcpy(packed + 0, out.ui, 8);
memcpy(packed + 2, out.ui, 8);
} else {
memcpy(packed, out.ui, 16);
}
}
void
pan_pack_color(uint32_t *packed, const union pipe_color_union *color,
enum pipe_format format, bool dithered)
{
/* Set of blendable formats is common across versions. TODO: v9 */
enum mali_color_buffer_internal_format internal =
panfrost_blendable_formats_v7[format].internal;
/* Set of blendable formats is common across versions. TODO: v9 */
enum mali_color_buffer_internal_format internal =
panfrost_blendable_formats_v7[format].internal;
if (internal == MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW_VALUE) {
pan_pack_raw(packed, color, format);
return;
}
if (internal == MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW_VALUE) {
pan_pack_raw(packed, color, format);
return;
}
/* Saturate to [0, 1] by definition of UNORM. Prevents overflow. */
float r = SATURATE(color->f[0]);
float g = SATURATE(color->f[1]);
float b = SATURATE(color->f[2]);
float a = SATURATE(color->f[3]);
/* Saturate to [0, 1] by definition of UNORM. Prevents overflow. */
float r = SATURATE(color->f[0]);
float g = SATURATE(color->f[1]);
float b = SATURATE(color->f[2]);
float a = SATURATE(color->f[3]);
/* Fill in alpha = 1.0 by default */
if (!util_format_has_alpha(format))
a = 1.0;
/* Fill in alpha = 1.0 by default */
if (!util_format_has_alpha(format))
a = 1.0;
/* Convert colourspace while we still have floats */
if (util_format_is_srgb(format)) {
r = util_format_linear_to_srgb_float(r);
g = util_format_linear_to_srgb_float(g);
b = util_format_linear_to_srgb_float(b);
}
/* Convert colourspace while we still have floats */
if (util_format_is_srgb(format)) {
r = util_format_linear_to_srgb_float(r);
g = util_format_linear_to_srgb_float(g);
b = util_format_linear_to_srgb_float(b);
}
/* Look up the layout of the tilebuffer */
assert(internal < ARRAY_SIZE(tib_layouts));
struct mali_tib_layout l = tib_layouts[internal];
/* Look up the layout of the tilebuffer */
assert(internal < ARRAY_SIZE(tib_layouts));
struct mali_tib_layout l = tib_layouts[internal];
unsigned count_r = l.int_r + l.frac_r;
unsigned count_g = l.int_g + l.frac_g + count_r;
unsigned count_b = l.int_b + l.frac_b + count_g;
ASSERTED unsigned count_a = l.int_a + l.frac_a + count_b;
unsigned count_r = l.int_r + l.frac_r;
unsigned count_g = l.int_g + l.frac_g + count_r;
unsigned count_b = l.int_b + l.frac_b + count_g;
ASSERTED unsigned count_a = l.int_a + l.frac_a + count_b;
/* Must fill the word */
assert(count_a == 32);
/* Must fill the word */
assert(count_a == 32);
/* Convert the transformed float colour to the given layout */
uint32_t ur = float_to_fixed(r, l.int_r, l.frac_r, dithered) << 0;
uint32_t ug = float_to_fixed(g, l.int_g, l.frac_g, dithered) << count_r;
uint32_t ub = float_to_fixed(b, l.int_b, l.frac_b, dithered) << count_g;
uint32_t ua = float_to_fixed(a, l.int_a, l.frac_a, dithered) << count_b;
/* Convert the transformed float colour to the given layout */
uint32_t ur = float_to_fixed(r, l.int_r, l.frac_r, dithered) << 0;
uint32_t ug = float_to_fixed(g, l.int_g, l.frac_g, dithered) << count_r;
uint32_t ub = float_to_fixed(b, l.int_b, l.frac_b, dithered) << count_g;
uint32_t ua = float_to_fixed(a, l.int_a, l.frac_a, dithered) << count_b;
pan_pack_color_32(packed, ur | ug | ub | ua);
pan_pack_color_32(packed, ur | ug | ub | ua);
}

File diff suppressed because it is too large Load diff

Some files were not shown because too many files have changed in this diff Show more