mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 21:50:12 +01:00
panfrost: clang-format the tree
This switches us over to Mesa's code style [1], normalizing us within the tree. The results aren't perfect, but they bring us a hell of a lot closer to the rest of the tree. Panfrost doesn't feel so foreign relative to Mesa with this, which I think (in retrospect after a bunch of years of being "different") is the right call. I skipped PanVK because that's paused right now. find panfrost/ -type f -name '*.h' | grep -v vulkan | xargs clang-format -i; find panfrost/ -type f -name '*.c' | grep -v vulkan | xargs clang-format -i; clang-format -i gallium/drivers/panfrost/*.c gallium/drivers/panfrost/*.h ; find panfrost/ -type f -name '*.cpp' | grep -v vulkan | xargs clang-format -i [1] https://docs.mesa3d.org/codingstyle.html Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20425>
This commit is contained in:
parent
a4705afe63
commit
0afd691f29
182 changed files with 36570 additions and 36355 deletions
|
|
@ -36,26 +36,26 @@
|
|||
struct panfrost_bo;
|
||||
|
||||
struct pan_blend_info {
|
||||
unsigned constant_mask : 4;
|
||||
bool fixed_function : 1;
|
||||
bool enabled : 1;
|
||||
bool load_dest : 1;
|
||||
bool opaque : 1;
|
||||
bool alpha_zero_nop : 1;
|
||||
bool alpha_one_store : 1;
|
||||
unsigned constant_mask : 4;
|
||||
bool fixed_function : 1;
|
||||
bool enabled : 1;
|
||||
bool load_dest : 1;
|
||||
bool opaque : 1;
|
||||
bool alpha_zero_nop : 1;
|
||||
bool alpha_one_store : 1;
|
||||
};
|
||||
|
||||
struct panfrost_blend_state {
|
||||
struct pipe_blend_state base;
|
||||
struct pan_blend_state pan;
|
||||
struct pan_blend_info info[PIPE_MAX_COLOR_BUFS];
|
||||
uint32_t equation[PIPE_MAX_COLOR_BUFS];
|
||||
struct pipe_blend_state base;
|
||||
struct pan_blend_state pan;
|
||||
struct pan_blend_info info[PIPE_MAX_COLOR_BUFS];
|
||||
uint32_t equation[PIPE_MAX_COLOR_BUFS];
|
||||
|
||||
/* info.load presented as a bitfield for draw call hot paths */
|
||||
unsigned load_dest_mask : PIPE_MAX_COLOR_BUFS;
|
||||
/* info.load presented as a bitfield for draw call hot paths */
|
||||
unsigned load_dest_mask : PIPE_MAX_COLOR_BUFS;
|
||||
};
|
||||
|
||||
mali_ptr
|
||||
panfrost_get_blend(struct panfrost_batch *batch, unsigned rt, struct panfrost_bo **bo, unsigned *shader_offset);
|
||||
mali_ptr panfrost_get_blend(struct panfrost_batch *batch, unsigned rt,
|
||||
struct panfrost_bo **bo, unsigned *shader_offset);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -27,59 +27,58 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include "util/format/u_format.h"
|
||||
#include "pan_context.h"
|
||||
#include "pan_util.h"
|
||||
#include "util/format/u_format.h"
|
||||
|
||||
void
|
||||
panfrost_blitter_save(struct panfrost_context *ctx, bool render_cond)
|
||||
{
|
||||
struct blitter_context *blitter = ctx->blitter;
|
||||
struct blitter_context *blitter = ctx->blitter;
|
||||
|
||||
util_blitter_save_vertex_buffer_slot(blitter, ctx->vertex_buffers);
|
||||
util_blitter_save_vertex_elements(blitter, ctx->vertex);
|
||||
util_blitter_save_vertex_shader(blitter, ctx->uncompiled[PIPE_SHADER_VERTEX]);
|
||||
util_blitter_save_rasterizer(blitter, ctx->rasterizer);
|
||||
util_blitter_save_viewport(blitter, &ctx->pipe_viewport);
|
||||
util_blitter_save_scissor(blitter, &ctx->scissor);
|
||||
util_blitter_save_fragment_shader(blitter, ctx->uncompiled[PIPE_SHADER_FRAGMENT]);
|
||||
util_blitter_save_blend(blitter, ctx->blend);
|
||||
util_blitter_save_depth_stencil_alpha(blitter, ctx->depth_stencil);
|
||||
util_blitter_save_stencil_ref(blitter, &ctx->stencil_ref);
|
||||
util_blitter_save_so_targets(blitter, 0, NULL);
|
||||
util_blitter_save_sample_mask(blitter, ctx->sample_mask, ctx->min_samples);
|
||||
util_blitter_save_vertex_buffer_slot(blitter, ctx->vertex_buffers);
|
||||
util_blitter_save_vertex_elements(blitter, ctx->vertex);
|
||||
util_blitter_save_vertex_shader(blitter,
|
||||
ctx->uncompiled[PIPE_SHADER_VERTEX]);
|
||||
util_blitter_save_rasterizer(blitter, ctx->rasterizer);
|
||||
util_blitter_save_viewport(blitter, &ctx->pipe_viewport);
|
||||
util_blitter_save_scissor(blitter, &ctx->scissor);
|
||||
util_blitter_save_fragment_shader(blitter,
|
||||
ctx->uncompiled[PIPE_SHADER_FRAGMENT]);
|
||||
util_blitter_save_blend(blitter, ctx->blend);
|
||||
util_blitter_save_depth_stencil_alpha(blitter, ctx->depth_stencil);
|
||||
util_blitter_save_stencil_ref(blitter, &ctx->stencil_ref);
|
||||
util_blitter_save_so_targets(blitter, 0, NULL);
|
||||
util_blitter_save_sample_mask(blitter, ctx->sample_mask, ctx->min_samples);
|
||||
|
||||
util_blitter_save_framebuffer(blitter, &ctx->pipe_framebuffer);
|
||||
util_blitter_save_fragment_sampler_states(blitter,
|
||||
ctx->sampler_count[PIPE_SHADER_FRAGMENT],
|
||||
(void **)(&ctx->samplers[PIPE_SHADER_FRAGMENT]));
|
||||
util_blitter_save_fragment_sampler_views(blitter,
|
||||
ctx->sampler_view_count[PIPE_SHADER_FRAGMENT],
|
||||
(struct pipe_sampler_view **)&ctx->sampler_views[PIPE_SHADER_FRAGMENT]);
|
||||
util_blitter_save_fragment_constant_buffer_slot(blitter,
|
||||
ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);
|
||||
|
||||
if (!render_cond) {
|
||||
util_blitter_save_render_condition(blitter,
|
||||
(struct pipe_query *) ctx->cond_query,
|
||||
ctx->cond_cond, ctx->cond_mode);
|
||||
}
|
||||
util_blitter_save_framebuffer(blitter, &ctx->pipe_framebuffer);
|
||||
util_blitter_save_fragment_sampler_states(
|
||||
blitter, ctx->sampler_count[PIPE_SHADER_FRAGMENT],
|
||||
(void **)(&ctx->samplers[PIPE_SHADER_FRAGMENT]));
|
||||
util_blitter_save_fragment_sampler_views(
|
||||
blitter, ctx->sampler_view_count[PIPE_SHADER_FRAGMENT],
|
||||
(struct pipe_sampler_view **)&ctx->sampler_views[PIPE_SHADER_FRAGMENT]);
|
||||
util_blitter_save_fragment_constant_buffer_slot(
|
||||
blitter, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);
|
||||
|
||||
if (!render_cond) {
|
||||
util_blitter_save_render_condition(blitter,
|
||||
(struct pipe_query *)ctx->cond_query,
|
||||
ctx->cond_cond, ctx->cond_mode);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
panfrost_blit(struct pipe_context *pipe,
|
||||
const struct pipe_blit_info *info)
|
||||
panfrost_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
|
||||
{
|
||||
struct panfrost_context *ctx = pan_context(pipe);
|
||||
struct panfrost_context *ctx = pan_context(pipe);
|
||||
|
||||
if (info->render_condition_enable &&
|
||||
!panfrost_render_condition_check(ctx))
|
||||
return;
|
||||
if (info->render_condition_enable && !panfrost_render_condition_check(ctx))
|
||||
return;
|
||||
|
||||
if (!util_blitter_is_blit_supported(ctx->blitter, info))
|
||||
unreachable("Unsupported blit\n");
|
||||
if (!util_blitter_is_blit_supported(ctx->blitter, info))
|
||||
unreachable("Unsupported blit\n");
|
||||
|
||||
panfrost_blitter_save(ctx, info->render_condition_enable);
|
||||
util_blitter_blit(ctx->blitter, info);
|
||||
panfrost_blitter_save(ctx, info->render_condition_enable);
|
||||
util_blitter_blit(ctx->blitter, info);
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -26,206 +26,207 @@
|
|||
#define __BUILDER_H__
|
||||
|
||||
#define _LARGEFILE64_SOURCE 1
|
||||
#include <sys/mman.h>
|
||||
#include <assert.h>
|
||||
#include "pan_resource.h"
|
||||
#include "pan_job.h"
|
||||
#include <sys/mman.h>
|
||||
#include "pan_blend_cso.h"
|
||||
#include "pan_encoder.h"
|
||||
#include "pan_texture.h"
|
||||
#include "pan_earlyzs.h"
|
||||
#include "pan_encoder.h"
|
||||
#include "pan_job.h"
|
||||
#include "pan_resource.h"
|
||||
#include "pan_texture.h"
|
||||
|
||||
#include "pipe/p_compiler.h"
|
||||
#include "util/detect.h"
|
||||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_defines.h"
|
||||
#include "util/format/u_formats.h"
|
||||
#include "pipe/p_screen.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_blitter.h"
|
||||
#include "util/detect.h"
|
||||
#include "util/format/u_formats.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/simple_mtx.h"
|
||||
#include "util/u_blitter.h"
|
||||
|
||||
#include "midgard/midgard_compile.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
#include "midgard/midgard_compile.h"
|
||||
|
||||
#define SET_BIT(lval, bit, cond) \
|
||||
if (cond) \
|
||||
lval |= (bit); \
|
||||
else \
|
||||
lval &= ~(bit);
|
||||
#define SET_BIT(lval, bit, cond) \
|
||||
if (cond) \
|
||||
lval |= (bit); \
|
||||
else \
|
||||
lval &= ~(bit);
|
||||
|
||||
/* Dirty tracking flags. 3D is for general 3D state. Shader flags are
|
||||
* per-stage. Renderer refers to Renderer State Descriptors. Vertex refers to
|
||||
* vertex attributes/elements. */
|
||||
|
||||
enum pan_dirty_3d {
|
||||
PAN_DIRTY_VIEWPORT = BITFIELD_BIT(0),
|
||||
PAN_DIRTY_SCISSOR = BITFIELD_BIT(1),
|
||||
PAN_DIRTY_VERTEX = BITFIELD_BIT(2),
|
||||
PAN_DIRTY_PARAMS = BITFIELD_BIT(3),
|
||||
PAN_DIRTY_DRAWID = BITFIELD_BIT(4),
|
||||
PAN_DIRTY_TLS_SIZE = BITFIELD_BIT(5),
|
||||
PAN_DIRTY_ZS = BITFIELD_BIT(6),
|
||||
PAN_DIRTY_BLEND = BITFIELD_BIT(7),
|
||||
PAN_DIRTY_MSAA = BITFIELD_BIT(8),
|
||||
PAN_DIRTY_OQ = BITFIELD_BIT(9),
|
||||
PAN_DIRTY_RASTERIZER = BITFIELD_BIT(10),
|
||||
PAN_DIRTY_POINTS = BITFIELD_BIT(11),
|
||||
PAN_DIRTY_SO = BITFIELD_BIT(12),
|
||||
PAN_DIRTY_VIEWPORT = BITFIELD_BIT(0),
|
||||
PAN_DIRTY_SCISSOR = BITFIELD_BIT(1),
|
||||
PAN_DIRTY_VERTEX = BITFIELD_BIT(2),
|
||||
PAN_DIRTY_PARAMS = BITFIELD_BIT(3),
|
||||
PAN_DIRTY_DRAWID = BITFIELD_BIT(4),
|
||||
PAN_DIRTY_TLS_SIZE = BITFIELD_BIT(5),
|
||||
PAN_DIRTY_ZS = BITFIELD_BIT(6),
|
||||
PAN_DIRTY_BLEND = BITFIELD_BIT(7),
|
||||
PAN_DIRTY_MSAA = BITFIELD_BIT(8),
|
||||
PAN_DIRTY_OQ = BITFIELD_BIT(9),
|
||||
PAN_DIRTY_RASTERIZER = BITFIELD_BIT(10),
|
||||
PAN_DIRTY_POINTS = BITFIELD_BIT(11),
|
||||
PAN_DIRTY_SO = BITFIELD_BIT(12),
|
||||
};
|
||||
|
||||
enum pan_dirty_shader {
|
||||
PAN_DIRTY_STAGE_SHADER = BITFIELD_BIT(0),
|
||||
PAN_DIRTY_STAGE_TEXTURE = BITFIELD_BIT(1),
|
||||
PAN_DIRTY_STAGE_SAMPLER = BITFIELD_BIT(2),
|
||||
PAN_DIRTY_STAGE_IMAGE = BITFIELD_BIT(3),
|
||||
PAN_DIRTY_STAGE_CONST = BITFIELD_BIT(4),
|
||||
PAN_DIRTY_STAGE_SSBO = BITFIELD_BIT(5),
|
||||
PAN_DIRTY_STAGE_SHADER = BITFIELD_BIT(0),
|
||||
PAN_DIRTY_STAGE_TEXTURE = BITFIELD_BIT(1),
|
||||
PAN_DIRTY_STAGE_SAMPLER = BITFIELD_BIT(2),
|
||||
PAN_DIRTY_STAGE_IMAGE = BITFIELD_BIT(3),
|
||||
PAN_DIRTY_STAGE_CONST = BITFIELD_BIT(4),
|
||||
PAN_DIRTY_STAGE_SSBO = BITFIELD_BIT(5),
|
||||
};
|
||||
|
||||
struct panfrost_constant_buffer {
|
||||
struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
|
||||
uint32_t enabled_mask;
|
||||
struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
|
||||
uint32_t enabled_mask;
|
||||
};
|
||||
|
||||
struct panfrost_query {
|
||||
/* Passthrough from Gallium */
|
||||
unsigned type;
|
||||
unsigned index;
|
||||
/* Passthrough from Gallium */
|
||||
unsigned type;
|
||||
unsigned index;
|
||||
|
||||
/* For computed queries. 64-bit to prevent overflow */
|
||||
struct {
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
};
|
||||
/* For computed queries. 64-bit to prevent overflow */
|
||||
struct {
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
};
|
||||
|
||||
/* Memory for the GPU to writeback the value of the query */
|
||||
struct pipe_resource *rsrc;
|
||||
/* Memory for the GPU to writeback the value of the query */
|
||||
struct pipe_resource *rsrc;
|
||||
|
||||
/* Whether an occlusion query is for a MSAA framebuffer */
|
||||
bool msaa;
|
||||
/* Whether an occlusion query is for a MSAA framebuffer */
|
||||
bool msaa;
|
||||
};
|
||||
|
||||
struct panfrost_streamout_target {
|
||||
struct pipe_stream_output_target base;
|
||||
uint32_t offset;
|
||||
struct pipe_stream_output_target base;
|
||||
uint32_t offset;
|
||||
};
|
||||
|
||||
struct panfrost_streamout {
|
||||
struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
|
||||
unsigned num_targets;
|
||||
struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
|
||||
unsigned num_targets;
|
||||
};
|
||||
|
||||
struct panfrost_context {
|
||||
/* Gallium context */
|
||||
struct pipe_context base;
|
||||
/* Gallium context */
|
||||
struct pipe_context base;
|
||||
|
||||
/* Dirty global state */
|
||||
enum pan_dirty_3d dirty;
|
||||
/* Dirty global state */
|
||||
enum pan_dirty_3d dirty;
|
||||
|
||||
/* Per shader stage dirty state */
|
||||
enum pan_dirty_shader dirty_shader[PIPE_SHADER_TYPES];
|
||||
/* Per shader stage dirty state */
|
||||
enum pan_dirty_shader dirty_shader[PIPE_SHADER_TYPES];
|
||||
|
||||
/* Unowned pools, so manage yourself. */
|
||||
struct panfrost_pool descs, shaders;
|
||||
/* Unowned pools, so manage yourself. */
|
||||
struct panfrost_pool descs, shaders;
|
||||
|
||||
/* Sync obj used to keep track of in-flight jobs. */
|
||||
uint32_t syncobj;
|
||||
/* Sync obj used to keep track of in-flight jobs. */
|
||||
uint32_t syncobj;
|
||||
|
||||
/* Set of 32 batches. When the set is full, the LRU entry (the batch
|
||||
* with the smallest seqnum) is flushed to free a slot.
|
||||
*/
|
||||
struct {
|
||||
uint64_t seqnum;
|
||||
struct panfrost_batch slots[PAN_MAX_BATCHES];
|
||||
/* Set of 32 batches. When the set is full, the LRU entry (the batch
|
||||
* with the smallest seqnum) is flushed to free a slot.
|
||||
*/
|
||||
struct {
|
||||
uint64_t seqnum;
|
||||
struct panfrost_batch slots[PAN_MAX_BATCHES];
|
||||
|
||||
/** Set of active batches for faster traversal */
|
||||
BITSET_DECLARE(active, PAN_MAX_BATCHES);
|
||||
} batches;
|
||||
/** Set of active batches for faster traversal */
|
||||
BITSET_DECLARE(active, PAN_MAX_BATCHES);
|
||||
} batches;
|
||||
|
||||
/* Map from resources to panfrost_batches */
|
||||
struct hash_table *writers;
|
||||
/* Map from resources to panfrost_batches */
|
||||
struct hash_table *writers;
|
||||
|
||||
/* Bound job batch */
|
||||
struct panfrost_batch *batch;
|
||||
/* Bound job batch */
|
||||
struct panfrost_batch *batch;
|
||||
|
||||
/* Within a launch_grid call.. */
|
||||
const struct pipe_grid_info *compute_grid;
|
||||
/* Within a launch_grid call.. */
|
||||
const struct pipe_grid_info *compute_grid;
|
||||
|
||||
struct pipe_framebuffer_state pipe_framebuffer;
|
||||
struct panfrost_streamout streamout;
|
||||
struct pipe_framebuffer_state pipe_framebuffer;
|
||||
struct panfrost_streamout streamout;
|
||||
|
||||
bool active_queries;
|
||||
uint64_t prims_generated;
|
||||
uint64_t tf_prims_generated;
|
||||
uint64_t draw_calls;
|
||||
struct panfrost_query *occlusion_query;
|
||||
bool active_queries;
|
||||
uint64_t prims_generated;
|
||||
uint64_t tf_prims_generated;
|
||||
uint64_t draw_calls;
|
||||
struct panfrost_query *occlusion_query;
|
||||
|
||||
unsigned drawid;
|
||||
unsigned vertex_count;
|
||||
unsigned instance_count;
|
||||
unsigned offset_start;
|
||||
unsigned base_vertex;
|
||||
unsigned base_instance;
|
||||
enum pipe_prim_type active_prim;
|
||||
unsigned drawid;
|
||||
unsigned vertex_count;
|
||||
unsigned instance_count;
|
||||
unsigned offset_start;
|
||||
unsigned base_vertex;
|
||||
unsigned base_instance;
|
||||
enum pipe_prim_type active_prim;
|
||||
|
||||
/* If instancing is enabled, vertex count padded for instance; if
|
||||
* it is disabled, just equal to plain vertex count */
|
||||
unsigned padded_count;
|
||||
/* If instancing is enabled, vertex count padded for instance; if
|
||||
* it is disabled, just equal to plain vertex count */
|
||||
unsigned padded_count;
|
||||
|
||||
struct panfrost_constant_buffer constant_buffer[PIPE_SHADER_TYPES];
|
||||
struct panfrost_rasterizer *rasterizer;
|
||||
struct panfrost_vertex_state *vertex;
|
||||
struct panfrost_constant_buffer constant_buffer[PIPE_SHADER_TYPES];
|
||||
struct panfrost_rasterizer *rasterizer;
|
||||
struct panfrost_vertex_state *vertex;
|
||||
|
||||
struct panfrost_uncompiled_shader *uncompiled[PIPE_SHADER_TYPES];
|
||||
struct panfrost_compiled_shader *prog[PIPE_SHADER_TYPES];
|
||||
struct panfrost_uncompiled_shader *uncompiled[PIPE_SHADER_TYPES];
|
||||
struct panfrost_compiled_shader *prog[PIPE_SHADER_TYPES];
|
||||
|
||||
struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
|
||||
uint32_t vb_mask;
|
||||
struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
|
||||
uint32_t vb_mask;
|
||||
|
||||
struct pipe_shader_buffer ssbo[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
|
||||
uint32_t ssbo_mask[PIPE_SHADER_TYPES];
|
||||
struct pipe_shader_buffer ssbo[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_BUFFERS];
|
||||
uint32_t ssbo_mask[PIPE_SHADER_TYPES];
|
||||
|
||||
struct pipe_image_view images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
|
||||
uint32_t image_mask[PIPE_SHADER_TYPES];
|
||||
struct pipe_image_view images[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_IMAGES];
|
||||
uint32_t image_mask[PIPE_SHADER_TYPES];
|
||||
|
||||
struct panfrost_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
|
||||
unsigned sampler_count[PIPE_SHADER_TYPES];
|
||||
uint32_t valid_samplers[PIPE_SHADER_TYPES];
|
||||
struct panfrost_sampler_state *samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
|
||||
unsigned sampler_count[PIPE_SHADER_TYPES];
|
||||
uint32_t valid_samplers[PIPE_SHADER_TYPES];
|
||||
|
||||
struct panfrost_sampler_view *sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
|
||||
unsigned sampler_view_count[PIPE_SHADER_TYPES];
|
||||
struct panfrost_sampler_view
|
||||
*sampler_views[PIPE_SHADER_TYPES][PIPE_MAX_SHADER_SAMPLER_VIEWS];
|
||||
unsigned sampler_view_count[PIPE_SHADER_TYPES];
|
||||
|
||||
struct blitter_context *blitter;
|
||||
struct blitter_context *blitter;
|
||||
|
||||
struct panfrost_blend_state *blend;
|
||||
struct panfrost_blend_state *blend;
|
||||
|
||||
/* On Valhall, does the current blend state use a blend shader for any
|
||||
* output? We need this information in a hot path to decide if
|
||||
* per-sample shading should be enabled.
|
||||
*/
|
||||
bool valhall_has_blend_shader;
|
||||
/* On Valhall, does the current blend state use a blend shader for any
|
||||
* output? We need this information in a hot path to decide if
|
||||
* per-sample shading should be enabled.
|
||||
*/
|
||||
bool valhall_has_blend_shader;
|
||||
|
||||
struct pipe_viewport_state pipe_viewport;
|
||||
struct pipe_scissor_state scissor;
|
||||
struct pipe_blend_color blend_color;
|
||||
struct panfrost_zsa_state *depth_stencil;
|
||||
struct pipe_stencil_ref stencil_ref;
|
||||
uint16_t sample_mask;
|
||||
unsigned min_samples;
|
||||
struct pipe_viewport_state pipe_viewport;
|
||||
struct pipe_scissor_state scissor;
|
||||
struct pipe_blend_color blend_color;
|
||||
struct panfrost_zsa_state *depth_stencil;
|
||||
struct pipe_stencil_ref stencil_ref;
|
||||
uint16_t sample_mask;
|
||||
unsigned min_samples;
|
||||
|
||||
struct panfrost_query *cond_query;
|
||||
bool cond_cond;
|
||||
enum pipe_render_cond_flag cond_mode;
|
||||
struct panfrost_query *cond_query;
|
||||
bool cond_cond;
|
||||
enum pipe_render_cond_flag cond_mode;
|
||||
|
||||
bool is_noop;
|
||||
bool is_noop;
|
||||
|
||||
/* Mask of active render targets */
|
||||
uint8_t fb_rt_mask;
|
||||
/* Mask of active render targets */
|
||||
uint8_t fb_rt_mask;
|
||||
|
||||
int in_sync_fd;
|
||||
uint32_t in_sync_obj;
|
||||
int in_sync_fd;
|
||||
uint32_t in_sync_obj;
|
||||
};
|
||||
|
||||
/* Corresponds to the CSO */
|
||||
|
|
@ -234,19 +235,19 @@ struct panfrost_rasterizer;
|
|||
|
||||
/* Linked varyings */
|
||||
struct pan_linkage {
|
||||
/* If the upload is owned by the CSO instead
|
||||
* of the pool, the referenced BO. Else,
|
||||
* NULL. */
|
||||
struct panfrost_bo *bo;
|
||||
/* If the upload is owned by the CSO instead
|
||||
* of the pool, the referenced BO. Else,
|
||||
* NULL. */
|
||||
struct panfrost_bo *bo;
|
||||
|
||||
/* Uploaded attribute descriptors */
|
||||
mali_ptr producer, consumer;
|
||||
/* Uploaded attribute descriptors */
|
||||
mali_ptr producer, consumer;
|
||||
|
||||
/* Varyings buffers required */
|
||||
uint32_t present;
|
||||
/* Varyings buffers required */
|
||||
uint32_t present;
|
||||
|
||||
/* Per-vertex stride for general varying buffer */
|
||||
uint32_t stride;
|
||||
/* Per-vertex stride for general varying buffer */
|
||||
uint32_t stride;
|
||||
};
|
||||
|
||||
#define RSD_WORDS 16
|
||||
|
|
@ -255,89 +256,89 @@ struct pan_linkage {
|
|||
* shaders with varying emulated features baked in
|
||||
*/
|
||||
struct panfrost_fs_key {
|
||||
/* Number of colour buffers if gl_FragColor is written */
|
||||
unsigned nr_cbufs_for_fragcolor;
|
||||
/* Number of colour buffers if gl_FragColor is written */
|
||||
unsigned nr_cbufs_for_fragcolor;
|
||||
|
||||
/* On Valhall, fixed_varying_mask of the linked vertex shader */
|
||||
uint32_t fixed_varying_mask;
|
||||
/* On Valhall, fixed_varying_mask of the linked vertex shader */
|
||||
uint32_t fixed_varying_mask;
|
||||
|
||||
/* Midgard shaders that read the tilebuffer must be keyed for
|
||||
* non-blendable formats
|
||||
*/
|
||||
enum pipe_format rt_formats[8];
|
||||
/* Midgard shaders that read the tilebuffer must be keyed for
|
||||
* non-blendable formats
|
||||
*/
|
||||
enum pipe_format rt_formats[8];
|
||||
|
||||
/* From rasterize state, to lower point sprites */
|
||||
uint16_t sprite_coord_enable;
|
||||
/* From rasterize state, to lower point sprites */
|
||||
uint16_t sprite_coord_enable;
|
||||
|
||||
/* User clip plane lowering */
|
||||
uint8_t clip_plane_enable;
|
||||
/* User clip plane lowering */
|
||||
uint8_t clip_plane_enable;
|
||||
};
|
||||
|
||||
struct panfrost_shader_key {
|
||||
union {
|
||||
/* Vertex shaders do not use shader keys. However, we have a
|
||||
* special "transform feedback" vertex program derived from a
|
||||
* vertex shader. If vs_is_xfb is set on a vertex shader, this
|
||||
* is a transform feedback shader, else it is a regular
|
||||
* (unkeyed) vertex shader.
|
||||
*/
|
||||
bool vs_is_xfb;
|
||||
union {
|
||||
/* Vertex shaders do not use shader keys. However, we have a
|
||||
* special "transform feedback" vertex program derived from a
|
||||
* vertex shader. If vs_is_xfb is set on a vertex shader, this
|
||||
* is a transform feedback shader, else it is a regular
|
||||
* (unkeyed) vertex shader.
|
||||
*/
|
||||
bool vs_is_xfb;
|
||||
|
||||
/* Fragment shaders use regular shader keys */
|
||||
struct panfrost_fs_key fs;
|
||||
};
|
||||
/* Fragment shaders use regular shader keys */
|
||||
struct panfrost_fs_key fs;
|
||||
};
|
||||
};
|
||||
|
||||
struct panfrost_compiled_shader {
|
||||
/* Respectively, shader binary and Renderer State Descriptor */
|
||||
struct panfrost_pool_ref bin, state;
|
||||
/* Respectively, shader binary and Renderer State Descriptor */
|
||||
struct panfrost_pool_ref bin, state;
|
||||
|
||||
/* For fragment shaders, a prepared (but not uploaded RSD) */
|
||||
uint32_t partial_rsd[RSD_WORDS];
|
||||
/* For fragment shaders, a prepared (but not uploaded RSD) */
|
||||
uint32_t partial_rsd[RSD_WORDS];
|
||||
|
||||
struct pan_shader_info info;
|
||||
struct pan_shader_info info;
|
||||
|
||||
struct pan_earlyzs_lut earlyzs;
|
||||
struct pan_earlyzs_lut earlyzs;
|
||||
|
||||
/* Linked varyings, for non-separable programs */
|
||||
struct pan_linkage linkage;
|
||||
/* Linked varyings, for non-separable programs */
|
||||
struct pan_linkage linkage;
|
||||
|
||||
struct pipe_stream_output_info stream_output;
|
||||
struct pipe_stream_output_info stream_output;
|
||||
|
||||
struct panfrost_shader_key key;
|
||||
struct panfrost_shader_key key;
|
||||
|
||||
/* Mask of state that dirties the sysvals */
|
||||
unsigned dirty_3d, dirty_shader;
|
||||
/* Mask of state that dirties the sysvals */
|
||||
unsigned dirty_3d, dirty_shader;
|
||||
};
|
||||
|
||||
/* Shader CSO */
|
||||
struct panfrost_uncompiled_shader {
|
||||
/* NIR for the shader. For graphics, this will be non-NULL even for
|
||||
* TGSI. For compute, this will be NULL after the shader is compiled,
|
||||
* as we don't need any compute variants.
|
||||
*/
|
||||
const nir_shader *nir;
|
||||
/* NIR for the shader. For graphics, this will be non-NULL even for
|
||||
* TGSI. For compute, this will be NULL after the shader is compiled,
|
||||
* as we don't need any compute variants.
|
||||
*/
|
||||
const nir_shader *nir;
|
||||
|
||||
/* A SHA1 of the serialized NIR for the disk cache. */
|
||||
unsigned char nir_sha1[20];
|
||||
/* A SHA1 of the serialized NIR for the disk cache. */
|
||||
unsigned char nir_sha1[20];
|
||||
|
||||
/* Stream output information */
|
||||
struct pipe_stream_output_info stream_output;
|
||||
/* Stream output information */
|
||||
struct pipe_stream_output_info stream_output;
|
||||
|
||||
/** Lock for the variants array */
|
||||
simple_mtx_t lock;
|
||||
/** Lock for the variants array */
|
||||
simple_mtx_t lock;
|
||||
|
||||
/* Array of panfrost_compiled_shader */
|
||||
struct util_dynarray variants;
|
||||
/* Array of panfrost_compiled_shader */
|
||||
struct util_dynarray variants;
|
||||
|
||||
/* Compiled transform feedback program, if one is required */
|
||||
struct panfrost_compiled_shader *xfb;
|
||||
/* Compiled transform feedback program, if one is required */
|
||||
struct panfrost_compiled_shader *xfb;
|
||||
|
||||
/* On vertex shaders, bit mask of special desktop-only varyings to link
|
||||
* with the fragment shader. Used on Valhall to implement separable
|
||||
* shaders for desktop GL.
|
||||
*/
|
||||
uint32_t fixed_varying_mask;
|
||||
/* On vertex shaders, bit mask of special desktop-only varyings to link
|
||||
* with the fragment shader. Used on Valhall to implement separable
|
||||
* shaders for desktop GL.
|
||||
*/
|
||||
uint32_t fixed_varying_mask;
|
||||
};
|
||||
|
||||
/* The binary artefacts of compiling a shader. This differs from
|
||||
|
|
@ -347,11 +348,11 @@ struct panfrost_uncompiled_shader {
|
|||
* This structure is serialized for the shader disk cache.
|
||||
*/
|
||||
struct panfrost_shader_binary {
|
||||
/* Collected information about the compiled shader */
|
||||
struct pan_shader_info info;
|
||||
/* Collected information about the compiled shader */
|
||||
struct pan_shader_info info;
|
||||
|
||||
/* The binary itself */
|
||||
struct util_dynarray binary;
|
||||
/* The binary itself */
|
||||
struct util_dynarray binary;
|
||||
};
|
||||
|
||||
void
|
||||
|
|
@ -360,28 +361,25 @@ panfrost_disk_cache_store(struct disk_cache *cache,
|
|||
const struct panfrost_shader_key *key,
|
||||
const struct panfrost_shader_binary *binary);
|
||||
|
||||
bool
|
||||
panfrost_disk_cache_retrieve(struct disk_cache *cache,
|
||||
const struct panfrost_uncompiled_shader *uncompiled,
|
||||
const struct panfrost_shader_key *key,
|
||||
struct panfrost_shader_binary *binary);
|
||||
bool panfrost_disk_cache_retrieve(
|
||||
struct disk_cache *cache,
|
||||
const struct panfrost_uncompiled_shader *uncompiled,
|
||||
const struct panfrost_shader_key *key,
|
||||
struct panfrost_shader_binary *binary);
|
||||
|
||||
void
|
||||
panfrost_disk_cache_init(struct panfrost_screen *screen);
|
||||
void panfrost_disk_cache_init(struct panfrost_screen *screen);
|
||||
|
||||
/** (Vertex buffer index, divisor) tuple that will become an Attribute Buffer
|
||||
* Descriptor at draw-time on Midgard
|
||||
*/
|
||||
struct pan_vertex_buffer {
|
||||
unsigned vbi;
|
||||
unsigned divisor;
|
||||
unsigned vbi;
|
||||
unsigned divisor;
|
||||
};
|
||||
|
||||
unsigned
|
||||
pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
|
||||
unsigned *nr_bufs,
|
||||
unsigned vbi,
|
||||
unsigned divisor);
|
||||
unsigned pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
|
||||
unsigned *nr_bufs, unsigned vbi,
|
||||
unsigned divisor);
|
||||
|
||||
struct panfrost_zsa_state;
|
||||
struct panfrost_sampler_state;
|
||||
|
|
@ -391,39 +389,32 @@ struct panfrost_vertex_state;
|
|||
static inline struct panfrost_context *
|
||||
pan_context(struct pipe_context *pcontext)
|
||||
{
|
||||
return (struct panfrost_context *) pcontext;
|
||||
return (struct panfrost_context *)pcontext;
|
||||
}
|
||||
|
||||
static inline struct panfrost_streamout_target *
|
||||
pan_so_target(struct pipe_stream_output_target *target)
|
||||
{
|
||||
return (struct panfrost_streamout_target *)target;
|
||||
return (struct panfrost_streamout_target *)target;
|
||||
}
|
||||
|
||||
struct pipe_context *
|
||||
panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags);
|
||||
struct pipe_context *panfrost_create_context(struct pipe_screen *screen,
|
||||
void *priv, unsigned flags);
|
||||
|
||||
bool
|
||||
panfrost_writes_point_size(struct panfrost_context *ctx);
|
||||
bool panfrost_writes_point_size(struct panfrost_context *ctx);
|
||||
|
||||
struct panfrost_ptr
|
||||
panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler);
|
||||
struct panfrost_ptr panfrost_vertex_tiler_job(struct panfrost_context *ctx,
|
||||
bool is_tiler);
|
||||
|
||||
void
|
||||
panfrost_flush(
|
||||
struct pipe_context *pipe,
|
||||
struct pipe_fence_handle **fence,
|
||||
unsigned flags);
|
||||
void panfrost_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence,
|
||||
unsigned flags);
|
||||
|
||||
bool
|
||||
panfrost_render_condition_check(struct panfrost_context *ctx);
|
||||
bool panfrost_render_condition_check(struct panfrost_context *ctx);
|
||||
|
||||
void
|
||||
panfrost_update_shader_variant(struct panfrost_context *ctx,
|
||||
enum pipe_shader_type type);
|
||||
void panfrost_update_shader_variant(struct panfrost_context *ctx,
|
||||
enum pipe_shader_type type);
|
||||
|
||||
void
|
||||
panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss);
|
||||
void panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss);
|
||||
|
||||
mali_ptr
|
||||
panfrost_get_index_buffer(struct panfrost_batch *batch,
|
||||
|
|
@ -438,41 +429,37 @@ panfrost_get_index_buffer_bounded(struct panfrost_batch *batch,
|
|||
|
||||
/* Instancing */
|
||||
|
||||
mali_ptr
|
||||
panfrost_vertex_buffer_address(struct panfrost_context *ctx, unsigned i);
|
||||
mali_ptr panfrost_vertex_buffer_address(struct panfrost_context *ctx,
|
||||
unsigned i);
|
||||
|
||||
void
|
||||
panfrost_shader_context_init(struct pipe_context *pctx);
|
||||
void panfrost_shader_context_init(struct pipe_context *pctx);
|
||||
|
||||
static inline void
|
||||
panfrost_dirty_state_all(struct panfrost_context *ctx)
|
||||
{
|
||||
ctx->dirty = ~0;
|
||||
ctx->dirty = ~0;
|
||||
|
||||
for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
|
||||
ctx->dirty_shader[i] = ~0;
|
||||
for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i)
|
||||
ctx->dirty_shader[i] = ~0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
panfrost_clean_state_3d(struct panfrost_context *ctx)
|
||||
{
|
||||
ctx->dirty = 0;
|
||||
ctx->dirty = 0;
|
||||
|
||||
for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
|
||||
if (i != PIPE_SHADER_COMPUTE)
|
||||
ctx->dirty_shader[i] = 0;
|
||||
}
|
||||
for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
|
||||
if (i != PIPE_SHADER_COMPUTE)
|
||||
ctx->dirty_shader[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
panfrost_set_batch_masks_blend(struct panfrost_batch *batch);
|
||||
void panfrost_set_batch_masks_blend(struct panfrost_batch *batch);
|
||||
|
||||
void
|
||||
panfrost_set_batch_masks_zs(struct panfrost_batch *batch);
|
||||
void panfrost_set_batch_masks_zs(struct panfrost_batch *batch);
|
||||
|
||||
void
|
||||
panfrost_track_image_access(struct panfrost_batch *batch,
|
||||
enum pipe_shader_type stage,
|
||||
struct pipe_image_view *image);
|
||||
void panfrost_track_image_access(struct panfrost_batch *batch,
|
||||
enum pipe_shader_type stage,
|
||||
struct pipe_image_view *image);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -21,9 +21,9 @@
|
|||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
|
|
@ -43,17 +43,17 @@ extern int bifrost_debug;
|
|||
* Compute a disk cache key for the given uncompiled shader and shader key.
|
||||
*/
|
||||
static void
|
||||
panfrost_disk_cache_compute_key(struct disk_cache *cache,
|
||||
const struct panfrost_uncompiled_shader *uncompiled,
|
||||
const struct panfrost_shader_key *shader_key,
|
||||
cache_key cache_key)
|
||||
panfrost_disk_cache_compute_key(
|
||||
struct disk_cache *cache,
|
||||
const struct panfrost_uncompiled_shader *uncompiled,
|
||||
const struct panfrost_shader_key *shader_key, cache_key cache_key)
|
||||
{
|
||||
uint8_t data[sizeof(uncompiled->nir_sha1) + sizeof(*shader_key)];
|
||||
uint8_t data[sizeof(uncompiled->nir_sha1) + sizeof(*shader_key)];
|
||||
|
||||
memcpy(data, uncompiled->nir_sha1, sizeof(uncompiled->nir_sha1));
|
||||
memcpy(data + sizeof(uncompiled->nir_sha1), shader_key, sizeof(*shader_key));
|
||||
memcpy(data, uncompiled->nir_sha1, sizeof(uncompiled->nir_sha1));
|
||||
memcpy(data + sizeof(uncompiled->nir_sha1), shader_key, sizeof(*shader_key));
|
||||
|
||||
disk_cache_compute_key(cache, data, sizeof(data), cache_key);
|
||||
disk_cache_compute_key(cache, data, sizeof(data), cache_key);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -69,33 +69,33 @@ panfrost_disk_cache_store(struct disk_cache *cache,
|
|||
const struct panfrost_shader_binary *binary)
|
||||
{
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
if (!cache)
|
||||
return;
|
||||
if (!cache)
|
||||
return;
|
||||
|
||||
cache_key cache_key;
|
||||
panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);
|
||||
cache_key cache_key;
|
||||
panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);
|
||||
|
||||
if (debug) {
|
||||
char sha1[41];
|
||||
_mesa_sha1_format(sha1, cache_key);
|
||||
fprintf(stderr, "[mesa disk cache] storing %s\n", sha1);
|
||||
}
|
||||
if (debug) {
|
||||
char sha1[41];
|
||||
_mesa_sha1_format(sha1, cache_key);
|
||||
fprintf(stderr, "[mesa disk cache] storing %s\n", sha1);
|
||||
}
|
||||
|
||||
struct blob blob;
|
||||
blob_init(&blob);
|
||||
struct blob blob;
|
||||
blob_init(&blob);
|
||||
|
||||
/* We write the following data to the cache blob:
|
||||
*
|
||||
* 1. Size of program binary
|
||||
* 2. Program binary
|
||||
* 3. Shader info
|
||||
*/
|
||||
blob_write_uint32(&blob, binary->binary.size);
|
||||
blob_write_bytes(&blob, binary->binary.data, binary->binary.size);
|
||||
blob_write_bytes(&blob, &binary->info, sizeof(binary->info));
|
||||
/* We write the following data to the cache blob:
|
||||
*
|
||||
* 1. Size of program binary
|
||||
* 2. Program binary
|
||||
* 3. Shader info
|
||||
*/
|
||||
blob_write_uint32(&blob, binary->binary.size);
|
||||
blob_write_bytes(&blob, binary->binary.data, binary->binary.size);
|
||||
blob_write_bytes(&blob, &binary->info, sizeof(binary->info));
|
||||
|
||||
disk_cache_put(cache, cache_key, blob.data, blob.size, NULL);
|
||||
blob_finish(&blob);
|
||||
disk_cache_put(cache, cache_key, blob.data, blob.size, NULL);
|
||||
blob_finish(&blob);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -109,43 +109,43 @@ panfrost_disk_cache_retrieve(struct disk_cache *cache,
|
|||
struct panfrost_shader_binary *binary)
|
||||
{
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
if (!cache)
|
||||
return false;
|
||||
if (!cache)
|
||||
return false;
|
||||
|
||||
cache_key cache_key;
|
||||
panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);
|
||||
cache_key cache_key;
|
||||
panfrost_disk_cache_compute_key(cache, uncompiled, key, cache_key);
|
||||
|
||||
if (debug) {
|
||||
char sha1[41];
|
||||
_mesa_sha1_format(sha1, cache_key);
|
||||
fprintf(stderr, "[mesa disk cache] retrieving %s: ", sha1);
|
||||
}
|
||||
if (debug) {
|
||||
char sha1[41];
|
||||
_mesa_sha1_format(sha1, cache_key);
|
||||
fprintf(stderr, "[mesa disk cache] retrieving %s: ", sha1);
|
||||
}
|
||||
|
||||
size_t size;
|
||||
void *buffer = disk_cache_get(cache, cache_key, &size);
|
||||
size_t size;
|
||||
void *buffer = disk_cache_get(cache, cache_key, &size);
|
||||
|
||||
if (debug)
|
||||
fprintf(stderr, "%s\n", buffer ? "found" : "missing");
|
||||
if (debug)
|
||||
fprintf(stderr, "%s\n", buffer ? "found" : "missing");
|
||||
|
||||
if (!buffer)
|
||||
return false;
|
||||
if (!buffer)
|
||||
return false;
|
||||
|
||||
struct blob_reader blob;
|
||||
blob_reader_init(&blob, buffer, size);
|
||||
struct blob_reader blob;
|
||||
blob_reader_init(&blob, buffer, size);
|
||||
|
||||
util_dynarray_init(&binary->binary, NULL);
|
||||
util_dynarray_init(&binary->binary, NULL);
|
||||
|
||||
uint32_t binary_size = blob_read_uint32(&blob);
|
||||
void *ptr = util_dynarray_resize_bytes(&binary->binary, binary_size, 1);
|
||||
uint32_t binary_size = blob_read_uint32(&blob);
|
||||
void *ptr = util_dynarray_resize_bytes(&binary->binary, binary_size, 1);
|
||||
|
||||
blob_copy_bytes(&blob, ptr, binary_size);
|
||||
blob_copy_bytes(&blob, &binary->info, sizeof(binary->info));
|
||||
blob_copy_bytes(&blob, ptr, binary_size);
|
||||
blob_copy_bytes(&blob, &binary->info, sizeof(binary->info));
|
||||
|
||||
free(buffer);
|
||||
free(buffer);
|
||||
|
||||
return true;
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -156,22 +156,22 @@ void
|
|||
panfrost_disk_cache_init(struct panfrost_screen *screen)
|
||||
{
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
const char *renderer = screen->base.get_name(&screen->base);
|
||||
const char *renderer = screen->base.get_name(&screen->base);
|
||||
|
||||
const struct build_id_note *note =
|
||||
build_id_find_nhdr_for_addr(panfrost_disk_cache_init);
|
||||
assert(note && build_id_length(note) == 20); /* sha1 */
|
||||
const struct build_id_note *note =
|
||||
build_id_find_nhdr_for_addr(panfrost_disk_cache_init);
|
||||
assert(note && build_id_length(note) == 20); /* sha1 */
|
||||
|
||||
const uint8_t *id_sha1 = build_id_data(note);
|
||||
assert(id_sha1);
|
||||
const uint8_t *id_sha1 = build_id_data(note);
|
||||
assert(id_sha1);
|
||||
|
||||
char timestamp[41];
|
||||
_mesa_sha1_format(timestamp, id_sha1);
|
||||
char timestamp[41];
|
||||
_mesa_sha1_format(timestamp, id_sha1);
|
||||
|
||||
/* Consider any flags affecting the compile when caching */
|
||||
uint64_t driver_flags = screen->dev.debug;
|
||||
driver_flags |= ((uint64_t) (midgard_debug | bifrost_debug) << 32);
|
||||
/* Consider any flags affecting the compile when caching */
|
||||
uint64_t driver_flags = screen->dev.debug;
|
||||
driver_flags |= ((uint64_t)(midgard_debug | bifrost_debug) << 32);
|
||||
|
||||
screen->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
|
||||
screen->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
|
||||
#endif
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,8 +26,8 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "pan_context.h"
|
||||
#include "pan_fence.h"
|
||||
#include "pan_context.h"
|
||||
#include "pan_screen.h"
|
||||
|
||||
#include "util/os_time.h"
|
||||
|
|
@ -38,117 +38,112 @@ panfrost_fence_reference(struct pipe_screen *pscreen,
|
|||
struct pipe_fence_handle **ptr,
|
||||
struct pipe_fence_handle *fence)
|
||||
{
|
||||
struct panfrost_device *dev = pan_device(pscreen);
|
||||
struct pipe_fence_handle *old = *ptr;
|
||||
struct panfrost_device *dev = pan_device(pscreen);
|
||||
struct pipe_fence_handle *old = *ptr;
|
||||
|
||||
if (pipe_reference(&old->reference, &fence->reference)) {
|
||||
drmSyncobjDestroy(dev->fd, old->syncobj);
|
||||
free(old);
|
||||
}
|
||||
if (pipe_reference(&old->reference, &fence->reference)) {
|
||||
drmSyncobjDestroy(dev->fd, old->syncobj);
|
||||
free(old);
|
||||
}
|
||||
|
||||
*ptr = fence;
|
||||
*ptr = fence;
|
||||
}
|
||||
|
||||
bool
|
||||
panfrost_fence_finish(struct pipe_screen *pscreen,
|
||||
struct pipe_context *ctx,
|
||||
struct pipe_fence_handle *fence,
|
||||
uint64_t timeout)
|
||||
panfrost_fence_finish(struct pipe_screen *pscreen, struct pipe_context *ctx,
|
||||
struct pipe_fence_handle *fence, uint64_t timeout)
|
||||
{
|
||||
struct panfrost_device *dev = pan_device(pscreen);
|
||||
int ret;
|
||||
struct panfrost_device *dev = pan_device(pscreen);
|
||||
int ret;
|
||||
|
||||
if (fence->signaled)
|
||||
return true;
|
||||
if (fence->signaled)
|
||||
return true;
|
||||
|
||||
uint64_t abs_timeout = os_time_get_absolute_timeout(timeout);
|
||||
if (abs_timeout == OS_TIMEOUT_INFINITE)
|
||||
abs_timeout = INT64_MAX;
|
||||
uint64_t abs_timeout = os_time_get_absolute_timeout(timeout);
|
||||
if (abs_timeout == OS_TIMEOUT_INFINITE)
|
||||
abs_timeout = INT64_MAX;
|
||||
|
||||
ret = drmSyncobjWait(dev->fd, &fence->syncobj,
|
||||
1,
|
||||
abs_timeout, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
|
||||
NULL);
|
||||
ret = drmSyncobjWait(dev->fd, &fence->syncobj, 1, abs_timeout,
|
||||
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, NULL);
|
||||
|
||||
fence->signaled = (ret >= 0);
|
||||
return fence->signaled;
|
||||
fence->signaled = (ret >= 0);
|
||||
return fence->signaled;
|
||||
}
|
||||
|
||||
int
|
||||
panfrost_fence_get_fd(struct pipe_screen *screen,
|
||||
struct pipe_fence_handle *f)
|
||||
panfrost_fence_get_fd(struct pipe_screen *screen, struct pipe_fence_handle *f)
|
||||
{
|
||||
struct panfrost_device *dev = pan_device(screen);
|
||||
int fd = -1;
|
||||
struct panfrost_device *dev = pan_device(screen);
|
||||
int fd = -1;
|
||||
|
||||
drmSyncobjExportSyncFile(dev->fd, f->syncobj, &fd);
|
||||
return fd;
|
||||
drmSyncobjExportSyncFile(dev->fd, f->syncobj, &fd);
|
||||
return fd;
|
||||
}
|
||||
|
||||
struct pipe_fence_handle *
|
||||
panfrost_fence_from_fd(struct panfrost_context *ctx, int fd,
|
||||
enum pipe_fd_type type)
|
||||
{
|
||||
struct panfrost_device *dev = pan_device(ctx->base.screen);
|
||||
int ret;
|
||||
struct panfrost_device *dev = pan_device(ctx->base.screen);
|
||||
int ret;
|
||||
|
||||
struct pipe_fence_handle *f = calloc(1, sizeof(*f));
|
||||
if (!f)
|
||||
return NULL;
|
||||
struct pipe_fence_handle *f = calloc(1, sizeof(*f));
|
||||
if (!f)
|
||||
return NULL;
|
||||
|
||||
if (type == PIPE_FD_TYPE_NATIVE_SYNC) {
|
||||
ret = drmSyncobjCreate(dev->fd, 0, &f->syncobj);
|
||||
if (ret) {
|
||||
fprintf(stderr, "create syncobj failed\n");
|
||||
goto err_free_fence;
|
||||
}
|
||||
if (type == PIPE_FD_TYPE_NATIVE_SYNC) {
|
||||
ret = drmSyncobjCreate(dev->fd, 0, &f->syncobj);
|
||||
if (ret) {
|
||||
fprintf(stderr, "create syncobj failed\n");
|
||||
goto err_free_fence;
|
||||
}
|
||||
|
||||
ret = drmSyncobjImportSyncFile(dev->fd, f->syncobj, fd);
|
||||
if (ret) {
|
||||
fprintf(stderr, "import syncfile failed\n");
|
||||
goto err_destroy_syncobj;
|
||||
}
|
||||
} else {
|
||||
assert(type == PIPE_FD_TYPE_SYNCOBJ);
|
||||
ret = drmSyncobjFDToHandle(dev->fd, fd, &f->syncobj);
|
||||
if (ret) {
|
||||
fprintf(stderr, "import syncobj FD failed\n");
|
||||
goto err_free_fence;
|
||||
}
|
||||
}
|
||||
ret = drmSyncobjImportSyncFile(dev->fd, f->syncobj, fd);
|
||||
if (ret) {
|
||||
fprintf(stderr, "import syncfile failed\n");
|
||||
goto err_destroy_syncobj;
|
||||
}
|
||||
} else {
|
||||
assert(type == PIPE_FD_TYPE_SYNCOBJ);
|
||||
ret = drmSyncobjFDToHandle(dev->fd, fd, &f->syncobj);
|
||||
if (ret) {
|
||||
fprintf(stderr, "import syncobj FD failed\n");
|
||||
goto err_free_fence;
|
||||
}
|
||||
}
|
||||
|
||||
pipe_reference_init(&f->reference, 1);
|
||||
pipe_reference_init(&f->reference, 1);
|
||||
|
||||
return f;
|
||||
return f;
|
||||
|
||||
err_destroy_syncobj:
|
||||
drmSyncobjDestroy(dev->fd, f->syncobj);
|
||||
drmSyncobjDestroy(dev->fd, f->syncobj);
|
||||
err_free_fence:
|
||||
free(f);
|
||||
return NULL;
|
||||
free(f);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct pipe_fence_handle *
|
||||
panfrost_fence_create(struct panfrost_context *ctx)
|
||||
{
|
||||
struct panfrost_device *dev = pan_device(ctx->base.screen);
|
||||
int fd = -1, ret;
|
||||
struct panfrost_device *dev = pan_device(ctx->base.screen);
|
||||
int fd = -1, ret;
|
||||
|
||||
/* Snapshot the last rendering out fence. We'd rather have another
|
||||
* syncobj instead of a sync file, but this is all we get.
|
||||
* (HandleToFD/FDToHandle just gives you another syncobj ID for the
|
||||
* same syncobj).
|
||||
*/
|
||||
ret = drmSyncobjExportSyncFile(dev->fd, ctx->syncobj, &fd);
|
||||
if (ret || fd == -1) {
|
||||
fprintf(stderr, "export failed\n");
|
||||
return NULL;
|
||||
}
|
||||
/* Snapshot the last rendering out fence. We'd rather have another
|
||||
* syncobj instead of a sync file, but this is all we get.
|
||||
* (HandleToFD/FDToHandle just gives you another syncobj ID for the
|
||||
* same syncobj).
|
||||
*/
|
||||
ret = drmSyncobjExportSyncFile(dev->fd, ctx->syncobj, &fd);
|
||||
if (ret || fd == -1) {
|
||||
fprintf(stderr, "export failed\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct pipe_fence_handle *f =
|
||||
panfrost_fence_from_fd(ctx, fd, PIPE_FD_TYPE_NATIVE_SYNC);
|
||||
struct pipe_fence_handle *f =
|
||||
panfrost_fence_from_fd(ctx, fd, PIPE_FD_TYPE_NATIVE_SYNC);
|
||||
|
||||
close(fd);
|
||||
close(fd);
|
||||
|
||||
return f;
|
||||
return f;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,29 +30,24 @@
|
|||
struct panfrost_context;
|
||||
|
||||
struct pipe_fence_handle {
|
||||
struct pipe_reference reference;
|
||||
uint32_t syncobj;
|
||||
bool signaled;
|
||||
struct pipe_reference reference;
|
||||
uint32_t syncobj;
|
||||
bool signaled;
|
||||
};
|
||||
|
||||
void
|
||||
panfrost_fence_reference(struct pipe_screen *pscreen,
|
||||
struct pipe_fence_handle **ptr,
|
||||
struct pipe_fence_handle *fence);
|
||||
void panfrost_fence_reference(struct pipe_screen *pscreen,
|
||||
struct pipe_fence_handle **ptr,
|
||||
struct pipe_fence_handle *fence);
|
||||
|
||||
bool
|
||||
panfrost_fence_finish(struct pipe_screen *pscreen,
|
||||
struct pipe_context *ctx,
|
||||
struct pipe_fence_handle *fence,
|
||||
uint64_t timeout);
|
||||
bool panfrost_fence_finish(struct pipe_screen *pscreen,
|
||||
struct pipe_context *ctx,
|
||||
struct pipe_fence_handle *fence, uint64_t timeout);
|
||||
|
||||
int
|
||||
panfrost_fence_get_fd(struct pipe_screen *screen,
|
||||
struct pipe_fence_handle *f);
|
||||
int panfrost_fence_get_fd(struct pipe_screen *screen,
|
||||
struct pipe_fence_handle *f);
|
||||
|
||||
struct pipe_fence_handle *
|
||||
panfrost_fence_from_fd(struct panfrost_context *ctx, int fd,
|
||||
enum pipe_fd_type type);
|
||||
struct pipe_fence_handle *panfrost_fence_from_fd(struct panfrost_context *ctx,
|
||||
int fd,
|
||||
enum pipe_fd_type type);
|
||||
|
||||
struct pipe_fence_handle *
|
||||
panfrost_fence_create(struct panfrost_context *ctx);
|
||||
struct pipe_fence_handle *panfrost_fence_create(struct panfrost_context *ctx);
|
||||
|
|
|
|||
|
|
@ -21,66 +21,66 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "pan_context.h"
|
||||
#include "util/u_vbuf.h"
|
||||
#include "pan_context.h"
|
||||
|
||||
void
|
||||
panfrost_analyze_sysvals(struct panfrost_compiled_shader *ss)
|
||||
{
|
||||
unsigned dirty = 0;
|
||||
unsigned dirty_shader = PAN_DIRTY_STAGE_SHADER | PAN_DIRTY_STAGE_CONST;
|
||||
unsigned dirty = 0;
|
||||
unsigned dirty_shader = PAN_DIRTY_STAGE_SHADER | PAN_DIRTY_STAGE_CONST;
|
||||
|
||||
for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
|
||||
switch (PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[i])) {
|
||||
case PAN_SYSVAL_VIEWPORT_SCALE:
|
||||
case PAN_SYSVAL_VIEWPORT_OFFSET:
|
||||
dirty |= PAN_DIRTY_VIEWPORT;
|
||||
break;
|
||||
for (unsigned i = 0; i < ss->info.sysvals.sysval_count; ++i) {
|
||||
switch (PAN_SYSVAL_TYPE(ss->info.sysvals.sysvals[i])) {
|
||||
case PAN_SYSVAL_VIEWPORT_SCALE:
|
||||
case PAN_SYSVAL_VIEWPORT_OFFSET:
|
||||
dirty |= PAN_DIRTY_VIEWPORT;
|
||||
break;
|
||||
|
||||
case PAN_SYSVAL_TEXTURE_SIZE:
|
||||
dirty_shader |= PAN_DIRTY_STAGE_TEXTURE;
|
||||
break;
|
||||
case PAN_SYSVAL_TEXTURE_SIZE:
|
||||
dirty_shader |= PAN_DIRTY_STAGE_TEXTURE;
|
||||
break;
|
||||
|
||||
case PAN_SYSVAL_SSBO:
|
||||
dirty_shader |= PAN_DIRTY_STAGE_SSBO;
|
||||
break;
|
||||
case PAN_SYSVAL_SSBO:
|
||||
dirty_shader |= PAN_DIRTY_STAGE_SSBO;
|
||||
break;
|
||||
|
||||
case PAN_SYSVAL_XFB:
|
||||
dirty |= PAN_DIRTY_SO;
|
||||
break;
|
||||
case PAN_SYSVAL_XFB:
|
||||
dirty |= PAN_DIRTY_SO;
|
||||
break;
|
||||
|
||||
case PAN_SYSVAL_SAMPLER:
|
||||
dirty_shader |= PAN_DIRTY_STAGE_SAMPLER;
|
||||
break;
|
||||
case PAN_SYSVAL_SAMPLER:
|
||||
dirty_shader |= PAN_DIRTY_STAGE_SAMPLER;
|
||||
break;
|
||||
|
||||
case PAN_SYSVAL_IMAGE_SIZE:
|
||||
dirty_shader |= PAN_DIRTY_STAGE_IMAGE;
|
||||
break;
|
||||
case PAN_SYSVAL_IMAGE_SIZE:
|
||||
dirty_shader |= PAN_DIRTY_STAGE_IMAGE;
|
||||
break;
|
||||
|
||||
case PAN_SYSVAL_NUM_WORK_GROUPS:
|
||||
case PAN_SYSVAL_LOCAL_GROUP_SIZE:
|
||||
case PAN_SYSVAL_WORK_DIM:
|
||||
case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
|
||||
case PAN_SYSVAL_NUM_VERTICES:
|
||||
dirty |= PAN_DIRTY_PARAMS;
|
||||
break;
|
||||
case PAN_SYSVAL_NUM_WORK_GROUPS:
|
||||
case PAN_SYSVAL_LOCAL_GROUP_SIZE:
|
||||
case PAN_SYSVAL_WORK_DIM:
|
||||
case PAN_SYSVAL_VERTEX_INSTANCE_OFFSETS:
|
||||
case PAN_SYSVAL_NUM_VERTICES:
|
||||
dirty |= PAN_DIRTY_PARAMS;
|
||||
break;
|
||||
|
||||
case PAN_SYSVAL_DRAWID:
|
||||
dirty |= PAN_DIRTY_DRAWID;
|
||||
break;
|
||||
case PAN_SYSVAL_DRAWID:
|
||||
dirty |= PAN_DIRTY_DRAWID;
|
||||
break;
|
||||
|
||||
case PAN_SYSVAL_SAMPLE_POSITIONS:
|
||||
case PAN_SYSVAL_MULTISAMPLED:
|
||||
case PAN_SYSVAL_RT_CONVERSION:
|
||||
/* Nothing beyond the batch itself */
|
||||
break;
|
||||
default:
|
||||
unreachable("Invalid sysval");
|
||||
}
|
||||
}
|
||||
case PAN_SYSVAL_SAMPLE_POSITIONS:
|
||||
case PAN_SYSVAL_MULTISAMPLED:
|
||||
case PAN_SYSVAL_RT_CONVERSION:
|
||||
/* Nothing beyond the batch itself */
|
||||
break;
|
||||
default:
|
||||
unreachable("Invalid sysval");
|
||||
}
|
||||
}
|
||||
|
||||
ss->dirty_3d = dirty;
|
||||
ss->dirty_shader = dirty_shader;
|
||||
ss->dirty_3d = dirty;
|
||||
ss->dirty_shader = dirty_shader;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -93,25 +93,22 @@ panfrost_get_index_buffer(struct panfrost_batch *batch,
|
|||
const struct pipe_draw_info *info,
|
||||
const struct pipe_draw_start_count_bias *draw)
|
||||
{
|
||||
struct panfrost_resource *rsrc = pan_resource(info->index.resource);
|
||||
off_t offset = draw->start * info->index_size;
|
||||
struct panfrost_resource *rsrc = pan_resource(info->index.resource);
|
||||
off_t offset = draw->start * info->index_size;
|
||||
|
||||
if (!info->has_user_indices) {
|
||||
/* Only resources can be directly mapped */
|
||||
panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
|
||||
return rsrc->image.data.bo->ptr.gpu + offset;
|
||||
} else {
|
||||
/* Otherwise, we need to upload to transient memory */
|
||||
const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
|
||||
struct panfrost_ptr T =
|
||||
pan_pool_alloc_aligned(&batch->pool.base,
|
||||
draw->count *
|
||||
info->index_size,
|
||||
info->index_size);
|
||||
if (!info->has_user_indices) {
|
||||
/* Only resources can be directly mapped */
|
||||
panfrost_batch_read_rsrc(batch, rsrc, PIPE_SHADER_VERTEX);
|
||||
return rsrc->image.data.bo->ptr.gpu + offset;
|
||||
} else {
|
||||
/* Otherwise, we need to upload to transient memory */
|
||||
const uint8_t *ibuf8 = (const uint8_t *)info->index.user;
|
||||
struct panfrost_ptr T = pan_pool_alloc_aligned(
|
||||
&batch->pool.base, draw->count * info->index_size, info->index_size);
|
||||
|
||||
memcpy(T.cpu, ibuf8 + offset, draw->count * info->index_size);
|
||||
return T.gpu;
|
||||
}
|
||||
memcpy(T.cpu, ibuf8 + offset, draw->count * info->index_size);
|
||||
return T.gpu;
|
||||
}
|
||||
}
|
||||
|
||||
/* Gets a GPU address for the associated index buffer. Only gauranteed to be
|
||||
|
|
@ -126,34 +123,30 @@ panfrost_get_index_buffer_bounded(struct panfrost_batch *batch,
|
|||
const struct pipe_draw_start_count_bias *draw,
|
||||
unsigned *min_index, unsigned *max_index)
|
||||
{
|
||||
struct panfrost_resource *rsrc = pan_resource(info->index.resource);
|
||||
struct panfrost_context *ctx = batch->ctx;
|
||||
bool needs_indices = true;
|
||||
struct panfrost_resource *rsrc = pan_resource(info->index.resource);
|
||||
struct panfrost_context *ctx = batch->ctx;
|
||||
bool needs_indices = true;
|
||||
|
||||
if (info->index_bounds_valid) {
|
||||
*min_index = info->min_index;
|
||||
*max_index = info->max_index;
|
||||
needs_indices = false;
|
||||
} else if (!info->has_user_indices) {
|
||||
/* Check the cache */
|
||||
needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
|
||||
draw->start,
|
||||
draw->count,
|
||||
min_index,
|
||||
max_index);
|
||||
}
|
||||
if (info->index_bounds_valid) {
|
||||
*min_index = info->min_index;
|
||||
*max_index = info->max_index;
|
||||
needs_indices = false;
|
||||
} else if (!info->has_user_indices) {
|
||||
/* Check the cache */
|
||||
needs_indices = !panfrost_minmax_cache_get(
|
||||
rsrc->index_cache, draw->start, draw->count, min_index, max_index);
|
||||
}
|
||||
|
||||
if (needs_indices) {
|
||||
/* Fallback */
|
||||
u_vbuf_get_minmax_index(&ctx->base, info, draw, min_index, max_index);
|
||||
if (needs_indices) {
|
||||
/* Fallback */
|
||||
u_vbuf_get_minmax_index(&ctx->base, info, draw, min_index, max_index);
|
||||
|
||||
if (!info->has_user_indices)
|
||||
panfrost_minmax_cache_add(rsrc->index_cache,
|
||||
draw->start, draw->count,
|
||||
*min_index, *max_index);
|
||||
}
|
||||
if (!info->has_user_indices)
|
||||
panfrost_minmax_cache_add(rsrc->index_cache, draw->start, draw->count,
|
||||
*min_index, *max_index);
|
||||
}
|
||||
|
||||
return panfrost_get_index_buffer(batch, info, draw);
|
||||
return panfrost_get_index_buffer(batch, info, draw);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -163,26 +156,24 @@ panfrost_get_index_buffer_bounded(struct panfrost_batch *batch,
|
|||
* elements CSO create time, not at draw time.
|
||||
*/
|
||||
unsigned
|
||||
pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
|
||||
unsigned *nr_bufs,
|
||||
unsigned vbi,
|
||||
unsigned divisor)
|
||||
pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers, unsigned *nr_bufs,
|
||||
unsigned vbi, unsigned divisor)
|
||||
{
|
||||
/* Look up the buffer */
|
||||
for (unsigned i = 0; i < (*nr_bufs); ++i) {
|
||||
if (buffers[i].vbi == vbi && buffers[i].divisor == divisor)
|
||||
return i;
|
||||
}
|
||||
/* Look up the buffer */
|
||||
for (unsigned i = 0; i < (*nr_bufs); ++i) {
|
||||
if (buffers[i].vbi == vbi && buffers[i].divisor == divisor)
|
||||
return i;
|
||||
}
|
||||
|
||||
/* Else, create a new buffer */
|
||||
unsigned idx = (*nr_bufs)++;
|
||||
/* Else, create a new buffer */
|
||||
unsigned idx = (*nr_bufs)++;
|
||||
|
||||
buffers[idx] = (struct pan_vertex_buffer) {
|
||||
.vbi = vbi,
|
||||
.divisor = divisor,
|
||||
};
|
||||
buffers[idx] = (struct pan_vertex_buffer){
|
||||
.vbi = vbi,
|
||||
.divisor = divisor,
|
||||
};
|
||||
|
||||
return idx;
|
||||
return idx;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -194,8 +185,8 @@ pan_assign_vertex_buffer(struct pan_vertex_buffer *buffers,
|
|||
static void
|
||||
panfrost_draw_target(struct panfrost_batch *batch, unsigned target)
|
||||
{
|
||||
batch->draws |= target;
|
||||
batch->resolve |= target;
|
||||
batch->draws |= target;
|
||||
batch->resolve |= target;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -206,34 +197,34 @@ panfrost_draw_target(struct panfrost_batch *batch, unsigned target)
|
|||
void
|
||||
panfrost_set_batch_masks_blend(struct panfrost_batch *batch)
|
||||
{
|
||||
struct panfrost_context *ctx = batch->ctx;
|
||||
struct panfrost_blend_state *blend = ctx->blend;
|
||||
struct panfrost_context *ctx = batch->ctx;
|
||||
struct panfrost_blend_state *blend = ctx->blend;
|
||||
|
||||
for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
|
||||
if (blend->info[i].enabled && batch->key.cbufs[i])
|
||||
panfrost_draw_target(batch, PIPE_CLEAR_COLOR0 << i);
|
||||
}
|
||||
for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
|
||||
if (blend->info[i].enabled && batch->key.cbufs[i])
|
||||
panfrost_draw_target(batch, PIPE_CLEAR_COLOR0 << i);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
panfrost_set_batch_masks_zs(struct panfrost_batch *batch)
|
||||
{
|
||||
struct panfrost_context *ctx = batch->ctx;
|
||||
struct pipe_depth_stencil_alpha_state *zsa = (void *) ctx->depth_stencil;
|
||||
struct panfrost_context *ctx = batch->ctx;
|
||||
struct pipe_depth_stencil_alpha_state *zsa = (void *)ctx->depth_stencil;
|
||||
|
||||
/* Assume depth is read (TODO: perf) */
|
||||
if (zsa->depth_enabled)
|
||||
batch->read |= PIPE_CLEAR_DEPTH;
|
||||
/* Assume depth is read (TODO: perf) */
|
||||
if (zsa->depth_enabled)
|
||||
batch->read |= PIPE_CLEAR_DEPTH;
|
||||
|
||||
if (zsa->depth_writemask)
|
||||
panfrost_draw_target(batch, PIPE_CLEAR_DEPTH);
|
||||
if (zsa->depth_writemask)
|
||||
panfrost_draw_target(batch, PIPE_CLEAR_DEPTH);
|
||||
|
||||
if (zsa->stencil[0].enabled) {
|
||||
panfrost_draw_target(batch, PIPE_CLEAR_STENCIL);
|
||||
if (zsa->stencil[0].enabled) {
|
||||
panfrost_draw_target(batch, PIPE_CLEAR_STENCIL);
|
||||
|
||||
/* Assume stencil is read (TODO: perf) */
|
||||
batch->read |= PIPE_CLEAR_STENCIL;
|
||||
}
|
||||
/* Assume stencil is read (TODO: perf) */
|
||||
batch->read |= PIPE_CLEAR_STENCIL;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -241,21 +232,20 @@ panfrost_track_image_access(struct panfrost_batch *batch,
|
|||
enum pipe_shader_type stage,
|
||||
struct pipe_image_view *image)
|
||||
{
|
||||
struct panfrost_resource *rsrc = pan_resource(image->resource);
|
||||
struct panfrost_resource *rsrc = pan_resource(image->resource);
|
||||
|
||||
if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) {
|
||||
panfrost_batch_write_rsrc(batch, rsrc, stage);
|
||||
if (image->shader_access & PIPE_IMAGE_ACCESS_WRITE) {
|
||||
panfrost_batch_write_rsrc(batch, rsrc, stage);
|
||||
|
||||
bool is_buffer = rsrc->base.target == PIPE_BUFFER;
|
||||
unsigned level = is_buffer ? 0 : image->u.tex.level;
|
||||
BITSET_SET(rsrc->valid.data, level);
|
||||
bool is_buffer = rsrc->base.target == PIPE_BUFFER;
|
||||
unsigned level = is_buffer ? 0 : image->u.tex.level;
|
||||
BITSET_SET(rsrc->valid.data, level);
|
||||
|
||||
if (is_buffer) {
|
||||
util_range_add(&rsrc->base, &rsrc->valid_buffer_range,
|
||||
0, rsrc->base.width0);
|
||||
}
|
||||
} else {
|
||||
panfrost_batch_read_rsrc(batch, rsrc, stage);
|
||||
}
|
||||
if (is_buffer) {
|
||||
util_range_add(&rsrc->base, &rsrc->valid_buffer_range, 0,
|
||||
rsrc->base.width0);
|
||||
}
|
||||
} else {
|
||||
panfrost_batch_read_rsrc(batch, rsrc, stage);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -26,8 +26,8 @@
|
|||
#ifndef __PAN_JOB_H__
|
||||
#define __PAN_JOB_H__
|
||||
|
||||
#include "util/u_dynarray.h"
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "pan_cs.h"
|
||||
#include "pan_mempool.h"
|
||||
#include "pan_resource.h"
|
||||
|
|
@ -39,11 +39,11 @@
|
|||
* error. The getter needs to be used instead.
|
||||
*/
|
||||
struct pan_tristate {
|
||||
enum {
|
||||
PAN_TRISTATE_DONTCARE,
|
||||
PAN_TRISTATE_FALSE,
|
||||
PAN_TRISTATE_TRUE,
|
||||
} v;
|
||||
enum {
|
||||
PAN_TRISTATE_DONTCARE,
|
||||
PAN_TRISTATE_FALSE,
|
||||
PAN_TRISTATE_TRUE,
|
||||
} v;
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
@ -53,20 +53,20 @@ struct pan_tristate {
|
|||
static bool
|
||||
pan_tristate_set(struct pan_tristate *state, bool value)
|
||||
{
|
||||
switch (state->v) {
|
||||
case PAN_TRISTATE_DONTCARE:
|
||||
state->v = value ? PAN_TRISTATE_TRUE : PAN_TRISTATE_FALSE;
|
||||
return true;
|
||||
switch (state->v) {
|
||||
case PAN_TRISTATE_DONTCARE:
|
||||
state->v = value ? PAN_TRISTATE_TRUE : PAN_TRISTATE_FALSE;
|
||||
return true;
|
||||
|
||||
case PAN_TRISTATE_FALSE:
|
||||
return (value == false);
|
||||
case PAN_TRISTATE_FALSE:
|
||||
return (value == false);
|
||||
|
||||
case PAN_TRISTATE_TRUE:
|
||||
return (value == true);
|
||||
case PAN_TRISTATE_TRUE:
|
||||
return (value == true);
|
||||
|
||||
default:
|
||||
unreachable("Invalid tristate value");
|
||||
}
|
||||
default:
|
||||
unreachable("Invalid tristate value");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -76,189 +76,179 @@ pan_tristate_set(struct pan_tristate *state, bool value)
|
|||
static bool
|
||||
pan_tristate_get(struct pan_tristate state)
|
||||
{
|
||||
return (state.v == PAN_TRISTATE_TRUE);
|
||||
return (state.v == PAN_TRISTATE_TRUE);
|
||||
}
|
||||
|
||||
/* A panfrost_batch corresponds to a bound FBO we're rendering to,
|
||||
* collecting over multiple draws. */
|
||||
|
||||
struct panfrost_batch {
|
||||
struct panfrost_context *ctx;
|
||||
struct pipe_framebuffer_state key;
|
||||
struct panfrost_context *ctx;
|
||||
struct pipe_framebuffer_state key;
|
||||
|
||||
/* Sequence number used to implement LRU eviction when all batch slots are used */
|
||||
uint64_t seqnum;
|
||||
/* Sequence number used to implement LRU eviction when all batch slots are
|
||||
* used */
|
||||
uint64_t seqnum;
|
||||
|
||||
/* Buffers cleared (PIPE_CLEAR_* bitmask) */
|
||||
unsigned clear;
|
||||
/* Buffers cleared (PIPE_CLEAR_* bitmask) */
|
||||
unsigned clear;
|
||||
|
||||
/* Buffers drawn */
|
||||
unsigned draws;
|
||||
/* Buffers drawn */
|
||||
unsigned draws;
|
||||
|
||||
/* Buffers read */
|
||||
unsigned read;
|
||||
/* Buffers read */
|
||||
unsigned read;
|
||||
|
||||
/* Buffers needing resolve to memory */
|
||||
unsigned resolve;
|
||||
/* Buffers needing resolve to memory */
|
||||
unsigned resolve;
|
||||
|
||||
/* Packed clear values, indexed by both render target as well as word.
|
||||
* Essentially, a single pixel is packed, with some padding to bring it
|
||||
* up to a 32-bit interval; that pixel is then duplicated over to fill
|
||||
* all 16-bytes */
|
||||
/* Packed clear values, indexed by both render target as well as word.
|
||||
* Essentially, a single pixel is packed, with some padding to bring it
|
||||
* up to a 32-bit interval; that pixel is then duplicated over to fill
|
||||
* all 16-bytes */
|
||||
|
||||
uint32_t clear_color[PIPE_MAX_COLOR_BUFS][4];
|
||||
float clear_depth;
|
||||
unsigned clear_stencil;
|
||||
uint32_t clear_color[PIPE_MAX_COLOR_BUFS][4];
|
||||
float clear_depth;
|
||||
unsigned clear_stencil;
|
||||
|
||||
/* Amount of thread local storage required per thread */
|
||||
unsigned stack_size;
|
||||
/* Amount of thread local storage required per thread */
|
||||
unsigned stack_size;
|
||||
|
||||
/* Amount of shared memory needed per workgroup (for compute) */
|
||||
unsigned shared_size;
|
||||
/* Amount of shared memory needed per workgroup (for compute) */
|
||||
unsigned shared_size;
|
||||
|
||||
/* The bounding box covered by this job, taking scissors into account.
|
||||
* Basically, the bounding box we have to run fragment shaders for */
|
||||
/* The bounding box covered by this job, taking scissors into account.
|
||||
* Basically, the bounding box we have to run fragment shaders for */
|
||||
|
||||
unsigned minx, miny;
|
||||
unsigned maxx, maxy;
|
||||
unsigned minx, miny;
|
||||
unsigned maxx, maxy;
|
||||
|
||||
/* Acts as a rasterizer discard */
|
||||
bool scissor_culls_everything;
|
||||
/* Acts as a rasterizer discard */
|
||||
bool scissor_culls_everything;
|
||||
|
||||
/* BOs referenced not in the pool */
|
||||
unsigned num_bos;
|
||||
struct util_dynarray bos;
|
||||
/* BOs referenced not in the pool */
|
||||
unsigned num_bos;
|
||||
struct util_dynarray bos;
|
||||
|
||||
/* Pool owned by this batch (released when the batch is released) used for temporary descriptors */
|
||||
struct panfrost_pool pool;
|
||||
/* Pool owned by this batch (released when the batch is released) used for
|
||||
* temporary descriptors */
|
||||
struct panfrost_pool pool;
|
||||
|
||||
/* Pool also owned by this batch that is not CPU mapped (created as
|
||||
* INVISIBLE) used for private GPU-internal structures, particularly
|
||||
* varyings */
|
||||
struct panfrost_pool invisible_pool;
|
||||
/* Pool also owned by this batch that is not CPU mapped (created as
|
||||
* INVISIBLE) used for private GPU-internal structures, particularly
|
||||
* varyings */
|
||||
struct panfrost_pool invisible_pool;
|
||||
|
||||
/* Job scoreboarding state */
|
||||
struct pan_scoreboard scoreboard;
|
||||
/* Job scoreboarding state */
|
||||
struct pan_scoreboard scoreboard;
|
||||
|
||||
/* Polygon list bound to the batch, or NULL if none bound yet */
|
||||
struct panfrost_bo *polygon_list;
|
||||
/* Polygon list bound to the batch, or NULL if none bound yet */
|
||||
struct panfrost_bo *polygon_list;
|
||||
|
||||
/* Scratchpad BO bound to the batch, or NULL if none bound yet */
|
||||
struct panfrost_bo *scratchpad;
|
||||
/* Scratchpad BO bound to the batch, or NULL if none bound yet */
|
||||
struct panfrost_bo *scratchpad;
|
||||
|
||||
/* Shared memory BO bound to the batch, or NULL if none bound yet */
|
||||
struct panfrost_bo *shared_memory;
|
||||
/* Shared memory BO bound to the batch, or NULL if none bound yet */
|
||||
struct panfrost_bo *shared_memory;
|
||||
|
||||
/* Framebuffer descriptor. */
|
||||
struct panfrost_ptr framebuffer;
|
||||
/* Framebuffer descriptor. */
|
||||
struct panfrost_ptr framebuffer;
|
||||
|
||||
/* Thread local storage descriptor. */
|
||||
struct panfrost_ptr tls;
|
||||
/* Thread local storage descriptor. */
|
||||
struct panfrost_ptr tls;
|
||||
|
||||
/* Tiler context */
|
||||
struct pan_tiler_context tiler_ctx;
|
||||
/* Tiler context */
|
||||
struct pan_tiler_context tiler_ctx;
|
||||
|
||||
/* Keep the num_work_groups sysval around for indirect dispatch */
|
||||
mali_ptr num_wg_sysval[3];
|
||||
/* Keep the num_work_groups sysval around for indirect dispatch */
|
||||
mali_ptr num_wg_sysval[3];
|
||||
|
||||
/* Cached descriptors */
|
||||
mali_ptr viewport;
|
||||
mali_ptr rsd[PIPE_SHADER_TYPES];
|
||||
mali_ptr textures[PIPE_SHADER_TYPES];
|
||||
mali_ptr samplers[PIPE_SHADER_TYPES];
|
||||
mali_ptr attribs[PIPE_SHADER_TYPES];
|
||||
mali_ptr attrib_bufs[PIPE_SHADER_TYPES];
|
||||
mali_ptr uniform_buffers[PIPE_SHADER_TYPES];
|
||||
mali_ptr push_uniforms[PIPE_SHADER_TYPES];
|
||||
mali_ptr depth_stencil;
|
||||
mali_ptr blend;
|
||||
/* Cached descriptors */
|
||||
mali_ptr viewport;
|
||||
mali_ptr rsd[PIPE_SHADER_TYPES];
|
||||
mali_ptr textures[PIPE_SHADER_TYPES];
|
||||
mali_ptr samplers[PIPE_SHADER_TYPES];
|
||||
mali_ptr attribs[PIPE_SHADER_TYPES];
|
||||
mali_ptr attrib_bufs[PIPE_SHADER_TYPES];
|
||||
mali_ptr uniform_buffers[PIPE_SHADER_TYPES];
|
||||
mali_ptr push_uniforms[PIPE_SHADER_TYPES];
|
||||
mali_ptr depth_stencil;
|
||||
mali_ptr blend;
|
||||
|
||||
/* Valhall: struct mali_scissor_packed */
|
||||
unsigned scissor[2];
|
||||
float minimum_z, maximum_z;
|
||||
/* Valhall: struct mali_scissor_packed */
|
||||
unsigned scissor[2];
|
||||
float minimum_z, maximum_z;
|
||||
|
||||
/* Used on Valhall only. Midgard includes attributes in-band with
|
||||
* attributes, wildly enough.
|
||||
*/
|
||||
mali_ptr images[PIPE_SHADER_TYPES];
|
||||
/* Used on Valhall only. Midgard includes attributes in-band with
|
||||
* attributes, wildly enough.
|
||||
*/
|
||||
mali_ptr images[PIPE_SHADER_TYPES];
|
||||
|
||||
/* On Valhall, these are properties of the batch. On Bifrost, they are
|
||||
* per draw.
|
||||
*/
|
||||
struct pan_tristate sprite_coord_origin;
|
||||
struct pan_tristate first_provoking_vertex;
|
||||
/* On Valhall, these are properties of the batch. On Bifrost, they are
|
||||
* per draw.
|
||||
*/
|
||||
struct pan_tristate sprite_coord_origin;
|
||||
struct pan_tristate first_provoking_vertex;
|
||||
};
|
||||
|
||||
/* Functions for managing the above */
|
||||
|
||||
struct panfrost_batch *
|
||||
panfrost_get_batch_for_fbo(struct panfrost_context *ctx);
|
||||
struct panfrost_batch *panfrost_get_batch_for_fbo(struct panfrost_context *ctx);
|
||||
|
||||
struct panfrost_batch *
|
||||
panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx, const char *reason);
|
||||
panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx,
|
||||
const char *reason);
|
||||
|
||||
void
|
||||
panfrost_batch_add_bo(struct panfrost_batch *batch,
|
||||
struct panfrost_bo *bo,
|
||||
enum pipe_shader_type stage);
|
||||
void panfrost_batch_add_bo(struct panfrost_batch *batch, struct panfrost_bo *bo,
|
||||
enum pipe_shader_type stage);
|
||||
|
||||
void
|
||||
panfrost_batch_read_rsrc(struct panfrost_batch *batch,
|
||||
struct panfrost_resource *rsrc,
|
||||
enum pipe_shader_type stage);
|
||||
void panfrost_batch_read_rsrc(struct panfrost_batch *batch,
|
||||
struct panfrost_resource *rsrc,
|
||||
enum pipe_shader_type stage);
|
||||
|
||||
void
|
||||
panfrost_batch_write_rsrc(struct panfrost_batch *batch,
|
||||
struct panfrost_resource *rsrc,
|
||||
enum pipe_shader_type stage);
|
||||
void panfrost_batch_write_rsrc(struct panfrost_batch *batch,
|
||||
struct panfrost_resource *rsrc,
|
||||
enum pipe_shader_type stage);
|
||||
|
||||
bool
|
||||
panfrost_any_batch_reads_rsrc(struct panfrost_context *ctx,
|
||||
struct panfrost_resource *rsrc);
|
||||
bool panfrost_any_batch_reads_rsrc(struct panfrost_context *ctx,
|
||||
struct panfrost_resource *rsrc);
|
||||
|
||||
bool
|
||||
panfrost_any_batch_writes_rsrc(struct panfrost_context *ctx,
|
||||
struct panfrost_resource *rsrc);
|
||||
bool panfrost_any_batch_writes_rsrc(struct panfrost_context *ctx,
|
||||
struct panfrost_resource *rsrc);
|
||||
|
||||
struct panfrost_bo *panfrost_batch_create_bo(struct panfrost_batch *batch,
|
||||
size_t size, uint32_t create_flags,
|
||||
enum pipe_shader_type stage,
|
||||
const char *label);
|
||||
|
||||
void panfrost_flush_all_batches(struct panfrost_context *ctx,
|
||||
const char *reason);
|
||||
|
||||
void panfrost_flush_batches_accessing_rsrc(struct panfrost_context *ctx,
|
||||
struct panfrost_resource *rsrc,
|
||||
const char *reason);
|
||||
|
||||
void panfrost_flush_writer(struct panfrost_context *ctx,
|
||||
struct panfrost_resource *rsrc, const char *reason);
|
||||
|
||||
void panfrost_batch_adjust_stack_size(struct panfrost_batch *batch);
|
||||
|
||||
struct panfrost_bo *panfrost_batch_get_scratchpad(struct panfrost_batch *batch,
|
||||
unsigned size,
|
||||
unsigned thread_tls_alloc,
|
||||
unsigned core_id_range);
|
||||
|
||||
struct panfrost_bo *
|
||||
panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
|
||||
uint32_t create_flags, enum pipe_shader_type stage,
|
||||
const char *label);
|
||||
panfrost_batch_get_shared_memory(struct panfrost_batch *batch, unsigned size,
|
||||
unsigned workgroup_count);
|
||||
|
||||
void
|
||||
panfrost_flush_all_batches(struct panfrost_context *ctx, const char *reason);
|
||||
void panfrost_batch_clear(struct panfrost_batch *batch, unsigned buffers,
|
||||
const union pipe_color_union *color, double depth,
|
||||
unsigned stencil);
|
||||
|
||||
void
|
||||
panfrost_flush_batches_accessing_rsrc(struct panfrost_context *ctx,
|
||||
struct panfrost_resource *rsrc,
|
||||
const char *reason);
|
||||
void panfrost_batch_union_scissor(struct panfrost_batch *batch, unsigned minx,
|
||||
unsigned miny, unsigned maxx, unsigned maxy);
|
||||
|
||||
void
|
||||
panfrost_flush_writer(struct panfrost_context *ctx,
|
||||
struct panfrost_resource *rsrc,
|
||||
const char *reason);
|
||||
|
||||
void
|
||||
panfrost_batch_adjust_stack_size(struct panfrost_batch *batch);
|
||||
|
||||
struct panfrost_bo *
|
||||
panfrost_batch_get_scratchpad(struct panfrost_batch *batch, unsigned size, unsigned thread_tls_alloc, unsigned core_id_range);
|
||||
|
||||
struct panfrost_bo *
|
||||
panfrost_batch_get_shared_memory(struct panfrost_batch *batch, unsigned size, unsigned workgroup_count);
|
||||
|
||||
void
|
||||
panfrost_batch_clear(struct panfrost_batch *batch,
|
||||
unsigned buffers,
|
||||
const union pipe_color_union *color,
|
||||
double depth, unsigned stencil);
|
||||
|
||||
void
|
||||
panfrost_batch_union_scissor(struct panfrost_batch *batch,
|
||||
unsigned minx, unsigned miny,
|
||||
unsigned maxx, unsigned maxy);
|
||||
|
||||
bool
|
||||
panfrost_batch_skip_rasterization(struct panfrost_batch *batch);
|
||||
bool panfrost_batch_skip_rasterization(struct panfrost_batch *batch);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -46,124 +46,124 @@
|
|||
static struct panfrost_bo *
|
||||
panfrost_pool_alloc_backing(struct panfrost_pool *pool, size_t bo_sz)
|
||||
{
|
||||
/* We don't know what the BO will be used for, so let's flag it
|
||||
* RW and attach it to both the fragment and vertex/tiler jobs.
|
||||
* TODO: if we want fine grained BO assignment we should pass
|
||||
* flags to this function and keep the read/write,
|
||||
* fragment/vertex+tiler pools separate.
|
||||
*/
|
||||
struct panfrost_bo *bo = panfrost_bo_create(pool->base.dev, bo_sz,
|
||||
pool->base.create_flags, pool->base.label);
|
||||
/* We don't know what the BO will be used for, so let's flag it
|
||||
* RW and attach it to both the fragment and vertex/tiler jobs.
|
||||
* TODO: if we want fine grained BO assignment we should pass
|
||||
* flags to this function and keep the read/write,
|
||||
* fragment/vertex+tiler pools separate.
|
||||
*/
|
||||
struct panfrost_bo *bo = panfrost_bo_create(
|
||||
pool->base.dev, bo_sz, pool->base.create_flags, pool->base.label);
|
||||
|
||||
if (pool->owned)
|
||||
util_dynarray_append(&pool->bos, struct panfrost_bo *, bo);
|
||||
else
|
||||
panfrost_bo_unreference(pool->transient_bo);
|
||||
if (pool->owned)
|
||||
util_dynarray_append(&pool->bos, struct panfrost_bo *, bo);
|
||||
else
|
||||
panfrost_bo_unreference(pool->transient_bo);
|
||||
|
||||
pool->transient_bo = bo;
|
||||
pool->transient_offset = 0;
|
||||
pool->transient_bo = bo;
|
||||
pool->transient_offset = 0;
|
||||
|
||||
return bo;
|
||||
return bo;
|
||||
}
|
||||
|
||||
void
|
||||
panfrost_pool_init(struct panfrost_pool *pool, void *memctx,
|
||||
struct panfrost_device *dev,
|
||||
unsigned create_flags, size_t slab_size, const char *label,
|
||||
bool prealloc, bool owned)
|
||||
struct panfrost_device *dev, unsigned create_flags,
|
||||
size_t slab_size, const char *label, bool prealloc,
|
||||
bool owned)
|
||||
{
|
||||
memset(pool, 0, sizeof(*pool));
|
||||
pan_pool_init(&pool->base, dev, create_flags, slab_size, label);
|
||||
pool->owned = owned;
|
||||
memset(pool, 0, sizeof(*pool));
|
||||
pan_pool_init(&pool->base, dev, create_flags, slab_size, label);
|
||||
pool->owned = owned;
|
||||
|
||||
if (owned)
|
||||
util_dynarray_init(&pool->bos, memctx);
|
||||
if (owned)
|
||||
util_dynarray_init(&pool->bos, memctx);
|
||||
|
||||
if (prealloc)
|
||||
panfrost_pool_alloc_backing(pool, pool->base.slab_size);
|
||||
if (prealloc)
|
||||
panfrost_pool_alloc_backing(pool, pool->base.slab_size);
|
||||
}
|
||||
|
||||
void
|
||||
panfrost_pool_cleanup(struct panfrost_pool *pool)
|
||||
{
|
||||
if (!pool->owned) {
|
||||
panfrost_bo_unreference(pool->transient_bo);
|
||||
return;
|
||||
}
|
||||
if (!pool->owned) {
|
||||
panfrost_bo_unreference(pool->transient_bo);
|
||||
return;
|
||||
}
|
||||
|
||||
util_dynarray_foreach(&pool->bos, struct panfrost_bo *, bo)
|
||||
panfrost_bo_unreference(*bo);
|
||||
util_dynarray_foreach(&pool->bos, struct panfrost_bo *, bo)
|
||||
panfrost_bo_unreference(*bo);
|
||||
|
||||
util_dynarray_fini(&pool->bos);
|
||||
util_dynarray_fini(&pool->bos);
|
||||
}
|
||||
|
||||
void
|
||||
panfrost_pool_get_bo_handles(struct panfrost_pool *pool, uint32_t *handles)
|
||||
{
|
||||
assert(pool->owned && "pool does not track BOs in unowned mode");
|
||||
assert(pool->owned && "pool does not track BOs in unowned mode");
|
||||
|
||||
unsigned idx = 0;
|
||||
util_dynarray_foreach(&pool->bos, struct panfrost_bo *, bo) {
|
||||
assert((*bo)->gem_handle > 0);
|
||||
handles[idx++] = (*bo)->gem_handle;
|
||||
unsigned idx = 0;
|
||||
util_dynarray_foreach(&pool->bos, struct panfrost_bo *, bo) {
|
||||
assert((*bo)->gem_handle > 0);
|
||||
handles[idx++] = (*bo)->gem_handle;
|
||||
|
||||
/* Update the BO access flags so that panfrost_bo_wait() knows
|
||||
* about all pending accesses.
|
||||
* We only keep the READ/WRITE info since this is all the BO
|
||||
* wait logic cares about.
|
||||
* We also preserve existing flags as this batch might not
|
||||
* be the first one to access the BO.
|
||||
*/
|
||||
(*bo)->gpu_access |= PAN_BO_ACCESS_RW;
|
||||
}
|
||||
/* Update the BO access flags so that panfrost_bo_wait() knows
|
||||
* about all pending accesses.
|
||||
* We only keep the READ/WRITE info since this is all the BO
|
||||
* wait logic cares about.
|
||||
* We also preserve existing flags as this batch might not
|
||||
* be the first one to access the BO.
|
||||
*/
|
||||
(*bo)->gpu_access |= PAN_BO_ACCESS_RW;
|
||||
}
|
||||
}
|
||||
|
||||
#define PAN_GUARD_SIZE 4096
|
||||
|
||||
static struct panfrost_ptr
|
||||
panfrost_pool_alloc_aligned(struct panfrost_pool *pool, size_t sz, unsigned alignment)
|
||||
panfrost_pool_alloc_aligned(struct panfrost_pool *pool, size_t sz,
|
||||
unsigned alignment)
|
||||
{
|
||||
assert(alignment == util_next_power_of_two(alignment));
|
||||
assert(alignment == util_next_power_of_two(alignment));
|
||||
|
||||
/* Find or create a suitable BO */
|
||||
struct panfrost_bo *bo = pool->transient_bo;
|
||||
unsigned offset = ALIGN_POT(pool->transient_offset, alignment);
|
||||
/* Find or create a suitable BO */
|
||||
struct panfrost_bo *bo = pool->transient_bo;
|
||||
unsigned offset = ALIGN_POT(pool->transient_offset, alignment);
|
||||
|
||||
#ifdef PAN_DBG_OVERFLOW
|
||||
if (unlikely(pool->base.dev->debug & PAN_DBG_OVERFLOW) &&
|
||||
!(pool->base.create_flags & PAN_BO_INVISIBLE)) {
|
||||
unsigned aligned = ALIGN_POT(sz, sysconf(_SC_PAGESIZE));
|
||||
unsigned bo_size = aligned + PAN_GUARD_SIZE;
|
||||
if (unlikely(pool->base.dev->debug & PAN_DBG_OVERFLOW) &&
|
||||
!(pool->base.create_flags & PAN_BO_INVISIBLE)) {
|
||||
unsigned aligned = ALIGN_POT(sz, sysconf(_SC_PAGESIZE));
|
||||
unsigned bo_size = aligned + PAN_GUARD_SIZE;
|
||||
|
||||
bo = panfrost_pool_alloc_backing(pool, bo_size);
|
||||
memset(bo->ptr.cpu, 0xbb, bo_size);
|
||||
bo = panfrost_pool_alloc_backing(pool, bo_size);
|
||||
memset(bo->ptr.cpu, 0xbb, bo_size);
|
||||
|
||||
/* Place the object as close as possible to the protected
|
||||
* region at the end of the buffer while keeping alignment. */
|
||||
offset = ROUND_DOWN_TO(aligned - sz, alignment);
|
||||
/* Place the object as close as possible to the protected
|
||||
* region at the end of the buffer while keeping alignment. */
|
||||
offset = ROUND_DOWN_TO(aligned - sz, alignment);
|
||||
|
||||
if (mprotect(bo->ptr.cpu + aligned,
|
||||
PAN_GUARD_SIZE, PROT_NONE) == -1)
|
||||
perror("mprotect");
|
||||
if (mprotect(bo->ptr.cpu + aligned, PAN_GUARD_SIZE, PROT_NONE) == -1)
|
||||
perror("mprotect");
|
||||
|
||||
pool->transient_bo = NULL;
|
||||
}
|
||||
pool->transient_bo = NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* If we don't fit, allocate a new backing */
|
||||
if (unlikely(bo == NULL || (offset + sz) >= pool->base.slab_size)) {
|
||||
bo = panfrost_pool_alloc_backing(pool,
|
||||
ALIGN_POT(MAX2(pool->base.slab_size, sz), 4096));
|
||||
offset = 0;
|
||||
}
|
||||
/* If we don't fit, allocate a new backing */
|
||||
if (unlikely(bo == NULL || (offset + sz) >= pool->base.slab_size)) {
|
||||
bo = panfrost_pool_alloc_backing(
|
||||
pool, ALIGN_POT(MAX2(pool->base.slab_size, sz), 4096));
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
pool->transient_offset = offset + sz;
|
||||
pool->transient_offset = offset + sz;
|
||||
|
||||
struct panfrost_ptr ret = {
|
||||
.cpu = bo->ptr.cpu + offset,
|
||||
.gpu = bo->ptr.gpu + offset,
|
||||
};
|
||||
struct panfrost_ptr ret = {
|
||||
.cpu = bo->ptr.cpu + offset,
|
||||
.gpu = bo->ptr.gpu + offset,
|
||||
};
|
||||
|
||||
return ret;
|
||||
return ret;
|
||||
}
|
||||
PAN_POOL_ALLOCATOR(struct panfrost_pool, panfrost_pool_alloc_aligned)
|
||||
|
|
|
|||
|
|
@ -31,37 +31,37 @@
|
|||
be unowned for persistent uploads. */
|
||||
|
||||
struct panfrost_pool {
|
||||
/* Inherit from pan_pool */
|
||||
struct pan_pool base;
|
||||
/* Inherit from pan_pool */
|
||||
struct pan_pool base;
|
||||
|
||||
/* BOs allocated by this pool */
|
||||
struct util_dynarray bos;
|
||||
/* BOs allocated by this pool */
|
||||
struct util_dynarray bos;
|
||||
|
||||
/* Current transient BO */
|
||||
struct panfrost_bo *transient_bo;
|
||||
/* Current transient BO */
|
||||
struct panfrost_bo *transient_bo;
|
||||
|
||||
/* Within the topmost transient BO, how much has been used? */
|
||||
unsigned transient_offset;
|
||||
/* Within the topmost transient BO, how much has been used? */
|
||||
unsigned transient_offset;
|
||||
|
||||
/* Mode of the pool. BO management is in the pool for owned mode, but
|
||||
* the consumed for unowned mode. */
|
||||
bool owned;
|
||||
/* Mode of the pool. BO management is in the pool for owned mode, but
|
||||
* the consumed for unowned mode. */
|
||||
bool owned;
|
||||
};
|
||||
|
||||
static inline struct panfrost_pool *
|
||||
to_panfrost_pool(struct pan_pool *pool)
|
||||
{
|
||||
return container_of(pool, struct panfrost_pool, base);
|
||||
return container_of(pool, struct panfrost_pool, base);
|
||||
}
|
||||
|
||||
/* Reference to pool allocated memory for an unowned pool */
|
||||
|
||||
struct panfrost_pool_ref {
|
||||
/* Owning BO */
|
||||
struct panfrost_bo *bo;
|
||||
/* Owning BO */
|
||||
struct panfrost_bo *bo;
|
||||
|
||||
/* Mapped GPU VA */
|
||||
mali_ptr gpu;
|
||||
/* Mapped GPU VA */
|
||||
mali_ptr gpu;
|
||||
};
|
||||
|
||||
/* Take a reference to an allocation pool. Call directly after allocating from
|
||||
|
|
@ -70,32 +70,30 @@ struct panfrost_pool_ref {
|
|||
static inline struct panfrost_pool_ref
|
||||
panfrost_pool_take_ref(struct panfrost_pool *pool, mali_ptr ptr)
|
||||
{
|
||||
if (!pool->owned)
|
||||
panfrost_bo_reference(pool->transient_bo);
|
||||
if (!pool->owned)
|
||||
panfrost_bo_reference(pool->transient_bo);
|
||||
|
||||
return (struct panfrost_pool_ref) {
|
||||
.bo = pool->transient_bo,
|
||||
.gpu = ptr,
|
||||
};
|
||||
return (struct panfrost_pool_ref){
|
||||
.bo = pool->transient_bo,
|
||||
.gpu = ptr,
|
||||
};
|
||||
}
|
||||
|
||||
void
|
||||
panfrost_pool_init(struct panfrost_pool *pool, void *memctx,
|
||||
struct panfrost_device *dev, unsigned create_flags,
|
||||
size_t slab_size, const char *label, bool prealloc, bool
|
||||
owned);
|
||||
void panfrost_pool_init(struct panfrost_pool *pool, void *memctx,
|
||||
struct panfrost_device *dev, unsigned create_flags,
|
||||
size_t slab_size, const char *label, bool prealloc,
|
||||
bool owned);
|
||||
|
||||
void
|
||||
panfrost_pool_cleanup(struct panfrost_pool *pool);
|
||||
void panfrost_pool_cleanup(struct panfrost_pool *pool);
|
||||
|
||||
static inline unsigned
|
||||
panfrost_pool_num_bos(struct panfrost_pool *pool)
|
||||
{
|
||||
assert(pool->owned && "pool does not track BOs in unowned mode");
|
||||
return util_dynarray_num_elements(&pool->bos, struct panfrost_bo *);
|
||||
assert(pool->owned && "pool does not track BOs in unowned mode");
|
||||
return util_dynarray_num_elements(&pool->bos, struct panfrost_bo *);
|
||||
}
|
||||
|
||||
void
|
||||
panfrost_pool_get_bo_handles(struct panfrost_pool *pool, uint32_t *handles);
|
||||
void panfrost_pool_get_bo_handles(struct panfrost_pool *pool,
|
||||
uint32_t *handles);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -31,8 +31,7 @@ extern "C" {
|
|||
struct pipe_screen;
|
||||
struct renderonly;
|
||||
|
||||
struct pipe_screen *
|
||||
panfrost_create_screen(int fd, struct renderonly *ro);
|
||||
struct pipe_screen *panfrost_create_screen(int fd, struct renderonly *ro);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -22,87 +22,86 @@
|
|||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef PAN_RESOURCE_H
|
||||
#define PAN_RESOURCE_H
|
||||
|
||||
#include "pan_screen.h"
|
||||
#include "pan_minmax_cache.h"
|
||||
#include "pan_texture.h"
|
||||
#include "drm-uapi/drm.h"
|
||||
#include "util/u_range.h"
|
||||
#include "pan_minmax_cache.h"
|
||||
#include "pan_screen.h"
|
||||
#include "pan_texture.h"
|
||||
|
||||
#define LAYOUT_CONVERT_THRESHOLD 8
|
||||
#define PAN_MAX_BATCHES 32
|
||||
#define PAN_MAX_BATCHES 32
|
||||
|
||||
#define PAN_BIND_SHARED_MASK (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | \
|
||||
PIPE_BIND_SHARED)
|
||||
#define PAN_BIND_SHARED_MASK \
|
||||
(PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)
|
||||
|
||||
struct panfrost_resource {
|
||||
struct pipe_resource base;
|
||||
struct {
|
||||
struct pipe_scissor_state extent;
|
||||
struct {
|
||||
bool enable;
|
||||
unsigned stride;
|
||||
unsigned size;
|
||||
BITSET_WORD *data;
|
||||
} tile_map;
|
||||
} damage;
|
||||
struct pipe_resource base;
|
||||
struct {
|
||||
struct pipe_scissor_state extent;
|
||||
struct {
|
||||
bool enable;
|
||||
unsigned stride;
|
||||
unsigned size;
|
||||
BITSET_WORD *data;
|
||||
} tile_map;
|
||||
} damage;
|
||||
|
||||
struct renderonly_scanout *scanout;
|
||||
struct renderonly_scanout *scanout;
|
||||
|
||||
struct panfrost_resource *separate_stencil;
|
||||
struct panfrost_resource *separate_stencil;
|
||||
|
||||
struct util_range valid_buffer_range;
|
||||
struct util_range valid_buffer_range;
|
||||
|
||||
/* Description of the resource layout */
|
||||
struct pan_image image;
|
||||
/* Description of the resource layout */
|
||||
struct pan_image image;
|
||||
|
||||
struct {
|
||||
/* Is the checksum for this image valid? Implicitly refers to
|
||||
* the first slice; we only checksum non-mipmapped 2D images */
|
||||
bool crc;
|
||||
struct {
|
||||
/* Is the checksum for this image valid? Implicitly refers to
|
||||
* the first slice; we only checksum non-mipmapped 2D images */
|
||||
bool crc;
|
||||
|
||||
/* Has anything been written to this slice? */
|
||||
BITSET_DECLARE(data, MAX_MIP_LEVELS);
|
||||
} valid;
|
||||
/* Has anything been written to this slice? */
|
||||
BITSET_DECLARE(data, MAX_MIP_LEVELS);
|
||||
} valid;
|
||||
|
||||
/* Whether the modifier can be changed */
|
||||
bool modifier_constant;
|
||||
/* Whether the modifier can be changed */
|
||||
bool modifier_constant;
|
||||
|
||||
/* Used to decide when to convert to another modifier */
|
||||
uint16_t modifier_updates;
|
||||
/* Used to decide when to convert to another modifier */
|
||||
uint16_t modifier_updates;
|
||||
|
||||
/* Do all pixels have the same stencil value? */
|
||||
bool constant_stencil;
|
||||
/* Do all pixels have the same stencil value? */
|
||||
bool constant_stencil;
|
||||
|
||||
/* The stencil value if constant_stencil is set */
|
||||
uint8_t stencil_value;
|
||||
/* The stencil value if constant_stencil is set */
|
||||
uint8_t stencil_value;
|
||||
|
||||
/* Cached min/max values for index buffers */
|
||||
struct panfrost_minmax_cache *index_cache;
|
||||
/* Cached min/max values for index buffers */
|
||||
struct panfrost_minmax_cache *index_cache;
|
||||
};
|
||||
|
||||
static inline struct panfrost_resource *
|
||||
pan_resource(struct pipe_resource *p)
|
||||
{
|
||||
return (struct panfrost_resource *)p;
|
||||
return (struct panfrost_resource *)p;
|
||||
}
|
||||
|
||||
struct panfrost_transfer {
|
||||
struct pipe_transfer base;
|
||||
void *map;
|
||||
struct {
|
||||
struct pipe_resource *rsrc;
|
||||
struct pipe_box box;
|
||||
} staging;
|
||||
struct pipe_transfer base;
|
||||
void *map;
|
||||
struct {
|
||||
struct pipe_resource *rsrc;
|
||||
struct pipe_box box;
|
||||
} staging;
|
||||
};
|
||||
|
||||
static inline struct panfrost_transfer *
|
||||
pan_transfer(struct pipe_transfer *p)
|
||||
{
|
||||
return (struct panfrost_transfer *)p;
|
||||
return (struct panfrost_transfer *)p;
|
||||
}
|
||||
|
||||
void panfrost_resource_screen_init(struct pipe_screen *screen);
|
||||
|
|
@ -113,53 +112,48 @@ void panfrost_resource_context_init(struct pipe_context *pctx);
|
|||
|
||||
/* Blitting */
|
||||
|
||||
void
|
||||
panfrost_blitter_save(struct panfrost_context *ctx, bool render_cond);
|
||||
void panfrost_blitter_save(struct panfrost_context *ctx, bool render_cond);
|
||||
|
||||
void
|
||||
panfrost_blit(struct pipe_context *pipe,
|
||||
const struct pipe_blit_info *info);
|
||||
void panfrost_blit(struct pipe_context *pipe,
|
||||
const struct pipe_blit_info *info);
|
||||
|
||||
void
|
||||
panfrost_resource_set_damage_region(struct pipe_screen *screen,
|
||||
struct pipe_resource *res,
|
||||
unsigned int nrects,
|
||||
const struct pipe_box *rects);
|
||||
void panfrost_resource_set_damage_region(struct pipe_screen *screen,
|
||||
struct pipe_resource *res,
|
||||
unsigned int nrects,
|
||||
const struct pipe_box *rects);
|
||||
|
||||
static inline enum mali_texture_dimension
|
||||
panfrost_translate_texture_dimension(enum pipe_texture_target t) {
|
||||
switch (t)
|
||||
{
|
||||
case PIPE_BUFFER:
|
||||
case PIPE_TEXTURE_1D:
|
||||
case PIPE_TEXTURE_1D_ARRAY:
|
||||
return MALI_TEXTURE_DIMENSION_1D;
|
||||
panfrost_translate_texture_dimension(enum pipe_texture_target t)
|
||||
{
|
||||
switch (t) {
|
||||
case PIPE_BUFFER:
|
||||
case PIPE_TEXTURE_1D:
|
||||
case PIPE_TEXTURE_1D_ARRAY:
|
||||
return MALI_TEXTURE_DIMENSION_1D;
|
||||
|
||||
case PIPE_TEXTURE_2D:
|
||||
case PIPE_TEXTURE_2D_ARRAY:
|
||||
case PIPE_TEXTURE_RECT:
|
||||
return MALI_TEXTURE_DIMENSION_2D;
|
||||
case PIPE_TEXTURE_2D:
|
||||
case PIPE_TEXTURE_2D_ARRAY:
|
||||
case PIPE_TEXTURE_RECT:
|
||||
return MALI_TEXTURE_DIMENSION_2D;
|
||||
|
||||
case PIPE_TEXTURE_3D:
|
||||
return MALI_TEXTURE_DIMENSION_3D;
|
||||
case PIPE_TEXTURE_3D:
|
||||
return MALI_TEXTURE_DIMENSION_3D;
|
||||
|
||||
case PIPE_TEXTURE_CUBE:
|
||||
case PIPE_TEXTURE_CUBE_ARRAY:
|
||||
return MALI_TEXTURE_DIMENSION_CUBE;
|
||||
case PIPE_TEXTURE_CUBE:
|
||||
case PIPE_TEXTURE_CUBE_ARRAY:
|
||||
return MALI_TEXTURE_DIMENSION_CUBE;
|
||||
|
||||
default:
|
||||
unreachable("Unknown target");
|
||||
}
|
||||
default:
|
||||
unreachable("Unknown target");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pan_resource_modifier_convert(struct panfrost_context *ctx,
|
||||
struct panfrost_resource *rsrc,
|
||||
uint64_t modifier, const char *reason);
|
||||
void pan_resource_modifier_convert(struct panfrost_context *ctx,
|
||||
struct panfrost_resource *rsrc,
|
||||
uint64_t modifier, const char *reason);
|
||||
|
||||
void
|
||||
pan_legalize_afbc_format(struct panfrost_context *ctx,
|
||||
struct panfrost_resource *rsrc,
|
||||
enum pipe_format format);
|
||||
void pan_legalize_afbc_format(struct panfrost_context *ctx,
|
||||
struct panfrost_resource *rsrc,
|
||||
enum pipe_format format);
|
||||
|
||||
#endif /* PAN_RESOURCE_H */
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -30,14 +30,14 @@
|
|||
#define PAN_SCREEN_H
|
||||
|
||||
#include <xf86drm.h>
|
||||
#include "pipe/p_screen.h"
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_screen.h"
|
||||
#include "renderonly/renderonly.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "util/bitset.h"
|
||||
#include "util/set.h"
|
||||
#include "util/log.h"
|
||||
#include "util/disk_cache.h"
|
||||
#include "util/log.h"
|
||||
#include "util/set.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
#include "pan_device.h"
|
||||
#include "pan_mempool.h"
|
||||
|
|
@ -45,7 +45,7 @@
|
|||
#define PAN_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
|
||||
|
||||
static const struct pipe_driver_query_info panfrost_driver_query_list[] = {
|
||||
{"draw-calls", PAN_QUERY_DRAW_CALLS, { 0 }},
|
||||
{"draw-calls", PAN_QUERY_DRAW_CALLS, {0}},
|
||||
};
|
||||
|
||||
struct panfrost_batch;
|
||||
|
|
@ -58,77 +58,74 @@ struct pan_blend_state;
|
|||
/* Virtual table of per-generation (GenXML) functions */
|
||||
|
||||
struct panfrost_vtable {
|
||||
/* Prepares the renderer state descriptor or shader program descriptor
|
||||
* for a given compiled shader, and if desired uploads it as well */
|
||||
void (*prepare_shader)(struct panfrost_compiled_shader *,
|
||||
struct panfrost_pool *, bool);
|
||||
/* Prepares the renderer state descriptor or shader program descriptor
|
||||
* for a given compiled shader, and if desired uploads it as well */
|
||||
void (*prepare_shader)(struct panfrost_compiled_shader *,
|
||||
struct panfrost_pool *, bool);
|
||||
|
||||
/* Emits a thread local storage descriptor */
|
||||
void (*emit_tls)(struct panfrost_batch *);
|
||||
/* Emits a thread local storage descriptor */
|
||||
void (*emit_tls)(struct panfrost_batch *);
|
||||
|
||||
/* Emits a framebuffer descriptor */
|
||||
void (*emit_fbd)(struct panfrost_batch *, const struct pan_fb_info *);
|
||||
/* Emits a framebuffer descriptor */
|
||||
void (*emit_fbd)(struct panfrost_batch *, const struct pan_fb_info *);
|
||||
|
||||
/* Emits a fragment job */
|
||||
mali_ptr (*emit_fragment_job)(struct panfrost_batch *, const struct pan_fb_info *);
|
||||
/* Emits a fragment job */
|
||||
mali_ptr (*emit_fragment_job)(struct panfrost_batch *,
|
||||
const struct pan_fb_info *);
|
||||
|
||||
/* General destructor */
|
||||
void (*screen_destroy)(struct pipe_screen *);
|
||||
/* General destructor */
|
||||
void (*screen_destroy)(struct pipe_screen *);
|
||||
|
||||
/* Preload framebuffer */
|
||||
void (*preload)(struct panfrost_batch *, struct pan_fb_info *);
|
||||
/* Preload framebuffer */
|
||||
void (*preload)(struct panfrost_batch *, struct pan_fb_info *);
|
||||
|
||||
/* Initialize a Gallium context */
|
||||
void (*context_init)(struct pipe_context *pipe);
|
||||
/* Initialize a Gallium context */
|
||||
void (*context_init)(struct pipe_context *pipe);
|
||||
|
||||
/* Device-dependent initialization of a panfrost_batch */
|
||||
void (*init_batch)(struct panfrost_batch *batch);
|
||||
/* Device-dependent initialization of a panfrost_batch */
|
||||
void (*init_batch)(struct panfrost_batch *batch);
|
||||
|
||||
/* Get blend shader */
|
||||
struct pan_blend_shader_variant *
|
||||
(*get_blend_shader)(const struct panfrost_device *,
|
||||
const struct pan_blend_state *,
|
||||
nir_alu_type, nir_alu_type,
|
||||
unsigned rt);
|
||||
/* Get blend shader */
|
||||
struct pan_blend_shader_variant *(*get_blend_shader)(
|
||||
const struct panfrost_device *, const struct pan_blend_state *,
|
||||
nir_alu_type, nir_alu_type, unsigned rt);
|
||||
|
||||
/* Initialize the polygon list */
|
||||
void (*init_polygon_list)(struct panfrost_batch *);
|
||||
/* Initialize the polygon list */
|
||||
void (*init_polygon_list)(struct panfrost_batch *);
|
||||
|
||||
/* Shader compilation methods */
|
||||
const nir_shader_compiler_options *(*get_compiler_options)(void);
|
||||
void (*compile_shader)(nir_shader *s,
|
||||
struct panfrost_compile_inputs *inputs,
|
||||
struct util_dynarray *binary,
|
||||
struct pan_shader_info *info);
|
||||
/* Shader compilation methods */
|
||||
const nir_shader_compiler_options *(*get_compiler_options)(void);
|
||||
void (*compile_shader)(nir_shader *s, struct panfrost_compile_inputs *inputs,
|
||||
struct util_dynarray *binary,
|
||||
struct pan_shader_info *info);
|
||||
};
|
||||
|
||||
struct panfrost_screen {
|
||||
struct pipe_screen base;
|
||||
struct panfrost_device dev;
|
||||
struct {
|
||||
struct panfrost_pool bin_pool;
|
||||
struct panfrost_pool desc_pool;
|
||||
} blitter;
|
||||
struct pipe_screen base;
|
||||
struct panfrost_device dev;
|
||||
struct {
|
||||
struct panfrost_pool bin_pool;
|
||||
struct panfrost_pool desc_pool;
|
||||
} blitter;
|
||||
|
||||
struct panfrost_vtable vtbl;
|
||||
struct disk_cache *disk_cache;
|
||||
struct panfrost_vtable vtbl;
|
||||
struct disk_cache *disk_cache;
|
||||
};
|
||||
|
||||
static inline struct panfrost_screen *
|
||||
pan_screen(struct pipe_screen *p)
|
||||
{
|
||||
return (struct panfrost_screen *)p;
|
||||
return (struct panfrost_screen *)p;
|
||||
}
|
||||
|
||||
static inline struct panfrost_device *
|
||||
pan_device(struct pipe_screen *p)
|
||||
{
|
||||
return &(pan_screen(p)->dev);
|
||||
return &(pan_screen(p)->dev);
|
||||
}
|
||||
|
||||
int
|
||||
panfrost_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
|
||||
struct pipe_driver_query_info *info);
|
||||
int panfrost_get_driver_query_info(struct pipe_screen *pscreen, unsigned index,
|
||||
struct pipe_driver_query_info *info);
|
||||
|
||||
void panfrost_cmdstream_screen_init_v4(struct panfrost_screen *screen);
|
||||
void panfrost_cmdstream_screen_init_v5(struct panfrost_screen *screen);
|
||||
|
|
@ -136,13 +133,13 @@ void panfrost_cmdstream_screen_init_v6(struct panfrost_screen *screen);
|
|||
void panfrost_cmdstream_screen_init_v7(struct panfrost_screen *screen);
|
||||
void panfrost_cmdstream_screen_init_v9(struct panfrost_screen *screen);
|
||||
|
||||
#define perf_debug(dev, ...) \
|
||||
do { \
|
||||
if (unlikely((dev)->debug & PAN_DBG_PERF)) \
|
||||
mesa_logw(__VA_ARGS__); \
|
||||
} while(0)
|
||||
#define perf_debug(dev, ...) \
|
||||
do { \
|
||||
if (unlikely((dev)->debug & PAN_DBG_PERF)) \
|
||||
mesa_logw(__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
#define perf_debug_ctx(ctx, ...) \
|
||||
perf_debug(pan_device((ctx)->base.screen), __VA_ARGS__);
|
||||
#define perf_debug_ctx(ctx, ...) \
|
||||
perf_debug(pan_device((ctx)->base.screen), __VA_ARGS__);
|
||||
|
||||
#endif /* PAN_SCREEN_H */
|
||||
|
|
|
|||
|
|
@ -28,103 +28,96 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include "pan_context.h"
|
||||
#include "pan_bo.h"
|
||||
#include "pan_shader.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "nir/tgsi_to_nir.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "nir_serialize.h"
|
||||
#include "pan_bo.h"
|
||||
#include "pan_context.h"
|
||||
|
||||
static struct panfrost_uncompiled_shader *
|
||||
panfrost_alloc_shader(const nir_shader *nir)
|
||||
{
|
||||
struct panfrost_uncompiled_shader *so =
|
||||
rzalloc(NULL, struct panfrost_uncompiled_shader);
|
||||
struct panfrost_uncompiled_shader *so =
|
||||
rzalloc(NULL, struct panfrost_uncompiled_shader);
|
||||
|
||||
simple_mtx_init(&so->lock, mtx_plain);
|
||||
util_dynarray_init(&so->variants, so);
|
||||
simple_mtx_init(&so->lock, mtx_plain);
|
||||
util_dynarray_init(&so->variants, so);
|
||||
|
||||
so->nir = nir;
|
||||
so->nir = nir;
|
||||
|
||||
/* Serialize the NIR to a binary blob that we can hash for the disk
|
||||
* cache. Drop unnecessary information (like variable names) so the
|
||||
* serialized NIR is smaller, and also to let us detect more isomorphic
|
||||
* shaders when hashing, increasing cache hits.
|
||||
*/
|
||||
struct blob blob;
|
||||
blob_init(&blob);
|
||||
nir_serialize(&blob, nir, true);
|
||||
_mesa_sha1_compute(blob.data, blob.size, so->nir_sha1);
|
||||
blob_finish(&blob);
|
||||
/* Serialize the NIR to a binary blob that we can hash for the disk
|
||||
* cache. Drop unnecessary information (like variable names) so the
|
||||
* serialized NIR is smaller, and also to let us detect more isomorphic
|
||||
* shaders when hashing, increasing cache hits.
|
||||
*/
|
||||
struct blob blob;
|
||||
blob_init(&blob);
|
||||
nir_serialize(&blob, nir, true);
|
||||
_mesa_sha1_compute(blob.data, blob.size, so->nir_sha1);
|
||||
blob_finish(&blob);
|
||||
|
||||
return so;
|
||||
return so;
|
||||
}
|
||||
|
||||
static struct panfrost_compiled_shader *
|
||||
panfrost_alloc_variant(struct panfrost_uncompiled_shader *so)
|
||||
{
|
||||
return util_dynarray_grow(&so->variants, struct panfrost_compiled_shader, 1);
|
||||
return util_dynarray_grow(&so->variants, struct panfrost_compiled_shader, 1);
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_shader_compile(struct panfrost_screen *screen,
|
||||
const nir_shader *ir,
|
||||
panfrost_shader_compile(struct panfrost_screen *screen, const nir_shader *ir,
|
||||
struct util_debug_callback *dbg,
|
||||
struct panfrost_shader_key *key,
|
||||
unsigned req_local_mem,
|
||||
struct panfrost_shader_key *key, unsigned req_local_mem,
|
||||
unsigned fixed_varying_mask,
|
||||
struct panfrost_shader_binary *out)
|
||||
{
|
||||
struct panfrost_device *dev = pan_device(&screen->base);
|
||||
struct panfrost_device *dev = pan_device(&screen->base);
|
||||
|
||||
nir_shader *s = nir_shader_clone(NULL, ir);
|
||||
nir_shader *s = nir_shader_clone(NULL, ir);
|
||||
|
||||
struct panfrost_compile_inputs inputs = {
|
||||
.debug = dbg,
|
||||
.gpu_id = dev->gpu_id,
|
||||
.fixed_sysval_ubo = -1,
|
||||
};
|
||||
struct panfrost_compile_inputs inputs = {
|
||||
.debug = dbg,
|
||||
.gpu_id = dev->gpu_id,
|
||||
.fixed_sysval_ubo = -1,
|
||||
};
|
||||
|
||||
/* Lower this early so the backends don't have to worry about it */
|
||||
if (s->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
inputs.fixed_varying_mask = key->fs.fixed_varying_mask;
|
||||
/* Lower this early so the backends don't have to worry about it */
|
||||
if (s->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
inputs.fixed_varying_mask = key->fs.fixed_varying_mask;
|
||||
|
||||
if (s->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
|
||||
NIR_PASS_V(s, nir_lower_fragcolor,
|
||||
key->fs.nr_cbufs_for_fragcolor);
|
||||
}
|
||||
if (s->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
|
||||
NIR_PASS_V(s, nir_lower_fragcolor, key->fs.nr_cbufs_for_fragcolor);
|
||||
}
|
||||
|
||||
if (key->fs.sprite_coord_enable) {
|
||||
NIR_PASS_V(s, nir_lower_texcoord_replace,
|
||||
key->fs.sprite_coord_enable,
|
||||
true /* point coord is sysval */,
|
||||
false /* Y-invert */);
|
||||
}
|
||||
if (key->fs.sprite_coord_enable) {
|
||||
NIR_PASS_V(s, nir_lower_texcoord_replace, key->fs.sprite_coord_enable,
|
||||
true /* point coord is sysval */, false /* Y-invert */);
|
||||
}
|
||||
|
||||
if (key->fs.clip_plane_enable) {
|
||||
NIR_PASS_V(s, nir_lower_clip_fs,
|
||||
key->fs.clip_plane_enable,
|
||||
false);
|
||||
}
|
||||
if (key->fs.clip_plane_enable) {
|
||||
NIR_PASS_V(s, nir_lower_clip_fs, key->fs.clip_plane_enable, false);
|
||||
}
|
||||
|
||||
memcpy(inputs.rt_formats, key->fs.rt_formats, sizeof(inputs.rt_formats));
|
||||
} else if (s->info.stage == MESA_SHADER_VERTEX) {
|
||||
inputs.fixed_varying_mask = fixed_varying_mask;
|
||||
memcpy(inputs.rt_formats, key->fs.rt_formats, sizeof(inputs.rt_formats));
|
||||
} else if (s->info.stage == MESA_SHADER_VERTEX) {
|
||||
inputs.fixed_varying_mask = fixed_varying_mask;
|
||||
|
||||
/* No IDVS for internal XFB shaders */
|
||||
inputs.no_idvs = s->info.has_transform_feedback_varyings;
|
||||
}
|
||||
/* No IDVS for internal XFB shaders */
|
||||
inputs.no_idvs = s->info.has_transform_feedback_varyings;
|
||||
}
|
||||
|
||||
util_dynarray_init(&out->binary, NULL);
|
||||
screen->vtbl.compile_shader(s, &inputs, &out->binary, &out->info);
|
||||
util_dynarray_init(&out->binary, NULL);
|
||||
screen->vtbl.compile_shader(s, &inputs, &out->binary, &out->info);
|
||||
|
||||
assert(req_local_mem >= out->info.wls_size);
|
||||
out->info.wls_size = req_local_mem;
|
||||
assert(req_local_mem >= out->info.wls_size);
|
||||
out->info.wls_size = req_local_mem;
|
||||
|
||||
/* In both clone and tgsi_to_nir paths, the shader is ralloc'd against
|
||||
* a NULL context
|
||||
*/
|
||||
ralloc_free(s);
|
||||
/* In both clone and tgsi_to_nir paths, the shader is ralloc'd against
|
||||
* a NULL context
|
||||
*/
|
||||
ralloc_free(s);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -136,287 +129,288 @@ panfrost_shader_get(struct pipe_screen *pscreen,
|
|||
struct panfrost_compiled_shader *state,
|
||||
unsigned req_local_mem)
|
||||
{
|
||||
struct panfrost_screen *screen = pan_screen(pscreen);
|
||||
struct panfrost_device *dev = pan_device(pscreen);
|
||||
struct panfrost_screen *screen = pan_screen(pscreen);
|
||||
struct panfrost_device *dev = pan_device(pscreen);
|
||||
|
||||
struct panfrost_shader_binary res = { 0 };
|
||||
struct panfrost_shader_binary res = {0};
|
||||
|
||||
/* Try to retrieve the variant from the disk cache. If that fails,
|
||||
* compile a new variant and store in the disk cache for later reuse.
|
||||
*/
|
||||
if (!panfrost_disk_cache_retrieve(screen->disk_cache, uncompiled, &state->key, &res)) {
|
||||
panfrost_shader_compile(screen, uncompiled->nir, dbg, &state->key,
|
||||
req_local_mem,
|
||||
uncompiled->fixed_varying_mask, &res);
|
||||
/* Try to retrieve the variant from the disk cache. If that fails,
|
||||
* compile a new variant and store in the disk cache for later reuse.
|
||||
*/
|
||||
if (!panfrost_disk_cache_retrieve(screen->disk_cache, uncompiled,
|
||||
&state->key, &res)) {
|
||||
panfrost_shader_compile(screen, uncompiled->nir, dbg, &state->key,
|
||||
req_local_mem, uncompiled->fixed_varying_mask,
|
||||
&res);
|
||||
|
||||
panfrost_disk_cache_store(screen->disk_cache, uncompiled, &state->key, &res);
|
||||
}
|
||||
panfrost_disk_cache_store(screen->disk_cache, uncompiled, &state->key,
|
||||
&res);
|
||||
}
|
||||
|
||||
state->info = res.info;
|
||||
state->info = res.info;
|
||||
|
||||
if (res.binary.size) {
|
||||
state->bin = panfrost_pool_take_ref(shader_pool,
|
||||
pan_pool_upload_aligned(&shader_pool->base,
|
||||
res.binary.data, res.binary.size, 128));
|
||||
}
|
||||
if (res.binary.size) {
|
||||
state->bin = panfrost_pool_take_ref(
|
||||
shader_pool,
|
||||
pan_pool_upload_aligned(&shader_pool->base, res.binary.data,
|
||||
res.binary.size, 128));
|
||||
}
|
||||
|
||||
util_dynarray_fini(&res.binary);
|
||||
util_dynarray_fini(&res.binary);
|
||||
|
||||
/* Don't upload RSD for fragment shaders since they need draw-time
|
||||
* merging for e.g. depth/stencil/alpha. RSDs are replaced by simpler
|
||||
* shader program descriptors on Valhall, which can be preuploaded even
|
||||
* for fragment shaders. */
|
||||
bool upload = !(uncompiled->nir->info.stage == MESA_SHADER_FRAGMENT && dev->arch <= 7);
|
||||
screen->vtbl.prepare_shader(state, desc_pool, upload);
|
||||
/* Don't upload RSD for fragment shaders since they need draw-time
|
||||
* merging for e.g. depth/stencil/alpha. RSDs are replaced by simpler
|
||||
* shader program descriptors on Valhall, which can be preuploaded even
|
||||
* for fragment shaders. */
|
||||
bool upload =
|
||||
!(uncompiled->nir->info.stage == MESA_SHADER_FRAGMENT && dev->arch <= 7);
|
||||
screen->vtbl.prepare_shader(state, desc_pool, upload);
|
||||
|
||||
panfrost_analyze_sysvals(state);
|
||||
panfrost_analyze_sysvals(state);
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_build_key(struct panfrost_context *ctx,
|
||||
struct panfrost_shader_key *key,
|
||||
const nir_shader *nir)
|
||||
struct panfrost_shader_key *key, const nir_shader *nir)
|
||||
{
|
||||
/* We don't currently have vertex shader variants */
|
||||
if (nir->info.stage != MESA_SHADER_FRAGMENT)
|
||||
return;
|
||||
/* We don't currently have vertex shader variants */
|
||||
if (nir->info.stage != MESA_SHADER_FRAGMENT)
|
||||
return;
|
||||
|
||||
struct panfrost_device *dev = pan_device(ctx->base.screen);
|
||||
struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
|
||||
struct pipe_rasterizer_state *rast = (void *) ctx->rasterizer;
|
||||
struct panfrost_uncompiled_shader *vs = ctx->uncompiled[MESA_SHADER_VERTEX];
|
||||
struct panfrost_device *dev = pan_device(ctx->base.screen);
|
||||
struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
|
||||
struct pipe_rasterizer_state *rast = (void *)ctx->rasterizer;
|
||||
struct panfrost_uncompiled_shader *vs = ctx->uncompiled[MESA_SHADER_VERTEX];
|
||||
|
||||
/* gl_FragColor lowering needs the number of colour buffers */
|
||||
if (nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
|
||||
key->fs.nr_cbufs_for_fragcolor = fb->nr_cbufs;
|
||||
}
|
||||
/* gl_FragColor lowering needs the number of colour buffers */
|
||||
if (nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
|
||||
key->fs.nr_cbufs_for_fragcolor = fb->nr_cbufs;
|
||||
}
|
||||
|
||||
/* Point sprite lowering needed on Bifrost and newer */
|
||||
if (dev->arch >= 6 && rast && ctx->active_prim == PIPE_PRIM_POINTS) {
|
||||
key->fs.sprite_coord_enable = rast->sprite_coord_enable;
|
||||
}
|
||||
/* Point sprite lowering needed on Bifrost and newer */
|
||||
if (dev->arch >= 6 && rast && ctx->active_prim == PIPE_PRIM_POINTS) {
|
||||
key->fs.sprite_coord_enable = rast->sprite_coord_enable;
|
||||
}
|
||||
|
||||
/* User clip plane lowering needed everywhere */
|
||||
if (rast) {
|
||||
key->fs.clip_plane_enable = rast->clip_plane_enable;
|
||||
}
|
||||
/* User clip plane lowering needed everywhere */
|
||||
if (rast) {
|
||||
key->fs.clip_plane_enable = rast->clip_plane_enable;
|
||||
}
|
||||
|
||||
if (dev->arch <= 5) {
|
||||
u_foreach_bit(i, (nir->info.outputs_read >> FRAG_RESULT_DATA0)) {
|
||||
enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
|
||||
if (dev->arch <= 5) {
|
||||
u_foreach_bit(i, (nir->info.outputs_read >> FRAG_RESULT_DATA0)) {
|
||||
enum pipe_format fmt = PIPE_FORMAT_R8G8B8A8_UNORM;
|
||||
|
||||
if ((fb->nr_cbufs > i) && fb->cbufs[i])
|
||||
fmt = fb->cbufs[i]->format;
|
||||
if ((fb->nr_cbufs > i) && fb->cbufs[i])
|
||||
fmt = fb->cbufs[i]->format;
|
||||
|
||||
if (panfrost_blendable_formats_v6[fmt].internal)
|
||||
fmt = PIPE_FORMAT_NONE;
|
||||
if (panfrost_blendable_formats_v6[fmt].internal)
|
||||
fmt = PIPE_FORMAT_NONE;
|
||||
|
||||
key->fs.rt_formats[i] = fmt;
|
||||
}
|
||||
}
|
||||
key->fs.rt_formats[i] = fmt;
|
||||
}
|
||||
}
|
||||
|
||||
/* Funny desktop GL varying lowering on Valhall */
|
||||
if (dev->arch >= 9) {
|
||||
assert(vs != NULL && "too early");
|
||||
key->fs.fixed_varying_mask = vs->fixed_varying_mask;
|
||||
}
|
||||
/* Funny desktop GL varying lowering on Valhall */
|
||||
if (dev->arch >= 9) {
|
||||
assert(vs != NULL && "too early");
|
||||
key->fs.fixed_varying_mask = vs->fixed_varying_mask;
|
||||
}
|
||||
}
|
||||
|
||||
static struct panfrost_compiled_shader *
|
||||
panfrost_new_variant_locked(
|
||||
struct panfrost_context *ctx,
|
||||
struct panfrost_uncompiled_shader *uncompiled,
|
||||
struct panfrost_shader_key *key)
|
||||
panfrost_new_variant_locked(struct panfrost_context *ctx,
|
||||
struct panfrost_uncompiled_shader *uncompiled,
|
||||
struct panfrost_shader_key *key)
|
||||
{
|
||||
struct panfrost_compiled_shader *prog = panfrost_alloc_variant(uncompiled);
|
||||
struct panfrost_compiled_shader *prog = panfrost_alloc_variant(uncompiled);
|
||||
|
||||
*prog = (struct panfrost_compiled_shader) {
|
||||
.key = *key,
|
||||
.stream_output = uncompiled->stream_output,
|
||||
};
|
||||
*prog = (struct panfrost_compiled_shader){
|
||||
.key = *key,
|
||||
.stream_output = uncompiled->stream_output,
|
||||
};
|
||||
|
||||
panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs,
|
||||
uncompiled, &ctx->base.debug, prog, 0);
|
||||
panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs, uncompiled,
|
||||
&ctx->base.debug, prog, 0);
|
||||
|
||||
prog->earlyzs = pan_earlyzs_analyze(&prog->info);
|
||||
prog->earlyzs = pan_earlyzs_analyze(&prog->info);
|
||||
|
||||
return prog;
|
||||
return prog;
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_bind_shader_state(
|
||||
struct pipe_context *pctx,
|
||||
void *hwcso,
|
||||
enum pipe_shader_type type)
|
||||
panfrost_bind_shader_state(struct pipe_context *pctx, void *hwcso,
|
||||
enum pipe_shader_type type)
|
||||
{
|
||||
struct panfrost_context *ctx = pan_context(pctx);
|
||||
ctx->uncompiled[type] = hwcso;
|
||||
ctx->prog[type] = NULL;
|
||||
struct panfrost_context *ctx = pan_context(pctx);
|
||||
ctx->uncompiled[type] = hwcso;
|
||||
ctx->prog[type] = NULL;
|
||||
|
||||
ctx->dirty |= PAN_DIRTY_TLS_SIZE;
|
||||
ctx->dirty_shader[type] |= PAN_DIRTY_STAGE_SHADER;
|
||||
ctx->dirty |= PAN_DIRTY_TLS_SIZE;
|
||||
ctx->dirty_shader[type] |= PAN_DIRTY_STAGE_SHADER;
|
||||
|
||||
if (hwcso)
|
||||
panfrost_update_shader_variant(ctx, type);
|
||||
if (hwcso)
|
||||
panfrost_update_shader_variant(ctx, type);
|
||||
}
|
||||
|
||||
void
|
||||
panfrost_update_shader_variant(struct panfrost_context *ctx,
|
||||
enum pipe_shader_type type)
|
||||
{
|
||||
/* No shader variants for compute */
|
||||
if (type == PIPE_SHADER_COMPUTE)
|
||||
return;
|
||||
/* No shader variants for compute */
|
||||
if (type == PIPE_SHADER_COMPUTE)
|
||||
return;
|
||||
|
||||
/* We need linking information, defer this */
|
||||
if (type == PIPE_SHADER_FRAGMENT && !ctx->uncompiled[PIPE_SHADER_VERTEX])
|
||||
return;
|
||||
/* We need linking information, defer this */
|
||||
if (type == PIPE_SHADER_FRAGMENT && !ctx->uncompiled[PIPE_SHADER_VERTEX])
|
||||
return;
|
||||
|
||||
/* Also defer, happens with GALLIUM_HUD */
|
||||
if (!ctx->uncompiled[type])
|
||||
return;
|
||||
/* Also defer, happens with GALLIUM_HUD */
|
||||
if (!ctx->uncompiled[type])
|
||||
return;
|
||||
|
||||
/* Match the appropriate variant */
|
||||
struct panfrost_uncompiled_shader *uncompiled = ctx->uncompiled[type];
|
||||
struct panfrost_compiled_shader *compiled = NULL;
|
||||
/* Match the appropriate variant */
|
||||
struct panfrost_uncompiled_shader *uncompiled = ctx->uncompiled[type];
|
||||
struct panfrost_compiled_shader *compiled = NULL;
|
||||
|
||||
simple_mtx_lock(&uncompiled->lock);
|
||||
simple_mtx_lock(&uncompiled->lock);
|
||||
|
||||
struct panfrost_shader_key key = { 0 };
|
||||
panfrost_build_key(ctx, &key, uncompiled->nir);
|
||||
struct panfrost_shader_key key = {0};
|
||||
panfrost_build_key(ctx, &key, uncompiled->nir);
|
||||
|
||||
util_dynarray_foreach(&uncompiled->variants, struct panfrost_compiled_shader, so) {
|
||||
if (memcmp(&key, &so->key, sizeof(key)) == 0) {
|
||||
compiled = so;
|
||||
break;
|
||||
}
|
||||
}
|
||||
util_dynarray_foreach(&uncompiled->variants, struct panfrost_compiled_shader,
|
||||
so) {
|
||||
if (memcmp(&key, &so->key, sizeof(key)) == 0) {
|
||||
compiled = so;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (compiled == NULL)
|
||||
compiled = panfrost_new_variant_locked(ctx, uncompiled, &key);
|
||||
if (compiled == NULL)
|
||||
compiled = panfrost_new_variant_locked(ctx, uncompiled, &key);
|
||||
|
||||
ctx->prog[type] = compiled;
|
||||
ctx->prog[type] = compiled;
|
||||
|
||||
/* TODO: it would be more efficient to release the lock before
|
||||
* compiling instead of after, but that can race if thread A compiles a
|
||||
* variant while thread B searches for that same variant */
|
||||
simple_mtx_unlock(&uncompiled->lock);
|
||||
/* TODO: it would be more efficient to release the lock before
|
||||
* compiling instead of after, but that can race if thread A compiles a
|
||||
* variant while thread B searches for that same variant */
|
||||
simple_mtx_unlock(&uncompiled->lock);
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_bind_vs_state(struct pipe_context *pctx, void *hwcso)
|
||||
{
|
||||
panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_VERTEX);
|
||||
panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_VERTEX);
|
||||
|
||||
/* Fragment shaders are linked with vertex shaders */
|
||||
struct panfrost_context *ctx = pan_context(pctx);
|
||||
panfrost_update_shader_variant(ctx, PIPE_SHADER_FRAGMENT);
|
||||
/* Fragment shaders are linked with vertex shaders */
|
||||
struct panfrost_context *ctx = pan_context(pctx);
|
||||
panfrost_update_shader_variant(ctx, PIPE_SHADER_FRAGMENT);
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_bind_fs_state(struct pipe_context *pctx, void *hwcso)
|
||||
{
|
||||
panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_FRAGMENT);
|
||||
panfrost_bind_shader_state(pctx, hwcso, PIPE_SHADER_FRAGMENT);
|
||||
}
|
||||
|
||||
static void *
|
||||
panfrost_create_shader_state(
|
||||
struct pipe_context *pctx,
|
||||
const struct pipe_shader_state *cso)
|
||||
panfrost_create_shader_state(struct pipe_context *pctx,
|
||||
const struct pipe_shader_state *cso)
|
||||
{
|
||||
nir_shader *nir = (cso->type == PIPE_SHADER_IR_TGSI) ?
|
||||
tgsi_to_nir(cso->tokens, pctx->screen, false) :
|
||||
cso->ir.nir;
|
||||
nir_shader *nir = (cso->type == PIPE_SHADER_IR_TGSI)
|
||||
? tgsi_to_nir(cso->tokens, pctx->screen, false)
|
||||
: cso->ir.nir;
|
||||
|
||||
struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(nir);
|
||||
struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(nir);
|
||||
|
||||
/* The driver gets ownership of the nir_shader for graphics. The NIR is
|
||||
* ralloc'd. Free the NIR when we free the uncompiled shader.
|
||||
*/
|
||||
ralloc_steal(so, nir);
|
||||
/* The driver gets ownership of the nir_shader for graphics. The NIR is
|
||||
* ralloc'd. Free the NIR when we free the uncompiled shader.
|
||||
*/
|
||||
ralloc_steal(so, nir);
|
||||
|
||||
so->stream_output = cso->stream_output;
|
||||
so->nir = nir;
|
||||
so->stream_output = cso->stream_output;
|
||||
so->nir = nir;
|
||||
|
||||
/* Fix linkage early */
|
||||
if (so->nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
so->fixed_varying_mask =
|
||||
(so->nir->info.outputs_written & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
|
||||
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
|
||||
}
|
||||
/* Fix linkage early */
|
||||
if (so->nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
so->fixed_varying_mask =
|
||||
(so->nir->info.outputs_written & BITFIELD_MASK(VARYING_SLOT_VAR0)) &
|
||||
~VARYING_BIT_POS & ~VARYING_BIT_PSIZ;
|
||||
}
|
||||
|
||||
/* If this shader uses transform feedback, compile the transform
|
||||
* feedback program. This is a special shader variant.
|
||||
*/
|
||||
struct panfrost_context *ctx = pan_context(pctx);
|
||||
/* If this shader uses transform feedback, compile the transform
|
||||
* feedback program. This is a special shader variant.
|
||||
*/
|
||||
struct panfrost_context *ctx = pan_context(pctx);
|
||||
|
||||
if (so->nir->xfb_info) {
|
||||
nir_shader *xfb = nir_shader_clone(NULL, so->nir);
|
||||
xfb->info.name = ralloc_asprintf(xfb, "%s@xfb", xfb->info.name);
|
||||
xfb->info.internal = true;
|
||||
if (so->nir->xfb_info) {
|
||||
nir_shader *xfb = nir_shader_clone(NULL, so->nir);
|
||||
xfb->info.name = ralloc_asprintf(xfb, "%s@xfb", xfb->info.name);
|
||||
xfb->info.internal = true;
|
||||
|
||||
so->xfb = calloc(1, sizeof(struct panfrost_compiled_shader));
|
||||
so->xfb->key.vs_is_xfb = true;
|
||||
so->xfb = calloc(1, sizeof(struct panfrost_compiled_shader));
|
||||
so->xfb->key.vs_is_xfb = true;
|
||||
|
||||
panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs,
|
||||
so, &ctx->base.debug, so->xfb, 0);
|
||||
panfrost_shader_get(ctx->base.screen, &ctx->shaders, &ctx->descs, so,
|
||||
&ctx->base.debug, so->xfb, 0);
|
||||
|
||||
/* Since transform feedback is handled via the transform
|
||||
* feedback program, the original program no longer uses XFB
|
||||
*/
|
||||
nir->info.has_transform_feedback_varyings = false;
|
||||
}
|
||||
/* Since transform feedback is handled via the transform
|
||||
* feedback program, the original program no longer uses XFB
|
||||
*/
|
||||
nir->info.has_transform_feedback_varyings = false;
|
||||
}
|
||||
|
||||
/* Compile the program. We don't use vertex shader keys, so there will
|
||||
* be no further vertex shader variants. We do have fragment shader
|
||||
* keys, but we can still compile with a default key that will work most
|
||||
* of the time.
|
||||
*/
|
||||
struct panfrost_shader_key key = { 0 };
|
||||
/* Compile the program. We don't use vertex shader keys, so there will
|
||||
* be no further vertex shader variants. We do have fragment shader
|
||||
* keys, but we can still compile with a default key that will work most
|
||||
* of the time.
|
||||
*/
|
||||
struct panfrost_shader_key key = {0};
|
||||
|
||||
/* gl_FragColor lowering needs the number of colour buffers on desktop
|
||||
* GL, where it acts as an implicit broadcast to all colour buffers.
|
||||
*
|
||||
* However, gl_FragColor is a legacy feature, so assume that if
|
||||
* gl_FragColor is used, there is only a single render target. The
|
||||
* implicit broadcast is neither especially useful nor required by GLES.
|
||||
*/
|
||||
if (so->nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
so->nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
|
||||
/* gl_FragColor lowering needs the number of colour buffers on desktop
|
||||
* GL, where it acts as an implicit broadcast to all colour buffers.
|
||||
*
|
||||
* However, gl_FragColor is a legacy feature, so assume that if
|
||||
* gl_FragColor is used, there is only a single render target. The
|
||||
* implicit broadcast is neither especially useful nor required by GLES.
|
||||
*/
|
||||
if (so->nir->info.stage == MESA_SHADER_FRAGMENT &&
|
||||
so->nir->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_COLOR)) {
|
||||
|
||||
key.fs.nr_cbufs_for_fragcolor = 1;
|
||||
}
|
||||
key.fs.nr_cbufs_for_fragcolor = 1;
|
||||
}
|
||||
|
||||
/* Creating a CSO is single-threaded, so it's ok to use the
|
||||
* locked function without explicitly taking the lock. Creating a
|
||||
* default variant acts as a precompile.
|
||||
*/
|
||||
panfrost_new_variant_locked(ctx, so, &key);
|
||||
/* Creating a CSO is single-threaded, so it's ok to use the
|
||||
* locked function without explicitly taking the lock. Creating a
|
||||
* default variant acts as a precompile.
|
||||
*/
|
||||
panfrost_new_variant_locked(ctx, so, &key);
|
||||
|
||||
return so;
|
||||
return so;
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_delete_shader_state(struct pipe_context *pctx, void *so)
|
||||
{
|
||||
struct panfrost_uncompiled_shader *cso = (struct panfrost_uncompiled_shader *) so;
|
||||
struct panfrost_uncompiled_shader *cso =
|
||||
(struct panfrost_uncompiled_shader *)so;
|
||||
|
||||
util_dynarray_foreach(&cso->variants, struct panfrost_compiled_shader, so) {
|
||||
panfrost_bo_unreference(so->bin.bo);
|
||||
panfrost_bo_unreference(so->state.bo);
|
||||
panfrost_bo_unreference(so->linkage.bo);
|
||||
}
|
||||
util_dynarray_foreach(&cso->variants, struct panfrost_compiled_shader, so) {
|
||||
panfrost_bo_unreference(so->bin.bo);
|
||||
panfrost_bo_unreference(so->state.bo);
|
||||
panfrost_bo_unreference(so->linkage.bo);
|
||||
}
|
||||
|
||||
if (cso->xfb) {
|
||||
panfrost_bo_unreference(cso->xfb->bin.bo);
|
||||
panfrost_bo_unreference(cso->xfb->state.bo);
|
||||
panfrost_bo_unreference(cso->xfb->linkage.bo);
|
||||
free(cso->xfb);
|
||||
}
|
||||
if (cso->xfb) {
|
||||
panfrost_bo_unreference(cso->xfb->bin.bo);
|
||||
panfrost_bo_unreference(cso->xfb->state.bo);
|
||||
panfrost_bo_unreference(cso->xfb->linkage.bo);
|
||||
free(cso->xfb);
|
||||
}
|
||||
|
||||
simple_mtx_destroy(&cso->lock);
|
||||
simple_mtx_destroy(&cso->lock);
|
||||
|
||||
ralloc_free(so);
|
||||
ralloc_free(so);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -424,52 +418,51 @@ panfrost_delete_shader_state(struct pipe_context *pctx, void *so)
|
|||
* precompiled, creating both the uncompiled and compiled shaders now.
|
||||
*/
|
||||
static void *
|
||||
panfrost_create_compute_state(
|
||||
struct pipe_context *pctx,
|
||||
const struct pipe_compute_state *cso)
|
||||
panfrost_create_compute_state(struct pipe_context *pctx,
|
||||
const struct pipe_compute_state *cso)
|
||||
{
|
||||
struct panfrost_context *ctx = pan_context(pctx);
|
||||
struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(cso->prog);
|
||||
struct panfrost_compiled_shader *v = panfrost_alloc_variant(so);
|
||||
memset(v, 0, sizeof *v);
|
||||
struct panfrost_context *ctx = pan_context(pctx);
|
||||
struct panfrost_uncompiled_shader *so = panfrost_alloc_shader(cso->prog);
|
||||
struct panfrost_compiled_shader *v = panfrost_alloc_variant(so);
|
||||
memset(v, 0, sizeof *v);
|
||||
|
||||
assert(cso->ir_type == PIPE_SHADER_IR_NIR && "TGSI kernels unsupported");
|
||||
assert(cso->ir_type == PIPE_SHADER_IR_NIR && "TGSI kernels unsupported");
|
||||
|
||||
panfrost_shader_get(pctx->screen, &ctx->shaders, &ctx->descs,
|
||||
so, &ctx->base.debug, v, cso->static_shared_mem);
|
||||
panfrost_shader_get(pctx->screen, &ctx->shaders, &ctx->descs, so,
|
||||
&ctx->base.debug, v, cso->static_shared_mem);
|
||||
|
||||
/* The NIR becomes invalid after this. For compute kernels, we never
|
||||
* need to access it again. Don't keep a dangling pointer around.
|
||||
*/
|
||||
so->nir = NULL;
|
||||
/* The NIR becomes invalid after this. For compute kernels, we never
|
||||
* need to access it again. Don't keep a dangling pointer around.
|
||||
*/
|
||||
so->nir = NULL;
|
||||
|
||||
return so;
|
||||
return so;
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_bind_compute_state(struct pipe_context *pipe, void *cso)
|
||||
{
|
||||
struct panfrost_context *ctx = pan_context(pipe);
|
||||
struct panfrost_uncompiled_shader *uncompiled = cso;
|
||||
struct panfrost_context *ctx = pan_context(pipe);
|
||||
struct panfrost_uncompiled_shader *uncompiled = cso;
|
||||
|
||||
ctx->uncompiled[PIPE_SHADER_COMPUTE] = uncompiled;
|
||||
ctx->uncompiled[PIPE_SHADER_COMPUTE] = uncompiled;
|
||||
|
||||
ctx->prog[PIPE_SHADER_COMPUTE] =
|
||||
uncompiled ? util_dynarray_begin(&uncompiled->variants) : NULL;
|
||||
ctx->prog[PIPE_SHADER_COMPUTE] =
|
||||
uncompiled ? util_dynarray_begin(&uncompiled->variants) : NULL;
|
||||
}
|
||||
|
||||
void
|
||||
panfrost_shader_context_init(struct pipe_context *pctx)
|
||||
{
|
||||
pctx->create_vs_state = panfrost_create_shader_state;
|
||||
pctx->delete_vs_state = panfrost_delete_shader_state;
|
||||
pctx->bind_vs_state = panfrost_bind_vs_state;
|
||||
pctx->create_vs_state = panfrost_create_shader_state;
|
||||
pctx->delete_vs_state = panfrost_delete_shader_state;
|
||||
pctx->bind_vs_state = panfrost_bind_vs_state;
|
||||
|
||||
pctx->create_fs_state = panfrost_create_shader_state;
|
||||
pctx->delete_fs_state = panfrost_delete_shader_state;
|
||||
pctx->bind_fs_state = panfrost_bind_fs_state;
|
||||
pctx->create_fs_state = panfrost_create_shader_state;
|
||||
pctx->delete_fs_state = panfrost_delete_shader_state;
|
||||
pctx->bind_fs_state = panfrost_bind_fs_state;
|
||||
|
||||
pctx->create_compute_state = panfrost_create_compute_state;
|
||||
pctx->bind_compute_state = panfrost_bind_compute_state;
|
||||
pctx->delete_compute_state = panfrost_delete_shader_state;
|
||||
pctx->create_compute_state = panfrost_create_compute_state;
|
||||
pctx->bind_compute_state = panfrost_bind_compute_state;
|
||||
pctx->delete_compute_state = panfrost_delete_shader_state;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,20 +64,20 @@
|
|||
static bool
|
||||
bi_has_skip_bit(enum bi_opcode op)
|
||||
{
|
||||
switch (op) {
|
||||
case BI_OPCODE_TEX_SINGLE:
|
||||
case BI_OPCODE_TEXC:
|
||||
case BI_OPCODE_TEXC_DUAL:
|
||||
case BI_OPCODE_TEXS_2D_F16:
|
||||
case BI_OPCODE_TEXS_2D_F32:
|
||||
case BI_OPCODE_TEXS_CUBE_F16:
|
||||
case BI_OPCODE_TEXS_CUBE_F32:
|
||||
case BI_OPCODE_VAR_TEX_F16:
|
||||
case BI_OPCODE_VAR_TEX_F32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
switch (op) {
|
||||
case BI_OPCODE_TEX_SINGLE:
|
||||
case BI_OPCODE_TEXC:
|
||||
case BI_OPCODE_TEXC_DUAL:
|
||||
case BI_OPCODE_TEXS_2D_F16:
|
||||
case BI_OPCODE_TEXS_2D_F32:
|
||||
case BI_OPCODE_TEXS_CUBE_F16:
|
||||
case BI_OPCODE_TEXS_CUBE_F32:
|
||||
case BI_OPCODE_VAR_TEX_F16:
|
||||
case BI_OPCODE_VAR_TEX_F32:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Does a given instruction require helper threads to be active (because it
|
||||
|
|
@ -87,52 +87,52 @@ bi_has_skip_bit(enum bi_opcode op)
|
|||
bool
|
||||
bi_instr_uses_helpers(bi_instr *I)
|
||||
{
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_TEXC:
|
||||
case BI_OPCODE_TEXC_DUAL:
|
||||
case BI_OPCODE_TEXS_2D_F16:
|
||||
case BI_OPCODE_TEXS_2D_F32:
|
||||
case BI_OPCODE_TEXS_CUBE_F16:
|
||||
case BI_OPCODE_TEXS_CUBE_F32:
|
||||
case BI_OPCODE_VAR_TEX_F16:
|
||||
case BI_OPCODE_VAR_TEX_F32:
|
||||
return !I->lod_mode; /* set for zero, clear for computed */
|
||||
case BI_OPCODE_TEX_SINGLE:
|
||||
return (I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_LOD) ||
|
||||
(I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_BIAS);
|
||||
case BI_OPCODE_CLPER_I32:
|
||||
case BI_OPCODE_CLPER_OLD_I32:
|
||||
/* Fragment shaders require helpers to implement derivatives.
|
||||
* Other shader stages don't have helpers at all */
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_TEXC:
|
||||
case BI_OPCODE_TEXC_DUAL:
|
||||
case BI_OPCODE_TEXS_2D_F16:
|
||||
case BI_OPCODE_TEXS_2D_F32:
|
||||
case BI_OPCODE_TEXS_CUBE_F16:
|
||||
case BI_OPCODE_TEXS_CUBE_F32:
|
||||
case BI_OPCODE_VAR_TEX_F16:
|
||||
case BI_OPCODE_VAR_TEX_F32:
|
||||
return !I->lod_mode; /* set for zero, clear for computed */
|
||||
case BI_OPCODE_TEX_SINGLE:
|
||||
return (I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_LOD) ||
|
||||
(I->va_lod_mode == BI_VA_LOD_MODE_COMPUTED_BIAS);
|
||||
case BI_OPCODE_CLPER_I32:
|
||||
case BI_OPCODE_CLPER_OLD_I32:
|
||||
/* Fragment shaders require helpers to implement derivatives.
|
||||
* Other shader stages don't have helpers at all */
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Does a block use helpers directly */
|
||||
static bool
|
||||
bi_block_uses_helpers(bi_block *block)
|
||||
{
|
||||
bi_foreach_instr_in_block(block, I) {
|
||||
if (bi_instr_uses_helpers(I))
|
||||
return true;
|
||||
}
|
||||
bi_foreach_instr_in_block(block, I) {
|
||||
if (bi_instr_uses_helpers(I))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
bi_block_terminates_helpers(bi_block *block)
|
||||
{
|
||||
/* Can't terminate if a successor needs helpers */
|
||||
bi_foreach_successor(block, succ) {
|
||||
if (succ->pass_flags & 1)
|
||||
return false;
|
||||
}
|
||||
/* Can't terminate if a successor needs helpers */
|
||||
bi_foreach_successor(block, succ) {
|
||||
if (succ->pass_flags & 1)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Otherwise we terminate */
|
||||
return true;
|
||||
/* Otherwise we terminate */
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -142,128 +142,130 @@ bi_block_terminates_helpers(bi_block *block)
|
|||
static void
|
||||
bi_propagate_pass_flag(bi_block *block)
|
||||
{
|
||||
block->pass_flags = 1;
|
||||
block->pass_flags = 1;
|
||||
|
||||
bi_foreach_predecessor(block, pred) {
|
||||
if ((*pred)->pass_flags == 0)
|
||||
bi_propagate_pass_flag(*pred);
|
||||
}
|
||||
bi_foreach_predecessor(block, pred) {
|
||||
if ((*pred)->pass_flags == 0)
|
||||
bi_propagate_pass_flag(*pred);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
bi_analyze_helper_terminate(bi_context *ctx)
|
||||
{
|
||||
/* Other shader stages do not have a notion of helper threads, so we
|
||||
* can skip the analysis. Don't run for blend shaders, either, since
|
||||
* they run in the context of another shader that we don't see. */
|
||||
if (ctx->stage != MESA_SHADER_FRAGMENT || ctx->inputs->is_blend)
|
||||
return;
|
||||
/* Other shader stages do not have a notion of helper threads, so we
|
||||
* can skip the analysis. Don't run for blend shaders, either, since
|
||||
* they run in the context of another shader that we don't see. */
|
||||
if (ctx->stage != MESA_SHADER_FRAGMENT || ctx->inputs->is_blend)
|
||||
return;
|
||||
|
||||
/* Clear flags */
|
||||
bi_foreach_block(ctx, block)
|
||||
block->pass_flags = 0;
|
||||
/* Clear flags */
|
||||
bi_foreach_block(ctx, block)
|
||||
block->pass_flags = 0;
|
||||
|
||||
/* For each block, check if it uses helpers and propagate that fact if
|
||||
* so. We walk in reverse order to minimize the number of blocks tested:
|
||||
* if the (unique) last block uses helpers, only that block is tested.
|
||||
*/
|
||||
bi_foreach_block_rev(ctx, block) {
|
||||
if (block->pass_flags == 0 && bi_block_uses_helpers(block))
|
||||
bi_propagate_pass_flag(block);
|
||||
}
|
||||
/* For each block, check if it uses helpers and propagate that fact if
|
||||
* so. We walk in reverse order to minimize the number of blocks tested:
|
||||
* if the (unique) last block uses helpers, only that block is tested.
|
||||
*/
|
||||
bi_foreach_block_rev(ctx, block) {
|
||||
if (block->pass_flags == 0 && bi_block_uses_helpers(block))
|
||||
bi_propagate_pass_flag(block);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
bi_mark_clauses_td(bi_context *ctx)
|
||||
{
|
||||
if (ctx->stage != MESA_SHADER_FRAGMENT || ctx->inputs->is_blend)
|
||||
return;
|
||||
if (ctx->stage != MESA_SHADER_FRAGMENT || ctx->inputs->is_blend)
|
||||
return;
|
||||
|
||||
/* Finally, mark clauses requiring helpers */
|
||||
bi_foreach_block(ctx, block) {
|
||||
/* At the end, there are helpers iff we don't terminate */
|
||||
bool helpers = !bi_block_terminates_helpers(block);
|
||||
/* Finally, mark clauses requiring helpers */
|
||||
bi_foreach_block(ctx, block) {
|
||||
/* At the end, there are helpers iff we don't terminate */
|
||||
bool helpers = !bi_block_terminates_helpers(block);
|
||||
|
||||
bi_foreach_clause_in_block_rev(block, clause) {
|
||||
bi_foreach_instr_in_clause_rev(block, clause, I) {
|
||||
helpers |= bi_instr_uses_helpers(I);
|
||||
}
|
||||
bi_foreach_clause_in_block_rev(block, clause) {
|
||||
bi_foreach_instr_in_clause_rev(block, clause, I) {
|
||||
helpers |= bi_instr_uses_helpers(I);
|
||||
}
|
||||
|
||||
clause->td = !helpers;
|
||||
}
|
||||
}
|
||||
clause->td = !helpers;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_helper_block_update(BITSET_WORD *deps, bi_block *block)
|
||||
{
|
||||
bool progress = false;
|
||||
bool progress = false;
|
||||
|
||||
bi_foreach_instr_in_block_rev(block, I) {
|
||||
/* If a destination is required by helper invocation... */
|
||||
bi_foreach_dest(I, d) {
|
||||
if (!BITSET_TEST(deps, I->dest[d].value))
|
||||
continue;
|
||||
bi_foreach_instr_in_block_rev(block, I) {
|
||||
/* If a destination is required by helper invocation... */
|
||||
bi_foreach_dest(I, d) {
|
||||
if (!BITSET_TEST(deps, I->dest[d].value))
|
||||
continue;
|
||||
|
||||
/* ...so are the sources */
|
||||
bi_foreach_ssa_src(I, s) {
|
||||
progress |= !BITSET_TEST(deps, I->src[s].value);
|
||||
BITSET_SET(deps, I->src[s].value);
|
||||
}
|
||||
/* ...so are the sources */
|
||||
bi_foreach_ssa_src(I, s) {
|
||||
progress |= !BITSET_TEST(deps, I->src[s].value);
|
||||
BITSET_SET(deps, I->src[s].value);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
return progress;
|
||||
}
|
||||
|
||||
void
|
||||
bi_analyze_helper_requirements(bi_context *ctx)
|
||||
{
|
||||
BITSET_WORD *deps = calloc(sizeof(BITSET_WORD), ctx->ssa_alloc);
|
||||
BITSET_WORD *deps = calloc(sizeof(BITSET_WORD), ctx->ssa_alloc);
|
||||
|
||||
/* Initialize with the sources of instructions consuming
|
||||
* derivatives */
|
||||
/* Initialize with the sources of instructions consuming
|
||||
* derivatives */
|
||||
|
||||
bi_foreach_instr_global(ctx, I) {
|
||||
if (!bi_instr_uses_helpers(I)) continue;
|
||||
bi_foreach_instr_global(ctx, I) {
|
||||
if (!bi_instr_uses_helpers(I))
|
||||
continue;
|
||||
|
||||
bi_foreach_ssa_src(I, s)
|
||||
BITSET_SET(deps, I->src[s].value);
|
||||
}
|
||||
bi_foreach_ssa_src(I, s)
|
||||
BITSET_SET(deps, I->src[s].value);
|
||||
}
|
||||
|
||||
/* Propagate that up */
|
||||
u_worklist worklist;
|
||||
bi_worklist_init(ctx, &worklist);
|
||||
/* Propagate that up */
|
||||
u_worklist worklist;
|
||||
bi_worklist_init(ctx, &worklist);
|
||||
|
||||
bi_foreach_block(ctx, block) {
|
||||
bi_worklist_push_tail(&worklist, block);
|
||||
}
|
||||
bi_foreach_block(ctx, block) {
|
||||
bi_worklist_push_tail(&worklist, block);
|
||||
}
|
||||
|
||||
while (!u_worklist_is_empty(&worklist)) {
|
||||
bi_block *blk = bi_worklist_pop_tail(&worklist);
|
||||
while (!u_worklist_is_empty(&worklist)) {
|
||||
bi_block *blk = bi_worklist_pop_tail(&worklist);
|
||||
|
||||
if (bi_helper_block_update(deps, blk)) {
|
||||
bi_foreach_predecessor(blk, pred)
|
||||
bi_worklist_push_head(&worklist, *pred);
|
||||
}
|
||||
}
|
||||
if (bi_helper_block_update(deps, blk)) {
|
||||
bi_foreach_predecessor(blk, pred)
|
||||
bi_worklist_push_head(&worklist, *pred);
|
||||
}
|
||||
}
|
||||
|
||||
u_worklist_fini(&worklist);
|
||||
u_worklist_fini(&worklist);
|
||||
|
||||
/* Set the execute bits */
|
||||
/* Set the execute bits */
|
||||
|
||||
bi_foreach_instr_global(ctx, I) {
|
||||
if (!bi_has_skip_bit(I->op)) continue;
|
||||
bi_foreach_instr_global(ctx, I) {
|
||||
if (!bi_has_skip_bit(I->op))
|
||||
continue;
|
||||
|
||||
bool exec = false;
|
||||
bool exec = false;
|
||||
|
||||
bi_foreach_dest(I, d)
|
||||
exec |= BITSET_TEST(deps, I->dest[d].value);
|
||||
bi_foreach_dest(I, d)
|
||||
exec |= BITSET_TEST(deps, I->dest[d].value);
|
||||
|
||||
I->skip = !exec;
|
||||
}
|
||||
I->skip = !exec;
|
||||
}
|
||||
|
||||
free(deps);
|
||||
free(deps);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,10 +37,8 @@
|
|||
bool
|
||||
bi_ec0_packed(unsigned tuple_count)
|
||||
{
|
||||
return (tuple_count == 3) ||
|
||||
(tuple_count == 5) ||
|
||||
(tuple_count == 6) ||
|
||||
(tuple_count == 8);
|
||||
return (tuple_count == 3) || (tuple_count == 5) || (tuple_count == 6) ||
|
||||
(tuple_count == 8);
|
||||
}
|
||||
|
||||
/* Helper to calculate the number of quadwords in a clause. This is a function
|
||||
|
|
@ -60,7 +58,7 @@ bi_ec0_packed(unsigned tuple_count)
|
|||
* 6 | 5*
|
||||
* 7 | 5
|
||||
* 8 | 6*
|
||||
*
|
||||
*
|
||||
* Y = { X if X <= 3
|
||||
* { X - 1 if 4 <= X <= 6
|
||||
* { X - 2 if 7 <= X <= 8
|
||||
|
|
@ -72,15 +70,15 @@ bi_ec0_packed(unsigned tuple_count)
|
|||
static unsigned
|
||||
bi_clause_quadwords(bi_clause *clause)
|
||||
{
|
||||
unsigned X = clause->tuple_count;
|
||||
unsigned Y = X - ((X >= 7) ? 2 : (X >= 4) ? 1 : 0);
|
||||
unsigned X = clause->tuple_count;
|
||||
unsigned Y = X - ((X >= 7) ? 2 : (X >= 4) ? 1 : 0);
|
||||
|
||||
unsigned constants = clause->constant_count;
|
||||
unsigned constants = clause->constant_count;
|
||||
|
||||
if ((X != 4) && (X != 7) && (X >= 3) && constants)
|
||||
constants--;
|
||||
if ((X != 4) && (X != 7) && (X >= 3) && constants)
|
||||
constants--;
|
||||
|
||||
return Y + DIV_ROUND_UP(constants, 2);
|
||||
return Y + DIV_ROUND_UP(constants, 2);
|
||||
}
|
||||
|
||||
/* Measures the number of quadwords a branch jumps. Bifrost relative offsets
|
||||
|
|
@ -90,62 +88,62 @@ bi_clause_quadwords(bi_clause *clause)
|
|||
signed
|
||||
bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target)
|
||||
{
|
||||
/* Signed since we might jump backwards */
|
||||
signed ret = 0;
|
||||
/* Signed since we might jump backwards */
|
||||
signed ret = 0;
|
||||
|
||||
/* Determine if the block we're branching to is strictly greater in
|
||||
* source order */
|
||||
bool forwards = target->index > start->block->index;
|
||||
/* Determine if the block we're branching to is strictly greater in
|
||||
* source order */
|
||||
bool forwards = target->index > start->block->index;
|
||||
|
||||
if (forwards) {
|
||||
/* We have to jump through this block from the start of this
|
||||
* clause to the end */
|
||||
bi_foreach_clause_in_block_from(start->block, clause, start) {
|
||||
ret += bi_clause_quadwords(clause);
|
||||
}
|
||||
if (forwards) {
|
||||
/* We have to jump through this block from the start of this
|
||||
* clause to the end */
|
||||
bi_foreach_clause_in_block_from(start->block, clause, start) {
|
||||
ret += bi_clause_quadwords(clause);
|
||||
}
|
||||
|
||||
/* We then need to jump through every clause of every following
|
||||
* block until the target */
|
||||
bi_foreach_block_from(ctx, start->block, blk) {
|
||||
/* Don't double-count the first block */
|
||||
if (blk == start->block)
|
||||
continue;
|
||||
/* We then need to jump through every clause of every following
|
||||
* block until the target */
|
||||
bi_foreach_block_from(ctx, start->block, blk) {
|
||||
/* Don't double-count the first block */
|
||||
if (blk == start->block)
|
||||
continue;
|
||||
|
||||
/* End just before the target */
|
||||
if (blk == target)
|
||||
break;
|
||||
/* End just before the target */
|
||||
if (blk == target)
|
||||
break;
|
||||
|
||||
/* Count every clause in the block */
|
||||
bi_foreach_clause_in_block(blk, clause) {
|
||||
ret += bi_clause_quadwords(clause);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* We start at the beginning of the clause but have to jump
|
||||
* through the clauses before us in the block */
|
||||
bi_foreach_clause_in_block_from_rev(start->block, clause, start) {
|
||||
if (clause == start)
|
||||
continue;
|
||||
/* Count every clause in the block */
|
||||
bi_foreach_clause_in_block(blk, clause) {
|
||||
ret += bi_clause_quadwords(clause);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* We start at the beginning of the clause but have to jump
|
||||
* through the clauses before us in the block */
|
||||
bi_foreach_clause_in_block_from_rev(start->block, clause, start) {
|
||||
if (clause == start)
|
||||
continue;
|
||||
|
||||
ret -= bi_clause_quadwords(clause);
|
||||
}
|
||||
ret -= bi_clause_quadwords(clause);
|
||||
}
|
||||
|
||||
/* And jump back every clause of preceding blocks up through
|
||||
* and including the target to get to the beginning of the
|
||||
* target */
|
||||
bi_foreach_block_from_rev(ctx, start->block, blk) {
|
||||
if (blk == start->block)
|
||||
continue;
|
||||
/* And jump back every clause of preceding blocks up through
|
||||
* and including the target to get to the beginning of the
|
||||
* target */
|
||||
bi_foreach_block_from_rev(ctx, start->block, blk) {
|
||||
if (blk == start->block)
|
||||
continue;
|
||||
|
||||
bi_foreach_clause_in_block(blk, clause) {
|
||||
ret -= bi_clause_quadwords(clause);
|
||||
}
|
||||
bi_foreach_clause_in_block(blk, clause) {
|
||||
ret -= bi_clause_quadwords(clause);
|
||||
}
|
||||
|
||||
/* End just after the target */
|
||||
if (blk == target)
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* End just after the target */
|
||||
if (blk == target)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,98 +23,100 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "compiler.h"
|
||||
|
||||
void
|
||||
bi_liveness_ins_update_ssa(BITSET_WORD *live, const bi_instr *I)
|
||||
{
|
||||
bi_foreach_dest(I, d)
|
||||
BITSET_CLEAR(live, I->dest[d].value);
|
||||
bi_foreach_dest(I, d)
|
||||
BITSET_CLEAR(live, I->dest[d].value);
|
||||
|
||||
bi_foreach_ssa_src(I, s)
|
||||
BITSET_SET(live, I->src[s].value);
|
||||
bi_foreach_ssa_src(I, s)
|
||||
BITSET_SET(live, I->src[s].value);
|
||||
}
|
||||
|
||||
void
|
||||
bi_compute_liveness_ssa(bi_context *ctx)
|
||||
{
|
||||
u_worklist worklist;
|
||||
u_worklist_init(&worklist, ctx->num_blocks, NULL);
|
||||
u_worklist worklist;
|
||||
u_worklist_init(&worklist, ctx->num_blocks, NULL);
|
||||
|
||||
/* Free any previous liveness, and allocate */
|
||||
unsigned words = BITSET_WORDS(ctx->ssa_alloc);
|
||||
/* Free any previous liveness, and allocate */
|
||||
unsigned words = BITSET_WORDS(ctx->ssa_alloc);
|
||||
|
||||
bi_foreach_block(ctx, block) {
|
||||
if (block->ssa_live_in)
|
||||
ralloc_free(block->ssa_live_in);
|
||||
bi_foreach_block(ctx, block) {
|
||||
if (block->ssa_live_in)
|
||||
ralloc_free(block->ssa_live_in);
|
||||
|
||||
if (block->ssa_live_out)
|
||||
ralloc_free(block->ssa_live_out);
|
||||
if (block->ssa_live_out)
|
||||
ralloc_free(block->ssa_live_out);
|
||||
|
||||
block->ssa_live_in = rzalloc_array(block, BITSET_WORD, words);
|
||||
block->ssa_live_out = rzalloc_array(block, BITSET_WORD, words);
|
||||
block->ssa_live_in = rzalloc_array(block, BITSET_WORD, words);
|
||||
block->ssa_live_out = rzalloc_array(block, BITSET_WORD, words);
|
||||
|
||||
bi_worklist_push_head(&worklist, block);
|
||||
}
|
||||
bi_worklist_push_head(&worklist, block);
|
||||
}
|
||||
|
||||
/* Iterate the work list */
|
||||
while(!u_worklist_is_empty(&worklist)) {
|
||||
/* Pop in reverse order since liveness is a backwards pass */
|
||||
bi_block *blk = bi_worklist_pop_head(&worklist);
|
||||
/* Iterate the work list */
|
||||
while (!u_worklist_is_empty(&worklist)) {
|
||||
/* Pop in reverse order since liveness is a backwards pass */
|
||||
bi_block *blk = bi_worklist_pop_head(&worklist);
|
||||
|
||||
/* Update its liveness information */
|
||||
memcpy(blk->ssa_live_in, blk->ssa_live_out, words * sizeof(BITSET_WORD));
|
||||
/* Update its liveness information */
|
||||
memcpy(blk->ssa_live_in, blk->ssa_live_out, words * sizeof(BITSET_WORD));
|
||||
|
||||
bi_foreach_instr_in_block_rev(blk, I) {
|
||||
/* Phi nodes are handled separately, so we skip them. As phi nodes are
|
||||
* at the beginning and we're iterating backwards, we stop as soon as
|
||||
* we hit a phi node.
|
||||
*/
|
||||
if (I->op == BI_OPCODE_PHI)
|
||||
break;
|
||||
bi_foreach_instr_in_block_rev(blk, I) {
|
||||
/* Phi nodes are handled separately, so we skip them. As phi nodes are
|
||||
* at the beginning and we're iterating backwards, we stop as soon as
|
||||
* we hit a phi node.
|
||||
*/
|
||||
if (I->op == BI_OPCODE_PHI)
|
||||
break;
|
||||
|
||||
bi_liveness_ins_update_ssa(blk->ssa_live_in, I);
|
||||
}
|
||||
bi_liveness_ins_update_ssa(blk->ssa_live_in, I);
|
||||
}
|
||||
|
||||
/* Propagate the live in of the successor (blk) to the live out of
|
||||
* predecessors.
|
||||
*
|
||||
* Phi nodes are logically on the control flow edge and act in parallel.
|
||||
* To handle when propagating, we kill writes from phis and make live the
|
||||
* corresponding sources.
|
||||
*/
|
||||
bi_foreach_predecessor(blk, pred) {
|
||||
BITSET_WORD *live = ralloc_array(blk, BITSET_WORD, words);
|
||||
memcpy(live, blk->ssa_live_in, words * sizeof(BITSET_WORD));
|
||||
/* Propagate the live in of the successor (blk) to the live out of
|
||||
* predecessors.
|
||||
*
|
||||
* Phi nodes are logically on the control flow edge and act in parallel.
|
||||
* To handle when propagating, we kill writes from phis and make live the
|
||||
* corresponding sources.
|
||||
*/
|
||||
bi_foreach_predecessor(blk, pred) {
|
||||
BITSET_WORD *live = ralloc_array(blk, BITSET_WORD, words);
|
||||
memcpy(live, blk->ssa_live_in, words * sizeof(BITSET_WORD));
|
||||
|
||||
/* Kill write */
|
||||
bi_foreach_instr_in_block(blk, I) {
|
||||
if (I->op != BI_OPCODE_PHI) break;
|
||||
/* Kill write */
|
||||
bi_foreach_instr_in_block(blk, I) {
|
||||
if (I->op != BI_OPCODE_PHI)
|
||||
break;
|
||||
|
||||
BITSET_CLEAR(live, I->dest[0].value);
|
||||
}
|
||||
BITSET_CLEAR(live, I->dest[0].value);
|
||||
}
|
||||
|
||||
/* Make live the corresponding source */
|
||||
bi_foreach_instr_in_block(blk, I) {
|
||||
if (I->op != BI_OPCODE_PHI) break;
|
||||
/* Make live the corresponding source */
|
||||
bi_foreach_instr_in_block(blk, I) {
|
||||
if (I->op != BI_OPCODE_PHI)
|
||||
break;
|
||||
|
||||
bi_index operand = I->src[bi_predecessor_index(blk, *pred)];
|
||||
if (bi_is_ssa(operand))
|
||||
BITSET_SET(live, operand.value);
|
||||
}
|
||||
bi_index operand = I->src[bi_predecessor_index(blk, *pred)];
|
||||
if (bi_is_ssa(operand))
|
||||
BITSET_SET(live, operand.value);
|
||||
}
|
||||
|
||||
BITSET_WORD progress = 0;
|
||||
BITSET_WORD progress = 0;
|
||||
|
||||
for (unsigned i = 0; i < words; ++i) {
|
||||
progress |= live[i] & ~((*pred)->ssa_live_out[i]);
|
||||
(*pred)->ssa_live_out[i] |= live[i];
|
||||
}
|
||||
for (unsigned i = 0; i < words; ++i) {
|
||||
progress |= live[i] & ~((*pred)->ssa_live_out[i]);
|
||||
(*pred)->ssa_live_out[i] |= live[i];
|
||||
}
|
||||
|
||||
if (progress != 0)
|
||||
bi_worklist_push_tail(&worklist, *pred);
|
||||
}
|
||||
}
|
||||
if (progress != 0)
|
||||
bi_worklist_push_tail(&worklist, *pred);
|
||||
}
|
||||
}
|
||||
|
||||
u_worklist_fini(&worklist);
|
||||
u_worklist_fini(&worklist);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,8 +21,8 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "compiler.h"
|
||||
|
||||
/* Divergent attribute access is undefined behaviour. To avoid divergence,
|
||||
* lower to an if-chain like:
|
||||
|
|
@ -40,89 +40,88 @@
|
|||
static bool
|
||||
bi_lower_divergent_indirects_impl(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
gl_shader_stage stage = b->shader->info.stage;
|
||||
nir_src *offset;
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
gl_shader_stage stage = b->shader->info.stage;
|
||||
nir_src *offset;
|
||||
|
||||
/* Not all indirect access needs this workaround */
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_input:
|
||||
case nir_intrinsic_load_interpolated_input:
|
||||
/* Attributes and varyings */
|
||||
offset = nir_get_io_offset_src(intr);
|
||||
break;
|
||||
/* Not all indirect access needs this workaround */
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_input:
|
||||
case nir_intrinsic_load_interpolated_input:
|
||||
/* Attributes and varyings */
|
||||
offset = nir_get_io_offset_src(intr);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_store_output:
|
||||
/* Varyings only */
|
||||
if (stage == MESA_SHADER_FRAGMENT)
|
||||
return false;
|
||||
case nir_intrinsic_store_output:
|
||||
/* Varyings only */
|
||||
if (stage == MESA_SHADER_FRAGMENT)
|
||||
return false;
|
||||
|
||||
offset = nir_get_io_offset_src(intr);
|
||||
break;
|
||||
offset = nir_get_io_offset_src(intr);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
case nir_intrinsic_image_atomic_imax:
|
||||
case nir_intrinsic_image_atomic_umax:
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
case nir_intrinsic_image_load:
|
||||
case nir_intrinsic_image_store:
|
||||
/* Any image access */
|
||||
offset = &intr->src[0];
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
case nir_intrinsic_image_atomic_imin:
|
||||
case nir_intrinsic_image_atomic_umin:
|
||||
case nir_intrinsic_image_atomic_imax:
|
||||
case nir_intrinsic_image_atomic_umax:
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
case nir_intrinsic_image_atomic_or:
|
||||
case nir_intrinsic_image_atomic_xor:
|
||||
case nir_intrinsic_image_load:
|
||||
case nir_intrinsic_image_store:
|
||||
/* Any image access */
|
||||
offset = &intr->src[0];
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nir_src_is_divergent(*offset))
|
||||
return false;
|
||||
if (!nir_src_is_divergent(*offset))
|
||||
return false;
|
||||
|
||||
/* This indirect does need it */
|
||||
/* This indirect does need it */
|
||||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
nir_ssa_def *lane = nir_load_subgroup_invocation(b);
|
||||
unsigned *lanes = data;
|
||||
b->cursor = nir_before_instr(instr);
|
||||
nir_ssa_def *lane = nir_load_subgroup_invocation(b);
|
||||
unsigned *lanes = data;
|
||||
|
||||
/* Write zero in a funny way to bypass lower_load_const_to_scalar */
|
||||
bool has_dest = nir_intrinsic_infos[intr->intrinsic].has_dest;
|
||||
unsigned size = has_dest ? nir_dest_bit_size(intr->dest) : 32;
|
||||
nir_ssa_def *zero = has_dest ? nir_imm_zero(b, 1, size) : NULL;
|
||||
nir_ssa_def *zeroes[4] = { zero, zero, zero, zero };
|
||||
nir_ssa_def *res = has_dest ?
|
||||
nir_vec(b, zeroes, nir_dest_num_components(intr->dest)) : NULL;
|
||||
/* Write zero in a funny way to bypass lower_load_const_to_scalar */
|
||||
bool has_dest = nir_intrinsic_infos[intr->intrinsic].has_dest;
|
||||
unsigned size = has_dest ? nir_dest_bit_size(intr->dest) : 32;
|
||||
nir_ssa_def *zero = has_dest ? nir_imm_zero(b, 1, size) : NULL;
|
||||
nir_ssa_def *zeroes[4] = {zero, zero, zero, zero};
|
||||
nir_ssa_def *res =
|
||||
has_dest ? nir_vec(b, zeroes, nir_dest_num_components(intr->dest)) : NULL;
|
||||
|
||||
for (unsigned i = 0; i < (*lanes); ++i) {
|
||||
nir_push_if(b, nir_ieq_imm(b, lane, i));
|
||||
for (unsigned i = 0; i < (*lanes); ++i) {
|
||||
nir_push_if(b, nir_ieq_imm(b, lane, i));
|
||||
|
||||
nir_instr *c = nir_instr_clone(b->shader, instr);
|
||||
nir_intrinsic_instr *c_intr = nir_instr_as_intrinsic(c);
|
||||
nir_builder_instr_insert(b, c);
|
||||
nir_pop_if(b, NULL);
|
||||
nir_instr *c = nir_instr_clone(b->shader, instr);
|
||||
nir_intrinsic_instr *c_intr = nir_instr_as_intrinsic(c);
|
||||
nir_builder_instr_insert(b, c);
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
if (has_dest) {
|
||||
assert(c_intr->dest.is_ssa);
|
||||
nir_ssa_def *c_ssa = &c_intr->dest.ssa;
|
||||
res = nir_if_phi(b, c_ssa, res);
|
||||
}
|
||||
}
|
||||
if (has_dest) {
|
||||
assert(c_intr->dest.is_ssa);
|
||||
nir_ssa_def *c_ssa = &c_intr->dest.ssa;
|
||||
res = nir_if_phi(b, c_ssa, res);
|
||||
}
|
||||
}
|
||||
|
||||
if (has_dest)
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, res);
|
||||
if (has_dest)
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, res);
|
||||
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader,
|
||||
bi_lower_divergent_indirects_impl,
|
||||
nir_metadata_none, &lanes);
|
||||
return nir_shader_instructions_pass(
|
||||
shader, bi_lower_divergent_indirects_impl, nir_metadata_none, &lanes);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,8 +21,8 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_builder.h"
|
||||
#include "compiler.h"
|
||||
|
||||
/* Not all 8-bit and 16-bit instructions support all swizzles on all sources.
|
||||
* These passes, intended to run after NIR->BIR but before scheduling/RA, lower
|
||||
|
|
@ -33,270 +33,269 @@
|
|||
static bool
|
||||
bi_swizzle_replicates_8(enum bi_swizzle swz)
|
||||
{
|
||||
switch (swz) {
|
||||
case BI_SWIZZLE_B0000:
|
||||
case BI_SWIZZLE_B1111:
|
||||
case BI_SWIZZLE_B2222:
|
||||
case BI_SWIZZLE_B3333:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
switch (swz) {
|
||||
case BI_SWIZZLE_B0000:
|
||||
case BI_SWIZZLE_B1111:
|
||||
case BI_SWIZZLE_B2222:
|
||||
case BI_SWIZZLE_B3333:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lower_swizzle(bi_context *ctx, bi_instr *ins, unsigned src)
|
||||
{
|
||||
/* TODO: Use the opcode table and be a lot more methodical about this... */
|
||||
switch (ins->op) {
|
||||
/* Some instructions used with 16-bit data never have swizzles */
|
||||
case BI_OPCODE_CSEL_V2F16:
|
||||
case BI_OPCODE_CSEL_V2I16:
|
||||
case BI_OPCODE_CSEL_V2S16:
|
||||
case BI_OPCODE_CSEL_V2U16:
|
||||
/* TODO: Use the opcode table and be a lot more methodical about this... */
|
||||
switch (ins->op) {
|
||||
/* Some instructions used with 16-bit data never have swizzles */
|
||||
case BI_OPCODE_CSEL_V2F16:
|
||||
case BI_OPCODE_CSEL_V2I16:
|
||||
case BI_OPCODE_CSEL_V2S16:
|
||||
case BI_OPCODE_CSEL_V2U16:
|
||||
|
||||
/* Despite ostensibly being 32-bit instructions, CLPER does not
|
||||
* inherently interpret the data, so it can be used for v2f16
|
||||
* derivatives, which might require swizzle lowering */
|
||||
case BI_OPCODE_CLPER_I32:
|
||||
case BI_OPCODE_CLPER_OLD_I32:
|
||||
/* Despite ostensibly being 32-bit instructions, CLPER does not
|
||||
* inherently interpret the data, so it can be used for v2f16
|
||||
* derivatives, which might require swizzle lowering */
|
||||
case BI_OPCODE_CLPER_I32:
|
||||
case BI_OPCODE_CLPER_OLD_I32:
|
||||
|
||||
/* Similarly, CSEL.i32 consumes a boolean as a 32-bit argument. If the
|
||||
* boolean is implemented as a 16-bit integer, the swizzle is needed
|
||||
* for correct operation if the instruction producing the 16-bit
|
||||
* boolean does not replicate to both halves of the containing 32-bit
|
||||
* register. As such, we may need to lower a swizzle.
|
||||
*
|
||||
* This is a silly hack. Ideally, code gen would be smart enough to
|
||||
* avoid this case (by replicating). In practice, silly hardware design
|
||||
* decisions force our hand here.
|
||||
*/
|
||||
case BI_OPCODE_MUX_I32:
|
||||
case BI_OPCODE_CSEL_I32:
|
||||
break;
|
||||
/* Similarly, CSEL.i32 consumes a boolean as a 32-bit argument. If the
|
||||
* boolean is implemented as a 16-bit integer, the swizzle is needed
|
||||
* for correct operation if the instruction producing the 16-bit
|
||||
* boolean does not replicate to both halves of the containing 32-bit
|
||||
* register. As such, we may need to lower a swizzle.
|
||||
*
|
||||
* This is a silly hack. Ideally, code gen would be smart enough to
|
||||
* avoid this case (by replicating). In practice, silly hardware design
|
||||
* decisions force our hand here.
|
||||
*/
|
||||
case BI_OPCODE_MUX_I32:
|
||||
case BI_OPCODE_CSEL_I32:
|
||||
break;
|
||||
|
||||
case BI_OPCODE_IADD_V2S16:
|
||||
case BI_OPCODE_IADD_V2U16:
|
||||
case BI_OPCODE_ISUB_V2S16:
|
||||
case BI_OPCODE_ISUB_V2U16:
|
||||
if (src == 0 && ins->src[src].swizzle != BI_SWIZZLE_H10)
|
||||
break;
|
||||
else
|
||||
return;
|
||||
case BI_OPCODE_LSHIFT_AND_V2I16:
|
||||
case BI_OPCODE_LSHIFT_OR_V2I16:
|
||||
case BI_OPCODE_LSHIFT_XOR_V2I16:
|
||||
case BI_OPCODE_RSHIFT_AND_V2I16:
|
||||
case BI_OPCODE_RSHIFT_OR_V2I16:
|
||||
case BI_OPCODE_RSHIFT_XOR_V2I16:
|
||||
if (src == 2)
|
||||
return;
|
||||
else
|
||||
break;
|
||||
case BI_OPCODE_IADD_V2S16:
|
||||
case BI_OPCODE_IADD_V2U16:
|
||||
case BI_OPCODE_ISUB_V2S16:
|
||||
case BI_OPCODE_ISUB_V2U16:
|
||||
if (src == 0 && ins->src[src].swizzle != BI_SWIZZLE_H10)
|
||||
break;
|
||||
else
|
||||
return;
|
||||
case BI_OPCODE_LSHIFT_AND_V2I16:
|
||||
case BI_OPCODE_LSHIFT_OR_V2I16:
|
||||
case BI_OPCODE_LSHIFT_XOR_V2I16:
|
||||
case BI_OPCODE_RSHIFT_AND_V2I16:
|
||||
case BI_OPCODE_RSHIFT_OR_V2I16:
|
||||
case BI_OPCODE_RSHIFT_XOR_V2I16:
|
||||
if (src == 2)
|
||||
return;
|
||||
else
|
||||
break;
|
||||
|
||||
/* For some reason MUX.v2i16 allows swaps but not replication */
|
||||
case BI_OPCODE_MUX_V2I16:
|
||||
if (ins->src[src].swizzle == BI_SWIZZLE_H10)
|
||||
return;
|
||||
else
|
||||
break;
|
||||
/* For some reason MUX.v2i16 allows swaps but not replication */
|
||||
case BI_OPCODE_MUX_V2I16:
|
||||
if (ins->src[src].swizzle == BI_SWIZZLE_H10)
|
||||
return;
|
||||
else
|
||||
break;
|
||||
|
||||
/* No swizzles supported */
|
||||
case BI_OPCODE_HADD_V4U8:
|
||||
case BI_OPCODE_HADD_V4S8:
|
||||
case BI_OPCODE_CLZ_V4U8:
|
||||
case BI_OPCODE_IDP_V4I8:
|
||||
case BI_OPCODE_IABS_V4S8:
|
||||
case BI_OPCODE_ICMP_V4I8:
|
||||
case BI_OPCODE_ICMP_V4U8:
|
||||
case BI_OPCODE_MUX_V4I8:
|
||||
case BI_OPCODE_IADD_IMM_V4I8:
|
||||
break;
|
||||
/* No swizzles supported */
|
||||
case BI_OPCODE_HADD_V4U8:
|
||||
case BI_OPCODE_HADD_V4S8:
|
||||
case BI_OPCODE_CLZ_V4U8:
|
||||
case BI_OPCODE_IDP_V4I8:
|
||||
case BI_OPCODE_IABS_V4S8:
|
||||
case BI_OPCODE_ICMP_V4I8:
|
||||
case BI_OPCODE_ICMP_V4U8:
|
||||
case BI_OPCODE_MUX_V4I8:
|
||||
case BI_OPCODE_IADD_IMM_V4I8:
|
||||
break;
|
||||
|
||||
case BI_OPCODE_LSHIFT_AND_V4I8:
|
||||
case BI_OPCODE_LSHIFT_OR_V4I8:
|
||||
case BI_OPCODE_LSHIFT_XOR_V4I8:
|
||||
case BI_OPCODE_RSHIFT_AND_V4I8:
|
||||
case BI_OPCODE_RSHIFT_OR_V4I8:
|
||||
case BI_OPCODE_RSHIFT_XOR_V4I8:
|
||||
/* Last source allows identity or replication */
|
||||
if (src == 2 && bi_swizzle_replicates_8(ins->src[src].swizzle))
|
||||
return;
|
||||
case BI_OPCODE_LSHIFT_AND_V4I8:
|
||||
case BI_OPCODE_LSHIFT_OR_V4I8:
|
||||
case BI_OPCODE_LSHIFT_XOR_V4I8:
|
||||
case BI_OPCODE_RSHIFT_AND_V4I8:
|
||||
case BI_OPCODE_RSHIFT_OR_V4I8:
|
||||
case BI_OPCODE_RSHIFT_XOR_V4I8:
|
||||
/* Last source allows identity or replication */
|
||||
if (src == 2 && bi_swizzle_replicates_8(ins->src[src].swizzle))
|
||||
return;
|
||||
|
||||
/* Others do not allow swizzles */
|
||||
break;
|
||||
/* Others do not allow swizzles */
|
||||
break;
|
||||
|
||||
/* We don't want to deal with reswizzling logic in modifier prop. Move
|
||||
* the swizzle outside, it's easier for clamp propagation. */
|
||||
case BI_OPCODE_FCLAMP_V2F16:
|
||||
{
|
||||
bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
|
||||
bi_index dest = ins->dest[0];
|
||||
bi_index tmp = bi_temp(ctx);
|
||||
/* We don't want to deal with reswizzling logic in modifier prop. Move
|
||||
* the swizzle outside, it's easier for clamp propagation. */
|
||||
case BI_OPCODE_FCLAMP_V2F16: {
|
||||
bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
|
||||
bi_index dest = ins->dest[0];
|
||||
bi_index tmp = bi_temp(ctx);
|
||||
|
||||
ins->dest[0] = tmp;
|
||||
bi_swz_v2i16_to(&b, dest, bi_replace_index(ins->src[0], tmp));
|
||||
return;
|
||||
}
|
||||
ins->dest[0] = tmp;
|
||||
bi_swz_v2i16_to(&b, dest, bi_replace_index(ins->src[0], tmp));
|
||||
return;
|
||||
}
|
||||
|
||||
default:
|
||||
return;
|
||||
}
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
/* First, try to apply a given swizzle to a constant to clear the
|
||||
* runtime swizzle. This is less heavy-handed than ignoring the
|
||||
* swizzle for scalar destinations, since it maintains
|
||||
* replication of the destination.
|
||||
*/
|
||||
if (ins->src[src].type == BI_INDEX_CONSTANT) {
|
||||
ins->src[src].value = bi_apply_swizzle(ins->src[src].value,
|
||||
ins->src[src].swizzle);
|
||||
ins->src[src].swizzle = BI_SWIZZLE_H01;
|
||||
return;
|
||||
}
|
||||
/* First, try to apply a given swizzle to a constant to clear the
|
||||
* runtime swizzle. This is less heavy-handed than ignoring the
|
||||
* swizzle for scalar destinations, since it maintains
|
||||
* replication of the destination.
|
||||
*/
|
||||
if (ins->src[src].type == BI_INDEX_CONSTANT) {
|
||||
ins->src[src].value =
|
||||
bi_apply_swizzle(ins->src[src].value, ins->src[src].swizzle);
|
||||
ins->src[src].swizzle = BI_SWIZZLE_H01;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Even if the source does not replicate, if the consuming instruction
|
||||
* produces a 16-bit scalar, we can ignore the other component.
|
||||
*/
|
||||
if (ins->dest[0].swizzle == BI_SWIZZLE_H00 &&
|
||||
ins->src[src].swizzle == BI_SWIZZLE_H00)
|
||||
{
|
||||
ins->src[src].swizzle = BI_SWIZZLE_H01;
|
||||
return;
|
||||
}
|
||||
/* Even if the source does not replicate, if the consuming instruction
|
||||
* produces a 16-bit scalar, we can ignore the other component.
|
||||
*/
|
||||
if (ins->dest[0].swizzle == BI_SWIZZLE_H00 &&
|
||||
ins->src[src].swizzle == BI_SWIZZLE_H00) {
|
||||
ins->src[src].swizzle = BI_SWIZZLE_H01;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Lower it away */
|
||||
bi_builder b = bi_init_builder(ctx, bi_before_instr(ins));
|
||||
/* Lower it away */
|
||||
bi_builder b = bi_init_builder(ctx, bi_before_instr(ins));
|
||||
|
||||
bool is_8 = (bi_opcode_props[ins->op].size == BI_SIZE_8);
|
||||
bi_index orig = ins->src[src];
|
||||
bi_index stripped = bi_replace_index(bi_null(), orig);
|
||||
stripped.swizzle = ins->src[src].swizzle;
|
||||
bool is_8 = (bi_opcode_props[ins->op].size == BI_SIZE_8);
|
||||
bi_index orig = ins->src[src];
|
||||
bi_index stripped = bi_replace_index(bi_null(), orig);
|
||||
stripped.swizzle = ins->src[src].swizzle;
|
||||
|
||||
bi_index swz = is_8 ? bi_swz_v4i8(&b, stripped) : bi_swz_v2i16(&b, stripped);
|
||||
bi_index swz = is_8 ? bi_swz_v4i8(&b, stripped) : bi_swz_v2i16(&b, stripped);
|
||||
|
||||
bi_replace_src(ins, src, swz);
|
||||
ins->src[src].swizzle = BI_SWIZZLE_H01;
|
||||
bi_replace_src(ins, src, swz);
|
||||
ins->src[src].swizzle = BI_SWIZZLE_H01;
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_swizzle_replicates_16(enum bi_swizzle swz)
|
||||
{
|
||||
switch (swz) {
|
||||
case BI_SWIZZLE_H00:
|
||||
case BI_SWIZZLE_H11:
|
||||
return true;
|
||||
default:
|
||||
/* If a swizzle replicates every 8-bits, it also replicates
|
||||
* every 16-bits, so allow 8-bit replicating swizzles.
|
||||
*/
|
||||
return bi_swizzle_replicates_8(swz);
|
||||
}
|
||||
switch (swz) {
|
||||
case BI_SWIZZLE_H00:
|
||||
case BI_SWIZZLE_H11:
|
||||
return true;
|
||||
default:
|
||||
/* If a swizzle replicates every 8-bits, it also replicates
|
||||
* every 16-bits, so allow 8-bit replicating swizzles.
|
||||
*/
|
||||
return bi_swizzle_replicates_8(swz);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_instr_replicates(bi_instr *I, BITSET_WORD *replicates_16)
|
||||
{
|
||||
switch (I->op) {
|
||||
switch (I->op) {
|
||||
|
||||
/* Instructions that construct vectors have replicated output if their
|
||||
* sources are identical. Check this case first.
|
||||
*/
|
||||
case BI_OPCODE_MKVEC_V2I16:
|
||||
case BI_OPCODE_V2F16_TO_V2S16:
|
||||
case BI_OPCODE_V2F16_TO_V2U16:
|
||||
case BI_OPCODE_V2F32_TO_V2F16:
|
||||
case BI_OPCODE_V2S16_TO_V2F16:
|
||||
case BI_OPCODE_V2S8_TO_V2F16:
|
||||
case BI_OPCODE_V2S8_TO_V2S16:
|
||||
case BI_OPCODE_V2U16_TO_V2F16:
|
||||
case BI_OPCODE_V2U8_TO_V2F16:
|
||||
case BI_OPCODE_V2U8_TO_V2U16:
|
||||
return bi_is_value_equiv(I->src[0], I->src[1]);
|
||||
/* Instructions that construct vectors have replicated output if their
|
||||
* sources are identical. Check this case first.
|
||||
*/
|
||||
case BI_OPCODE_MKVEC_V2I16:
|
||||
case BI_OPCODE_V2F16_TO_V2S16:
|
||||
case BI_OPCODE_V2F16_TO_V2U16:
|
||||
case BI_OPCODE_V2F32_TO_V2F16:
|
||||
case BI_OPCODE_V2S16_TO_V2F16:
|
||||
case BI_OPCODE_V2S8_TO_V2F16:
|
||||
case BI_OPCODE_V2S8_TO_V2S16:
|
||||
case BI_OPCODE_V2U16_TO_V2F16:
|
||||
case BI_OPCODE_V2U8_TO_V2F16:
|
||||
case BI_OPCODE_V2U8_TO_V2U16:
|
||||
return bi_is_value_equiv(I->src[0], I->src[1]);
|
||||
|
||||
/* 16-bit transcendentals are defined to output zero in their
|
||||
* upper half, so they do not replicate
|
||||
*/
|
||||
case BI_OPCODE_FRCP_F16:
|
||||
case BI_OPCODE_FRSQ_F16:
|
||||
return false;
|
||||
/* 16-bit transcendentals are defined to output zero in their
|
||||
* upper half, so they do not replicate
|
||||
*/
|
||||
case BI_OPCODE_FRCP_F16:
|
||||
case BI_OPCODE_FRSQ_F16:
|
||||
return false;
|
||||
|
||||
/* Not sure, be conservative, we don't use these.. */
|
||||
case BI_OPCODE_VN_ASST1_F16:
|
||||
case BI_OPCODE_FPCLASS_F16:
|
||||
case BI_OPCODE_FPOW_SC_DET_F16:
|
||||
return false;
|
||||
/* Not sure, be conservative, we don't use these.. */
|
||||
case BI_OPCODE_VN_ASST1_F16:
|
||||
case BI_OPCODE_FPCLASS_F16:
|
||||
case BI_OPCODE_FPOW_SC_DET_F16:
|
||||
return false;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Replication analysis only makes sense for ALU instructions */
|
||||
if (bi_opcode_props[I->op].message != BIFROST_MESSAGE_NONE)
|
||||
return false;
|
||||
/* Replication analysis only makes sense for ALU instructions */
|
||||
if (bi_opcode_props[I->op].message != BIFROST_MESSAGE_NONE)
|
||||
return false;
|
||||
|
||||
/* We only analyze 16-bit instructions for 16-bit replication. We could
|
||||
* maybe do better.
|
||||
*/
|
||||
if (bi_opcode_props[I->op].size != BI_SIZE_16)
|
||||
return false;
|
||||
/* We only analyze 16-bit instructions for 16-bit replication. We could
|
||||
* maybe do better.
|
||||
*/
|
||||
if (bi_opcode_props[I->op].size != BI_SIZE_16)
|
||||
return false;
|
||||
|
||||
bi_foreach_src(I, s) {
|
||||
if (bi_is_null(I->src[s]))
|
||||
continue;
|
||||
bi_foreach_src(I, s) {
|
||||
if (bi_is_null(I->src[s]))
|
||||
continue;
|
||||
|
||||
/* Replicated swizzles */
|
||||
if (bi_swizzle_replicates_16(I->src[s].swizzle))
|
||||
continue;
|
||||
/* Replicated swizzles */
|
||||
if (bi_swizzle_replicates_16(I->src[s].swizzle))
|
||||
continue;
|
||||
|
||||
/* Replicated values */
|
||||
if (bi_is_ssa(I->src[s]) &&
|
||||
BITSET_TEST(replicates_16, I->src[s].value))
|
||||
continue;
|
||||
/* Replicated values */
|
||||
if (bi_is_ssa(I->src[s]) && BITSET_TEST(replicates_16, I->src[s].value))
|
||||
continue;
|
||||
|
||||
/* Replicated constants */
|
||||
if (I->src[s].type == BI_INDEX_CONSTANT &&
|
||||
(I->src[s].value & 0xFFFF) == (I->src[s].value >> 16))
|
||||
continue;
|
||||
/* Replicated constants */
|
||||
if (I->src[s].type == BI_INDEX_CONSTANT &&
|
||||
(I->src[s].value & 0xFFFF) == (I->src[s].value >> 16))
|
||||
continue;
|
||||
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
bi_lower_swizzle(bi_context *ctx)
|
||||
{
|
||||
bi_foreach_instr_global_safe(ctx, ins) {
|
||||
bi_foreach_src(ins, s) {
|
||||
if (bi_is_null(ins->src[s])) continue;
|
||||
if (ins->src[s].swizzle == BI_SWIZZLE_H01) continue;
|
||||
bi_foreach_instr_global_safe(ctx, ins) {
|
||||
bi_foreach_src(ins, s) {
|
||||
if (bi_is_null(ins->src[s]))
|
||||
continue;
|
||||
if (ins->src[s].swizzle == BI_SWIZZLE_H01)
|
||||
continue;
|
||||
|
||||
lower_swizzle(ctx, ins, s);
|
||||
}
|
||||
}
|
||||
lower_swizzle(ctx, ins, s);
|
||||
}
|
||||
}
|
||||
|
||||
/* Now that we've lowered swizzles, clean up the mess */
|
||||
BITSET_WORD *replicates_16 = calloc(sizeof(bi_index), ctx->ssa_alloc);
|
||||
/* Now that we've lowered swizzles, clean up the mess */
|
||||
BITSET_WORD *replicates_16 = calloc(sizeof(bi_index), ctx->ssa_alloc);
|
||||
|
||||
bi_foreach_instr_global(ctx, ins) {
|
||||
if (ins->nr_dests && bi_instr_replicates(ins, replicates_16))
|
||||
BITSET_SET(replicates_16, ins->dest[0].value);
|
||||
bi_foreach_instr_global(ctx, ins) {
|
||||
if (ins->nr_dests && bi_instr_replicates(ins, replicates_16))
|
||||
BITSET_SET(replicates_16, ins->dest[0].value);
|
||||
|
||||
if (ins->op == BI_OPCODE_SWZ_V2I16 && bi_is_ssa(ins->src[0]) &&
|
||||
BITSET_TEST(replicates_16, ins->src[0].value)) {
|
||||
ins->op = BI_OPCODE_MOV_I32;
|
||||
ins->src[0].swizzle = BI_SWIZZLE_H01;
|
||||
}
|
||||
if (ins->op == BI_OPCODE_SWZ_V2I16 && bi_is_ssa(ins->src[0]) &&
|
||||
BITSET_TEST(replicates_16, ins->src[0].value)) {
|
||||
ins->op = BI_OPCODE_MOV_I32;
|
||||
ins->src[0].swizzle = BI_SWIZZLE_H01;
|
||||
}
|
||||
|
||||
/* The above passes rely on replicating destinations. For
|
||||
* Valhall, we will want to optimize this. For now, default
|
||||
* to Bifrost compatible behaviour.
|
||||
*/
|
||||
if (ins->nr_dests)
|
||||
ins->dest[0].swizzle = BI_SWIZZLE_H01;
|
||||
}
|
||||
/* The above passes rely on replicating destinations. For
|
||||
* Valhall, we will want to optimize this. For now, default
|
||||
* to Bifrost compatible behaviour.
|
||||
*/
|
||||
if (ins->nr_dests)
|
||||
ins->dest[0].swizzle = BI_SWIZZLE_H01;
|
||||
}
|
||||
|
||||
free(replicates_16);
|
||||
free(replicates_16);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,8 +21,8 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_builder.h"
|
||||
#include "compiler.h"
|
||||
|
||||
/* Dead simple constant folding to cleanup compiler frontend patterns. Before
|
||||
* adding a new pattern here, check why you need it and whether we can avoid
|
||||
|
|
@ -31,83 +31,84 @@
|
|||
static inline uint32_t
|
||||
bi_source_value(const bi_instr *I, unsigned s)
|
||||
{
|
||||
if (s < I->nr_srcs)
|
||||
return bi_apply_swizzle(I->src[s].value, I->src[s].swizzle);
|
||||
else
|
||||
return 0;
|
||||
if (s < I->nr_srcs)
|
||||
return bi_apply_swizzle(I->src[s].value, I->src[s].swizzle);
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
bi_fold_constant(bi_instr *I, bool *unsupported)
|
||||
{
|
||||
/* We can only fold instructions where all sources are constant */
|
||||
bi_foreach_src(I, s) {
|
||||
if (I->src[s].type != BI_INDEX_CONSTANT) {
|
||||
*unsupported = true;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
/* We can only fold instructions where all sources are constant */
|
||||
bi_foreach_src(I, s) {
|
||||
if (I->src[s].type != BI_INDEX_CONSTANT) {
|
||||
*unsupported = true;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Grab the sources */
|
||||
uint32_t a = bi_source_value(I, 0);
|
||||
uint32_t b = bi_source_value(I, 1);
|
||||
uint32_t c = bi_source_value(I, 2);
|
||||
uint32_t d = bi_source_value(I, 3);
|
||||
/* Grab the sources */
|
||||
uint32_t a = bi_source_value(I, 0);
|
||||
uint32_t b = bi_source_value(I, 1);
|
||||
uint32_t c = bi_source_value(I, 2);
|
||||
uint32_t d = bi_source_value(I, 3);
|
||||
|
||||
/* Evaluate the instruction */
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_SWZ_V2I16:
|
||||
return a;
|
||||
/* Evaluate the instruction */
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_SWZ_V2I16:
|
||||
return a;
|
||||
|
||||
case BI_OPCODE_MKVEC_V2I16:
|
||||
return (b << 16) | (a & 0xFFFF);
|
||||
case BI_OPCODE_MKVEC_V2I16:
|
||||
return (b << 16) | (a & 0xFFFF);
|
||||
|
||||
case BI_OPCODE_MKVEC_V4I8:
|
||||
return (d << 24) | ((c & 0xFF) << 16) | ((b & 0xFF) << 8) | (a & 0xFF);
|
||||
case BI_OPCODE_MKVEC_V4I8:
|
||||
return (d << 24) | ((c & 0xFF) << 16) | ((b & 0xFF) << 8) | (a & 0xFF);
|
||||
|
||||
case BI_OPCODE_MKVEC_V2I8:
|
||||
return (c << 16) | ((b & 0xFF) << 8) | (a & 0xFF);
|
||||
case BI_OPCODE_MKVEC_V2I8:
|
||||
return (c << 16) | ((b & 0xFF) << 8) | (a & 0xFF);
|
||||
|
||||
case BI_OPCODE_LSHIFT_OR_I32:
|
||||
if (I->not_result || I->src[0].neg || I->src[1].neg)
|
||||
break;
|
||||
case BI_OPCODE_LSHIFT_OR_I32:
|
||||
if (I->not_result || I->src[0].neg || I->src[1].neg)
|
||||
break;
|
||||
|
||||
return (a << c) | b;
|
||||
return (a << c) | b;
|
||||
|
||||
case BI_OPCODE_F32_TO_U32:
|
||||
if (I->round == BI_ROUND_NONE) {
|
||||
/* Explicitly clamp to prevent undefined behaviour and
|
||||
* match hardware rules */
|
||||
float f = uif(a);
|
||||
return (f >= 0.0) ? (uint32_t) f : 0;
|
||||
} else
|
||||
break;
|
||||
case BI_OPCODE_F32_TO_U32:
|
||||
if (I->round == BI_ROUND_NONE) {
|
||||
/* Explicitly clamp to prevent undefined behaviour and
|
||||
* match hardware rules */
|
||||
float f = uif(a);
|
||||
return (f >= 0.0) ? (uint32_t)f : 0;
|
||||
} else
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
*unsupported = true;
|
||||
return 0;
|
||||
*unsupported = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool
|
||||
bi_opt_constant_fold(bi_context *ctx)
|
||||
{
|
||||
bool progress = false;
|
||||
bool progress = false;
|
||||
|
||||
bi_foreach_instr_global_safe(ctx, ins) {
|
||||
bool unsupported = false;
|
||||
uint32_t replace = bi_fold_constant(ins, &unsupported);
|
||||
if (unsupported) continue;
|
||||
bi_foreach_instr_global_safe(ctx, ins) {
|
||||
bool unsupported = false;
|
||||
uint32_t replace = bi_fold_constant(ins, &unsupported);
|
||||
if (unsupported)
|
||||
continue;
|
||||
|
||||
/* Replace with constant move, to be copypropped */
|
||||
assert(ins->nr_dests == 1);
|
||||
bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
|
||||
bi_mov_i32_to(&b, ins->dest[0], bi_imm_u32(replace));
|
||||
bi_remove_instruction(ins);
|
||||
progress = true;
|
||||
}
|
||||
/* Replace with constant move, to be copypropped */
|
||||
assert(ins->nr_dests == 1);
|
||||
bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
|
||||
bi_mov_i32_to(&b, ins->dest[0], bi_imm_u32(replace));
|
||||
bi_remove_instruction(ins);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
return progress;
|
||||
return progress;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,92 +22,95 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_builder.h"
|
||||
#include "compiler.h"
|
||||
|
||||
/* SSA copy propagation */
|
||||
|
||||
static bool
|
||||
bi_reads_fau(bi_instr *ins)
|
||||
{
|
||||
bi_foreach_src(ins, s) {
|
||||
if (ins->src[s].type == BI_INDEX_FAU)
|
||||
return true;
|
||||
}
|
||||
bi_foreach_src(ins, s) {
|
||||
if (ins->src[s].type == BI_INDEX_FAU)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
bi_opt_copy_prop(bi_context *ctx)
|
||||
{
|
||||
/* Chase SPLIT of COLLECT. Instruction selection usually avoids this
|
||||
* pattern (due to the split cache), but it is inevitably generated by
|
||||
* the UBO pushing pass.
|
||||
*/
|
||||
bi_instr **collects = calloc(sizeof(bi_instr *), ctx->ssa_alloc);
|
||||
bi_foreach_instr_global_safe(ctx, I) {
|
||||
if (I->op == BI_OPCODE_COLLECT_I32) {
|
||||
/* Rewrite trivial collects while we're at it */
|
||||
if (I->nr_srcs == 1)
|
||||
I->op = BI_OPCODE_MOV_I32;
|
||||
/* Chase SPLIT of COLLECT. Instruction selection usually avoids this
|
||||
* pattern (due to the split cache), but it is inevitably generated by
|
||||
* the UBO pushing pass.
|
||||
*/
|
||||
bi_instr **collects = calloc(sizeof(bi_instr *), ctx->ssa_alloc);
|
||||
bi_foreach_instr_global_safe(ctx, I) {
|
||||
if (I->op == BI_OPCODE_COLLECT_I32) {
|
||||
/* Rewrite trivial collects while we're at it */
|
||||
if (I->nr_srcs == 1)
|
||||
I->op = BI_OPCODE_MOV_I32;
|
||||
|
||||
collects[I->dest[0].value] = I;
|
||||
} else if (I->op == BI_OPCODE_SPLIT_I32) {
|
||||
/* Rewrite trivial splits while we're at it */
|
||||
if (I->nr_dests == 1)
|
||||
I->op = BI_OPCODE_MOV_I32;
|
||||
collects[I->dest[0].value] = I;
|
||||
} else if (I->op == BI_OPCODE_SPLIT_I32) {
|
||||
/* Rewrite trivial splits while we're at it */
|
||||
if (I->nr_dests == 1)
|
||||
I->op = BI_OPCODE_MOV_I32;
|
||||
|
||||
bi_instr *collect = collects[I->src[0].value];
|
||||
if (!collect)
|
||||
continue;
|
||||
bi_instr *collect = collects[I->src[0].value];
|
||||
if (!collect)
|
||||
continue;
|
||||
|
||||
/* Lower the split to moves, copyprop cleans up */
|
||||
bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
|
||||
/* Lower the split to moves, copyprop cleans up */
|
||||
bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
|
||||
|
||||
bi_foreach_dest(I, d)
|
||||
bi_mov_i32_to(&b, I->dest[d], collect->src[d]);
|
||||
bi_foreach_dest(I, d)
|
||||
bi_mov_i32_to(&b, I->dest[d], collect->src[d]);
|
||||
|
||||
bi_remove_instruction(I);
|
||||
}
|
||||
}
|
||||
bi_remove_instruction(I);
|
||||
}
|
||||
}
|
||||
|
||||
free(collects);
|
||||
free(collects);
|
||||
|
||||
bi_index *replacement = calloc(sizeof(bi_index), ctx->ssa_alloc);
|
||||
bi_index *replacement = calloc(sizeof(bi_index), ctx->ssa_alloc);
|
||||
|
||||
bi_foreach_instr_global_safe(ctx, ins) {
|
||||
if (ins->op == BI_OPCODE_MOV_I32 && ins->src[0].type != BI_INDEX_REGISTER) {
|
||||
bi_index replace = ins->src[0];
|
||||
bi_foreach_instr_global_safe(ctx, ins) {
|
||||
if (ins->op == BI_OPCODE_MOV_I32 &&
|
||||
ins->src[0].type != BI_INDEX_REGISTER) {
|
||||
bi_index replace = ins->src[0];
|
||||
|
||||
/* Peek through one layer so copyprop converges in one
|
||||
* iteration for chained moves */
|
||||
if (bi_is_ssa(replace)) {
|
||||
bi_index chained = replacement[replace.value];
|
||||
/* Peek through one layer so copyprop converges in one
|
||||
* iteration for chained moves */
|
||||
if (bi_is_ssa(replace)) {
|
||||
bi_index chained = replacement[replace.value];
|
||||
|
||||
if (!bi_is_null(chained))
|
||||
replace = chained;
|
||||
}
|
||||
if (!bi_is_null(chained))
|
||||
replace = chained;
|
||||
}
|
||||
|
||||
assert(ins->nr_dests == 1);
|
||||
replacement[ins->dest[0].value] = replace;
|
||||
}
|
||||
assert(ins->nr_dests == 1);
|
||||
replacement[ins->dest[0].value] = replace;
|
||||
}
|
||||
|
||||
bi_foreach_src(ins, s) {
|
||||
bi_index use = ins->src[s];
|
||||
bi_foreach_src(ins, s) {
|
||||
bi_index use = ins->src[s];
|
||||
|
||||
if (use.type != BI_INDEX_NORMAL) continue;
|
||||
if (bi_is_staging_src(ins, s)) continue;
|
||||
if (use.type != BI_INDEX_NORMAL)
|
||||
continue;
|
||||
if (bi_is_staging_src(ins, s))
|
||||
continue;
|
||||
|
||||
bi_index repl = replacement[use.value];
|
||||
bi_index repl = replacement[use.value];
|
||||
|
||||
if (repl.type == BI_INDEX_CONSTANT && bi_reads_fau(ins))
|
||||
continue;
|
||||
if (repl.type == BI_INDEX_CONSTANT && bi_reads_fau(ins))
|
||||
continue;
|
||||
|
||||
if (!bi_is_null(repl))
|
||||
bi_replace_src(ins, s, repl);
|
||||
}
|
||||
}
|
||||
if (!bi_is_null(repl))
|
||||
bi_replace_src(ins, s, repl);
|
||||
}
|
||||
}
|
||||
|
||||
free(replacement);
|
||||
free(replacement);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,8 +22,8 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_builder.h"
|
||||
#include "compiler.h"
|
||||
|
||||
#define XXH_INLINE_ALL
|
||||
#include "util/xxhash.h"
|
||||
|
|
@ -36,85 +36,88 @@
|
|||
static inline uint32_t
|
||||
HASH(uint32_t hash, unsigned data)
|
||||
{
|
||||
return XXH32(&data, sizeof(data), hash);
|
||||
return XXH32(&data, sizeof(data), hash);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
hash_index(uint32_t hash, bi_index index)
|
||||
{
|
||||
hash = HASH(hash, index.value);
|
||||
hash = HASH(hash, index.abs);
|
||||
hash = HASH(hash, index.neg);
|
||||
hash = HASH(hash, index.swizzle);
|
||||
hash = HASH(hash, index.offset);
|
||||
hash = HASH(hash, index.type);
|
||||
return hash;
|
||||
hash = HASH(hash, index.value);
|
||||
hash = HASH(hash, index.abs);
|
||||
hash = HASH(hash, index.neg);
|
||||
hash = HASH(hash, index.swizzle);
|
||||
hash = HASH(hash, index.offset);
|
||||
hash = HASH(hash, index.type);
|
||||
return hash;
|
||||
}
|
||||
|
||||
/* Hash an ALU instruction. */
|
||||
static uint32_t
|
||||
hash_instr(const void *data)
|
||||
{
|
||||
const bi_instr *I = data;
|
||||
uint32_t hash = 0;
|
||||
const bi_instr *I = data;
|
||||
uint32_t hash = 0;
|
||||
|
||||
hash = HASH(hash, I->op);
|
||||
hash = HASH(hash, I->nr_dests);
|
||||
hash = HASH(hash, I->nr_srcs);
|
||||
hash = HASH(hash, I->op);
|
||||
hash = HASH(hash, I->nr_dests);
|
||||
hash = HASH(hash, I->nr_srcs);
|
||||
|
||||
assert(!I->flow && !I->slot && "CSE must be early");
|
||||
assert(!I->flow && !I->slot && "CSE must be early");
|
||||
|
||||
/* Explcitly skip destinations, except for size details */
|
||||
bi_foreach_dest(I, d) {
|
||||
hash = HASH(hash, I->dest[d].swizzle);
|
||||
}
|
||||
/* Explcitly skip destinations, except for size details */
|
||||
bi_foreach_dest(I, d) {
|
||||
hash = HASH(hash, I->dest[d].swizzle);
|
||||
}
|
||||
|
||||
bi_foreach_src(I, s) {
|
||||
hash = hash_index(hash, I->src[s]);
|
||||
}
|
||||
bi_foreach_src(I, s) {
|
||||
hash = hash_index(hash, I->src[s]);
|
||||
}
|
||||
|
||||
/* Explicitly skip branch, regfmt, vecsize, no_spill, tdd, table */
|
||||
hash = HASH(hash, I->dest_mod);
|
||||
/* Explicitly skip branch, regfmt, vecsize, no_spill, tdd, table */
|
||||
hash = HASH(hash, I->dest_mod);
|
||||
|
||||
/* Explicitly skip other immediates */
|
||||
hash = HASH(hash, I->shift);
|
||||
/* Explicitly skip other immediates */
|
||||
hash = HASH(hash, I->shift);
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(I->flags); ++i)
|
||||
hash = HASH(hash, I->flags[i]);
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(I->flags); ++i)
|
||||
hash = HASH(hash, I->flags[i]);
|
||||
|
||||
return hash;
|
||||
return hash;
|
||||
}
|
||||
|
||||
static bool
|
||||
instrs_equal(const void *_i1, const void *_i2)
|
||||
{
|
||||
const bi_instr *i1 = _i1, *i2 = _i2;
|
||||
const bi_instr *i1 = _i1, *i2 = _i2;
|
||||
|
||||
if (i1->op != i2->op) return false;
|
||||
if (i1->nr_srcs != i2->nr_srcs) return false;
|
||||
if (i1->nr_dests != i2->nr_dests) return false;
|
||||
if (i1->op != i2->op)
|
||||
return false;
|
||||
if (i1->nr_srcs != i2->nr_srcs)
|
||||
return false;
|
||||
if (i1->nr_dests != i2->nr_dests)
|
||||
return false;
|
||||
|
||||
/* Explicitly skip destinations */
|
||||
/* Explicitly skip destinations */
|
||||
|
||||
bi_foreach_src(i1, s) {
|
||||
bi_index s1 = i1->src[s], s2 = i2->src[s];
|
||||
bi_foreach_src(i1, s) {
|
||||
bi_index s1 = i1->src[s], s2 = i2->src[s];
|
||||
|
||||
if (memcmp(&s1, &s2, sizeof(s1)) != 0)
|
||||
return false;
|
||||
}
|
||||
if (memcmp(&s1, &s2, sizeof(s1)) != 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (i1->dest_mod != i2->dest_mod)
|
||||
return false;
|
||||
if (i1->dest_mod != i2->dest_mod)
|
||||
return false;
|
||||
|
||||
if (i1->shift != i2->shift)
|
||||
return false;
|
||||
if (i1->shift != i2->shift)
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(i1->flags); ++i) {
|
||||
if (i1->flags[i] != i2->flags[i])
|
||||
return false;
|
||||
}
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(i1->flags); ++i) {
|
||||
if (i1->flags[i] != i2->flags[i])
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Determines what instructions the above routines have to handle */
|
||||
|
|
@ -122,64 +125,64 @@ instrs_equal(const void *_i1, const void *_i2)
|
|||
static bool
|
||||
instr_can_cse(const bi_instr *I)
|
||||
{
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_DTSEL_IMM:
|
||||
case BI_OPCODE_DISCARD_F32:
|
||||
return false;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_DTSEL_IMM:
|
||||
case BI_OPCODE_DISCARD_F32:
|
||||
return false;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Be conservative about which message-passing instructions we CSE,
|
||||
* since most are not pure even within a thread.
|
||||
*/
|
||||
if (bi_opcode_props[I->op].message && I->op != BI_OPCODE_LEA_BUF_IMM)
|
||||
return false;
|
||||
/* Be conservative about which message-passing instructions we CSE,
|
||||
* since most are not pure even within a thread.
|
||||
*/
|
||||
if (bi_opcode_props[I->op].message && I->op != BI_OPCODE_LEA_BUF_IMM)
|
||||
return false;
|
||||
|
||||
if (I->branch_target)
|
||||
return false;
|
||||
if (I->branch_target)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
bi_opt_cse(bi_context *ctx)
|
||||
{
|
||||
struct set *instr_set = _mesa_set_create(NULL, hash_instr, instrs_equal);
|
||||
struct set *instr_set = _mesa_set_create(NULL, hash_instr, instrs_equal);
|
||||
|
||||
bi_foreach_block(ctx, block) {
|
||||
bi_index *replacement = calloc(sizeof(bi_index), ctx->ssa_alloc);
|
||||
_mesa_set_clear(instr_set, NULL);
|
||||
bi_foreach_block(ctx, block) {
|
||||
bi_index *replacement = calloc(sizeof(bi_index), ctx->ssa_alloc);
|
||||
_mesa_set_clear(instr_set, NULL);
|
||||
|
||||
bi_foreach_instr_in_block(block, instr) {
|
||||
/* Rewrite before trying to CSE anything so we converge
|
||||
* locally in one iteration */
|
||||
bi_foreach_ssa_src(instr, s) {
|
||||
if (bi_is_staging_src(instr, s))
|
||||
continue;
|
||||
bi_foreach_instr_in_block(block, instr) {
|
||||
/* Rewrite before trying to CSE anything so we converge
|
||||
* locally in one iteration */
|
||||
bi_foreach_ssa_src(instr, s) {
|
||||
if (bi_is_staging_src(instr, s))
|
||||
continue;
|
||||
|
||||
bi_index repl = replacement[instr->src[s].value];
|
||||
if (!bi_is_null(repl))
|
||||
bi_replace_src(instr, s, repl);
|
||||
}
|
||||
bi_index repl = replacement[instr->src[s].value];
|
||||
if (!bi_is_null(repl))
|
||||
bi_replace_src(instr, s, repl);
|
||||
}
|
||||
|
||||
if (!instr_can_cse(instr))
|
||||
continue;
|
||||
if (!instr_can_cse(instr))
|
||||
continue;
|
||||
|
||||
bool found;
|
||||
struct set_entry *entry =
|
||||
_mesa_set_search_or_add(instr_set, instr, &found);
|
||||
if (found) {
|
||||
const bi_instr *match = entry->key;
|
||||
bool found;
|
||||
struct set_entry *entry =
|
||||
_mesa_set_search_or_add(instr_set, instr, &found);
|
||||
if (found) {
|
||||
const bi_instr *match = entry->key;
|
||||
|
||||
bi_foreach_dest(instr, d) {
|
||||
replacement[instr->dest[d].value] = match->dest[d];
|
||||
}
|
||||
}
|
||||
}
|
||||
bi_foreach_dest(instr, d) {
|
||||
replacement[instr->dest[d].value] = match->dest[d];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(replacement);
|
||||
}
|
||||
free(replacement);
|
||||
}
|
||||
|
||||
_mesa_set_destroy(instr_set, NULL);
|
||||
_mesa_set_destroy(instr_set, NULL);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,66 +22,67 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "compiler.h"
|
||||
|
||||
/* A simple SSA-based mark-and-sweep dead code elimination pass. */
|
||||
|
||||
void
|
||||
bi_opt_dead_code_eliminate(bi_context *ctx)
|
||||
{
|
||||
/* Mark live values */
|
||||
BITSET_WORD *mark = calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->ssa_alloc));
|
||||
/* Mark live values */
|
||||
BITSET_WORD *mark =
|
||||
calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->ssa_alloc));
|
||||
|
||||
u_worklist worklist;
|
||||
u_worklist_init(&worklist, ctx->num_blocks, NULL);
|
||||
u_worklist worklist;
|
||||
u_worklist_init(&worklist, ctx->num_blocks, NULL);
|
||||
|
||||
bi_foreach_block(ctx, block) {
|
||||
bi_worklist_push_head(&worklist, block);
|
||||
}
|
||||
bi_foreach_block(ctx, block) {
|
||||
bi_worklist_push_head(&worklist, block);
|
||||
}
|
||||
|
||||
while(!u_worklist_is_empty(&worklist)) {
|
||||
/* Pop in reverse order for backwards pass */
|
||||
bi_block *blk = bi_worklist_pop_head(&worklist);
|
||||
while (!u_worklist_is_empty(&worklist)) {
|
||||
/* Pop in reverse order for backwards pass */
|
||||
bi_block *blk = bi_worklist_pop_head(&worklist);
|
||||
|
||||
bool progress = false;
|
||||
bool progress = false;
|
||||
|
||||
bi_foreach_instr_in_block_rev(blk, I) {
|
||||
bool needed = bi_side_effects(I);
|
||||
bi_foreach_instr_in_block_rev(blk, I) {
|
||||
bool needed = bi_side_effects(I);
|
||||
|
||||
bi_foreach_dest(I, d)
|
||||
needed |= BITSET_TEST(mark, I->dest[d].value);
|
||||
bi_foreach_dest(I, d)
|
||||
needed |= BITSET_TEST(mark, I->dest[d].value);
|
||||
|
||||
if (!needed)
|
||||
continue;
|
||||
if (!needed)
|
||||
continue;
|
||||
|
||||
bi_foreach_ssa_src(I, s) {
|
||||
progress |= !BITSET_TEST(mark, I->src[s].value);
|
||||
BITSET_SET(mark, I->src[s].value);
|
||||
}
|
||||
}
|
||||
bi_foreach_ssa_src(I, s) {
|
||||
progress |= !BITSET_TEST(mark, I->src[s].value);
|
||||
BITSET_SET(mark, I->src[s].value);
|
||||
}
|
||||
}
|
||||
|
||||
/* XXX: slow */
|
||||
if (progress) {
|
||||
bi_foreach_block(ctx, block)
|
||||
bi_worklist_push_head(&worklist, block);
|
||||
}
|
||||
}
|
||||
/* XXX: slow */
|
||||
if (progress) {
|
||||
bi_foreach_block(ctx, block)
|
||||
bi_worklist_push_head(&worklist, block);
|
||||
}
|
||||
}
|
||||
|
||||
u_worklist_fini(&worklist);
|
||||
u_worklist_fini(&worklist);
|
||||
|
||||
/* Sweep */
|
||||
bi_foreach_instr_global_safe(ctx, I) {
|
||||
bool needed = bi_side_effects(I);
|
||||
/* Sweep */
|
||||
bi_foreach_instr_global_safe(ctx, I) {
|
||||
bool needed = bi_side_effects(I);
|
||||
|
||||
bi_foreach_dest(I, d)
|
||||
needed |= BITSET_TEST(mark, I->dest[d].value);
|
||||
bi_foreach_dest(I, d)
|
||||
needed |= BITSET_TEST(mark, I->dest[d].value);
|
||||
|
||||
if (!needed)
|
||||
bi_remove_instruction(I);
|
||||
}
|
||||
if (!needed)
|
||||
bi_remove_instruction(I);
|
||||
}
|
||||
|
||||
free(mark);
|
||||
free(mark);
|
||||
}
|
||||
|
||||
/* Post-RA liveness-based dead code analysis to clean up results of bundling */
|
||||
|
|
@ -89,39 +90,39 @@ bi_opt_dead_code_eliminate(bi_context *ctx)
|
|||
uint64_t MUST_CHECK
|
||||
bi_postra_liveness_ins(uint64_t live, bi_instr *ins)
|
||||
{
|
||||
bi_foreach_dest(ins, d) {
|
||||
if (ins->dest[d].type == BI_INDEX_REGISTER) {
|
||||
unsigned nr = bi_count_write_registers(ins, d);
|
||||
unsigned reg = ins->dest[d].value;
|
||||
live &= ~(BITFIELD64_MASK(nr) << reg);
|
||||
}
|
||||
}
|
||||
bi_foreach_dest(ins, d) {
|
||||
if (ins->dest[d].type == BI_INDEX_REGISTER) {
|
||||
unsigned nr = bi_count_write_registers(ins, d);
|
||||
unsigned reg = ins->dest[d].value;
|
||||
live &= ~(BITFIELD64_MASK(nr) << reg);
|
||||
}
|
||||
}
|
||||
|
||||
bi_foreach_src(ins, s) {
|
||||
if (ins->src[s].type == BI_INDEX_REGISTER) {
|
||||
unsigned nr = bi_count_read_registers(ins, s);
|
||||
unsigned reg = ins->src[s].value;
|
||||
live |= (BITFIELD64_MASK(nr) << reg);
|
||||
}
|
||||
}
|
||||
bi_foreach_src(ins, s) {
|
||||
if (ins->src[s].type == BI_INDEX_REGISTER) {
|
||||
unsigned nr = bi_count_read_registers(ins, s);
|
||||
unsigned reg = ins->src[s].value;
|
||||
live |= (BITFIELD64_MASK(nr) << reg);
|
||||
}
|
||||
}
|
||||
|
||||
return live;
|
||||
return live;
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_postra_liveness_block(bi_block *blk)
|
||||
{
|
||||
bi_foreach_successor(blk, succ)
|
||||
blk->reg_live_out |= succ->reg_live_in;
|
||||
bi_foreach_successor(blk, succ)
|
||||
blk->reg_live_out |= succ->reg_live_in;
|
||||
|
||||
uint64_t live = blk->reg_live_out;
|
||||
uint64_t live = blk->reg_live_out;
|
||||
|
||||
bi_foreach_instr_in_block_rev(blk, ins)
|
||||
live = bi_postra_liveness_ins(live, ins);
|
||||
bi_foreach_instr_in_block_rev(blk, ins)
|
||||
live = bi_postra_liveness_ins(live, ins);
|
||||
|
||||
bool progress = blk->reg_live_in != live;
|
||||
blk->reg_live_in = live;
|
||||
return progress;
|
||||
bool progress = blk->reg_live_in != live;
|
||||
blk->reg_live_in = live;
|
||||
return progress;
|
||||
}
|
||||
|
||||
/* Globally, liveness analysis uses a fixed-point algorithm based on a
|
||||
|
|
@ -133,58 +134,58 @@ bi_postra_liveness_block(bi_block *blk)
|
|||
void
|
||||
bi_postra_liveness(bi_context *ctx)
|
||||
{
|
||||
u_worklist worklist;
|
||||
bi_worklist_init(ctx, &worklist);
|
||||
u_worklist worklist;
|
||||
bi_worklist_init(ctx, &worklist);
|
||||
|
||||
bi_foreach_block(ctx, block) {
|
||||
block->reg_live_out = block->reg_live_in = 0;
|
||||
bi_foreach_block(ctx, block) {
|
||||
block->reg_live_out = block->reg_live_in = 0;
|
||||
|
||||
bi_worklist_push_tail(&worklist, block);
|
||||
}
|
||||
bi_worklist_push_tail(&worklist, block);
|
||||
}
|
||||
|
||||
while (!u_worklist_is_empty(&worklist)) {
|
||||
/* Pop off in reverse order since liveness is backwards */
|
||||
bi_block *blk = bi_worklist_pop_tail(&worklist);
|
||||
while (!u_worklist_is_empty(&worklist)) {
|
||||
/* Pop off in reverse order since liveness is backwards */
|
||||
bi_block *blk = bi_worklist_pop_tail(&worklist);
|
||||
|
||||
/* Update liveness information. If we made progress, we need to
|
||||
* reprocess the predecessors
|
||||
*/
|
||||
if (bi_postra_liveness_block(blk)) {
|
||||
bi_foreach_predecessor(blk, pred)
|
||||
bi_worklist_push_head(&worklist, *pred);
|
||||
}
|
||||
}
|
||||
/* Update liveness information. If we made progress, we need to
|
||||
* reprocess the predecessors
|
||||
*/
|
||||
if (bi_postra_liveness_block(blk)) {
|
||||
bi_foreach_predecessor(blk, pred)
|
||||
bi_worklist_push_head(&worklist, *pred);
|
||||
}
|
||||
}
|
||||
|
||||
u_worklist_fini(&worklist);
|
||||
u_worklist_fini(&worklist);
|
||||
}
|
||||
|
||||
void
|
||||
bi_opt_dce_post_ra(bi_context *ctx)
|
||||
{
|
||||
bi_postra_liveness(ctx);
|
||||
bi_postra_liveness(ctx);
|
||||
|
||||
bi_foreach_block_rev(ctx, block) {
|
||||
uint64_t live = block->reg_live_out;
|
||||
bi_foreach_block_rev(ctx, block) {
|
||||
uint64_t live = block->reg_live_out;
|
||||
|
||||
bi_foreach_instr_in_block_rev(block, ins) {
|
||||
if (ins->op == BI_OPCODE_DTSEL_IMM)
|
||||
ins->dest[0] = bi_null();
|
||||
bi_foreach_instr_in_block_rev(block, ins) {
|
||||
if (ins->op == BI_OPCODE_DTSEL_IMM)
|
||||
ins->dest[0] = bi_null();
|
||||
|
||||
bi_foreach_dest(ins, d) {
|
||||
if (ins->dest[d].type != BI_INDEX_REGISTER)
|
||||
continue;
|
||||
bi_foreach_dest(ins, d) {
|
||||
if (ins->dest[d].type != BI_INDEX_REGISTER)
|
||||
continue;
|
||||
|
||||
unsigned nr = bi_count_write_registers(ins, d);
|
||||
unsigned reg = ins->dest[d].value;
|
||||
uint64_t mask = (BITFIELD64_MASK(nr) << reg);
|
||||
bool cullable = (ins->op != BI_OPCODE_BLEND);
|
||||
cullable &= !bi_opcode_props[ins->op].sr_write;
|
||||
unsigned nr = bi_count_write_registers(ins, d);
|
||||
unsigned reg = ins->dest[d].value;
|
||||
uint64_t mask = (BITFIELD64_MASK(nr) << reg);
|
||||
bool cullable = (ins->op != BI_OPCODE_BLEND);
|
||||
cullable &= !bi_opcode_props[ins->op].sr_write;
|
||||
|
||||
if (!(live & mask) && cullable)
|
||||
ins->dest[d] = bi_null();
|
||||
}
|
||||
if (!(live & mask) && cullable)
|
||||
ins->dest[d] = bi_null();
|
||||
}
|
||||
|
||||
live = bi_postra_liveness_ins(live, ins);
|
||||
}
|
||||
}
|
||||
live = bi_postra_liveness_ins(live, ins);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,8 +21,8 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_builder.h"
|
||||
#include "compiler.h"
|
||||
|
||||
#define XXH_INLINE_ALL
|
||||
#include "util/xxhash.h"
|
||||
|
|
@ -51,58 +51,60 @@
|
|||
static inline bool
|
||||
bi_can_fuse_dual_tex(bi_instr *I, bool fuse_zero_lod)
|
||||
{
|
||||
return (I->op == BI_OPCODE_TEXS_2D_F32 || I->op == BI_OPCODE_TEXS_2D_F16) &&
|
||||
(I->texture_index < 4 && I->sampler_index < 4) &&
|
||||
(I->lod_mode == fuse_zero_lod);
|
||||
return (I->op == BI_OPCODE_TEXS_2D_F32 || I->op == BI_OPCODE_TEXS_2D_F16) &&
|
||||
(I->texture_index < 4 && I->sampler_index < 4) &&
|
||||
(I->lod_mode == fuse_zero_lod);
|
||||
}
|
||||
|
||||
static enum bifrost_texture_format
|
||||
bi_format_for_texs_2d(enum bi_opcode op)
|
||||
{
|
||||
switch (op) {
|
||||
case BI_OPCODE_TEXS_2D_F32: return BIFROST_TEXTURE_FORMAT_F32;
|
||||
case BI_OPCODE_TEXS_2D_F16: return BIFROST_TEXTURE_FORMAT_F16;
|
||||
default: unreachable("Invalid TEXS_2D instruction");
|
||||
}
|
||||
switch (op) {
|
||||
case BI_OPCODE_TEXS_2D_F32:
|
||||
return BIFROST_TEXTURE_FORMAT_F32;
|
||||
case BI_OPCODE_TEXS_2D_F16:
|
||||
return BIFROST_TEXTURE_FORMAT_F16;
|
||||
default:
|
||||
unreachable("Invalid TEXS_2D instruction");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bi_fuse_dual(bi_context *ctx, bi_instr *I1, bi_instr *I2)
|
||||
{
|
||||
/* Construct a texture operation descriptor for the dual texture */
|
||||
struct bifrost_dual_texture_operation desc = {
|
||||
.mode = BIFROST_TEXTURE_OPERATION_DUAL,
|
||||
/* Construct a texture operation descriptor for the dual texture */
|
||||
struct bifrost_dual_texture_operation desc = {
|
||||
.mode = BIFROST_TEXTURE_OPERATION_DUAL,
|
||||
|
||||
.primary_texture_index = I1->texture_index,
|
||||
.primary_sampler_index = I1->sampler_index,
|
||||
.primary_format = bi_format_for_texs_2d(I1->op),
|
||||
.primary_mask = 0xF,
|
||||
.primary_texture_index = I1->texture_index,
|
||||
.primary_sampler_index = I1->sampler_index,
|
||||
.primary_format = bi_format_for_texs_2d(I1->op),
|
||||
.primary_mask = 0xF,
|
||||
|
||||
.secondary_texture_index = I2->texture_index,
|
||||
.secondary_sampler_index = I2->sampler_index,
|
||||
.secondary_format = bi_format_for_texs_2d(I2->op),
|
||||
.secondary_mask = 0xF,
|
||||
};
|
||||
.secondary_texture_index = I2->texture_index,
|
||||
.secondary_sampler_index = I2->sampler_index,
|
||||
.secondary_format = bi_format_for_texs_2d(I2->op),
|
||||
.secondary_mask = 0xF,
|
||||
};
|
||||
|
||||
/* LOD mode is implied in a shader stage */
|
||||
assert(I1->lod_mode == I2->lod_mode);
|
||||
/* LOD mode is implied in a shader stage */
|
||||
assert(I1->lod_mode == I2->lod_mode);
|
||||
|
||||
/* Insert before the earlier instruction in case its result is consumed
|
||||
* before the later instruction
|
||||
*/
|
||||
bi_builder b = bi_init_builder(ctx, bi_before_instr(I1));
|
||||
/* Insert before the earlier instruction in case its result is consumed
|
||||
* before the later instruction
|
||||
*/
|
||||
bi_builder b = bi_init_builder(ctx, bi_before_instr(I1));
|
||||
|
||||
bi_instr *I = bi_texc_dual_to(&b,
|
||||
I1->dest[0], I2->dest[0], bi_null(), /* staging */
|
||||
I1->src[0], I1->src[1], /* coordinates */
|
||||
bi_imm_u32(bi_dual_tex_as_u32(desc)), I1->lod_mode,
|
||||
bi_count_write_registers(I1, 0),
|
||||
bi_count_write_registers(I2, 0));
|
||||
bi_instr *I = bi_texc_dual_to(
|
||||
&b, I1->dest[0], I2->dest[0], bi_null(), /* staging */
|
||||
I1->src[0], I1->src[1], /* coordinates */
|
||||
bi_imm_u32(bi_dual_tex_as_u32(desc)), I1->lod_mode,
|
||||
bi_count_write_registers(I1, 0), bi_count_write_registers(I2, 0));
|
||||
|
||||
I->skip = I1->skip && I2->skip;
|
||||
I->skip = I1->skip && I2->skip;
|
||||
|
||||
bi_remove_instruction(I1);
|
||||
bi_remove_instruction(I2);
|
||||
bi_remove_instruction(I1);
|
||||
bi_remove_instruction(I2);
|
||||
}
|
||||
|
||||
#define HASH(hash, data) XXH32(&(data), sizeof(data), hash)
|
||||
|
|
@ -110,45 +112,45 @@ bi_fuse_dual(bi_context *ctx, bi_instr *I1, bi_instr *I2)
|
|||
static uint32_t
|
||||
coord_hash(const void *key)
|
||||
{
|
||||
const bi_instr *I = key;
|
||||
const bi_instr *I = key;
|
||||
|
||||
return XXH32(&I->src[0], sizeof(I->src[0]) + sizeof(I->src[1]), 0);
|
||||
return XXH32(&I->src[0], sizeof(I->src[0]) + sizeof(I->src[1]), 0);
|
||||
}
|
||||
|
||||
static bool
|
||||
coord_equal(const void *key1, const void *key2)
|
||||
{
|
||||
const bi_instr *I = key1;
|
||||
const bi_instr *J = key2;
|
||||
const bi_instr *I = key1;
|
||||
const bi_instr *J = key2;
|
||||
|
||||
return memcmp(&I->src[0], &J->src[0],
|
||||
sizeof(I->src[0]) + sizeof(I->src[1])) == 0;
|
||||
return memcmp(&I->src[0], &J->src[0],
|
||||
sizeof(I->src[0]) + sizeof(I->src[1])) == 0;
|
||||
}
|
||||
|
||||
static void
|
||||
bi_opt_fuse_dual_texture_block(bi_context *ctx, bi_block *block)
|
||||
{
|
||||
struct set *set = _mesa_set_create(ctx, coord_hash, coord_equal);
|
||||
bool fuse_zero_lod = (ctx->stage != MESA_SHADER_FRAGMENT);
|
||||
bool found = false;
|
||||
struct set *set = _mesa_set_create(ctx, coord_hash, coord_equal);
|
||||
bool fuse_zero_lod = (ctx->stage != MESA_SHADER_FRAGMENT);
|
||||
bool found = false;
|
||||
|
||||
bi_foreach_instr_in_block_safe(block, I) {
|
||||
if (!bi_can_fuse_dual_tex(I, fuse_zero_lod)) continue;
|
||||
bi_foreach_instr_in_block_safe(block, I) {
|
||||
if (!bi_can_fuse_dual_tex(I, fuse_zero_lod))
|
||||
continue;
|
||||
|
||||
struct set_entry *ent = _mesa_set_search_or_add(set, I, &found);
|
||||
struct set_entry *ent = _mesa_set_search_or_add(set, I, &found);
|
||||
|
||||
if (found) {
|
||||
bi_fuse_dual(ctx, (bi_instr *) ent->key, I);
|
||||
_mesa_set_remove(set, ent);
|
||||
}
|
||||
}
|
||||
if (found) {
|
||||
bi_fuse_dual(ctx, (bi_instr *)ent->key, I);
|
||||
_mesa_set_remove(set, ent);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
bi_opt_fuse_dual_texture(bi_context *ctx)
|
||||
{
|
||||
bi_foreach_block(ctx, block) {
|
||||
bi_opt_fuse_dual_texture_block(ctx, block);
|
||||
}
|
||||
|
||||
bi_foreach_block(ctx, block) {
|
||||
bi_opt_fuse_dual_texture_block(ctx, block);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,8 +21,8 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_builder.h"
|
||||
#include "compiler.h"
|
||||
|
||||
/* Bifrost v7 can preload up to two messages of the form:
|
||||
*
|
||||
|
|
@ -35,8 +35,8 @@
|
|||
static bool
|
||||
bi_is_regfmt_float(enum bi_register_format regfmt)
|
||||
{
|
||||
return (regfmt == BI_REGISTER_FORMAT_F32) ||
|
||||
(regfmt == BI_REGISTER_FORMAT_F16);
|
||||
return (regfmt == BI_REGISTER_FORMAT_F32) ||
|
||||
(regfmt == BI_REGISTER_FORMAT_F16);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -46,107 +46,107 @@ bi_is_regfmt_float(enum bi_register_format regfmt)
|
|||
static bool
|
||||
bi_can_interp_at_sample(bi_instr *I)
|
||||
{
|
||||
/* .sample mode with r61 corresponds to per-sample interpolation */
|
||||
if (I->sample == BI_SAMPLE_SAMPLE)
|
||||
return bi_is_value_equiv(I->src[0], bi_register(61));
|
||||
/* .sample mode with r61 corresponds to per-sample interpolation */
|
||||
if (I->sample == BI_SAMPLE_SAMPLE)
|
||||
return bi_is_value_equiv(I->src[0], bi_register(61));
|
||||
|
||||
/* If the shader runs with pixel-frequency shading, .sample is
|
||||
* equivalent to .center, so allow .center
|
||||
*
|
||||
* If the shader runs with sample-frequency shading, .sample and .center
|
||||
* are not equivalent. However, the ESSL 3.20 specification
|
||||
* stipulates in section 4.5 ("Interpolation Qualifiers"):
|
||||
*
|
||||
* for fragment shader input variables qualified with neither
|
||||
* centroid nor sample, the value of the assigned variable may be
|
||||
* interpolated anywhere within the pixel and a single value may be
|
||||
* assigned to each sample within the pixel, to the extent permitted
|
||||
* by the OpenGL ES Specification.
|
||||
*
|
||||
* We only produce .center for variables qualified with neither centroid
|
||||
* nor sample, so if .center is specified this section applies. This
|
||||
* suggests that, although per-pixel interpolation is allowed, it is not
|
||||
* mandated ("may" rather than "must" or "should"). Therefore it appears
|
||||
* safe to substitute sample.
|
||||
*/
|
||||
return (I->sample == BI_SAMPLE_CENTER);
|
||||
/* If the shader runs with pixel-frequency shading, .sample is
|
||||
* equivalent to .center, so allow .center
|
||||
*
|
||||
* If the shader runs with sample-frequency shading, .sample and .center
|
||||
* are not equivalent. However, the ESSL 3.20 specification
|
||||
* stipulates in section 4.5 ("Interpolation Qualifiers"):
|
||||
*
|
||||
* for fragment shader input variables qualified with neither
|
||||
* centroid nor sample, the value of the assigned variable may be
|
||||
* interpolated anywhere within the pixel and a single value may be
|
||||
* assigned to each sample within the pixel, to the extent permitted
|
||||
* by the OpenGL ES Specification.
|
||||
*
|
||||
* We only produce .center for variables qualified with neither centroid
|
||||
* nor sample, so if .center is specified this section applies. This
|
||||
* suggests that, although per-pixel interpolation is allowed, it is not
|
||||
* mandated ("may" rather than "must" or "should"). Therefore it appears
|
||||
* safe to substitute sample.
|
||||
*/
|
||||
return (I->sample == BI_SAMPLE_CENTER);
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_can_preload_ld_var(bi_instr *I)
|
||||
{
|
||||
return (I->op == BI_OPCODE_LD_VAR_IMM) &&
|
||||
bi_can_interp_at_sample(I) &&
|
||||
bi_is_regfmt_float(I->register_format);
|
||||
return (I->op == BI_OPCODE_LD_VAR_IMM) && bi_can_interp_at_sample(I) &&
|
||||
bi_is_regfmt_float(I->register_format);
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_is_var_tex(enum bi_opcode op)
|
||||
{
|
||||
return (op == BI_OPCODE_VAR_TEX_F32) || (op == BI_OPCODE_VAR_TEX_F16);
|
||||
return (op == BI_OPCODE_VAR_TEX_F32) || (op == BI_OPCODE_VAR_TEX_F16);
|
||||
}
|
||||
|
||||
void
|
||||
bi_opt_message_preload(bi_context *ctx)
|
||||
{
|
||||
unsigned nr_preload = 0;
|
||||
unsigned nr_preload = 0;
|
||||
|
||||
/* We only preload from the first block */
|
||||
bi_block *block = bi_start_block(&ctx->blocks);
|
||||
bi_builder b = bi_init_builder(ctx, bi_before_nonempty_block(block));
|
||||
/* We only preload from the first block */
|
||||
bi_block *block = bi_start_block(&ctx->blocks);
|
||||
bi_builder b = bi_init_builder(ctx, bi_before_nonempty_block(block));
|
||||
|
||||
bi_foreach_instr_in_block_safe(block, I) {
|
||||
if (I->nr_dests != 1) continue;
|
||||
bi_foreach_instr_in_block_safe(block, I) {
|
||||
if (I->nr_dests != 1)
|
||||
continue;
|
||||
|
||||
struct bifrost_message_preload msg;
|
||||
struct bifrost_message_preload msg;
|
||||
|
||||
if (bi_can_preload_ld_var(I)) {
|
||||
msg = (struct bifrost_message_preload) {
|
||||
.enabled = true,
|
||||
.varying_index = I->varying_index,
|
||||
.fp16 = (I->register_format == BI_REGISTER_FORMAT_F16),
|
||||
.num_components = I->vecsize + 1,
|
||||
};
|
||||
} else if (bi_is_var_tex(I->op)) {
|
||||
msg = (struct bifrost_message_preload) {
|
||||
.enabled = true,
|
||||
.texture = true,
|
||||
.varying_index = I->varying_index,
|
||||
.texture_index = I->texture_index,
|
||||
.fp16 = (I->op == BI_OPCODE_VAR_TEX_F16),
|
||||
.skip = I->skip,
|
||||
.zero_lod = I->lod_mode,
|
||||
};
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
if (bi_can_preload_ld_var(I)) {
|
||||
msg = (struct bifrost_message_preload){
|
||||
.enabled = true,
|
||||
.varying_index = I->varying_index,
|
||||
.fp16 = (I->register_format == BI_REGISTER_FORMAT_F16),
|
||||
.num_components = I->vecsize + 1,
|
||||
};
|
||||
} else if (bi_is_var_tex(I->op)) {
|
||||
msg = (struct bifrost_message_preload){
|
||||
.enabled = true,
|
||||
.texture = true,
|
||||
.varying_index = I->varying_index,
|
||||
.texture_index = I->texture_index,
|
||||
.fp16 = (I->op == BI_OPCODE_VAR_TEX_F16),
|
||||
.skip = I->skip,
|
||||
.zero_lod = I->lod_mode,
|
||||
};
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Report the preloading */
|
||||
ctx->info.bifrost->messages[nr_preload] = msg;
|
||||
/* Report the preloading */
|
||||
ctx->info.bifrost->messages[nr_preload] = msg;
|
||||
|
||||
/* Replace with a collect of preloaded registers. The collect
|
||||
* kills the moves, so the collect is free (it is coalesced).
|
||||
*/
|
||||
b.cursor = bi_before_instr(I);
|
||||
/* Replace with a collect of preloaded registers. The collect
|
||||
* kills the moves, so the collect is free (it is coalesced).
|
||||
*/
|
||||
b.cursor = bi_before_instr(I);
|
||||
|
||||
unsigned nr = bi_count_write_registers(I, 0);
|
||||
bi_instr *collect = bi_collect_i32_to(&b, I->dest[0], nr);
|
||||
unsigned nr = bi_count_write_registers(I, 0);
|
||||
bi_instr *collect = bi_collect_i32_to(&b, I->dest[0], nr);
|
||||
|
||||
/* The registers themselves must be preloaded at the start of
|
||||
* the program. Preloaded registers are coalesced, so these
|
||||
* moves are free.
|
||||
*/
|
||||
b.cursor = bi_before_block(block);
|
||||
bi_foreach_src(collect, i) {
|
||||
unsigned reg = (nr_preload * 4) + i;
|
||||
/* The registers themselves must be preloaded at the start of
|
||||
* the program. Preloaded registers are coalesced, so these
|
||||
* moves are free.
|
||||
*/
|
||||
b.cursor = bi_before_block(block);
|
||||
bi_foreach_src(collect, i) {
|
||||
unsigned reg = (nr_preload * 4) + i;
|
||||
|
||||
collect->src[i] = bi_mov_i32(&b, bi_register(reg));
|
||||
}
|
||||
collect->src[i] = bi_mov_i32(&b, bi_register(reg));
|
||||
}
|
||||
|
||||
bi_remove_instruction(I);
|
||||
bi_remove_instruction(I);
|
||||
|
||||
/* Maximum number of preloaded messages */
|
||||
if ((++nr_preload) == 2)
|
||||
break;
|
||||
}
|
||||
/* Maximum number of preloaded messages */
|
||||
if ((++nr_preload) == 2)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,8 +22,8 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_builder.h"
|
||||
#include "compiler.h"
|
||||
|
||||
/*
|
||||
* Due to a Bifrost encoding restriction, some instructions cannot have an abs
|
||||
|
|
@ -33,76 +33,76 @@
|
|||
static bool
|
||||
bi_would_impact_abs(unsigned arch, bi_instr *I, bi_index repl, unsigned s)
|
||||
{
|
||||
return (arch <= 8) && I->src[1 - s].abs &&
|
||||
bi_is_word_equiv(I->src[1 - s], repl);
|
||||
return (arch <= 8) && I->src[1 - s].abs &&
|
||||
bi_is_word_equiv(I->src[1 - s], repl);
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_takes_fabs(unsigned arch, bi_instr *I, bi_index repl, unsigned s)
|
||||
{
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_FCMP_V2F16:
|
||||
case BI_OPCODE_FMAX_V2F16:
|
||||
case BI_OPCODE_FMIN_V2F16:
|
||||
return !bi_would_impact_abs(arch, I, repl, s);
|
||||
case BI_OPCODE_FADD_V2F16:
|
||||
/*
|
||||
* For FADD.v2f16, the FMA pipe has the abs encoding hazard,
|
||||
* while the FADD pipe cannot encode a clamp. Either case in
|
||||
* isolation can be worked around in the scheduler, but both
|
||||
* together is impossible to encode. Avoid the hazard.
|
||||
*/
|
||||
return !(I->clamp && bi_would_impact_abs(arch, I, repl, s));
|
||||
case BI_OPCODE_V2F32_TO_V2F16:
|
||||
/* TODO: Needs both match or lower */
|
||||
return false;
|
||||
case BI_OPCODE_FLOG_TABLE_F32:
|
||||
/* TODO: Need to check mode */
|
||||
return false;
|
||||
default:
|
||||
return bi_opcode_props[I->op].abs & BITFIELD_BIT(s);
|
||||
}
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_FCMP_V2F16:
|
||||
case BI_OPCODE_FMAX_V2F16:
|
||||
case BI_OPCODE_FMIN_V2F16:
|
||||
return !bi_would_impact_abs(arch, I, repl, s);
|
||||
case BI_OPCODE_FADD_V2F16:
|
||||
/*
|
||||
* For FADD.v2f16, the FMA pipe has the abs encoding hazard,
|
||||
* while the FADD pipe cannot encode a clamp. Either case in
|
||||
* isolation can be worked around in the scheduler, but both
|
||||
* together is impossible to encode. Avoid the hazard.
|
||||
*/
|
||||
return !(I->clamp && bi_would_impact_abs(arch, I, repl, s));
|
||||
case BI_OPCODE_V2F32_TO_V2F16:
|
||||
/* TODO: Needs both match or lower */
|
||||
return false;
|
||||
case BI_OPCODE_FLOG_TABLE_F32:
|
||||
/* TODO: Need to check mode */
|
||||
return false;
|
||||
default:
|
||||
return bi_opcode_props[I->op].abs & BITFIELD_BIT(s);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_takes_fneg(unsigned arch, bi_instr *I, unsigned s)
|
||||
{
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_CUBE_SSEL:
|
||||
case BI_OPCODE_CUBE_TSEL:
|
||||
case BI_OPCODE_CUBEFACE:
|
||||
/* TODO: Bifrost encoding restriction: need to match or lower */
|
||||
return arch >= 9;
|
||||
case BI_OPCODE_FREXPE_F32:
|
||||
case BI_OPCODE_FREXPE_V2F16:
|
||||
case BI_OPCODE_FLOG_TABLE_F32:
|
||||
/* TODO: Need to check mode */
|
||||
return false;
|
||||
default:
|
||||
return bi_opcode_props[I->op].neg & BITFIELD_BIT(s);
|
||||
}
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_CUBE_SSEL:
|
||||
case BI_OPCODE_CUBE_TSEL:
|
||||
case BI_OPCODE_CUBEFACE:
|
||||
/* TODO: Bifrost encoding restriction: need to match or lower */
|
||||
return arch >= 9;
|
||||
case BI_OPCODE_FREXPE_F32:
|
||||
case BI_OPCODE_FREXPE_V2F16:
|
||||
case BI_OPCODE_FLOG_TABLE_F32:
|
||||
/* TODO: Need to check mode */
|
||||
return false;
|
||||
default:
|
||||
return bi_opcode_props[I->op].neg & BITFIELD_BIT(s);
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_is_fabsneg(enum bi_opcode op, enum bi_size size)
|
||||
{
|
||||
return (size == BI_SIZE_32 && op == BI_OPCODE_FABSNEG_F32) ||
|
||||
(size == BI_SIZE_16 && op == BI_OPCODE_FABSNEG_V2F16);
|
||||
return (size == BI_SIZE_32 && op == BI_OPCODE_FABSNEG_F32) ||
|
||||
(size == BI_SIZE_16 && op == BI_OPCODE_FABSNEG_V2F16);
|
||||
}
|
||||
|
||||
static enum bi_swizzle
|
||||
bi_compose_swizzle_16(enum bi_swizzle a, enum bi_swizzle b)
|
||||
{
|
||||
assert(a <= BI_SWIZZLE_H11);
|
||||
assert(b <= BI_SWIZZLE_H11);
|
||||
assert(a <= BI_SWIZZLE_H11);
|
||||
assert(b <= BI_SWIZZLE_H11);
|
||||
|
||||
bool al = (a & BI_SWIZZLE_H10);
|
||||
bool ar = (a & BI_SWIZZLE_H01);
|
||||
bool bl = (b & BI_SWIZZLE_H10);
|
||||
bool br = (b & BI_SWIZZLE_H01);
|
||||
bool al = (a & BI_SWIZZLE_H10);
|
||||
bool ar = (a & BI_SWIZZLE_H01);
|
||||
bool bl = (b & BI_SWIZZLE_H10);
|
||||
bool br = (b & BI_SWIZZLE_H01);
|
||||
|
||||
return ((al ? br : bl) ? BI_SWIZZLE_H10 : 0) |
|
||||
((ar ? br : bl) ? BI_SWIZZLE_H01 : 0);
|
||||
return ((al ? br : bl) ? BI_SWIZZLE_H10 : 0) |
|
||||
((ar ? br : bl) ? BI_SWIZZLE_H01 : 0);
|
||||
}
|
||||
|
||||
/* Like bi_replace_index, but composes instead of overwrites */
|
||||
|
|
@ -110,17 +110,17 @@ bi_compose_swizzle_16(enum bi_swizzle a, enum bi_swizzle b)
|
|||
static inline bi_index
|
||||
bi_compose_float_index(bi_index old, bi_index repl)
|
||||
{
|
||||
/* abs(-x) = abs(+x) so ignore repl.neg if old.abs is set, otherwise
|
||||
* -(-x) = x but -(+x) = +(-x) so need to exclusive-or the negates */
|
||||
repl.neg = old.neg ^ (repl.neg && !old.abs);
|
||||
/* abs(-x) = abs(+x) so ignore repl.neg if old.abs is set, otherwise
|
||||
* -(-x) = x but -(+x) = +(-x) so need to exclusive-or the negates */
|
||||
repl.neg = old.neg ^ (repl.neg && !old.abs);
|
||||
|
||||
/* +/- abs(+/- abs(x)) = +/- abs(x), etc so just or the two */
|
||||
repl.abs |= old.abs;
|
||||
/* +/- abs(+/- abs(x)) = +/- abs(x), etc so just or the two */
|
||||
repl.abs |= old.abs;
|
||||
|
||||
/* Use the old swizzle to select from the replacement swizzle */
|
||||
repl.swizzle = bi_compose_swizzle_16(old.swizzle, repl.swizzle);
|
||||
/* Use the old swizzle to select from the replacement swizzle */
|
||||
repl.swizzle = bi_compose_swizzle_16(old.swizzle, repl.swizzle);
|
||||
|
||||
return repl;
|
||||
return repl;
|
||||
}
|
||||
|
||||
/* DISCARD.b32(FCMP.f(x, y)) --> DISCARD.f(x, y) */
|
||||
|
|
@ -128,30 +128,35 @@ bi_compose_float_index(bi_index old, bi_index repl)
|
|||
static inline bool
|
||||
bi_fuse_discard_fcmp(bi_context *ctx, bi_instr *I, bi_instr *mod)
|
||||
{
|
||||
if (!mod) return false;
|
||||
if (I->op != BI_OPCODE_DISCARD_B32) return false;
|
||||
if (mod->op != BI_OPCODE_FCMP_F32 && mod->op != BI_OPCODE_FCMP_V2F16) return false;
|
||||
if (mod->cmpf >= BI_CMPF_GTLT) return false;
|
||||
if (!mod)
|
||||
return false;
|
||||
if (I->op != BI_OPCODE_DISCARD_B32)
|
||||
return false;
|
||||
if (mod->op != BI_OPCODE_FCMP_F32 && mod->op != BI_OPCODE_FCMP_V2F16)
|
||||
return false;
|
||||
if (mod->cmpf >= BI_CMPF_GTLT)
|
||||
return false;
|
||||
|
||||
/* result_type doesn't matter */
|
||||
/* result_type doesn't matter */
|
||||
|
||||
/* .abs and .neg modifiers allowed on Valhall DISCARD but not Bifrost */
|
||||
bool absneg = mod->src[0].neg || mod->src[0].abs;
|
||||
absneg |= mod->src[1].neg || mod->src[1].abs;
|
||||
/* .abs and .neg modifiers allowed on Valhall DISCARD but not Bifrost */
|
||||
bool absneg = mod->src[0].neg || mod->src[0].abs;
|
||||
absneg |= mod->src[1].neg || mod->src[1].abs;
|
||||
|
||||
if (ctx->arch <= 8 && absneg) return false;
|
||||
if (ctx->arch <= 8 && absneg)
|
||||
return false;
|
||||
|
||||
enum bi_swizzle r = I->src[0].swizzle;
|
||||
enum bi_swizzle r = I->src[0].swizzle;
|
||||
|
||||
bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
|
||||
I = bi_discard_f32(&b, mod->src[0], mod->src[1], mod->cmpf);
|
||||
bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
|
||||
I = bi_discard_f32(&b, mod->src[0], mod->src[1], mod->cmpf);
|
||||
|
||||
if (mod->op == BI_OPCODE_FCMP_V2F16) {
|
||||
I->src[0].swizzle = bi_compose_swizzle_16(r, I->src[0].swizzle);
|
||||
I->src[1].swizzle = bi_compose_swizzle_16(r, I->src[1].swizzle);
|
||||
}
|
||||
if (mod->op == BI_OPCODE_FCMP_V2F16) {
|
||||
I->src[0].swizzle = bi_compose_swizzle_16(r, I->src[0].swizzle);
|
||||
I->src[1].swizzle = bi_compose_swizzle_16(r, I->src[1].swizzle);
|
||||
}
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -159,80 +164,80 @@ bi_fuse_discard_fcmp(bi_context *ctx, bi_instr *I, bi_instr *mod)
|
|||
* because all 8-bit and 16-bit integers may be represented exactly as fp32.
|
||||
*/
|
||||
struct {
|
||||
enum bi_opcode inner;
|
||||
enum bi_opcode outer;
|
||||
enum bi_opcode replacement;
|
||||
enum bi_opcode inner;
|
||||
enum bi_opcode outer;
|
||||
enum bi_opcode replacement;
|
||||
} bi_small_int_patterns[] = {
|
||||
{ BI_OPCODE_S8_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S8_TO_F32 },
|
||||
{ BI_OPCODE_U8_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U8_TO_F32 },
|
||||
{ BI_OPCODE_U8_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U8_TO_F32 },
|
||||
{ BI_OPCODE_S16_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S16_TO_F32 },
|
||||
{ BI_OPCODE_U16_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U16_TO_F32 },
|
||||
{ BI_OPCODE_U16_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U16_TO_F32 },
|
||||
{BI_OPCODE_S8_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S8_TO_F32},
|
||||
{BI_OPCODE_U8_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U8_TO_F32},
|
||||
{BI_OPCODE_U8_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U8_TO_F32},
|
||||
{BI_OPCODE_S16_TO_S32, BI_OPCODE_S32_TO_F32, BI_OPCODE_S16_TO_F32},
|
||||
{BI_OPCODE_U16_TO_U32, BI_OPCODE_U32_TO_F32, BI_OPCODE_U16_TO_F32},
|
||||
{BI_OPCODE_U16_TO_U32, BI_OPCODE_S32_TO_F32, BI_OPCODE_U16_TO_F32},
|
||||
};
|
||||
|
||||
static inline void
|
||||
bi_fuse_small_int_to_f32(bi_instr *I, bi_instr *mod)
|
||||
{
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(bi_small_int_patterns); ++i) {
|
||||
if (I->op != bi_small_int_patterns[i].outer)
|
||||
continue;
|
||||
if (mod->op != bi_small_int_patterns[i].inner)
|
||||
continue;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(bi_small_int_patterns); ++i) {
|
||||
if (I->op != bi_small_int_patterns[i].outer)
|
||||
continue;
|
||||
if (mod->op != bi_small_int_patterns[i].inner)
|
||||
continue;
|
||||
|
||||
assert(I->src[0].swizzle == BI_SWIZZLE_H01);
|
||||
I->src[0] = mod->src[0];
|
||||
I->round = BI_ROUND_NONE;
|
||||
I->op = bi_small_int_patterns[i].replacement;
|
||||
}
|
||||
assert(I->src[0].swizzle == BI_SWIZZLE_H01);
|
||||
I->src[0] = mod->src[0];
|
||||
I->round = BI_ROUND_NONE;
|
||||
I->op = bi_small_int_patterns[i].replacement;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
bi_opt_mod_prop_forward(bi_context *ctx)
|
||||
{
|
||||
bi_instr **lut = calloc(sizeof(bi_instr *), ctx->ssa_alloc);
|
||||
bi_instr **lut = calloc(sizeof(bi_instr *), ctx->ssa_alloc);
|
||||
|
||||
bi_foreach_instr_global_safe(ctx, I) {
|
||||
/* Try fusing FCMP into DISCARD.b32, building a new DISCARD.f32
|
||||
* instruction. As this is the only optimization DISCARD is
|
||||
* involved in, this shortcircuits other processing.
|
||||
*/
|
||||
if (I->op == BI_OPCODE_DISCARD_B32) {
|
||||
if (bi_is_ssa(I->src[0]) &&
|
||||
bi_fuse_discard_fcmp(ctx, I, lut[I->src[0].value])) {
|
||||
bi_remove_instruction(I);
|
||||
}
|
||||
bi_foreach_instr_global_safe(ctx, I) {
|
||||
/* Try fusing FCMP into DISCARD.b32, building a new DISCARD.f32
|
||||
* instruction. As this is the only optimization DISCARD is
|
||||
* involved in, this shortcircuits other processing.
|
||||
*/
|
||||
if (I->op == BI_OPCODE_DISCARD_B32) {
|
||||
if (bi_is_ssa(I->src[0]) &&
|
||||
bi_fuse_discard_fcmp(ctx, I, lut[I->src[0].value])) {
|
||||
bi_remove_instruction(I);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
bi_foreach_dest(I, d) {
|
||||
lut[I->dest[d].value] = I;
|
||||
}
|
||||
bi_foreach_dest(I, d) {
|
||||
lut[I->dest[d].value] = I;
|
||||
}
|
||||
|
||||
bi_foreach_ssa_src(I, s) {
|
||||
bi_instr *mod = lut[I->src[s].value];
|
||||
bi_foreach_ssa_src(I, s) {
|
||||
bi_instr *mod = lut[I->src[s].value];
|
||||
|
||||
if (!mod)
|
||||
continue;
|
||||
if (!mod)
|
||||
continue;
|
||||
|
||||
unsigned size = bi_opcode_props[I->op].size;
|
||||
unsigned size = bi_opcode_props[I->op].size;
|
||||
|
||||
bi_fuse_small_int_to_f32(I, mod);
|
||||
bi_fuse_small_int_to_f32(I, mod);
|
||||
|
||||
if (bi_is_fabsneg(mod->op, size)) {
|
||||
if (mod->src[0].abs && !bi_takes_fabs(ctx->arch, I, mod->src[0], s))
|
||||
continue;
|
||||
if (bi_is_fabsneg(mod->op, size)) {
|
||||
if (mod->src[0].abs && !bi_takes_fabs(ctx->arch, I, mod->src[0], s))
|
||||
continue;
|
||||
|
||||
if (mod->src[0].neg && !bi_takes_fneg(ctx->arch, I, s))
|
||||
continue;
|
||||
if (mod->src[0].neg && !bi_takes_fneg(ctx->arch, I, s))
|
||||
continue;
|
||||
|
||||
I->src[s] = bi_compose_float_index(I->src[s], mod->src[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
I->src[s] = bi_compose_float_index(I->src[s], mod->src[0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(lut);
|
||||
free(lut);
|
||||
}
|
||||
|
||||
/* RSCALE has restrictions on how the clamp may be used, only used for
|
||||
|
|
@ -241,199 +246,207 @@ bi_opt_mod_prop_forward(bi_context *ctx)
|
|||
static bool
|
||||
bi_takes_clamp(bi_instr *I)
|
||||
{
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_FMA_RSCALE_F32:
|
||||
case BI_OPCODE_FMA_RSCALE_V2F16:
|
||||
case BI_OPCODE_FADD_RSCALE_F32:
|
||||
return false;
|
||||
case BI_OPCODE_FADD_V2F16:
|
||||
/* Encoding restriction */
|
||||
return !(I->src[0].abs && I->src[1].abs &&
|
||||
bi_is_word_equiv(I->src[0], I->src[1]));
|
||||
default:
|
||||
return bi_opcode_props[I->op].clamp;
|
||||
}
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_FMA_RSCALE_F32:
|
||||
case BI_OPCODE_FMA_RSCALE_V2F16:
|
||||
case BI_OPCODE_FADD_RSCALE_F32:
|
||||
return false;
|
||||
case BI_OPCODE_FADD_V2F16:
|
||||
/* Encoding restriction */
|
||||
return !(I->src[0].abs && I->src[1].abs &&
|
||||
bi_is_word_equiv(I->src[0], I->src[1]));
|
||||
default:
|
||||
return bi_opcode_props[I->op].clamp;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_is_fclamp(enum bi_opcode op, enum bi_size size)
|
||||
{
|
||||
return (size == BI_SIZE_32 && op == BI_OPCODE_FCLAMP_F32) ||
|
||||
(size == BI_SIZE_16 && op == BI_OPCODE_FCLAMP_V2F16);
|
||||
return (size == BI_SIZE_32 && op == BI_OPCODE_FCLAMP_F32) ||
|
||||
(size == BI_SIZE_16 && op == BI_OPCODE_FCLAMP_V2F16);
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_optimizer_clamp(bi_instr *I, bi_instr *use)
|
||||
{
|
||||
if (!bi_is_fclamp(use->op, bi_opcode_props[I->op].size)) return false;
|
||||
if (!bi_takes_clamp(I)) return false;
|
||||
if (!bi_is_fclamp(use->op, bi_opcode_props[I->op].size))
|
||||
return false;
|
||||
if (!bi_takes_clamp(I))
|
||||
return false;
|
||||
|
||||
/* Clamps are bitfields (clamp_m1_1/clamp_0_inf) so composition is OR */
|
||||
I->clamp |= use->clamp;
|
||||
I->dest[0] = use->dest[0];
|
||||
return true;
|
||||
/* Clamps are bitfields (clamp_m1_1/clamp_0_inf) so composition is OR */
|
||||
I->clamp |= use->clamp;
|
||||
I->dest[0] = use->dest[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
static enum bi_opcode
|
||||
bi_sized_mux_op(unsigned size)
|
||||
{
|
||||
switch (size) {
|
||||
case 8: return BI_OPCODE_MUX_V4I8;
|
||||
case 16: return BI_OPCODE_MUX_V2I16;
|
||||
case 32: return BI_OPCODE_MUX_I32;
|
||||
default: unreachable("invalid size");
|
||||
}
|
||||
switch (size) {
|
||||
case 8:
|
||||
return BI_OPCODE_MUX_V4I8;
|
||||
case 16:
|
||||
return BI_OPCODE_MUX_V2I16;
|
||||
case 32:
|
||||
return BI_OPCODE_MUX_I32;
|
||||
default:
|
||||
unreachable("invalid size");
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_is_fixed_mux(bi_instr *I, unsigned size, bi_index v1)
|
||||
{
|
||||
return I->op == bi_sized_mux_op(size) &&
|
||||
bi_is_value_equiv(I->src[0], bi_zero()) &&
|
||||
bi_is_value_equiv(I->src[1], v1);
|
||||
return I->op == bi_sized_mux_op(size) &&
|
||||
bi_is_value_equiv(I->src[0], bi_zero()) &&
|
||||
bi_is_value_equiv(I->src[1], v1);
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_takes_int_result_type(enum bi_opcode op)
|
||||
{
|
||||
switch (op) {
|
||||
case BI_OPCODE_ICMP_I32:
|
||||
case BI_OPCODE_ICMP_S32:
|
||||
case BI_OPCODE_ICMP_U32:
|
||||
case BI_OPCODE_ICMP_V2I16:
|
||||
case BI_OPCODE_ICMP_V2S16:
|
||||
case BI_OPCODE_ICMP_V2U16:
|
||||
case BI_OPCODE_ICMP_V4I8:
|
||||
case BI_OPCODE_ICMP_V4S8:
|
||||
case BI_OPCODE_ICMP_V4U8:
|
||||
case BI_OPCODE_FCMP_F32:
|
||||
case BI_OPCODE_FCMP_V2F16:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
switch (op) {
|
||||
case BI_OPCODE_ICMP_I32:
|
||||
case BI_OPCODE_ICMP_S32:
|
||||
case BI_OPCODE_ICMP_U32:
|
||||
case BI_OPCODE_ICMP_V2I16:
|
||||
case BI_OPCODE_ICMP_V2S16:
|
||||
case BI_OPCODE_ICMP_V2U16:
|
||||
case BI_OPCODE_ICMP_V4I8:
|
||||
case BI_OPCODE_ICMP_V4S8:
|
||||
case BI_OPCODE_ICMP_V4U8:
|
||||
case BI_OPCODE_FCMP_F32:
|
||||
case BI_OPCODE_FCMP_V2F16:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_takes_float_result_type(enum bi_opcode op)
|
||||
{
|
||||
return (op == BI_OPCODE_FCMP_F32) ||
|
||||
(op == BI_OPCODE_FCMP_V2F16);
|
||||
return (op == BI_OPCODE_FCMP_F32) || (op == BI_OPCODE_FCMP_V2F16);
|
||||
}
|
||||
|
||||
/* CMP+MUX -> CMP with result type */
|
||||
static bool
|
||||
bi_optimizer_result_type(bi_instr *I, bi_instr *mux)
|
||||
{
|
||||
if (bi_opcode_props[I->op].size != bi_opcode_props[mux->op].size)
|
||||
return false;
|
||||
if (bi_opcode_props[I->op].size != bi_opcode_props[mux->op].size)
|
||||
return false;
|
||||
|
||||
if (bi_is_fixed_mux(mux, 32, bi_imm_f32(1.0)) ||
|
||||
bi_is_fixed_mux(mux, 16, bi_imm_f16(1.0))) {
|
||||
if (bi_is_fixed_mux(mux, 32, bi_imm_f32(1.0)) ||
|
||||
bi_is_fixed_mux(mux, 16, bi_imm_f16(1.0))) {
|
||||
|
||||
if (!bi_takes_float_result_type(I->op))
|
||||
return false;
|
||||
if (!bi_takes_float_result_type(I->op))
|
||||
return false;
|
||||
|
||||
I->result_type = BI_RESULT_TYPE_F1;
|
||||
} else if (bi_is_fixed_mux(mux, 32, bi_imm_u32(1)) ||
|
||||
bi_is_fixed_mux(mux, 16, bi_imm_u16(1)) ||
|
||||
bi_is_fixed_mux(mux, 8, bi_imm_u8(1))) {
|
||||
I->result_type = BI_RESULT_TYPE_F1;
|
||||
} else if (bi_is_fixed_mux(mux, 32, bi_imm_u32(1)) ||
|
||||
bi_is_fixed_mux(mux, 16, bi_imm_u16(1)) ||
|
||||
bi_is_fixed_mux(mux, 8, bi_imm_u8(1))) {
|
||||
|
||||
if (!bi_takes_int_result_type(I->op))
|
||||
return false;
|
||||
if (!bi_takes_int_result_type(I->op))
|
||||
return false;
|
||||
|
||||
I->result_type = BI_RESULT_TYPE_I1;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
I->result_type = BI_RESULT_TYPE_I1;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
I->dest[0] = mux->dest[0];
|
||||
return true;
|
||||
I->dest[0] = mux->dest[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_is_var_tex(bi_instr *var, bi_instr *tex)
|
||||
{
|
||||
return (var->op == BI_OPCODE_LD_VAR_IMM) &&
|
||||
(tex->op == BI_OPCODE_TEXS_2D_F16 || tex->op == BI_OPCODE_TEXS_2D_F32) &&
|
||||
(var->register_format == BI_REGISTER_FORMAT_F32) &&
|
||||
((var->sample == BI_SAMPLE_CENTER && var->update == BI_UPDATE_STORE) ||
|
||||
(var->sample == BI_SAMPLE_NONE && var->update == BI_UPDATE_RETRIEVE)) &&
|
||||
(tex->texture_index == tex->sampler_index) &&
|
||||
(tex->texture_index < 4) &&
|
||||
(var->index < 8);
|
||||
return (var->op == BI_OPCODE_LD_VAR_IMM) &&
|
||||
(tex->op == BI_OPCODE_TEXS_2D_F16 ||
|
||||
tex->op == BI_OPCODE_TEXS_2D_F32) &&
|
||||
(var->register_format == BI_REGISTER_FORMAT_F32) &&
|
||||
((var->sample == BI_SAMPLE_CENTER &&
|
||||
var->update == BI_UPDATE_STORE) ||
|
||||
(var->sample == BI_SAMPLE_NONE &&
|
||||
var->update == BI_UPDATE_RETRIEVE)) &&
|
||||
(tex->texture_index == tex->sampler_index) &&
|
||||
(tex->texture_index < 4) && (var->index < 8);
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_optimizer_var_tex(bi_context *ctx, bi_instr *var, bi_instr *tex)
|
||||
{
|
||||
if (!bi_is_var_tex(var, tex)) return false;
|
||||
if (!bi_is_var_tex(var, tex))
|
||||
return false;
|
||||
|
||||
/* Construct the corresponding VAR_TEX intruction */
|
||||
bi_builder b = bi_init_builder(ctx, bi_after_instr(var));
|
||||
/* Construct the corresponding VAR_TEX intruction */
|
||||
bi_builder b = bi_init_builder(ctx, bi_after_instr(var));
|
||||
|
||||
bi_instr *I = bi_var_tex_f32_to(&b, tex->dest[0], tex->lod_mode,
|
||||
var->sample, var->update, tex->texture_index, var->index);
|
||||
I->skip = tex->skip;
|
||||
bi_instr *I = bi_var_tex_f32_to(&b, tex->dest[0], tex->lod_mode, var->sample,
|
||||
var->update, tex->texture_index, var->index);
|
||||
I->skip = tex->skip;
|
||||
|
||||
if (tex->op == BI_OPCODE_TEXS_2D_F16)
|
||||
I->op = BI_OPCODE_VAR_TEX_F16;
|
||||
if (tex->op == BI_OPCODE_TEXS_2D_F16)
|
||||
I->op = BI_OPCODE_VAR_TEX_F16;
|
||||
|
||||
/* Dead code elimination will clean up for us */
|
||||
return true;
|
||||
/* Dead code elimination will clean up for us */
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
bi_opt_mod_prop_backward(bi_context *ctx)
|
||||
{
|
||||
unsigned count = ctx->ssa_alloc;
|
||||
bi_instr **uses = calloc(count, sizeof(*uses));
|
||||
BITSET_WORD *multiple = calloc(BITSET_WORDS(count), sizeof(*multiple));
|
||||
unsigned count = ctx->ssa_alloc;
|
||||
bi_instr **uses = calloc(count, sizeof(*uses));
|
||||
BITSET_WORD *multiple = calloc(BITSET_WORDS(count), sizeof(*multiple));
|
||||
|
||||
bi_foreach_instr_global_rev(ctx, I) {
|
||||
bi_foreach_ssa_src(I, s) {
|
||||
unsigned v = I->src[s].value;
|
||||
bi_foreach_instr_global_rev(ctx, I) {
|
||||
bi_foreach_ssa_src(I, s) {
|
||||
unsigned v = I->src[s].value;
|
||||
|
||||
if (uses[v] && uses[v] != I)
|
||||
BITSET_SET(multiple, v);
|
||||
else
|
||||
uses[v] = I;
|
||||
}
|
||||
if (uses[v] && uses[v] != I)
|
||||
BITSET_SET(multiple, v);
|
||||
else
|
||||
uses[v] = I;
|
||||
}
|
||||
|
||||
if (!I->nr_dests)
|
||||
continue;
|
||||
if (!I->nr_dests)
|
||||
continue;
|
||||
|
||||
bi_instr *use = uses[I->dest[0].value];
|
||||
bi_instr *use = uses[I->dest[0].value];
|
||||
|
||||
if (!use || BITSET_TEST(multiple, I->dest[0].value))
|
||||
continue;
|
||||
if (!use || BITSET_TEST(multiple, I->dest[0].value))
|
||||
continue;
|
||||
|
||||
/* Destination has a single use, try to propagate */
|
||||
bool propagated =
|
||||
bi_optimizer_clamp(I, use) ||
|
||||
bi_optimizer_result_type(I, use);
|
||||
/* Destination has a single use, try to propagate */
|
||||
bool propagated =
|
||||
bi_optimizer_clamp(I, use) || bi_optimizer_result_type(I, use);
|
||||
|
||||
if (!propagated && I->op == BI_OPCODE_LD_VAR_IMM && use->op == BI_OPCODE_SPLIT_I32) {
|
||||
/* Need to see through the split in a
|
||||
* ld_var_imm/split/var_tex sequence
|
||||
*/
|
||||
bi_instr *tex = uses[use->dest[0].value];
|
||||
if (!propagated && I->op == BI_OPCODE_LD_VAR_IMM &&
|
||||
use->op == BI_OPCODE_SPLIT_I32) {
|
||||
/* Need to see through the split in a
|
||||
* ld_var_imm/split/var_tex sequence
|
||||
*/
|
||||
bi_instr *tex = uses[use->dest[0].value];
|
||||
|
||||
if (!tex || BITSET_TEST(multiple, use->dest[0].value))
|
||||
continue;
|
||||
if (!tex || BITSET_TEST(multiple, use->dest[0].value))
|
||||
continue;
|
||||
|
||||
use = tex;
|
||||
propagated = bi_optimizer_var_tex(ctx, I, use);
|
||||
}
|
||||
use = tex;
|
||||
propagated = bi_optimizer_var_tex(ctx, I, use);
|
||||
}
|
||||
|
||||
if (propagated) {
|
||||
bi_remove_instruction(use);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (propagated) {
|
||||
bi_remove_instruction(use);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
free(uses);
|
||||
free(multiple);
|
||||
free(uses);
|
||||
free(multiple);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -443,37 +456,37 @@ bi_opt_mod_prop_backward(bi_context *ctx)
|
|||
static bool
|
||||
bi_lower_opt_instruction_helper(bi_builder *b, bi_instr *I)
|
||||
{
|
||||
bi_instr *repl;
|
||||
bi_instr *repl;
|
||||
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_FABSNEG_F32:
|
||||
case BI_OPCODE_FCLAMP_F32:
|
||||
repl = bi_fadd_f32_to(b, I->dest[0], I->src[0], bi_negzero());
|
||||
repl->clamp = I->clamp;
|
||||
return true;
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_FABSNEG_F32:
|
||||
case BI_OPCODE_FCLAMP_F32:
|
||||
repl = bi_fadd_f32_to(b, I->dest[0], I->src[0], bi_negzero());
|
||||
repl->clamp = I->clamp;
|
||||
return true;
|
||||
|
||||
case BI_OPCODE_FABSNEG_V2F16:
|
||||
case BI_OPCODE_FCLAMP_V2F16:
|
||||
repl = bi_fadd_v2f16_to(b, I->dest[0], I->src[0], bi_negzero());
|
||||
repl->clamp = I->clamp;
|
||||
return true;
|
||||
case BI_OPCODE_FABSNEG_V2F16:
|
||||
case BI_OPCODE_FCLAMP_V2F16:
|
||||
repl = bi_fadd_v2f16_to(b, I->dest[0], I->src[0], bi_negzero());
|
||||
repl->clamp = I->clamp;
|
||||
return true;
|
||||
|
||||
case BI_OPCODE_DISCARD_B32:
|
||||
bi_discard_f32(b, I->src[0], bi_zero(), BI_CMPF_NE);
|
||||
return true;
|
||||
case BI_OPCODE_DISCARD_B32:
|
||||
bi_discard_f32(b, I->src[0], bi_zero(), BI_CMPF_NE);
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
bi_lower_opt_instructions(bi_context *ctx)
|
||||
{
|
||||
bi_foreach_instr_global_safe(ctx, I) {
|
||||
bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
|
||||
bi_foreach_instr_global_safe(ctx, I) {
|
||||
bi_builder b = bi_init_builder(ctx, bi_before_instr(I));
|
||||
|
||||
if (bi_lower_opt_instruction_helper(&b, I))
|
||||
bi_remove_instruction(I);
|
||||
}
|
||||
if (bi_lower_opt_instruction_helper(&b, I))
|
||||
bi_remove_instruction(I);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,8 +21,8 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_builder.h"
|
||||
#include "compiler.h"
|
||||
|
||||
/* This optimization pass, intended to run once after code emission but before
|
||||
* copy propagation, analyzes direct word-aligned UBO reads and promotes a
|
||||
|
|
@ -32,17 +32,16 @@
|
|||
static bool
|
||||
bi_is_ubo(bi_instr *ins)
|
||||
{
|
||||
return (bi_opcode_props[ins->op].message == BIFROST_MESSAGE_LOAD) &&
|
||||
(ins->seg == BI_SEG_UBO);
|
||||
return (bi_opcode_props[ins->op].message == BIFROST_MESSAGE_LOAD) &&
|
||||
(ins->seg == BI_SEG_UBO);
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_is_direct_aligned_ubo(bi_instr *ins)
|
||||
{
|
||||
return bi_is_ubo(ins) &&
|
||||
(ins->src[0].type == BI_INDEX_CONSTANT) &&
|
||||
(ins->src[1].type == BI_INDEX_CONSTANT) &&
|
||||
((ins->src[0].value & 0x3) == 0);
|
||||
return bi_is_ubo(ins) && (ins->src[0].type == BI_INDEX_CONSTANT) &&
|
||||
(ins->src[1].type == BI_INDEX_CONSTANT) &&
|
||||
((ins->src[0].value & 0x3) == 0);
|
||||
}
|
||||
|
||||
/* Represents use data for a single UBO */
|
||||
|
|
@ -50,44 +49,46 @@ bi_is_direct_aligned_ubo(bi_instr *ins)
|
|||
#define MAX_UBO_WORDS (65536 / 16)
|
||||
|
||||
struct bi_ubo_block {
|
||||
BITSET_DECLARE(pushed, MAX_UBO_WORDS);
|
||||
uint8_t range[MAX_UBO_WORDS];
|
||||
BITSET_DECLARE(pushed, MAX_UBO_WORDS);
|
||||
uint8_t range[MAX_UBO_WORDS];
|
||||
};
|
||||
|
||||
struct bi_ubo_analysis {
|
||||
/* Per block analysis */
|
||||
unsigned nr_blocks;
|
||||
struct bi_ubo_block *blocks;
|
||||
/* Per block analysis */
|
||||
unsigned nr_blocks;
|
||||
struct bi_ubo_block *blocks;
|
||||
};
|
||||
|
||||
static struct bi_ubo_analysis
|
||||
bi_analyze_ranges(bi_context *ctx)
|
||||
{
|
||||
struct bi_ubo_analysis res = {
|
||||
.nr_blocks = ctx->nir->info.num_ubos + 1,
|
||||
};
|
||||
struct bi_ubo_analysis res = {
|
||||
.nr_blocks = ctx->nir->info.num_ubos + 1,
|
||||
};
|
||||
|
||||
res.blocks = calloc(res.nr_blocks, sizeof(struct bi_ubo_block));
|
||||
res.blocks = calloc(res.nr_blocks, sizeof(struct bi_ubo_block));
|
||||
|
||||
bi_foreach_instr_global(ctx, ins) {
|
||||
if (!bi_is_direct_aligned_ubo(ins)) continue;
|
||||
bi_foreach_instr_global(ctx, ins) {
|
||||
if (!bi_is_direct_aligned_ubo(ins))
|
||||
continue;
|
||||
|
||||
unsigned ubo = ins->src[1].value;
|
||||
unsigned word = ins->src[0].value / 4;
|
||||
unsigned channels = bi_opcode_props[ins->op].sr_count;
|
||||
unsigned ubo = ins->src[1].value;
|
||||
unsigned word = ins->src[0].value / 4;
|
||||
unsigned channels = bi_opcode_props[ins->op].sr_count;
|
||||
|
||||
assert(ubo < res.nr_blocks);
|
||||
assert(channels > 0 && channels <= 4);
|
||||
assert(ubo < res.nr_blocks);
|
||||
assert(channels > 0 && channels <= 4);
|
||||
|
||||
if (word >= MAX_UBO_WORDS) continue;
|
||||
if (word >= MAX_UBO_WORDS)
|
||||
continue;
|
||||
|
||||
/* Must use max if the same base is read with different channel
|
||||
* counts, which is possible with nir_opt_shrink_vectors */
|
||||
uint8_t *range = res.blocks[ubo].range;
|
||||
range[word] = MAX2(range[word], channels);
|
||||
}
|
||||
/* Must use max if the same base is read with different channel
|
||||
* counts, which is possible with nir_opt_shrink_vectors */
|
||||
uint8_t *range = res.blocks[ubo].range;
|
||||
range[word] = MAX2(range[word], channels);
|
||||
}
|
||||
|
||||
return res;
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Select UBO words to push. A sophisticated implementation would consider the
|
||||
|
|
@ -97,92 +98,93 @@ bi_analyze_ranges(bi_context *ctx)
|
|||
static void
|
||||
bi_pick_ubo(struct panfrost_ubo_push *push, struct bi_ubo_analysis *analysis)
|
||||
{
|
||||
for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) {
|
||||
struct bi_ubo_block *block = &analysis->blocks[ubo];
|
||||
for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) {
|
||||
struct bi_ubo_block *block = &analysis->blocks[ubo];
|
||||
|
||||
for (unsigned r = 0; r < MAX_UBO_WORDS; ++r) {
|
||||
unsigned range = block->range[r];
|
||||
for (unsigned r = 0; r < MAX_UBO_WORDS; ++r) {
|
||||
unsigned range = block->range[r];
|
||||
|
||||
/* Don't push something we don't access */
|
||||
if (range == 0) continue;
|
||||
/* Don't push something we don't access */
|
||||
if (range == 0)
|
||||
continue;
|
||||
|
||||
/* Don't push more than possible */
|
||||
if (push->count > PAN_MAX_PUSH - range)
|
||||
return;
|
||||
/* Don't push more than possible */
|
||||
if (push->count > PAN_MAX_PUSH - range)
|
||||
return;
|
||||
|
||||
for (unsigned offs = 0; offs < range; ++offs) {
|
||||
struct panfrost_ubo_word word = {
|
||||
.ubo = ubo,
|
||||
.offset = (r + offs) * 4,
|
||||
};
|
||||
for (unsigned offs = 0; offs < range; ++offs) {
|
||||
struct panfrost_ubo_word word = {
|
||||
.ubo = ubo,
|
||||
.offset = (r + offs) * 4,
|
||||
};
|
||||
|
||||
push->words[push->count++] = word;
|
||||
}
|
||||
push->words[push->count++] = word;
|
||||
}
|
||||
|
||||
/* Mark it as pushed so we can rewrite */
|
||||
BITSET_SET(block->pushed, r);
|
||||
}
|
||||
}
|
||||
/* Mark it as pushed so we can rewrite */
|
||||
BITSET_SET(block->pushed, r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
bi_opt_push_ubo(bi_context *ctx)
|
||||
{
|
||||
struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx);
|
||||
bi_pick_ubo(ctx->info.push, &analysis);
|
||||
struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx);
|
||||
bi_pick_ubo(ctx->info.push, &analysis);
|
||||
|
||||
ctx->ubo_mask = 0;
|
||||
ctx->ubo_mask = 0;
|
||||
|
||||
bi_foreach_instr_global_safe(ctx, ins) {
|
||||
if (!bi_is_ubo(ins)) continue;
|
||||
bi_foreach_instr_global_safe(ctx, ins) {
|
||||
if (!bi_is_ubo(ins))
|
||||
continue;
|
||||
|
||||
unsigned ubo = ins->src[1].value;
|
||||
unsigned offset = ins->src[0].value;
|
||||
unsigned ubo = ins->src[1].value;
|
||||
unsigned offset = ins->src[0].value;
|
||||
|
||||
if (!bi_is_direct_aligned_ubo(ins)) {
|
||||
/* The load can't be pushed, so this UBO needs to be
|
||||
* uploaded conventionally */
|
||||
if (ins->src[1].type == BI_INDEX_CONSTANT)
|
||||
ctx->ubo_mask |= BITSET_BIT(ubo);
|
||||
else
|
||||
ctx->ubo_mask = ~0;
|
||||
if (!bi_is_direct_aligned_ubo(ins)) {
|
||||
/* The load can't be pushed, so this UBO needs to be
|
||||
* uploaded conventionally */
|
||||
if (ins->src[1].type == BI_INDEX_CONSTANT)
|
||||
ctx->ubo_mask |= BITSET_BIT(ubo);
|
||||
else
|
||||
ctx->ubo_mask = ~0;
|
||||
|
||||
continue;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Check if we decided to push this */
|
||||
assert(ubo < analysis.nr_blocks);
|
||||
if (!BITSET_TEST(analysis.blocks[ubo].pushed, offset / 4)) {
|
||||
ctx->ubo_mask |= BITSET_BIT(ubo);
|
||||
continue;
|
||||
}
|
||||
/* Check if we decided to push this */
|
||||
assert(ubo < analysis.nr_blocks);
|
||||
if (!BITSET_TEST(analysis.blocks[ubo].pushed, offset / 4)) {
|
||||
ctx->ubo_mask |= BITSET_BIT(ubo);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Replace the UBO load with moves from FAU */
|
||||
bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
|
||||
/* Replace the UBO load with moves from FAU */
|
||||
bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));
|
||||
|
||||
unsigned nr = bi_opcode_props[ins->op].sr_count;
|
||||
bi_instr *vec = bi_collect_i32_to(&b, ins->dest[0], nr);
|
||||
unsigned nr = bi_opcode_props[ins->op].sr_count;
|
||||
bi_instr *vec = bi_collect_i32_to(&b, ins->dest[0], nr);
|
||||
|
||||
bi_foreach_src(vec, w) {
|
||||
/* FAU is grouped in pairs (2 x 4-byte) */
|
||||
unsigned base =
|
||||
pan_lookup_pushed_ubo(ctx->info.push, ubo,
|
||||
(offset + 4 * w));
|
||||
bi_foreach_src(vec, w) {
|
||||
/* FAU is grouped in pairs (2 x 4-byte) */
|
||||
unsigned base =
|
||||
pan_lookup_pushed_ubo(ctx->info.push, ubo, (offset + 4 * w));
|
||||
|
||||
unsigned fau_idx = (base >> 1);
|
||||
unsigned fau_hi = (base & 1);
|
||||
unsigned fau_idx = (base >> 1);
|
||||
unsigned fau_hi = (base & 1);
|
||||
|
||||
vec->src[w] = bi_fau(BIR_FAU_UNIFORM | fau_idx, fau_hi);
|
||||
}
|
||||
vec->src[w] = bi_fau(BIR_FAU_UNIFORM | fau_idx, fau_hi);
|
||||
}
|
||||
|
||||
bi_remove_instruction(ins);
|
||||
}
|
||||
bi_remove_instruction(ins);
|
||||
}
|
||||
|
||||
free(analysis.blocks);
|
||||
free(analysis.blocks);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
BITSET_DECLARE(row, PAN_MAX_PUSH);
|
||||
BITSET_DECLARE(row, PAN_MAX_PUSH);
|
||||
} adjacency_row;
|
||||
|
||||
/* Find the connected component containing `node` with depth-first search */
|
||||
|
|
@ -190,33 +192,32 @@ static void
|
|||
bi_find_component(adjacency_row *adjacency, BITSET_WORD *visited,
|
||||
unsigned *component, unsigned *size, unsigned node)
|
||||
{
|
||||
unsigned neighbour;
|
||||
unsigned neighbour;
|
||||
|
||||
BITSET_SET(visited, node);
|
||||
component[(*size)++] = node;
|
||||
BITSET_SET(visited, node);
|
||||
component[(*size)++] = node;
|
||||
|
||||
BITSET_FOREACH_SET(neighbour, adjacency[node].row, PAN_MAX_PUSH) {
|
||||
if (!BITSET_TEST(visited, neighbour)) {
|
||||
bi_find_component(adjacency, visited, component, size,
|
||||
neighbour);
|
||||
}
|
||||
}
|
||||
BITSET_FOREACH_SET(neighbour, adjacency[node].row, PAN_MAX_PUSH) {
|
||||
if (!BITSET_TEST(visited, neighbour)) {
|
||||
bi_find_component(adjacency, visited, component, size, neighbour);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_is_uniform(bi_index idx)
|
||||
{
|
||||
return (idx.type == BI_INDEX_FAU) && (idx.value & BIR_FAU_UNIFORM);
|
||||
return (idx.type == BI_INDEX_FAU) && (idx.value & BIR_FAU_UNIFORM);
|
||||
}
|
||||
|
||||
/* Get the index of a uniform in 32-bit words from the start of FAU-RAM */
|
||||
static unsigned
|
||||
bi_uniform_word(bi_index idx)
|
||||
{
|
||||
assert(bi_is_uniform(idx));
|
||||
assert(idx.offset <= 1);
|
||||
assert(bi_is_uniform(idx));
|
||||
assert(idx.offset <= 1);
|
||||
|
||||
return ((idx.value & ~BIR_FAU_UNIFORM) << 1) | idx.offset;
|
||||
return ((idx.value & ~BIR_FAU_UNIFORM) << 1) | idx.offset;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -228,35 +229,35 @@ bi_uniform_word(bi_index idx)
|
|||
static void
|
||||
bi_create_fau_interference_graph(bi_context *ctx, adjacency_row *adjacency)
|
||||
{
|
||||
bi_foreach_instr_global(ctx, I) {
|
||||
unsigned nodes[BI_MAX_SRCS] = {};
|
||||
unsigned node_count = 0;
|
||||
bi_foreach_instr_global(ctx, I) {
|
||||
unsigned nodes[BI_MAX_SRCS] = {};
|
||||
unsigned node_count = 0;
|
||||
|
||||
/* Set nodes[] to 32-bit uniforms accessed */
|
||||
bi_foreach_src(I, s) {
|
||||
if (bi_is_uniform(I->src[s])) {
|
||||
unsigned word = bi_uniform_word(I->src[s]);
|
||||
/* Set nodes[] to 32-bit uniforms accessed */
|
||||
bi_foreach_src(I, s) {
|
||||
if (bi_is_uniform(I->src[s])) {
|
||||
unsigned word = bi_uniform_word(I->src[s]);
|
||||
|
||||
if (word >= ctx->info.push_offset)
|
||||
nodes[node_count++] = word;
|
||||
}
|
||||
}
|
||||
if (word >= ctx->info.push_offset)
|
||||
nodes[node_count++] = word;
|
||||
}
|
||||
}
|
||||
|
||||
/* Create clique connecting nodes[] */
|
||||
for (unsigned i = 0; i < node_count; ++i) {
|
||||
for (unsigned j = 0; j < node_count; ++j) {
|
||||
if (i == j)
|
||||
continue;
|
||||
/* Create clique connecting nodes[] */
|
||||
for (unsigned i = 0; i < node_count; ++i) {
|
||||
for (unsigned j = 0; j < node_count; ++j) {
|
||||
if (i == j)
|
||||
continue;
|
||||
|
||||
unsigned x = nodes[i], y = nodes[j];
|
||||
assert(MAX2(x, y) < ctx->info.push->count);
|
||||
unsigned x = nodes[i], y = nodes[j];
|
||||
assert(MAX2(x, y) < ctx->info.push->count);
|
||||
|
||||
/* Add undirected edge between the nodes */
|
||||
BITSET_SET(adjacency[x].row, y);
|
||||
BITSET_SET(adjacency[y].row, x);
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Add undirected edge between the nodes */
|
||||
BITSET_SET(adjacency[x].row, y);
|
||||
BITSET_SET(adjacency[y].row, x);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -278,71 +279,72 @@ bi_create_fau_interference_graph(bi_context *ctx, adjacency_row *adjacency)
|
|||
void
|
||||
bi_opt_reorder_push(bi_context *ctx)
|
||||
{
|
||||
adjacency_row adjacency[PAN_MAX_PUSH] = { 0 };
|
||||
BITSET_DECLARE(visited, PAN_MAX_PUSH) = { 0 };
|
||||
adjacency_row adjacency[PAN_MAX_PUSH] = {0};
|
||||
BITSET_DECLARE(visited, PAN_MAX_PUSH) = {0};
|
||||
|
||||
unsigned ordering[PAN_MAX_PUSH] = { 0 };
|
||||
unsigned unpaired[PAN_MAX_PUSH] = { 0 };
|
||||
unsigned pushed = 0, unpaired_count = 0;
|
||||
unsigned ordering[PAN_MAX_PUSH] = {0};
|
||||
unsigned unpaired[PAN_MAX_PUSH] = {0};
|
||||
unsigned pushed = 0, unpaired_count = 0;
|
||||
|
||||
struct panfrost_ubo_push *push = ctx->info.push;
|
||||
unsigned push_offset = ctx->info.push_offset;
|
||||
struct panfrost_ubo_push *push = ctx->info.push;
|
||||
unsigned push_offset = ctx->info.push_offset;
|
||||
|
||||
bi_create_fau_interference_graph(ctx, adjacency);
|
||||
bi_create_fau_interference_graph(ctx, adjacency);
|
||||
|
||||
for (unsigned i = push_offset; i < push->count; ++i) {
|
||||
if (BITSET_TEST(visited, i)) continue;
|
||||
for (unsigned i = push_offset; i < push->count; ++i) {
|
||||
if (BITSET_TEST(visited, i))
|
||||
continue;
|
||||
|
||||
unsigned component[PAN_MAX_PUSH] = { 0 };
|
||||
unsigned size = 0;
|
||||
bi_find_component(adjacency, visited, component, &size, i);
|
||||
unsigned component[PAN_MAX_PUSH] = {0};
|
||||
unsigned size = 0;
|
||||
bi_find_component(adjacency, visited, component, &size, i);
|
||||
|
||||
/* If there is an odd number of uses, at least one use must be
|
||||
* unpaired. Arbitrarily take the last one.
|
||||
*/
|
||||
if (size % 2)
|
||||
unpaired[unpaired_count++] = component[--size];
|
||||
/* If there is an odd number of uses, at least one use must be
|
||||
* unpaired. Arbitrarily take the last one.
|
||||
*/
|
||||
if (size % 2)
|
||||
unpaired[unpaired_count++] = component[--size];
|
||||
|
||||
/* The rest of uses are paired */
|
||||
assert((size % 2) == 0);
|
||||
/* The rest of uses are paired */
|
||||
assert((size % 2) == 0);
|
||||
|
||||
/* Push the paired uses */
|
||||
memcpy(ordering + pushed, component, sizeof(unsigned) * size);
|
||||
pushed += size;
|
||||
}
|
||||
/* Push the paired uses */
|
||||
memcpy(ordering + pushed, component, sizeof(unsigned) * size);
|
||||
pushed += size;
|
||||
}
|
||||
|
||||
/* Push unpaired nodes at the end */
|
||||
memcpy(ordering + pushed, unpaired, sizeof(unsigned) * unpaired_count);
|
||||
pushed += unpaired_count;
|
||||
/* Push unpaired nodes at the end */
|
||||
memcpy(ordering + pushed, unpaired, sizeof(unsigned) * unpaired_count);
|
||||
pushed += unpaired_count;
|
||||
|
||||
/* Ordering is a permutation. Invert it for O(1) lookup. */
|
||||
unsigned old_to_new[PAN_MAX_PUSH] = { 0 };
|
||||
/* Ordering is a permutation. Invert it for O(1) lookup. */
|
||||
unsigned old_to_new[PAN_MAX_PUSH] = {0};
|
||||
|
||||
for (unsigned i = 0; i < push_offset; ++i) {
|
||||
old_to_new[i] = i;
|
||||
}
|
||||
for (unsigned i = 0; i < push_offset; ++i) {
|
||||
old_to_new[i] = i;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < pushed; ++i) {
|
||||
assert(ordering[i] >= push_offset);
|
||||
old_to_new[ordering[i]] = push_offset + i;
|
||||
}
|
||||
for (unsigned i = 0; i < pushed; ++i) {
|
||||
assert(ordering[i] >= push_offset);
|
||||
old_to_new[ordering[i]] = push_offset + i;
|
||||
}
|
||||
|
||||
/* Use new ordering throughout the program */
|
||||
bi_foreach_instr_global(ctx, I) {
|
||||
bi_foreach_src(I, s) {
|
||||
if (bi_is_uniform(I->src[s])) {
|
||||
unsigned node = bi_uniform_word(I->src[s]);
|
||||
unsigned new_node = old_to_new[node];
|
||||
I->src[s].value = BIR_FAU_UNIFORM | (new_node >> 1);
|
||||
I->src[s].offset = new_node & 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Use new ordering throughout the program */
|
||||
bi_foreach_instr_global(ctx, I) {
|
||||
bi_foreach_src(I, s) {
|
||||
if (bi_is_uniform(I->src[s])) {
|
||||
unsigned node = bi_uniform_word(I->src[s]);
|
||||
unsigned new_node = old_to_new[node];
|
||||
I->src[s].value = BIR_FAU_UNIFORM | (new_node >> 1);
|
||||
I->src[s].offset = new_node & 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Use new ordering for push */
|
||||
struct panfrost_ubo_push old = *push;
|
||||
for (unsigned i = 0; i < pushed; ++i)
|
||||
push->words[push_offset + i] = old.words[ordering[i]];
|
||||
/* Use new ordering for push */
|
||||
struct panfrost_ubo_push old = *push;
|
||||
for (unsigned i = 0; i < pushed; ++i)
|
||||
push->words[push_offset + i] = old.words[ordering[i]];
|
||||
|
||||
push->count = push_offset + pushed;
|
||||
push->count = push_offset + pushed;
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -26,149 +26,148 @@
|
|||
|
||||
/* Bottom-up local scheduler to reduce register pressure */
|
||||
|
||||
#include "compiler.h"
|
||||
#include "util/dag.h"
|
||||
#include "compiler.h"
|
||||
|
||||
struct sched_ctx {
|
||||
/* Dependency graph */
|
||||
struct dag *dag;
|
||||
/* Dependency graph */
|
||||
struct dag *dag;
|
||||
|
||||
/* Live set */
|
||||
BITSET_WORD *live;
|
||||
/* Live set */
|
||||
BITSET_WORD *live;
|
||||
};
|
||||
|
||||
struct sched_node {
|
||||
struct dag_node dag;
|
||||
struct dag_node dag;
|
||||
|
||||
/* Instruction this node represents */
|
||||
bi_instr *instr;
|
||||
/* Instruction this node represents */
|
||||
bi_instr *instr;
|
||||
};
|
||||
|
||||
static void
|
||||
add_dep(struct sched_node *a, struct sched_node *b)
|
||||
{
|
||||
if (a && b)
|
||||
dag_add_edge(&a->dag, &b->dag, 0);
|
||||
if (a && b)
|
||||
dag_add_edge(&a->dag, &b->dag, 0);
|
||||
}
|
||||
|
||||
static struct dag *
|
||||
create_dag(bi_context *ctx, bi_block *block, void *memctx)
|
||||
{
|
||||
struct dag *dag = dag_create(ctx);
|
||||
struct dag *dag = dag_create(ctx);
|
||||
|
||||
struct sched_node **last_write =
|
||||
calloc(ctx->ssa_alloc, sizeof(struct sched_node *));
|
||||
struct sched_node *coverage = NULL;
|
||||
struct sched_node *preload = NULL;
|
||||
struct sched_node **last_write =
|
||||
calloc(ctx->ssa_alloc, sizeof(struct sched_node *));
|
||||
struct sched_node *coverage = NULL;
|
||||
struct sched_node *preload = NULL;
|
||||
|
||||
/* Last memory load, to serialize stores against */
|
||||
struct sched_node *memory_load = NULL;
|
||||
/* Last memory load, to serialize stores against */
|
||||
struct sched_node *memory_load = NULL;
|
||||
|
||||
/* Last memory store, to serialize loads and stores against */
|
||||
struct sched_node *memory_store = NULL;
|
||||
/* Last memory store, to serialize loads and stores against */
|
||||
struct sched_node *memory_store = NULL;
|
||||
|
||||
bi_foreach_instr_in_block(block, I) {
|
||||
/* Leave branches at the end */
|
||||
if (I->op == BI_OPCODE_JUMP || bi_opcode_props[I->op].branch)
|
||||
break;
|
||||
bi_foreach_instr_in_block(block, I) {
|
||||
/* Leave branches at the end */
|
||||
if (I->op == BI_OPCODE_JUMP || bi_opcode_props[I->op].branch)
|
||||
break;
|
||||
|
||||
assert(I->branch_target == NULL);
|
||||
assert(I->branch_target == NULL);
|
||||
|
||||
struct sched_node *node = rzalloc(memctx, struct sched_node);
|
||||
node->instr = I;
|
||||
dag_init_node(dag, &node->dag);
|
||||
struct sched_node *node = rzalloc(memctx, struct sched_node);
|
||||
node->instr = I;
|
||||
dag_init_node(dag, &node->dag);
|
||||
|
||||
/* Reads depend on writes, no other hazards in SSA */
|
||||
bi_foreach_ssa_src(I, s)
|
||||
add_dep(node, last_write[I->src[s].value]);
|
||||
/* Reads depend on writes, no other hazards in SSA */
|
||||
bi_foreach_ssa_src(I, s)
|
||||
add_dep(node, last_write[I->src[s].value]);
|
||||
|
||||
bi_foreach_dest(I, d)
|
||||
last_write[I->dest[d].value] = node;
|
||||
bi_foreach_dest(I, d)
|
||||
last_write[I->dest[d].value] = node;
|
||||
|
||||
switch (bi_opcode_props[I->op].message) {
|
||||
case BIFROST_MESSAGE_LOAD:
|
||||
/* Regular memory loads needs to be serialized against
|
||||
* other memory access. However, UBO memory is read-only
|
||||
* so it can be moved around freely.
|
||||
*/
|
||||
if (I->seg != BI_SEG_UBO) {
|
||||
add_dep(node, memory_store);
|
||||
memory_load = node;
|
||||
}
|
||||
switch (bi_opcode_props[I->op].message) {
|
||||
case BIFROST_MESSAGE_LOAD:
|
||||
/* Regular memory loads needs to be serialized against
|
||||
* other memory access. However, UBO memory is read-only
|
||||
* so it can be moved around freely.
|
||||
*/
|
||||
if (I->seg != BI_SEG_UBO) {
|
||||
add_dep(node, memory_store);
|
||||
memory_load = node;
|
||||
}
|
||||
|
||||
break;
|
||||
break;
|
||||
|
||||
case BIFROST_MESSAGE_ATTRIBUTE:
|
||||
/* Regular attribute loads can be reordered, but
|
||||
* writeable attributes can't be. Our one use of
|
||||
* writeable attributes are images.
|
||||
*/
|
||||
if ((I->op == BI_OPCODE_LD_TEX) ||
|
||||
(I->op == BI_OPCODE_LD_TEX_IMM) ||
|
||||
(I->op == BI_OPCODE_LD_ATTR_TEX)) {
|
||||
add_dep(node, memory_store);
|
||||
memory_load = node;
|
||||
}
|
||||
case BIFROST_MESSAGE_ATTRIBUTE:
|
||||
/* Regular attribute loads can be reordered, but
|
||||
* writeable attributes can't be. Our one use of
|
||||
* writeable attributes are images.
|
||||
*/
|
||||
if ((I->op == BI_OPCODE_LD_TEX) || (I->op == BI_OPCODE_LD_TEX_IMM) ||
|
||||
(I->op == BI_OPCODE_LD_ATTR_TEX)) {
|
||||
add_dep(node, memory_store);
|
||||
memory_load = node;
|
||||
}
|
||||
|
||||
break;
|
||||
break;
|
||||
|
||||
case BIFROST_MESSAGE_STORE:
|
||||
assert(I->seg != BI_SEG_UBO);
|
||||
add_dep(node, memory_load);
|
||||
add_dep(node, memory_store);
|
||||
memory_store = node;
|
||||
break;
|
||||
case BIFROST_MESSAGE_STORE:
|
||||
assert(I->seg != BI_SEG_UBO);
|
||||
add_dep(node, memory_load);
|
||||
add_dep(node, memory_store);
|
||||
memory_store = node;
|
||||
break;
|
||||
|
||||
case BIFROST_MESSAGE_ATOMIC:
|
||||
case BIFROST_MESSAGE_BARRIER:
|
||||
add_dep(node, memory_load);
|
||||
add_dep(node, memory_store);
|
||||
memory_load = node;
|
||||
memory_store = node;
|
||||
break;
|
||||
case BIFROST_MESSAGE_ATOMIC:
|
||||
case BIFROST_MESSAGE_BARRIER:
|
||||
add_dep(node, memory_load);
|
||||
add_dep(node, memory_store);
|
||||
memory_load = node;
|
||||
memory_store = node;
|
||||
break;
|
||||
|
||||
case BIFROST_MESSAGE_BLEND:
|
||||
case BIFROST_MESSAGE_Z_STENCIL:
|
||||
case BIFROST_MESSAGE_TILE:
|
||||
add_dep(node, coverage);
|
||||
coverage = node;
|
||||
break;
|
||||
case BIFROST_MESSAGE_BLEND:
|
||||
case BIFROST_MESSAGE_Z_STENCIL:
|
||||
case BIFROST_MESSAGE_TILE:
|
||||
add_dep(node, coverage);
|
||||
coverage = node;
|
||||
break;
|
||||
|
||||
case BIFROST_MESSAGE_ATEST:
|
||||
/* ATEST signals the end of shader side effects */
|
||||
add_dep(node, memory_store);
|
||||
memory_store = node;
|
||||
case BIFROST_MESSAGE_ATEST:
|
||||
/* ATEST signals the end of shader side effects */
|
||||
add_dep(node, memory_store);
|
||||
memory_store = node;
|
||||
|
||||
/* ATEST also updates coverage */
|
||||
add_dep(node, coverage);
|
||||
coverage = node;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
/* ATEST also updates coverage */
|
||||
add_dep(node, coverage);
|
||||
coverage = node;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
add_dep(node, preload);
|
||||
add_dep(node, preload);
|
||||
|
||||
if (I->op == BI_OPCODE_DISCARD_F32) {
|
||||
/* Serialize against ATEST */
|
||||
add_dep(node, coverage);
|
||||
coverage = node;
|
||||
if (I->op == BI_OPCODE_DISCARD_F32) {
|
||||
/* Serialize against ATEST */
|
||||
add_dep(node, coverage);
|
||||
coverage = node;
|
||||
|
||||
/* Also serialize against memory and barriers */
|
||||
add_dep(node, memory_load);
|
||||
add_dep(node, memory_store);
|
||||
memory_load = node;
|
||||
memory_store = node;
|
||||
} else if ((I->op == BI_OPCODE_PHI) ||
|
||||
(I->op == BI_OPCODE_MOV_I32 &&
|
||||
I->src[0].type == BI_INDEX_REGISTER)) {
|
||||
preload = node;
|
||||
}
|
||||
}
|
||||
/* Also serialize against memory and barriers */
|
||||
add_dep(node, memory_load);
|
||||
add_dep(node, memory_store);
|
||||
memory_load = node;
|
||||
memory_store = node;
|
||||
} else if ((I->op == BI_OPCODE_PHI) ||
|
||||
(I->op == BI_OPCODE_MOV_I32 &&
|
||||
I->src[0].type == BI_INDEX_REGISTER)) {
|
||||
preload = node;
|
||||
}
|
||||
}
|
||||
|
||||
free(last_write);
|
||||
free(last_write);
|
||||
|
||||
return dag;
|
||||
return dag;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -183,30 +182,30 @@ create_dag(bi_context *ctx, bi_block *block, void *memctx)
|
|||
static signed
|
||||
calculate_pressure_delta(bi_instr *I, BITSET_WORD *live)
|
||||
{
|
||||
signed delta = 0;
|
||||
signed delta = 0;
|
||||
|
||||
/* Destinations must be unique */
|
||||
bi_foreach_dest(I, d) {
|
||||
if (BITSET_TEST(live, I->dest[d].value))
|
||||
delta -= bi_count_write_registers(I, d);
|
||||
}
|
||||
/* Destinations must be unique */
|
||||
bi_foreach_dest(I, d) {
|
||||
if (BITSET_TEST(live, I->dest[d].value))
|
||||
delta -= bi_count_write_registers(I, d);
|
||||
}
|
||||
|
||||
bi_foreach_ssa_src(I, src) {
|
||||
/* Filter duplicates */
|
||||
bool dupe = false;
|
||||
bi_foreach_ssa_src(I, src) {
|
||||
/* Filter duplicates */
|
||||
bool dupe = false;
|
||||
|
||||
for (unsigned i = 0; i < src; ++i) {
|
||||
if (bi_is_equiv(I->src[i], I->src[src])) {
|
||||
dupe = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (unsigned i = 0; i < src; ++i) {
|
||||
if (bi_is_equiv(I->src[i], I->src[src])) {
|
||||
dupe = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!dupe && !BITSET_TEST(live, I->src[src].value))
|
||||
delta += bi_count_read_registers(I, src);
|
||||
}
|
||||
if (!dupe && !BITSET_TEST(live, I->src[src].value))
|
||||
delta += bi_count_read_registers(I, src);
|
||||
}
|
||||
|
||||
return delta;
|
||||
return delta;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -216,87 +215,88 @@ calculate_pressure_delta(bi_instr *I, BITSET_WORD *live)
|
|||
static struct sched_node *
|
||||
choose_instr(struct sched_ctx *s)
|
||||
{
|
||||
int32_t min_delta = INT32_MAX;
|
||||
struct sched_node *best = NULL;
|
||||
int32_t min_delta = INT32_MAX;
|
||||
struct sched_node *best = NULL;
|
||||
|
||||
list_for_each_entry(struct sched_node, n, &s->dag->heads, dag.link) {
|
||||
int32_t delta = calculate_pressure_delta(n->instr, s->live);
|
||||
list_for_each_entry(struct sched_node, n, &s->dag->heads, dag.link) {
|
||||
int32_t delta = calculate_pressure_delta(n->instr, s->live);
|
||||
|
||||
if (delta < min_delta) {
|
||||
best = n;
|
||||
min_delta = delta;
|
||||
}
|
||||
}
|
||||
if (delta < min_delta) {
|
||||
best = n;
|
||||
min_delta = delta;
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
return best;
|
||||
}
|
||||
|
||||
static void
|
||||
pressure_schedule_block(bi_context *ctx, bi_block *block, struct sched_ctx *s)
|
||||
{
|
||||
/* off by a constant, that's ok */
|
||||
signed pressure = 0;
|
||||
signed orig_max_pressure = 0;
|
||||
unsigned nr_ins = 0;
|
||||
/* off by a constant, that's ok */
|
||||
signed pressure = 0;
|
||||
signed orig_max_pressure = 0;
|
||||
unsigned nr_ins = 0;
|
||||
|
||||
memcpy(s->live, block->ssa_live_out, BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
|
||||
memcpy(s->live, block->ssa_live_out,
|
||||
BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
|
||||
|
||||
bi_foreach_instr_in_block_rev(block, I) {
|
||||
pressure += calculate_pressure_delta(I, s->live);
|
||||
orig_max_pressure = MAX2(pressure, orig_max_pressure);
|
||||
bi_liveness_ins_update_ssa(s->live, I);
|
||||
nr_ins++;
|
||||
}
|
||||
bi_foreach_instr_in_block_rev(block, I) {
|
||||
pressure += calculate_pressure_delta(I, s->live);
|
||||
orig_max_pressure = MAX2(pressure, orig_max_pressure);
|
||||
bi_liveness_ins_update_ssa(s->live, I);
|
||||
nr_ins++;
|
||||
}
|
||||
|
||||
memcpy(s->live, block->ssa_live_out, BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
|
||||
memcpy(s->live, block->ssa_live_out,
|
||||
BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
|
||||
|
||||
/* off by a constant, that's ok */
|
||||
signed max_pressure = 0;
|
||||
pressure = 0;
|
||||
/* off by a constant, that's ok */
|
||||
signed max_pressure = 0;
|
||||
pressure = 0;
|
||||
|
||||
struct sched_node **schedule = calloc(nr_ins, sizeof(struct sched_node *));
|
||||
nr_ins = 0;
|
||||
struct sched_node **schedule = calloc(nr_ins, sizeof(struct sched_node *));
|
||||
nr_ins = 0;
|
||||
|
||||
while (!list_is_empty(&s->dag->heads)) {
|
||||
struct sched_node *node = choose_instr(s);
|
||||
pressure += calculate_pressure_delta(node->instr, s->live);
|
||||
max_pressure = MAX2(pressure, max_pressure);
|
||||
dag_prune_head(s->dag, &node->dag);
|
||||
while (!list_is_empty(&s->dag->heads)) {
|
||||
struct sched_node *node = choose_instr(s);
|
||||
pressure += calculate_pressure_delta(node->instr, s->live);
|
||||
max_pressure = MAX2(pressure, max_pressure);
|
||||
dag_prune_head(s->dag, &node->dag);
|
||||
|
||||
schedule[nr_ins++] = node;
|
||||
bi_liveness_ins_update_ssa(s->live, node->instr);
|
||||
}
|
||||
schedule[nr_ins++] = node;
|
||||
bi_liveness_ins_update_ssa(s->live, node->instr);
|
||||
}
|
||||
|
||||
/* Bail if it looks like it's worse */
|
||||
if (max_pressure >= orig_max_pressure) {
|
||||
free(schedule);
|
||||
return;
|
||||
}
|
||||
/* Bail if it looks like it's worse */
|
||||
if (max_pressure >= orig_max_pressure) {
|
||||
free(schedule);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Apply the schedule */
|
||||
for (unsigned i = 0; i < nr_ins; ++i) {
|
||||
bi_remove_instruction(schedule[i]->instr);
|
||||
list_add(&schedule[i]->instr->link, &block->instructions);
|
||||
}
|
||||
/* Apply the schedule */
|
||||
for (unsigned i = 0; i < nr_ins; ++i) {
|
||||
bi_remove_instruction(schedule[i]->instr);
|
||||
list_add(&schedule[i]->instr->link, &block->instructions);
|
||||
}
|
||||
|
||||
free(schedule);
|
||||
free(schedule);
|
||||
}
|
||||
|
||||
void
|
||||
bi_pressure_schedule(bi_context *ctx)
|
||||
{
|
||||
bi_compute_liveness_ssa(ctx);
|
||||
void *memctx = ralloc_context(ctx);
|
||||
BITSET_WORD *live = ralloc_array(memctx, BITSET_WORD, BITSET_WORDS(ctx->ssa_alloc));
|
||||
bi_compute_liveness_ssa(ctx);
|
||||
void *memctx = ralloc_context(ctx);
|
||||
BITSET_WORD *live =
|
||||
ralloc_array(memctx, BITSET_WORD, BITSET_WORDS(ctx->ssa_alloc));
|
||||
|
||||
bi_foreach_block(ctx, block) {
|
||||
struct sched_ctx sctx = {
|
||||
.dag = create_dag(ctx, block, memctx),
|
||||
.live = live
|
||||
};
|
||||
bi_foreach_block(ctx, block) {
|
||||
struct sched_ctx sctx = {.dag = create_dag(ctx, block, memctx),
|
||||
.live = live};
|
||||
|
||||
pressure_schedule_block(ctx, block, &sctx);
|
||||
}
|
||||
pressure_schedule_block(ctx, block, &sctx);
|
||||
}
|
||||
|
||||
ralloc_free(memctx);
|
||||
ralloc_free(memctx);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,177 +24,179 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_print_common.h"
|
||||
#include "compiler.h"
|
||||
|
||||
static const char *
|
||||
bi_reg_op_name(enum bifrost_reg_op op)
|
||||
{
|
||||
switch (op) {
|
||||
case BIFROST_OP_IDLE: return "idle";
|
||||
case BIFROST_OP_READ: return "read";
|
||||
case BIFROST_OP_WRITE: return "write";
|
||||
case BIFROST_OP_WRITE_LO: return "write lo";
|
||||
case BIFROST_OP_WRITE_HI: return "write hi";
|
||||
default: return "invalid";
|
||||
}
|
||||
switch (op) {
|
||||
case BIFROST_OP_IDLE:
|
||||
return "idle";
|
||||
case BIFROST_OP_READ:
|
||||
return "read";
|
||||
case BIFROST_OP_WRITE:
|
||||
return "write";
|
||||
case BIFROST_OP_WRITE_LO:
|
||||
return "write lo";
|
||||
case BIFROST_OP_WRITE_HI:
|
||||
return "write hi";
|
||||
default:
|
||||
return "invalid";
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
bi_print_slots(bi_registers *regs, FILE *fp)
|
||||
{
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
if (regs->enabled[i])
|
||||
fprintf(fp, "slot %u: %u\n", i, regs->slot[i]);
|
||||
}
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
if (regs->enabled[i])
|
||||
fprintf(fp, "slot %u: %u\n", i, regs->slot[i]);
|
||||
}
|
||||
|
||||
if (regs->slot23.slot2) {
|
||||
fprintf(fp, "slot 2 (%s%s): %u\n",
|
||||
bi_reg_op_name(regs->slot23.slot2),
|
||||
regs->slot23.slot2 >= BIFROST_OP_WRITE ?
|
||||
" FMA": "",
|
||||
regs->slot[2]);
|
||||
}
|
||||
if (regs->slot23.slot2) {
|
||||
fprintf(fp, "slot 2 (%s%s): %u\n", bi_reg_op_name(regs->slot23.slot2),
|
||||
regs->slot23.slot2 >= BIFROST_OP_WRITE ? " FMA" : "",
|
||||
regs->slot[2]);
|
||||
}
|
||||
|
||||
if (regs->slot23.slot3) {
|
||||
fprintf(fp, "slot 3 (%s %s): %u\n",
|
||||
bi_reg_op_name(regs->slot23.slot3),
|
||||
regs->slot23.slot3_fma ? "FMA" : "ADD",
|
||||
regs->slot[3]);
|
||||
}
|
||||
if (regs->slot23.slot3) {
|
||||
fprintf(fp, "slot 3 (%s %s): %u\n", bi_reg_op_name(regs->slot23.slot3),
|
||||
regs->slot23.slot3_fma ? "FMA" : "ADD", regs->slot[3]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
bi_print_tuple(bi_tuple *tuple, FILE *fp)
|
||||
{
|
||||
bi_instr *ins[2] = { tuple->fma, tuple->add };
|
||||
bi_instr *ins[2] = {tuple->fma, tuple->add};
|
||||
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
fprintf(fp, (i == 0) ? "\t* " : "\t+ ");
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
fprintf(fp, (i == 0) ? "\t* " : "\t+ ");
|
||||
|
||||
if (ins[i])
|
||||
bi_print_instr(ins[i], fp);
|
||||
else
|
||||
fprintf(fp, "NOP\n");
|
||||
}
|
||||
if (ins[i])
|
||||
bi_print_instr(ins[i], fp);
|
||||
else
|
||||
fprintf(fp, "NOP\n");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
bi_print_clause(bi_clause *clause, FILE *fp)
|
||||
{
|
||||
fprintf(fp, "id(%u)", clause->scoreboard_id);
|
||||
fprintf(fp, "id(%u)", clause->scoreboard_id);
|
||||
|
||||
if (clause->dependencies) {
|
||||
fprintf(fp, " wait(");
|
||||
if (clause->dependencies) {
|
||||
fprintf(fp, " wait(");
|
||||
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
if (clause->dependencies & (1 << i))
|
||||
fprintf(fp, "%u ", i);
|
||||
}
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
if (clause->dependencies & (1 << i))
|
||||
fprintf(fp, "%u ", i);
|
||||
}
|
||||
|
||||
fprintf(fp, ")");
|
||||
}
|
||||
fprintf(fp, ")");
|
||||
}
|
||||
|
||||
fprintf(fp, " %s", bi_flow_control_name(clause->flow_control));
|
||||
fprintf(fp, " %s", bi_flow_control_name(clause->flow_control));
|
||||
|
||||
if (!clause->next_clause_prefetch)
|
||||
fprintf(fp, " no_prefetch");
|
||||
if (!clause->next_clause_prefetch)
|
||||
fprintf(fp, " no_prefetch");
|
||||
|
||||
if (clause->staging_barrier)
|
||||
fprintf(fp, " osrb");
|
||||
if (clause->staging_barrier)
|
||||
fprintf(fp, " osrb");
|
||||
|
||||
if (clause->td)
|
||||
fprintf(fp, " td");
|
||||
if (clause->td)
|
||||
fprintf(fp, " td");
|
||||
|
||||
if (clause->pcrel_idx != ~0)
|
||||
fprintf(fp, " pcrel(%u)", clause->pcrel_idx);
|
||||
if (clause->pcrel_idx != ~0)
|
||||
fprintf(fp, " pcrel(%u)", clause->pcrel_idx);
|
||||
|
||||
fprintf(fp, "\n");
|
||||
fprintf(fp, "\n");
|
||||
|
||||
for (unsigned i = 0; i < clause->tuple_count; ++i)
|
||||
bi_print_tuple(&clause->tuples[i], fp);
|
||||
for (unsigned i = 0; i < clause->tuple_count; ++i)
|
||||
bi_print_tuple(&clause->tuples[i], fp);
|
||||
|
||||
if (clause->constant_count) {
|
||||
for (unsigned i = 0; i < clause->constant_count; ++i)
|
||||
fprintf(fp, "%" PRIx64 " ", clause->constants[i]);
|
||||
if (clause->constant_count) {
|
||||
for (unsigned i = 0; i < clause->constant_count; ++i)
|
||||
fprintf(fp, "%" PRIx64 " ", clause->constants[i]);
|
||||
|
||||
if (clause->branch_constant)
|
||||
fprintf(fp, "*");
|
||||
if (clause->branch_constant)
|
||||
fprintf(fp, "*");
|
||||
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
|
||||
fprintf(fp, "\n");
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
|
||||
static void
|
||||
bi_print_scoreboard_line(unsigned slot, const char *name, uint64_t mask, FILE *fp)
|
||||
bi_print_scoreboard_line(unsigned slot, const char *name, uint64_t mask,
|
||||
FILE *fp)
|
||||
{
|
||||
if (!mask)
|
||||
return;
|
||||
if (!mask)
|
||||
return;
|
||||
|
||||
fprintf(fp, "slot %u %s:", slot, name);
|
||||
fprintf(fp, "slot %u %s:", slot, name);
|
||||
|
||||
u_foreach_bit64(reg, mask)
|
||||
fprintf(fp, " r%" PRId64, reg);
|
||||
u_foreach_bit64(reg, mask) fprintf(fp, " r%" PRId64, reg);
|
||||
|
||||
fprintf(fp, "\n");
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
|
||||
static void
|
||||
bi_print_scoreboard(struct bi_scoreboard_state *state, FILE *fp)
|
||||
{
|
||||
for (unsigned i = 0; i < BI_NUM_SLOTS; ++i) {
|
||||
bi_print_scoreboard_line(i, "reads", state->read[i], fp);
|
||||
bi_print_scoreboard_line(i, "writes", state->write[i], fp);
|
||||
}
|
||||
for (unsigned i = 0; i < BI_NUM_SLOTS; ++i) {
|
||||
bi_print_scoreboard_line(i, "reads", state->read[i], fp);
|
||||
bi_print_scoreboard_line(i, "writes", state->write[i], fp);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
bi_print_block(bi_block *block, FILE *fp)
|
||||
{
|
||||
if (block->scheduled) {
|
||||
bi_print_scoreboard(&block->scoreboard_in, fp);
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
if (block->scheduled) {
|
||||
bi_print_scoreboard(&block->scoreboard_in, fp);
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
|
||||
fprintf(fp, "block%u {\n", block->index);
|
||||
fprintf(fp, "block%u {\n", block->index);
|
||||
|
||||
if (block->scheduled) {
|
||||
bi_foreach_clause_in_block(block, clause)
|
||||
bi_print_clause(clause, fp);
|
||||
} else {
|
||||
bi_foreach_instr_in_block(block, ins)
|
||||
bi_print_instr((bi_instr *) ins, fp);
|
||||
}
|
||||
if (block->scheduled) {
|
||||
bi_foreach_clause_in_block(block, clause)
|
||||
bi_print_clause(clause, fp);
|
||||
} else {
|
||||
bi_foreach_instr_in_block(block, ins)
|
||||
bi_print_instr((bi_instr *)ins, fp);
|
||||
}
|
||||
|
||||
fprintf(fp, "}");
|
||||
fprintf(fp, "}");
|
||||
|
||||
if (block->successors[0]) {
|
||||
fprintf(fp, " -> ");
|
||||
if (block->successors[0]) {
|
||||
fprintf(fp, " -> ");
|
||||
|
||||
bi_foreach_successor((block), succ)
|
||||
fprintf(fp, "block%u ", succ->index);
|
||||
}
|
||||
bi_foreach_successor((block), succ)
|
||||
fprintf(fp, "block%u ", succ->index);
|
||||
}
|
||||
|
||||
if (bi_num_predecessors(block)) {
|
||||
fprintf(fp, " from");
|
||||
if (bi_num_predecessors(block)) {
|
||||
fprintf(fp, " from");
|
||||
|
||||
bi_foreach_predecessor(block, pred)
|
||||
fprintf(fp, " block%u", (*pred)->index);
|
||||
}
|
||||
bi_foreach_predecessor(block, pred)
|
||||
fprintf(fp, " block%u", (*pred)->index);
|
||||
}
|
||||
|
||||
if (block->scheduled) {
|
||||
fprintf(fp, "\n");
|
||||
bi_print_scoreboard(&block->scoreboard_out, fp);
|
||||
}
|
||||
if (block->scheduled) {
|
||||
fprintf(fp, "\n");
|
||||
bi_print_scoreboard(&block->scoreboard_out, fp);
|
||||
}
|
||||
|
||||
fprintf(fp, "\n\n");
|
||||
fprintf(fp, "\n\n");
|
||||
}
|
||||
|
||||
void
|
||||
bi_print_shader(bi_context *ctx, FILE *fp)
|
||||
{
|
||||
bi_foreach_block(ctx, block)
|
||||
bi_print_block(block, fp);
|
||||
bi_foreach_block(ctx, block)
|
||||
bi_print_block(block, fp);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,38 +31,63 @@
|
|||
const char *
|
||||
bi_message_type_name(enum bifrost_message_type T)
|
||||
{
|
||||
switch (T) {
|
||||
case BIFROST_MESSAGE_NONE: return "";
|
||||
case BIFROST_MESSAGE_VARYING: return "vary";
|
||||
case BIFROST_MESSAGE_ATTRIBUTE: return "attr";
|
||||
case BIFROST_MESSAGE_TEX: return "tex";
|
||||
case BIFROST_MESSAGE_VARTEX: return "vartex";
|
||||
case BIFROST_MESSAGE_LOAD: return "load";
|
||||
case BIFROST_MESSAGE_STORE: return "store";
|
||||
case BIFROST_MESSAGE_ATOMIC: return "atomic";
|
||||
case BIFROST_MESSAGE_BARRIER: return "barrier";
|
||||
case BIFROST_MESSAGE_BLEND: return "blend";
|
||||
case BIFROST_MESSAGE_TILE: return "tile";
|
||||
case BIFROST_MESSAGE_Z_STENCIL: return "z_stencil";
|
||||
case BIFROST_MESSAGE_ATEST: return "atest";
|
||||
case BIFROST_MESSAGE_JOB: return "job";
|
||||
case BIFROST_MESSAGE_64BIT: return "64";
|
||||
default: return "XXX reserved";
|
||||
}
|
||||
switch (T) {
|
||||
case BIFROST_MESSAGE_NONE:
|
||||
return "";
|
||||
case BIFROST_MESSAGE_VARYING:
|
||||
return "vary";
|
||||
case BIFROST_MESSAGE_ATTRIBUTE:
|
||||
return "attr";
|
||||
case BIFROST_MESSAGE_TEX:
|
||||
return "tex";
|
||||
case BIFROST_MESSAGE_VARTEX:
|
||||
return "vartex";
|
||||
case BIFROST_MESSAGE_LOAD:
|
||||
return "load";
|
||||
case BIFROST_MESSAGE_STORE:
|
||||
return "store";
|
||||
case BIFROST_MESSAGE_ATOMIC:
|
||||
return "atomic";
|
||||
case BIFROST_MESSAGE_BARRIER:
|
||||
return "barrier";
|
||||
case BIFROST_MESSAGE_BLEND:
|
||||
return "blend";
|
||||
case BIFROST_MESSAGE_TILE:
|
||||
return "tile";
|
||||
case BIFROST_MESSAGE_Z_STENCIL:
|
||||
return "z_stencil";
|
||||
case BIFROST_MESSAGE_ATEST:
|
||||
return "atest";
|
||||
case BIFROST_MESSAGE_JOB:
|
||||
return "job";
|
||||
case BIFROST_MESSAGE_64BIT:
|
||||
return "64";
|
||||
default:
|
||||
return "XXX reserved";
|
||||
}
|
||||
}
|
||||
|
||||
const char *
|
||||
bi_flow_control_name(enum bifrost_flow mode)
|
||||
{
|
||||
switch (mode) {
|
||||
case BIFROST_FLOW_END: return "eos";
|
||||
case BIFROST_FLOW_NBTB_PC: return "nbb br_pc";
|
||||
case BIFROST_FLOW_NBTB_UNCONDITIONAL: return "nbb r_uncond";
|
||||
case BIFROST_FLOW_NBTB: return "nbb";
|
||||
case BIFROST_FLOW_BTB_UNCONDITIONAL: return "bb r_uncond";
|
||||
case BIFROST_FLOW_BTB_NONE: return "bb";
|
||||
case BIFROST_FLOW_WE_UNCONDITIONAL: return "we r_uncond";
|
||||
case BIFROST_FLOW_WE: return "we";
|
||||
default: return "XXX";
|
||||
}
|
||||
switch (mode) {
|
||||
case BIFROST_FLOW_END:
|
||||
return "eos";
|
||||
case BIFROST_FLOW_NBTB_PC:
|
||||
return "nbb br_pc";
|
||||
case BIFROST_FLOW_NBTB_UNCONDITIONAL:
|
||||
return "nbb r_uncond";
|
||||
case BIFROST_FLOW_NBTB:
|
||||
return "nbb";
|
||||
case BIFROST_FLOW_BTB_UNCONDITIONAL:
|
||||
return "bb r_uncond";
|
||||
case BIFROST_FLOW_BTB_NONE:
|
||||
return "bb";
|
||||
case BIFROST_FLOW_WE_UNCONDITIONAL:
|
||||
return "we r_uncond";
|
||||
case BIFROST_FLOW_WE:
|
||||
return "we";
|
||||
default:
|
||||
return "XXX";
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@
|
|||
#include <stdio.h>
|
||||
#include "bifrost.h"
|
||||
|
||||
const char * bi_message_type_name(enum bifrost_message_type T);
|
||||
const char * bi_flow_control_name(enum bifrost_flow mode);
|
||||
const char *bi_message_type_name(enum bifrost_message_type T);
|
||||
const char *bi_flow_control_name(enum bifrost_flow mode);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -44,15 +44,15 @@
|
|||
static inline unsigned
|
||||
bifrost_get_quirks(unsigned product_id)
|
||||
{
|
||||
switch (product_id >> 8) {
|
||||
case 0x60: /* G71 */
|
||||
return BIFROST_NO_FP32_TRANSCENDENTALS | BIFROST_LIMITED_CLPER;
|
||||
case 0x62: /* G72 */
|
||||
case 0x70: /* G31 */
|
||||
return BIFROST_LIMITED_CLPER;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
switch (product_id >> 8) {
|
||||
case 0x60: /* G71 */
|
||||
return BIFROST_NO_FP32_TRANSCENDENTALS | BIFROST_LIMITED_CLPER;
|
||||
case 0x62: /* G72 */
|
||||
case 0x70: /* G31 */
|
||||
return BIFROST_LIMITED_CLPER;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -54,9 +54,9 @@
|
|||
*/
|
||||
|
||||
#define BI_NUM_GENERAL_SLOTS 6
|
||||
#define BI_NUM_SLOTS 8
|
||||
#define BI_NUM_REGISTERS 64
|
||||
#define BI_SLOT_SERIAL 0 /* arbitrary */
|
||||
#define BI_NUM_SLOTS 8
|
||||
#define BI_NUM_REGISTERS 64
|
||||
#define BI_SLOT_SERIAL 0 /* arbitrary */
|
||||
|
||||
/*
|
||||
* Due to the crude scoreboarding we do, we need to serialize varying loads and
|
||||
|
|
@ -65,26 +65,26 @@
|
|||
static bool
|
||||
bi_should_serialize(bi_instr *I)
|
||||
{
|
||||
/* For debug, serialize everything to disable scoreboard opts */
|
||||
if (bifrost_debug & BIFROST_DBG_NOSB)
|
||||
return true;
|
||||
/* For debug, serialize everything to disable scoreboard opts */
|
||||
if (bifrost_debug & BIFROST_DBG_NOSB)
|
||||
return true;
|
||||
|
||||
/* Although nominally on the attribute unit, image loads have the same
|
||||
* coherency requirements as general memory loads. Serialize them for
|
||||
* now until we can do something more clever.
|
||||
*/
|
||||
if (I->op == BI_OPCODE_LD_ATTR_TEX)
|
||||
return true;
|
||||
/* Although nominally on the attribute unit, image loads have the same
|
||||
* coherency requirements as general memory loads. Serialize them for
|
||||
* now until we can do something more clever.
|
||||
*/
|
||||
if (I->op == BI_OPCODE_LD_ATTR_TEX)
|
||||
return true;
|
||||
|
||||
switch (bi_opcode_props[I->op].message) {
|
||||
case BIFROST_MESSAGE_VARYING:
|
||||
case BIFROST_MESSAGE_LOAD:
|
||||
case BIFROST_MESSAGE_STORE:
|
||||
case BIFROST_MESSAGE_ATOMIC:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
switch (bi_opcode_props[I->op].message) {
|
||||
case BIFROST_MESSAGE_VARYING:
|
||||
case BIFROST_MESSAGE_LOAD:
|
||||
case BIFROST_MESSAGE_STORE:
|
||||
case BIFROST_MESSAGE_ATOMIC:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Given a scoreboard model, choose a slot for a clause wrapping a given
|
||||
|
|
@ -93,76 +93,77 @@ bi_should_serialize(bi_instr *I)
|
|||
static unsigned
|
||||
bi_choose_scoreboard_slot(bi_instr *message)
|
||||
{
|
||||
/* ATEST, ZS_EMIT must be issued with slot #0 */
|
||||
if (message->op == BI_OPCODE_ATEST || message->op == BI_OPCODE_ZS_EMIT)
|
||||
return 0;
|
||||
/* ATEST, ZS_EMIT must be issued with slot #0 */
|
||||
if (message->op == BI_OPCODE_ATEST || message->op == BI_OPCODE_ZS_EMIT)
|
||||
return 0;
|
||||
|
||||
/* BARRIER must be issued with slot #7 */
|
||||
if (message->op == BI_OPCODE_BARRIER)
|
||||
return 7;
|
||||
/* BARRIER must be issued with slot #7 */
|
||||
if (message->op == BI_OPCODE_BARRIER)
|
||||
return 7;
|
||||
|
||||
/* For now, make serialization is easy */
|
||||
if (bi_should_serialize(message))
|
||||
return BI_SLOT_SERIAL;
|
||||
/* For now, make serialization is easy */
|
||||
if (bi_should_serialize(message))
|
||||
return BI_SLOT_SERIAL;
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
bi_read_mask(bi_instr *I, bool staging_only)
|
||||
{
|
||||
uint64_t mask = 0;
|
||||
uint64_t mask = 0;
|
||||
|
||||
if (staging_only && !bi_opcode_props[I->op].sr_read)
|
||||
return mask;
|
||||
if (staging_only && !bi_opcode_props[I->op].sr_read)
|
||||
return mask;
|
||||
|
||||
bi_foreach_src(I, s) {
|
||||
if (I->src[s].type == BI_INDEX_REGISTER) {
|
||||
unsigned reg = I->src[s].value;
|
||||
unsigned count = bi_count_read_registers(I, s);
|
||||
bi_foreach_src(I, s) {
|
||||
if (I->src[s].type == BI_INDEX_REGISTER) {
|
||||
unsigned reg = I->src[s].value;
|
||||
unsigned count = bi_count_read_registers(I, s);
|
||||
|
||||
mask |= (BITFIELD64_MASK(count) << reg);
|
||||
}
|
||||
mask |= (BITFIELD64_MASK(count) << reg);
|
||||
}
|
||||
|
||||
if (staging_only)
|
||||
break;
|
||||
}
|
||||
if (staging_only)
|
||||
break;
|
||||
}
|
||||
|
||||
return mask;
|
||||
return mask;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
bi_write_mask(bi_instr *I)
|
||||
{
|
||||
uint64_t mask = 0;
|
||||
uint64_t mask = 0;
|
||||
|
||||
bi_foreach_dest(I, d) {
|
||||
if (bi_is_null(I->dest[d])) continue;
|
||||
bi_foreach_dest(I, d) {
|
||||
if (bi_is_null(I->dest[d]))
|
||||
continue;
|
||||
|
||||
assert(I->dest[d].type == BI_INDEX_REGISTER);
|
||||
assert(I->dest[d].type == BI_INDEX_REGISTER);
|
||||
|
||||
unsigned reg = I->dest[d].value;
|
||||
unsigned count = bi_count_write_registers(I, d);
|
||||
unsigned reg = I->dest[d].value;
|
||||
unsigned count = bi_count_write_registers(I, d);
|
||||
|
||||
mask |= (BITFIELD64_MASK(count) << reg);
|
||||
}
|
||||
mask |= (BITFIELD64_MASK(count) << reg);
|
||||
}
|
||||
|
||||
/* Instructions like AXCHG.i32 unconditionally both read and write
|
||||
* staging registers. Even if we discard the result, the write still
|
||||
* happens logically and needs to be included in our calculations.
|
||||
* Obscurely, ATOM_CX is sr_write but can ignore the staging register in
|
||||
* certain circumstances; this does not require consideration.
|
||||
*/
|
||||
if (bi_opcode_props[I->op].sr_write && I->nr_dests && I->nr_srcs &&
|
||||
bi_is_null(I->dest[0]) && !bi_is_null(I->src[0])) {
|
||||
/* Instructions like AXCHG.i32 unconditionally both read and write
|
||||
* staging registers. Even if we discard the result, the write still
|
||||
* happens logically and needs to be included in our calculations.
|
||||
* Obscurely, ATOM_CX is sr_write but can ignore the staging register in
|
||||
* certain circumstances; this does not require consideration.
|
||||
*/
|
||||
if (bi_opcode_props[I->op].sr_write && I->nr_dests && I->nr_srcs &&
|
||||
bi_is_null(I->dest[0]) && !bi_is_null(I->src[0])) {
|
||||
|
||||
unsigned reg = I->src[0].value;
|
||||
unsigned count = bi_count_write_registers(I, 0);
|
||||
unsigned reg = I->src[0].value;
|
||||
unsigned count = bi_count_write_registers(I, 0);
|
||||
|
||||
mask |= (BITFIELD64_MASK(count) << reg);
|
||||
}
|
||||
mask |= (BITFIELD64_MASK(count) << reg);
|
||||
}
|
||||
|
||||
return mask;
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* Update the scoreboard model to assign an instruction to a given slot */
|
||||
|
|
@ -170,140 +171,143 @@ bi_write_mask(bi_instr *I)
|
|||
static void
|
||||
bi_push_clause(struct bi_scoreboard_state *st, bi_clause *clause)
|
||||
{
|
||||
bi_instr *I = clause->message;
|
||||
unsigned slot = clause->scoreboard_id;
|
||||
bi_instr *I = clause->message;
|
||||
unsigned slot = clause->scoreboard_id;
|
||||
|
||||
if (!I)
|
||||
return;
|
||||
if (!I)
|
||||
return;
|
||||
|
||||
st->read[slot] |= bi_read_mask(I, true);
|
||||
st->read[slot] |= bi_read_mask(I, true);
|
||||
|
||||
if (bi_opcode_props[I->op].sr_write)
|
||||
st->write[slot] |= bi_write_mask(I);
|
||||
if (bi_opcode_props[I->op].sr_write)
|
||||
st->write[slot] |= bi_write_mask(I);
|
||||
}
|
||||
|
||||
/* Adds a dependency on each slot writing any specified register */
|
||||
|
||||
static void
|
||||
bi_depend_on_writers(bi_clause *clause, struct bi_scoreboard_state *st, uint64_t regmask)
|
||||
bi_depend_on_writers(bi_clause *clause, struct bi_scoreboard_state *st,
|
||||
uint64_t regmask)
|
||||
{
|
||||
for (unsigned slot = 0; slot < ARRAY_SIZE(st->write); ++slot) {
|
||||
if (!(st->write[slot] & regmask))
|
||||
continue;
|
||||
for (unsigned slot = 0; slot < ARRAY_SIZE(st->write); ++slot) {
|
||||
if (!(st->write[slot] & regmask))
|
||||
continue;
|
||||
|
||||
st->write[slot] = 0;
|
||||
st->read[slot] = 0;
|
||||
st->write[slot] = 0;
|
||||
st->read[slot] = 0;
|
||||
|
||||
clause->dependencies |= BITFIELD_BIT(slot);
|
||||
}
|
||||
clause->dependencies |= BITFIELD_BIT(slot);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bi_set_staging_barrier(bi_clause *clause, struct bi_scoreboard_state *st, uint64_t regmask)
|
||||
bi_set_staging_barrier(bi_clause *clause, struct bi_scoreboard_state *st,
|
||||
uint64_t regmask)
|
||||
{
|
||||
for (unsigned slot = 0; slot < ARRAY_SIZE(st->read); ++slot) {
|
||||
if (!(st->read[slot] & regmask))
|
||||
continue;
|
||||
for (unsigned slot = 0; slot < ARRAY_SIZE(st->read); ++slot) {
|
||||
if (!(st->read[slot] & regmask))
|
||||
continue;
|
||||
|
||||
st->read[slot] = 0;
|
||||
clause->staging_barrier = true;
|
||||
}
|
||||
st->read[slot] = 0;
|
||||
clause->staging_barrier = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Sets the dependencies for a given clause, updating the model */
|
||||
|
||||
static void
|
||||
bi_set_dependencies(bi_block *block, bi_clause *clause, struct bi_scoreboard_state *st)
|
||||
bi_set_dependencies(bi_block *block, bi_clause *clause,
|
||||
struct bi_scoreboard_state *st)
|
||||
{
|
||||
bi_foreach_instr_in_clause(block, clause, I) {
|
||||
uint64_t read = bi_read_mask(I, false);
|
||||
uint64_t written = bi_write_mask(I);
|
||||
bi_foreach_instr_in_clause(block, clause, I) {
|
||||
uint64_t read = bi_read_mask(I, false);
|
||||
uint64_t written = bi_write_mask(I);
|
||||
|
||||
/* Read-after-write; write-after-write */
|
||||
bi_depend_on_writers(clause, st, read | written);
|
||||
/* Read-after-write; write-after-write */
|
||||
bi_depend_on_writers(clause, st, read | written);
|
||||
|
||||
/* Write-after-read */
|
||||
bi_set_staging_barrier(clause, st, written);
|
||||
}
|
||||
/* Write-after-read */
|
||||
bi_set_staging_barrier(clause, st, written);
|
||||
}
|
||||
|
||||
/* LD_VAR instructions must be serialized per-quad. Just always depend
|
||||
* on any LD_VAR instructions. This isn't optimal, but doing better
|
||||
* requires divergence-aware data flow analysis.
|
||||
*
|
||||
* Similarly, memory loads/stores need to be synchronized. For now,
|
||||
* force them to be serialized. This is not optimal.
|
||||
*/
|
||||
if (clause->message && bi_should_serialize(clause->message))
|
||||
clause->dependencies |= BITFIELD_BIT(BI_SLOT_SERIAL);
|
||||
/* LD_VAR instructions must be serialized per-quad. Just always depend
|
||||
* on any LD_VAR instructions. This isn't optimal, but doing better
|
||||
* requires divergence-aware data flow analysis.
|
||||
*
|
||||
* Similarly, memory loads/stores need to be synchronized. For now,
|
||||
* force them to be serialized. This is not optimal.
|
||||
*/
|
||||
if (clause->message && bi_should_serialize(clause->message))
|
||||
clause->dependencies |= BITFIELD_BIT(BI_SLOT_SERIAL);
|
||||
|
||||
/* Barriers must wait on all slots to flush existing work. It might be
|
||||
* possible to skip this with more information about the barrier. For
|
||||
* now, be conservative.
|
||||
*/
|
||||
if (clause->message && clause->message->op == BI_OPCODE_BARRIER)
|
||||
clause->dependencies |= BITFIELD_MASK(BI_NUM_GENERAL_SLOTS);
|
||||
/* Barriers must wait on all slots to flush existing work. It might be
|
||||
* possible to skip this with more information about the barrier. For
|
||||
* now, be conservative.
|
||||
*/
|
||||
if (clause->message && clause->message->op == BI_OPCODE_BARRIER)
|
||||
clause->dependencies |= BITFIELD_MASK(BI_NUM_GENERAL_SLOTS);
|
||||
}
|
||||
|
||||
static bool
|
||||
scoreboard_block_update(bi_block *blk)
|
||||
{
|
||||
bool progress = false;
|
||||
bool progress = false;
|
||||
|
||||
/* pending_in[s] = sum { p in pred[s] } ( pending_out[p] ) */
|
||||
bi_foreach_predecessor(blk, pred) {
|
||||
for (unsigned i = 0; i < BI_NUM_SLOTS; ++i) {
|
||||
blk->scoreboard_in.read[i] |= (*pred)->scoreboard_out.read[i];
|
||||
blk->scoreboard_in.write[i] |= (*pred)->scoreboard_out.write[i];
|
||||
}
|
||||
}
|
||||
/* pending_in[s] = sum { p in pred[s] } ( pending_out[p] ) */
|
||||
bi_foreach_predecessor(blk, pred) {
|
||||
for (unsigned i = 0; i < BI_NUM_SLOTS; ++i) {
|
||||
blk->scoreboard_in.read[i] |= (*pred)->scoreboard_out.read[i];
|
||||
blk->scoreboard_in.write[i] |= (*pred)->scoreboard_out.write[i];
|
||||
}
|
||||
}
|
||||
|
||||
struct bi_scoreboard_state state = blk->scoreboard_in;
|
||||
struct bi_scoreboard_state state = blk->scoreboard_in;
|
||||
|
||||
/* Assign locally */
|
||||
/* Assign locally */
|
||||
|
||||
bi_foreach_clause_in_block(blk, clause) {
|
||||
bi_set_dependencies(blk, clause, &state);
|
||||
bi_push_clause(&state, clause);
|
||||
}
|
||||
bi_foreach_clause_in_block(blk, clause) {
|
||||
bi_set_dependencies(blk, clause, &state);
|
||||
bi_push_clause(&state, clause);
|
||||
}
|
||||
|
||||
/* To figure out progress, diff scoreboard_out */
|
||||
/* To figure out progress, diff scoreboard_out */
|
||||
|
||||
for (unsigned i = 0; i < BI_NUM_SLOTS; ++i)
|
||||
progress |= !!memcmp(&state, &blk->scoreboard_out, sizeof(state));
|
||||
for (unsigned i = 0; i < BI_NUM_SLOTS; ++i)
|
||||
progress |= !!memcmp(&state, &blk->scoreboard_out, sizeof(state));
|
||||
|
||||
blk->scoreboard_out = state;
|
||||
blk->scoreboard_out = state;
|
||||
|
||||
return progress;
|
||||
return progress;
|
||||
}
|
||||
|
||||
void
|
||||
bi_assign_scoreboard(bi_context *ctx)
|
||||
{
|
||||
u_worklist worklist;
|
||||
bi_worklist_init(ctx, &worklist);
|
||||
u_worklist worklist;
|
||||
bi_worklist_init(ctx, &worklist);
|
||||
|
||||
/* First, assign slots. */
|
||||
bi_foreach_block(ctx, block) {
|
||||
bi_foreach_clause_in_block(block, clause) {
|
||||
if (clause->message) {
|
||||
unsigned slot = bi_choose_scoreboard_slot(clause->message);
|
||||
clause->scoreboard_id = slot;
|
||||
}
|
||||
}
|
||||
/* First, assign slots. */
|
||||
bi_foreach_block(ctx, block) {
|
||||
bi_foreach_clause_in_block(block, clause) {
|
||||
if (clause->message) {
|
||||
unsigned slot = bi_choose_scoreboard_slot(clause->message);
|
||||
clause->scoreboard_id = slot;
|
||||
}
|
||||
}
|
||||
|
||||
bi_worklist_push_tail(&worklist, block);
|
||||
}
|
||||
bi_worklist_push_tail(&worklist, block);
|
||||
}
|
||||
|
||||
/* Next, perform forward data flow analysis to calculate dependencies */
|
||||
while (!u_worklist_is_empty(&worklist)) {
|
||||
/* Pop from the front for forward analysis */
|
||||
bi_block *blk = bi_worklist_pop_head(&worklist);
|
||||
/* Next, perform forward data flow analysis to calculate dependencies */
|
||||
while (!u_worklist_is_empty(&worklist)) {
|
||||
/* Pop from the front for forward analysis */
|
||||
bi_block *blk = bi_worklist_pop_head(&worklist);
|
||||
|
||||
if (scoreboard_block_update(blk)) {
|
||||
bi_foreach_successor(blk, succ)
|
||||
bi_worklist_push_tail(&worklist, succ);
|
||||
}
|
||||
}
|
||||
if (scoreboard_block_update(blk)) {
|
||||
bi_foreach_successor(blk, succ)
|
||||
bi_worklist_push_tail(&worklist, succ);
|
||||
}
|
||||
}
|
||||
|
||||
u_worklist_fini(&worklist);
|
||||
u_worklist_fini(&worklist);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,38 +27,38 @@
|
|||
#ifndef __BI_TEST_H
|
||||
#define __BI_TEST_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include "compiler.h"
|
||||
|
||||
/* Helper to generate a bi_builder suitable for creating test instructions */
|
||||
static inline bi_block *
|
||||
bit_block(bi_context *ctx)
|
||||
{
|
||||
bi_block *blk = rzalloc(ctx, bi_block);
|
||||
bi_block *blk = rzalloc(ctx, bi_block);
|
||||
|
||||
util_dynarray_init(&blk->predecessors, blk);
|
||||
list_addtail(&blk->link, &ctx->blocks);
|
||||
list_inithead(&blk->instructions);
|
||||
util_dynarray_init(&blk->predecessors, blk);
|
||||
list_addtail(&blk->link, &ctx->blocks);
|
||||
list_inithead(&blk->instructions);
|
||||
|
||||
blk->index = ctx->num_blocks++;
|
||||
blk->index = ctx->num_blocks++;
|
||||
|
||||
return blk;
|
||||
return blk;
|
||||
}
|
||||
|
||||
static inline bi_builder *
|
||||
bit_builder(void *memctx)
|
||||
{
|
||||
bi_context *ctx = rzalloc(memctx, bi_context);
|
||||
list_inithead(&ctx->blocks);
|
||||
ctx->inputs = rzalloc(memctx, struct panfrost_compile_inputs);
|
||||
bi_context *ctx = rzalloc(memctx, bi_context);
|
||||
list_inithead(&ctx->blocks);
|
||||
ctx->inputs = rzalloc(memctx, struct panfrost_compile_inputs);
|
||||
|
||||
bi_block *blk = bit_block(ctx);
|
||||
bi_block *blk = bit_block(ctx);
|
||||
|
||||
bi_builder *b = rzalloc(memctx, bi_builder);
|
||||
b->shader = ctx;
|
||||
b->cursor = bi_after_block(blk);
|
||||
return b;
|
||||
bi_builder *b = rzalloc(memctx, bi_builder);
|
||||
b->shader = ctx;
|
||||
b->cursor = bi_after_block(blk);
|
||||
return b;
|
||||
}
|
||||
|
||||
/* Helper to compare for logical equality of instructions. Need to skip over
|
||||
|
|
@ -69,14 +69,15 @@ bit_instr_equal(bi_instr *A, bi_instr *B)
|
|||
{
|
||||
size_t skip = sizeof(struct list_head) + 2 * sizeof(bi_index *);
|
||||
|
||||
if (memcmp((uint8_t *) A + skip, (uint8_t *) B + skip, sizeof(bi_instr) - skip))
|
||||
return false;
|
||||
if (memcmp((uint8_t *)A + skip, (uint8_t *)B + skip,
|
||||
sizeof(bi_instr) - skip))
|
||||
return false;
|
||||
|
||||
if (memcmp(A->dest, B->dest, sizeof(bi_index) * A->nr_dests))
|
||||
return false;
|
||||
return false;
|
||||
|
||||
if (memcmp(A->src, B->src, sizeof(bi_index) * A->nr_srcs))
|
||||
return false;
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -87,8 +88,9 @@ bit_block_equal(bi_block *A, bi_block *B)
|
|||
if (list_length(&A->instructions) != list_length(&B->instructions))
|
||||
return false;
|
||||
|
||||
list_pair_for_each_entry(bi_instr, insA, insB,
|
||||
&A->instructions, &B->instructions, link) {
|
||||
list_pair_for_each_entry(bi_instr, insA, insB, &A->instructions,
|
||||
&B->instructions, link)
|
||||
{
|
||||
if (!bit_instr_equal(insA, insB))
|
||||
return false;
|
||||
}
|
||||
|
|
@ -102,8 +104,9 @@ bit_shader_equal(bi_context *A, bi_context *B)
|
|||
if (list_length(&A->blocks) != list_length(&B->blocks))
|
||||
return false;
|
||||
|
||||
list_pair_for_each_entry(bi_block, blockA, blockB,
|
||||
&A->blocks, &B->blocks, link) {
|
||||
list_pair_for_each_entry(bi_block, blockA, blockB, &A->blocks, &B->blocks,
|
||||
link)
|
||||
{
|
||||
if (!bit_block_equal(blockA, blockB))
|
||||
return false;
|
||||
}
|
||||
|
|
@ -111,30 +114,31 @@ bit_shader_equal(bi_context *A, bi_context *B)
|
|||
return true;
|
||||
}
|
||||
|
||||
#define ASSERT_SHADER_EQUAL(A, B) \
|
||||
if (!bit_shader_equal(A, B)) { \
|
||||
ADD_FAILURE(); \
|
||||
fprintf(stderr, "Pass produced unexpected results"); \
|
||||
fprintf(stderr, " Actual:\n"); \
|
||||
bi_print_shader(A, stderr); \
|
||||
fprintf(stderr, " Expected:\n"); \
|
||||
bi_print_shader(B, stderr); \
|
||||
fprintf(stderr, "\n"); \
|
||||
} \
|
||||
#define ASSERT_SHADER_EQUAL(A, B) \
|
||||
if (!bit_shader_equal(A, B)) { \
|
||||
ADD_FAILURE(); \
|
||||
fprintf(stderr, "Pass produced unexpected results"); \
|
||||
fprintf(stderr, " Actual:\n"); \
|
||||
bi_print_shader(A, stderr); \
|
||||
fprintf(stderr, " Expected:\n"); \
|
||||
bi_print_shader(B, stderr); \
|
||||
fprintf(stderr, "\n"); \
|
||||
}
|
||||
|
||||
#define INSTRUCTION_CASE(instr, expected, pass) do { \
|
||||
bi_builder *A = bit_builder(mem_ctx); \
|
||||
bi_builder *B = bit_builder(mem_ctx); \
|
||||
{ \
|
||||
bi_builder *b = A; \
|
||||
instr; \
|
||||
} \
|
||||
{ \
|
||||
bi_builder *b = B; \
|
||||
expected; \
|
||||
} \
|
||||
pass(A->shader); \
|
||||
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
|
||||
} while(0)
|
||||
#define INSTRUCTION_CASE(instr, expected, pass) \
|
||||
do { \
|
||||
bi_builder *A = bit_builder(mem_ctx); \
|
||||
bi_builder *B = bit_builder(mem_ctx); \
|
||||
{ \
|
||||
bi_builder *b = A; \
|
||||
instr; \
|
||||
} \
|
||||
{ \
|
||||
bi_builder *b = B; \
|
||||
expected; \
|
||||
} \
|
||||
pass(A->shader); \
|
||||
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -21,8 +21,8 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "compiler.h"
|
||||
|
||||
/* Validatation doesn't make sense in release builds */
|
||||
#ifndef NDEBUG
|
||||
|
|
@ -35,21 +35,21 @@
|
|||
bool
|
||||
bi_validate_initialization(bi_context *ctx)
|
||||
{
|
||||
bool success = true;
|
||||
bool success = true;
|
||||
|
||||
/* Calculate the live set */
|
||||
bi_block *entry = bi_entry_block(ctx);
|
||||
bi_compute_liveness_ssa(ctx);
|
||||
/* Calculate the live set */
|
||||
bi_block *entry = bi_entry_block(ctx);
|
||||
bi_compute_liveness_ssa(ctx);
|
||||
|
||||
/* Validate that the live set is indeed empty */
|
||||
for (unsigned i = 0; i < ctx->ssa_alloc; ++i) {
|
||||
if (BITSET_TEST(entry->ssa_live_in, i)) {
|
||||
fprintf(stderr, "%u\n", i);
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
/* Validate that the live set is indeed empty */
|
||||
for (unsigned i = 0; i < ctx->ssa_alloc; ++i) {
|
||||
if (BITSET_TEST(entry->ssa_live_in, i)) {
|
||||
fprintf(stderr, "%u\n", i);
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
|
||||
return success;
|
||||
return success;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -60,47 +60,46 @@ bi_validate_initialization(bi_context *ctx)
|
|||
static bool
|
||||
bi_validate_preload(bi_context *ctx)
|
||||
{
|
||||
bool start = true;
|
||||
uint64_t preloaded = 0;
|
||||
bool start = true;
|
||||
uint64_t preloaded = 0;
|
||||
|
||||
bi_foreach_block(ctx, block) {
|
||||
bi_foreach_instr_in_block(block, I) {
|
||||
/* No instruction should have a register destination */
|
||||
bi_foreach_dest(I, d) {
|
||||
if (I->dest[d].type == BI_INDEX_REGISTER)
|
||||
return false;
|
||||
}
|
||||
bi_foreach_block(ctx, block) {
|
||||
bi_foreach_instr_in_block(block, I) {
|
||||
/* No instruction should have a register destination */
|
||||
bi_foreach_dest(I, d) {
|
||||
if (I->dest[d].type == BI_INDEX_REGISTER)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Preloads are register moves at the start */
|
||||
bool is_preload =
|
||||
start && I->op == BI_OPCODE_MOV_I32 &&
|
||||
I->src[0].type == BI_INDEX_REGISTER;
|
||||
/* Preloads are register moves at the start */
|
||||
bool is_preload = start && I->op == BI_OPCODE_MOV_I32 &&
|
||||
I->src[0].type == BI_INDEX_REGISTER;
|
||||
|
||||
/* After the first nonpreload, we're done preloading */
|
||||
start &= is_preload;
|
||||
/* After the first nonpreload, we're done preloading */
|
||||
start &= is_preload;
|
||||
|
||||
/* Only preloads may have a register source */
|
||||
bi_foreach_src(I, s) {
|
||||
if (I->src[s].type == BI_INDEX_REGISTER && !is_preload)
|
||||
return false;
|
||||
}
|
||||
/* Only preloads may have a register source */
|
||||
bi_foreach_src(I, s) {
|
||||
if (I->src[s].type == BI_INDEX_REGISTER && !is_preload)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Check uniqueness */
|
||||
if (is_preload) {
|
||||
unsigned r = I->src[0].value;
|
||||
/* Check uniqueness */
|
||||
if (is_preload) {
|
||||
unsigned r = I->src[0].value;
|
||||
|
||||
if (preloaded & BITFIELD64_BIT(r))
|
||||
return false;
|
||||
if (preloaded & BITFIELD64_BIT(r))
|
||||
return false;
|
||||
|
||||
preloaded |= BITFIELD64_BIT(r);
|
||||
}
|
||||
}
|
||||
preloaded |= BITFIELD64_BIT(r);
|
||||
}
|
||||
}
|
||||
|
||||
/* Only the first block may preload */
|
||||
start = false;
|
||||
}
|
||||
/* Only the first block may preload */
|
||||
start = false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -111,38 +110,37 @@ bi_validate_preload(bi_context *ctx)
|
|||
static bool
|
||||
bi_validate_width(bi_context *ctx)
|
||||
{
|
||||
bool succ = true;
|
||||
uint8_t *width = calloc(ctx->ssa_alloc, sizeof(uint8_t));
|
||||
bool succ = true;
|
||||
uint8_t *width = calloc(ctx->ssa_alloc, sizeof(uint8_t));
|
||||
|
||||
bi_foreach_instr_global(ctx, I) {
|
||||
bi_foreach_dest(I, d) {
|
||||
assert(bi_is_ssa(I->dest[d]));
|
||||
bi_foreach_instr_global(ctx, I) {
|
||||
bi_foreach_dest(I, d) {
|
||||
assert(bi_is_ssa(I->dest[d]));
|
||||
|
||||
unsigned v = I->dest[d].value;
|
||||
assert(width[v] == 0 && "broken SSA");
|
||||
unsigned v = I->dest[d].value;
|
||||
assert(width[v] == 0 && "broken SSA");
|
||||
|
||||
width[v] = bi_count_write_registers(I, d);
|
||||
}
|
||||
}
|
||||
width[v] = bi_count_write_registers(I, d);
|
||||
}
|
||||
}
|
||||
|
||||
bi_foreach_instr_global(ctx, I) {
|
||||
bi_foreach_ssa_src(I, s) {
|
||||
unsigned v = I->src[s].value;
|
||||
unsigned n = bi_count_read_registers(I, s);
|
||||
bi_foreach_instr_global(ctx, I) {
|
||||
bi_foreach_ssa_src(I, s) {
|
||||
unsigned v = I->src[s].value;
|
||||
unsigned n = bi_count_read_registers(I, s);
|
||||
|
||||
if (width[v] != n) {
|
||||
succ = false;
|
||||
fprintf(stderr,
|
||||
"source %u, expected width %u, got width %u\n",
|
||||
s, n, width[v]);
|
||||
bi_print_instr(I, stderr);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (width[v] != n) {
|
||||
succ = false;
|
||||
fprintf(stderr, "source %u, expected width %u, got width %u\n", s,
|
||||
n, width[v]);
|
||||
bi_print_instr(I, stderr);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(width);
|
||||
return succ;
|
||||
free(width);
|
||||
return succ;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -151,20 +149,20 @@ bi_validate_width(bi_context *ctx)
|
|||
static bool
|
||||
bi_validate_dest(bi_context *ctx)
|
||||
{
|
||||
bool succ = true;
|
||||
bool succ = true;
|
||||
|
||||
bi_foreach_instr_global(ctx, I) {
|
||||
bi_foreach_dest(I, d) {
|
||||
if (bi_is_null(I->dest[d])) {
|
||||
succ = false;
|
||||
fprintf(stderr, "expected dest %u", d);
|
||||
bi_print_instr(I, stderr);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
bi_foreach_instr_global(ctx, I) {
|
||||
bi_foreach_dest(I, d) {
|
||||
if (bi_is_null(I->dest[d])) {
|
||||
succ = false;
|
||||
fprintf(stderr, "expected dest %u", d);
|
||||
bi_print_instr(I, stderr);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return succ;
|
||||
return succ;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -173,57 +171,57 @@ bi_validate_dest(bi_context *ctx)
|
|||
static bool
|
||||
bi_validate_phi_ordering(bi_context *ctx)
|
||||
{
|
||||
bi_foreach_block(ctx, block) {
|
||||
bool start = true;
|
||||
bi_foreach_block(ctx, block) {
|
||||
bool start = true;
|
||||
|
||||
bi_foreach_instr_in_block(block, I) {
|
||||
if (start)
|
||||
start = I->op == BI_OPCODE_PHI;
|
||||
else if (I->op == BI_OPCODE_PHI)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
bi_foreach_instr_in_block(block, I) {
|
||||
if (start)
|
||||
start = I->op == BI_OPCODE_PHI;
|
||||
else if (I->op == BI_OPCODE_PHI)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
bi_validate(bi_context *ctx, const char *after)
|
||||
{
|
||||
bool fail = false;
|
||||
bool fail = false;
|
||||
|
||||
if (bifrost_debug & BIFROST_DBG_NOVALIDATE)
|
||||
return;
|
||||
if (bifrost_debug & BIFROST_DBG_NOVALIDATE)
|
||||
return;
|
||||
|
||||
if (!bi_validate_initialization(ctx)) {
|
||||
fprintf(stderr, "Uninitialized data read after %s\n", after);
|
||||
fail = true;
|
||||
}
|
||||
if (!bi_validate_initialization(ctx)) {
|
||||
fprintf(stderr, "Uninitialized data read after %s\n", after);
|
||||
fail = true;
|
||||
}
|
||||
|
||||
if (!bi_validate_preload(ctx)) {
|
||||
fprintf(stderr, "Unexpected preload after %s\n", after);
|
||||
fail = true;
|
||||
}
|
||||
if (!bi_validate_preload(ctx)) {
|
||||
fprintf(stderr, "Unexpected preload after %s\n", after);
|
||||
fail = true;
|
||||
}
|
||||
|
||||
if (!bi_validate_width(ctx)) {
|
||||
fprintf(stderr, "Unexpected vector with after %s\n", after);
|
||||
fail = true;
|
||||
}
|
||||
if (!bi_validate_width(ctx)) {
|
||||
fprintf(stderr, "Unexpected vector with after %s\n", after);
|
||||
fail = true;
|
||||
}
|
||||
|
||||
if (!bi_validate_dest(ctx)) {
|
||||
fprintf(stderr, "Unexpected source/dest after %s\n", after);
|
||||
fail = true;
|
||||
}
|
||||
if (!bi_validate_dest(ctx)) {
|
||||
fprintf(stderr, "Unexpected source/dest after %s\n", after);
|
||||
fail = true;
|
||||
}
|
||||
|
||||
if (!bi_validate_phi_ordering(ctx)) {
|
||||
fprintf(stderr, "Unexpected phi ordering after %s\n", after);
|
||||
fail = true;
|
||||
}
|
||||
if (!bi_validate_phi_ordering(ctx)) {
|
||||
fprintf(stderr, "Unexpected phi ordering after %s\n", after);
|
||||
fail = true;
|
||||
}
|
||||
|
||||
if (fail) {
|
||||
bi_print_shader(ctx, stderr);
|
||||
exit(1);
|
||||
}
|
||||
if (fail) {
|
||||
bi_print_shader(ctx, stderr);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* NDEBUG */
|
||||
|
|
|
|||
|
|
@ -26,63 +26,63 @@
|
|||
#ifndef __bifrost_h__
|
||||
#define __bifrost_h__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define BIFROST_DBG_MSGS 0x0001
|
||||
#define BIFROST_DBG_SHADERS 0x0002
|
||||
#define BIFROST_DBG_SHADERDB 0x0004
|
||||
#define BIFROST_DBG_VERBOSE 0x0008
|
||||
#define BIFROST_DBG_INTERNAL 0x0010
|
||||
#define BIFROST_DBG_NOSCHED 0x0020
|
||||
#define BIFROST_DBG_INORDER 0x0040
|
||||
#define BIFROST_DBG_NOVALIDATE 0x0080
|
||||
#define BIFROST_DBG_NOOPT 0x0100
|
||||
#define BIFROST_DBG_NOIDVS 0x0200
|
||||
#define BIFROST_DBG_NOSB 0x0400
|
||||
#define BIFROST_DBG_NOPRELOAD 0x0800
|
||||
#define BIFROST_DBG_SPILL 0x1000
|
||||
#define BIFROST_DBG_NOPSCHED 0x2000
|
||||
#define BIFROST_DBG_MSGS 0x0001
|
||||
#define BIFROST_DBG_SHADERS 0x0002
|
||||
#define BIFROST_DBG_SHADERDB 0x0004
|
||||
#define BIFROST_DBG_VERBOSE 0x0008
|
||||
#define BIFROST_DBG_INTERNAL 0x0010
|
||||
#define BIFROST_DBG_NOSCHED 0x0020
|
||||
#define BIFROST_DBG_INORDER 0x0040
|
||||
#define BIFROST_DBG_NOVALIDATE 0x0080
|
||||
#define BIFROST_DBG_NOOPT 0x0100
|
||||
#define BIFROST_DBG_NOIDVS 0x0200
|
||||
#define BIFROST_DBG_NOSB 0x0400
|
||||
#define BIFROST_DBG_NOPRELOAD 0x0800
|
||||
#define BIFROST_DBG_SPILL 0x1000
|
||||
#define BIFROST_DBG_NOPSCHED 0x2000
|
||||
|
||||
extern int bifrost_debug;
|
||||
|
||||
enum bifrost_message_type {
|
||||
BIFROST_MESSAGE_NONE = 0,
|
||||
BIFROST_MESSAGE_VARYING = 1,
|
||||
BIFROST_MESSAGE_ATTRIBUTE = 2,
|
||||
BIFROST_MESSAGE_TEX = 3,
|
||||
BIFROST_MESSAGE_VARTEX = 4,
|
||||
BIFROST_MESSAGE_LOAD = 5,
|
||||
BIFROST_MESSAGE_STORE = 6,
|
||||
BIFROST_MESSAGE_ATOMIC = 7,
|
||||
BIFROST_MESSAGE_BARRIER = 8,
|
||||
BIFROST_MESSAGE_BLEND = 9,
|
||||
BIFROST_MESSAGE_TILE = 10,
|
||||
/* type 11 reserved */
|
||||
BIFROST_MESSAGE_Z_STENCIL = 12,
|
||||
BIFROST_MESSAGE_ATEST = 13,
|
||||
BIFROST_MESSAGE_JOB = 14,
|
||||
BIFROST_MESSAGE_64BIT = 15
|
||||
BIFROST_MESSAGE_NONE = 0,
|
||||
BIFROST_MESSAGE_VARYING = 1,
|
||||
BIFROST_MESSAGE_ATTRIBUTE = 2,
|
||||
BIFROST_MESSAGE_TEX = 3,
|
||||
BIFROST_MESSAGE_VARTEX = 4,
|
||||
BIFROST_MESSAGE_LOAD = 5,
|
||||
BIFROST_MESSAGE_STORE = 6,
|
||||
BIFROST_MESSAGE_ATOMIC = 7,
|
||||
BIFROST_MESSAGE_BARRIER = 8,
|
||||
BIFROST_MESSAGE_BLEND = 9,
|
||||
BIFROST_MESSAGE_TILE = 10,
|
||||
/* type 11 reserved */
|
||||
BIFROST_MESSAGE_Z_STENCIL = 12,
|
||||
BIFROST_MESSAGE_ATEST = 13,
|
||||
BIFROST_MESSAGE_JOB = 14,
|
||||
BIFROST_MESSAGE_64BIT = 15
|
||||
};
|
||||
|
||||
enum bifrost_ftz {
|
||||
BIFROST_FTZ_DISABLE = 0,
|
||||
BIFROST_FTZ_DX11 = 1,
|
||||
BIFROST_FTZ_ALWAYS = 2,
|
||||
BIFROST_FTZ_ABRUPT = 3
|
||||
BIFROST_FTZ_DISABLE = 0,
|
||||
BIFROST_FTZ_DX11 = 1,
|
||||
BIFROST_FTZ_ALWAYS = 2,
|
||||
BIFROST_FTZ_ABRUPT = 3
|
||||
};
|
||||
|
||||
enum bifrost_exceptions {
|
||||
BIFROST_EXCEPTIONS_ENABLED = 0,
|
||||
BIFROST_EXCEPTIONS_DISABLED = 1,
|
||||
BIFROST_EXCEPTIONS_PRECISE_DIVISION = 2,
|
||||
BIFROST_EXCEPTIONS_PRECISE_SQRT = 3,
|
||||
BIFROST_EXCEPTIONS_ENABLED = 0,
|
||||
BIFROST_EXCEPTIONS_DISABLED = 1,
|
||||
BIFROST_EXCEPTIONS_PRECISE_DIVISION = 2,
|
||||
BIFROST_EXCEPTIONS_PRECISE_SQRT = 3,
|
||||
};
|
||||
|
||||
/* Describes clause flow control, with respect to control flow and branch
|
||||
|
|
@ -102,182 +102,182 @@ enum bifrost_exceptions {
|
|||
*/
|
||||
|
||||
enum bifrost_flow {
|
||||
/* End-of-shader */
|
||||
BIFROST_FLOW_END = 0,
|
||||
/* End-of-shader */
|
||||
BIFROST_FLOW_END = 0,
|
||||
|
||||
/* Non back-to-back, PC-encoded reconvergence */
|
||||
BIFROST_FLOW_NBTB_PC = 1,
|
||||
/* Non back-to-back, PC-encoded reconvergence */
|
||||
BIFROST_FLOW_NBTB_PC = 1,
|
||||
|
||||
/* Non back-to-back, unconditional reconvergence */
|
||||
BIFROST_FLOW_NBTB_UNCONDITIONAL = 2,
|
||||
/* Non back-to-back, unconditional reconvergence */
|
||||
BIFROST_FLOW_NBTB_UNCONDITIONAL = 2,
|
||||
|
||||
/* Non back-to-back, no reconvergence */
|
||||
BIFROST_FLOW_NBTB = 3,
|
||||
/* Non back-to-back, no reconvergence */
|
||||
BIFROST_FLOW_NBTB = 3,
|
||||
|
||||
/* Back-to-back, unconditional reconvergence */
|
||||
BIFROST_FLOW_BTB_UNCONDITIONAL = 4,
|
||||
/* Back-to-back, unconditional reconvergence */
|
||||
BIFROST_FLOW_BTB_UNCONDITIONAL = 4,
|
||||
|
||||
/* Back-to-back, no reconvergence */
|
||||
BIFROST_FLOW_BTB_NONE = 5,
|
||||
/* Back-to-back, no reconvergence */
|
||||
BIFROST_FLOW_BTB_NONE = 5,
|
||||
|
||||
/* Write elision, unconditional reconvergence */
|
||||
BIFROST_FLOW_WE_UNCONDITIONAL = 6,
|
||||
/* Write elision, unconditional reconvergence */
|
||||
BIFROST_FLOW_WE_UNCONDITIONAL = 6,
|
||||
|
||||
/* Write elision, no reconvergence */
|
||||
BIFROST_FLOW_WE = 7,
|
||||
/* Write elision, no reconvergence */
|
||||
BIFROST_FLOW_WE = 7,
|
||||
};
|
||||
|
||||
enum bifrost_slot {
|
||||
/* 0-5 are general purpose */
|
||||
BIFROST_SLOT_ELDEST_DEPTH = 6,
|
||||
BIFROST_SLOT_ELDEST_COLOUR = 7,
|
||||
/* 0-5 are general purpose */
|
||||
BIFROST_SLOT_ELDEST_DEPTH = 6,
|
||||
BIFROST_SLOT_ELDEST_COLOUR = 7,
|
||||
};
|
||||
|
||||
struct bifrost_header {
|
||||
/* Reserved */
|
||||
unsigned zero1 : 5;
|
||||
/* Reserved */
|
||||
unsigned zero1 : 5;
|
||||
|
||||
/* Flush-to-zero mode, leave zero for GL */
|
||||
enum bifrost_ftz flush_to_zero : 2;
|
||||
/* Flush-to-zero mode, leave zero for GL */
|
||||
enum bifrost_ftz flush_to_zero : 2;
|
||||
|
||||
/* Convert any infinite result of any floating-point operation to the
|
||||
* biggest representable number */
|
||||
unsigned suppress_inf: 1;
|
||||
/* Convert any infinite result of any floating-point operation to the
|
||||
* biggest representable number */
|
||||
unsigned suppress_inf : 1;
|
||||
|
||||
/* Convert NaN to +0.0 */
|
||||
unsigned suppress_nan : 1;
|
||||
/* Convert NaN to +0.0 */
|
||||
unsigned suppress_nan : 1;
|
||||
|
||||
/* Floating-point excception handling mode */
|
||||
enum bifrost_exceptions float_exceptions : 2;
|
||||
/* Floating-point excception handling mode */
|
||||
enum bifrost_exceptions float_exceptions : 2;
|
||||
|
||||
/* Enum describing the flow control, which matters for handling
|
||||
* divergence and reconvergence efficiently */
|
||||
enum bifrost_flow flow_control : 3;
|
||||
/* Enum describing the flow control, which matters for handling
|
||||
* divergence and reconvergence efficiently */
|
||||
enum bifrost_flow flow_control : 3;
|
||||
|
||||
/* Reserved */
|
||||
unsigned zero2 : 1;
|
||||
/* Reserved */
|
||||
unsigned zero2 : 1;
|
||||
|
||||
/* Terminate discarded threads, rather than continuing execution. Set
|
||||
* for fragment shaders for standard GL behaviour of DISCARD. Also in a
|
||||
* fragment shader, this disables helper invocations, so cannot be used
|
||||
* in a shader that requires derivatives or texture LOD computation */
|
||||
unsigned terminate_discarded_threads : 1;
|
||||
/* Terminate discarded threads, rather than continuing execution. Set
|
||||
* for fragment shaders for standard GL behaviour of DISCARD. Also in a
|
||||
* fragment shader, this disables helper invocations, so cannot be used
|
||||
* in a shader that requires derivatives or texture LOD computation */
|
||||
unsigned terminate_discarded_threads : 1;
|
||||
|
||||
/* If set, the hardware may prefetch the next clause. If false, the
|
||||
* hardware may not. Clear for unconditional branches. */
|
||||
unsigned next_clause_prefetch : 1;
|
||||
/* If set, the hardware may prefetch the next clause. If false, the
|
||||
* hardware may not. Clear for unconditional branches. */
|
||||
unsigned next_clause_prefetch : 1;
|
||||
|
||||
/* If set, a barrier will be inserted after the clause waiting for all
|
||||
* message passing instructions to read their staging registers, such
|
||||
* that it is safe for the next clause to write them. */
|
||||
unsigned staging_barrier: 1;
|
||||
unsigned staging_register : 6;
|
||||
/* If set, a barrier will be inserted after the clause waiting for all
|
||||
* message passing instructions to read their staging registers, such
|
||||
* that it is safe for the next clause to write them. */
|
||||
unsigned staging_barrier : 1;
|
||||
unsigned staging_register : 6;
|
||||
|
||||
/* Slots to wait on and slot to be used for message passing
|
||||
* instructions respectively */
|
||||
unsigned dependency_wait : 8;
|
||||
unsigned dependency_slot : 3;
|
||||
/* Slots to wait on and slot to be used for message passing
|
||||
* instructions respectively */
|
||||
unsigned dependency_wait : 8;
|
||||
unsigned dependency_slot : 3;
|
||||
|
||||
enum bifrost_message_type message_type : 5;
|
||||
enum bifrost_message_type next_message_type : 5;
|
||||
enum bifrost_message_type message_type : 5;
|
||||
enum bifrost_message_type next_message_type : 5;
|
||||
} __attribute__((packed));
|
||||
|
||||
enum bifrost_packed_src {
|
||||
BIFROST_SRC_PORT0 = 0,
|
||||
BIFROST_SRC_PORT1 = 1,
|
||||
BIFROST_SRC_PORT2 = 2,
|
||||
BIFROST_SRC_STAGE = 3,
|
||||
BIFROST_SRC_FAU_LO = 4,
|
||||
BIFROST_SRC_FAU_HI = 5,
|
||||
BIFROST_SRC_PASS_FMA = 6,
|
||||
BIFROST_SRC_PASS_ADD = 7,
|
||||
BIFROST_SRC_PORT0 = 0,
|
||||
BIFROST_SRC_PORT1 = 1,
|
||||
BIFROST_SRC_PORT2 = 2,
|
||||
BIFROST_SRC_STAGE = 3,
|
||||
BIFROST_SRC_FAU_LO = 4,
|
||||
BIFROST_SRC_FAU_HI = 5,
|
||||
BIFROST_SRC_PASS_FMA = 6,
|
||||
BIFROST_SRC_PASS_ADD = 7,
|
||||
};
|
||||
|
||||
struct bifrost_fma_inst {
|
||||
unsigned src0 : 3;
|
||||
unsigned op : 20;
|
||||
unsigned src0 : 3;
|
||||
unsigned op : 20;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct bifrost_add_inst {
|
||||
unsigned src0 : 3;
|
||||
unsigned op : 17;
|
||||
unsigned src0 : 3;
|
||||
unsigned op : 17;
|
||||
} __attribute__((packed));
|
||||
|
||||
enum branch_bit_size {
|
||||
BR_SIZE_32 = 0,
|
||||
BR_SIZE_16XX = 1,
|
||||
BR_SIZE_16YY = 2,
|
||||
// For the above combinations of bitsize and location, an extra bit is
|
||||
// encoded via comparing the sources. The only possible source of ambiguity
|
||||
// would be if the sources were the same, but then the branch condition
|
||||
// would be always true or always false anyways, so we can ignore it. But
|
||||
// this no longer works when comparing the y component to the x component,
|
||||
// since it's valid to compare the y component of a source against its own
|
||||
// x component. Instead, the extra bit is encoded via an extra bitsize.
|
||||
BR_SIZE_16YX0 = 3,
|
||||
BR_SIZE_16YX1 = 4,
|
||||
BR_SIZE_32_AND_16X = 5,
|
||||
BR_SIZE_32_AND_16Y = 6,
|
||||
// Used for comparisons with zero and always-true, see below. I think this
|
||||
// only works for integer comparisons.
|
||||
BR_SIZE_ZERO = 7,
|
||||
BR_SIZE_32 = 0,
|
||||
BR_SIZE_16XX = 1,
|
||||
BR_SIZE_16YY = 2,
|
||||
// For the above combinations of bitsize and location, an extra bit is
|
||||
// encoded via comparing the sources. The only possible source of ambiguity
|
||||
// would be if the sources were the same, but then the branch condition
|
||||
// would be always true or always false anyways, so we can ignore it. But
|
||||
// this no longer works when comparing the y component to the x component,
|
||||
// since it's valid to compare the y component of a source against its own
|
||||
// x component. Instead, the extra bit is encoded via an extra bitsize.
|
||||
BR_SIZE_16YX0 = 3,
|
||||
BR_SIZE_16YX1 = 4,
|
||||
BR_SIZE_32_AND_16X = 5,
|
||||
BR_SIZE_32_AND_16Y = 6,
|
||||
// Used for comparisons with zero and always-true, see below. I think this
|
||||
// only works for integer comparisons.
|
||||
BR_SIZE_ZERO = 7,
|
||||
};
|
||||
|
||||
struct bifrost_regs {
|
||||
unsigned fau_idx : 8;
|
||||
unsigned reg3 : 6;
|
||||
unsigned reg2 : 6;
|
||||
unsigned reg0 : 5;
|
||||
unsigned reg1 : 6;
|
||||
unsigned ctrl : 4;
|
||||
unsigned fau_idx : 8;
|
||||
unsigned reg3 : 6;
|
||||
unsigned reg2 : 6;
|
||||
unsigned reg0 : 5;
|
||||
unsigned reg1 : 6;
|
||||
unsigned ctrl : 4;
|
||||
} __attribute__((packed));
|
||||
|
||||
#define BIFROST_FMTC_CONSTANTS 0b0011
|
||||
#define BIFROST_FMTC_FINAL 0b0111
|
||||
#define BIFROST_FMTC_CONSTANTS 0b0011
|
||||
#define BIFROST_FMTC_FINAL 0b0111
|
||||
|
||||
struct bifrost_fmt_constant {
|
||||
unsigned pos : 4;
|
||||
unsigned tag : 4;
|
||||
uint64_t imm_1 : 60;
|
||||
uint64_t imm_2 : 60;
|
||||
unsigned pos : 4;
|
||||
unsigned tag : 4;
|
||||
uint64_t imm_1 : 60;
|
||||
uint64_t imm_2 : 60;
|
||||
} __attribute__((packed));
|
||||
|
||||
/* Clause formats, encoded in a table */
|
||||
|
||||
enum bi_clause_subword {
|
||||
/* Literal 3-bit values */
|
||||
BI_CLAUSE_SUBWORD_LITERAL_0 = 0,
|
||||
/* etc */
|
||||
BI_CLAUSE_SUBWORD_LITERAL_7 = 7,
|
||||
/* Literal 3-bit values */
|
||||
BI_CLAUSE_SUBWORD_LITERAL_0 = 0,
|
||||
/* etc */
|
||||
BI_CLAUSE_SUBWORD_LITERAL_7 = 7,
|
||||
|
||||
/* The value of the corresponding tuple in the corresponding bits */
|
||||
BI_CLAUSE_SUBWORD_TUPLE_0 = 8,
|
||||
/* etc */
|
||||
BI_CLAUSE_SUBWORD_TUPLE_7 = 15,
|
||||
/* The value of the corresponding tuple in the corresponding bits */
|
||||
BI_CLAUSE_SUBWORD_TUPLE_0 = 8,
|
||||
/* etc */
|
||||
BI_CLAUSE_SUBWORD_TUPLE_7 = 15,
|
||||
|
||||
/* Clause header */
|
||||
BI_CLAUSE_SUBWORD_HEADER = 16,
|
||||
/* Clause header */
|
||||
BI_CLAUSE_SUBWORD_HEADER = 16,
|
||||
|
||||
/* Leave zero, but semantically distinct from literal 0 */
|
||||
BI_CLAUSE_SUBWORD_RESERVED = 17,
|
||||
/* Leave zero, but semantically distinct from literal 0 */
|
||||
BI_CLAUSE_SUBWORD_RESERVED = 17,
|
||||
|
||||
/* Embedded constant 0 */
|
||||
BI_CLAUSE_SUBWORD_CONSTANT = 18,
|
||||
/* Embedded constant 0 */
|
||||
BI_CLAUSE_SUBWORD_CONSTANT = 18,
|
||||
|
||||
/* M bits controlling modifier for the constant */
|
||||
BI_CLAUSE_SUBWORD_M = 19,
|
||||
/* M bits controlling modifier for the constant */
|
||||
BI_CLAUSE_SUBWORD_M = 19,
|
||||
|
||||
/* Z bit: 1 to begin encoding constants, 0 to terminate the clause */
|
||||
BI_CLAUSE_SUBWORD_Z = 20,
|
||||
/* Z bit: 1 to begin encoding constants, 0 to terminate the clause */
|
||||
BI_CLAUSE_SUBWORD_Z = 20,
|
||||
|
||||
/* Upper 3-bits of a given tuple and zero extended */
|
||||
BI_CLAUSE_SUBWORD_UPPER_0 = 32,
|
||||
/* etc */
|
||||
BI_CLAUSE_SUBWORD_UPPER_7 = BI_CLAUSE_SUBWORD_UPPER_0 + 7,
|
||||
/* Upper 3-bits of a given tuple and zero extended */
|
||||
BI_CLAUSE_SUBWORD_UPPER_0 = 32,
|
||||
/* etc */
|
||||
BI_CLAUSE_SUBWORD_UPPER_7 = BI_CLAUSE_SUBWORD_UPPER_0 + 7,
|
||||
|
||||
/* Upper 3-bits of two tuples, concatenated and zero-extended */
|
||||
BI_CLAUSE_SUBWORD_UPPER_23 = BI_CLAUSE_SUBWORD_UPPER_0 + 23,
|
||||
BI_CLAUSE_SUBWORD_UPPER_56 = BI_CLAUSE_SUBWORD_UPPER_0 + 56,
|
||||
/* Upper 3-bits of two tuples, concatenated and zero-extended */
|
||||
BI_CLAUSE_SUBWORD_UPPER_23 = BI_CLAUSE_SUBWORD_UPPER_0 + 23,
|
||||
BI_CLAUSE_SUBWORD_UPPER_56 = BI_CLAUSE_SUBWORD_UPPER_0 + 56,
|
||||
};
|
||||
|
||||
#define L(x) ((enum bi_clause_subword)(BI_CLAUSE_SUBWORD_LITERAL_0 + x))
|
||||
|
|
@ -290,15 +290,15 @@ enum bi_clause_subword {
|
|||
#define R BI_CLAUSE_SUBWORD_RESERVED
|
||||
|
||||
struct bi_clause_format {
|
||||
unsigned format; /* format number */
|
||||
unsigned pos; /* index in the clause */
|
||||
enum bi_clause_subword tag_1; /* 2-bits */
|
||||
enum bi_clause_subword tag_2; /* 3-bits */
|
||||
enum bi_clause_subword tag_3; /* 3-bits */
|
||||
enum bi_clause_subword s0_s3; /* 60 bits */
|
||||
enum bi_clause_subword s4; /* 15 bits */
|
||||
enum bi_clause_subword s5_s6; /* 30 bits */
|
||||
enum bi_clause_subword s7; /* 15 bits */
|
||||
unsigned format; /* format number */
|
||||
unsigned pos; /* index in the clause */
|
||||
enum bi_clause_subword tag_1; /* 2-bits */
|
||||
enum bi_clause_subword tag_2; /* 3-bits */
|
||||
enum bi_clause_subword tag_3; /* 3-bits */
|
||||
enum bi_clause_subword s0_s3; /* 60 bits */
|
||||
enum bi_clause_subword s4; /* 15 bits */
|
||||
enum bi_clause_subword s5_s6; /* 30 bits */
|
||||
enum bi_clause_subword s7; /* 15 bits */
|
||||
};
|
||||
|
||||
/* clang-format off */
|
||||
|
|
@ -341,46 +341,46 @@ static const struct bi_clause_format bi_clause_formats[] = {
|
|||
* set (and ignored) as a placeholder to differentiate from reserved.
|
||||
*/
|
||||
enum bifrost_reg_mode {
|
||||
BIFROST_R_WL_FMA = 1,
|
||||
BIFROST_R_WH_FMA = 2,
|
||||
BIFROST_R_W_FMA = 3,
|
||||
BIFROST_R_WL_ADD = 4,
|
||||
BIFROST_R_WH_ADD = 5,
|
||||
BIFROST_R_W_ADD = 6,
|
||||
BIFROST_WL_WL_ADD = 7,
|
||||
BIFROST_WL_WH_ADD = 8,
|
||||
BIFROST_WL_W_ADD = 9,
|
||||
BIFROST_WH_WL_ADD = 10,
|
||||
BIFROST_WH_WH_ADD = 11,
|
||||
BIFROST_WH_W_ADD = 12,
|
||||
BIFROST_W_WL_ADD = 13,
|
||||
BIFROST_W_WH_ADD = 14,
|
||||
BIFROST_W_W_ADD = 15,
|
||||
BIFROST_IDLE_1 = 16,
|
||||
BIFROST_I_W_FMA = 17,
|
||||
BIFROST_I_WL_FMA = 18,
|
||||
BIFROST_I_WH_FMA = 19,
|
||||
BIFROST_R_I = 20,
|
||||
BIFROST_I_W_ADD = 21,
|
||||
BIFROST_I_WL_ADD = 22,
|
||||
BIFROST_I_WH_ADD = 23,
|
||||
BIFROST_WL_WH_MIX = 24,
|
||||
BIFROST_WH_WL_MIX = 26,
|
||||
BIFROST_IDLE = 27,
|
||||
BIFROST_R_WL_FMA = 1,
|
||||
BIFROST_R_WH_FMA = 2,
|
||||
BIFROST_R_W_FMA = 3,
|
||||
BIFROST_R_WL_ADD = 4,
|
||||
BIFROST_R_WH_ADD = 5,
|
||||
BIFROST_R_W_ADD = 6,
|
||||
BIFROST_WL_WL_ADD = 7,
|
||||
BIFROST_WL_WH_ADD = 8,
|
||||
BIFROST_WL_W_ADD = 9,
|
||||
BIFROST_WH_WL_ADD = 10,
|
||||
BIFROST_WH_WH_ADD = 11,
|
||||
BIFROST_WH_W_ADD = 12,
|
||||
BIFROST_W_WL_ADD = 13,
|
||||
BIFROST_W_WH_ADD = 14,
|
||||
BIFROST_W_W_ADD = 15,
|
||||
BIFROST_IDLE_1 = 16,
|
||||
BIFROST_I_W_FMA = 17,
|
||||
BIFROST_I_WL_FMA = 18,
|
||||
BIFROST_I_WH_FMA = 19,
|
||||
BIFROST_R_I = 20,
|
||||
BIFROST_I_W_ADD = 21,
|
||||
BIFROST_I_WL_ADD = 22,
|
||||
BIFROST_I_WH_ADD = 23,
|
||||
BIFROST_WL_WH_MIX = 24,
|
||||
BIFROST_WH_WL_MIX = 26,
|
||||
BIFROST_IDLE = 27,
|
||||
};
|
||||
|
||||
enum bifrost_reg_op {
|
||||
BIFROST_OP_IDLE = 0,
|
||||
BIFROST_OP_READ = 1,
|
||||
BIFROST_OP_WRITE = 2,
|
||||
BIFROST_OP_WRITE_LO = 3,
|
||||
BIFROST_OP_WRITE_HI = 4,
|
||||
BIFROST_OP_IDLE = 0,
|
||||
BIFROST_OP_READ = 1,
|
||||
BIFROST_OP_WRITE = 2,
|
||||
BIFROST_OP_WRITE_LO = 3,
|
||||
BIFROST_OP_WRITE_HI = 4,
|
||||
};
|
||||
|
||||
struct bifrost_reg_ctrl_23 {
|
||||
enum bifrost_reg_op slot2;
|
||||
enum bifrost_reg_op slot3;
|
||||
bool slot3_fma;
|
||||
enum bifrost_reg_op slot2;
|
||||
enum bifrost_reg_op slot3;
|
||||
bool slot3_fma;
|
||||
};
|
||||
|
||||
/* clang-format off */
|
||||
|
|
@ -420,201 +420,201 @@ static const struct bifrost_reg_ctrl_23 bifrost_reg_ctrl_lut[32] = {
|
|||
* compiler and stored as a constant */
|
||||
|
||||
enum bifrost_texture_operation_mode {
|
||||
/* Dual texturing */
|
||||
BIFROST_TEXTURE_OPERATION_DUAL = 1,
|
||||
/* Dual texturing */
|
||||
BIFROST_TEXTURE_OPERATION_DUAL = 1,
|
||||
|
||||
/* Single texturing */
|
||||
BIFROST_TEXTURE_OPERATION_SINGLE = 3,
|
||||
/* Single texturing */
|
||||
BIFROST_TEXTURE_OPERATION_SINGLE = 3,
|
||||
};
|
||||
|
||||
enum bifrost_index {
|
||||
/* Both texture/sampler index immediate */
|
||||
BIFROST_INDEX_IMMEDIATE_SHARED = 0,
|
||||
/* Both texture/sampler index immediate */
|
||||
BIFROST_INDEX_IMMEDIATE_SHARED = 0,
|
||||
|
||||
/* Sampler index immediate, texture index from staging */
|
||||
BIFROST_INDEX_IMMEDIATE_SAMPLER = 1,
|
||||
/* Sampler index immediate, texture index from staging */
|
||||
BIFROST_INDEX_IMMEDIATE_SAMPLER = 1,
|
||||
|
||||
/* Texture index immediate, sampler index from staging */
|
||||
BIFROST_INDEX_IMMEDIATE_TEXTURE = 2,
|
||||
/* Texture index immediate, sampler index from staging */
|
||||
BIFROST_INDEX_IMMEDIATE_TEXTURE = 2,
|
||||
|
||||
/* Both indices from (separate) staging registers */
|
||||
BIFROST_INDEX_REGISTER = 3,
|
||||
/* Both indices from (separate) staging registers */
|
||||
BIFROST_INDEX_REGISTER = 3,
|
||||
};
|
||||
|
||||
enum bifrost_tex_op {
|
||||
/* Given explicit derivatives, compute a gradient descriptor */
|
||||
BIFROST_TEX_OP_GRDESC_DER = 4,
|
||||
/* Given explicit derivatives, compute a gradient descriptor */
|
||||
BIFROST_TEX_OP_GRDESC_DER = 4,
|
||||
|
||||
/* Given implicit derivatives (texture coordinates in a fragment
|
||||
* shader), compute a gradient descriptor */
|
||||
BIFROST_TEX_OP_GRDESC = 5,
|
||||
/* Given implicit derivatives (texture coordinates in a fragment
|
||||
* shader), compute a gradient descriptor */
|
||||
BIFROST_TEX_OP_GRDESC = 5,
|
||||
|
||||
/* Fetch a texel. Takes a staging register with LOD level / face index
|
||||
* packed 16:16 */
|
||||
BIFROST_TEX_OP_FETCH = 6,
|
||||
/* Fetch a texel. Takes a staging register with LOD level / face index
|
||||
* packed 16:16 */
|
||||
BIFROST_TEX_OP_FETCH = 6,
|
||||
|
||||
/* Filtered texture */
|
||||
BIFROST_TEX_OP_TEX = 7,
|
||||
/* Filtered texture */
|
||||
BIFROST_TEX_OP_TEX = 7,
|
||||
};
|
||||
|
||||
enum bifrost_lod_mode {
|
||||
/* Takes two staging registers forming a 64-bit gradient descriptor
|
||||
* (computed by a previous GRDESC or GRDESC_DER operation) */
|
||||
BIFROST_LOD_MODE_GRDESC = 3,
|
||||
/* Takes two staging registers forming a 64-bit gradient descriptor
|
||||
* (computed by a previous GRDESC or GRDESC_DER operation) */
|
||||
BIFROST_LOD_MODE_GRDESC = 3,
|
||||
|
||||
/* Take a staging register with 8:8 fixed-point in bottom 16-bits
|
||||
* specifying an explicit LOD */
|
||||
BIFROST_LOD_MODE_EXPLICIT = 4,
|
||||
/* Take a staging register with 8:8 fixed-point in bottom 16-bits
|
||||
* specifying an explicit LOD */
|
||||
BIFROST_LOD_MODE_EXPLICIT = 4,
|
||||
|
||||
/* Takes a staging register with bottom 16-bits as 8:8 fixed-point LOD
|
||||
* bias and top 16-bit as 8:8 fixed-point lower bound (generally left
|
||||
* zero), added and clamped to a computed LOD */
|
||||
BIFROST_LOD_MODE_BIAS = 5,
|
||||
/* Takes a staging register with bottom 16-bits as 8:8 fixed-point LOD
|
||||
* bias and top 16-bit as 8:8 fixed-point lower bound (generally left
|
||||
* zero), added and clamped to a computed LOD */
|
||||
BIFROST_LOD_MODE_BIAS = 5,
|
||||
|
||||
/* Set LOD to zero */
|
||||
BIFROST_LOD_MODE_ZERO = 6,
|
||||
/* Set LOD to zero */
|
||||
BIFROST_LOD_MODE_ZERO = 6,
|
||||
|
||||
/* Compute LOD */
|
||||
BIFROST_LOD_MODE_COMPUTE = 7,
|
||||
/* Compute LOD */
|
||||
BIFROST_LOD_MODE_COMPUTE = 7,
|
||||
};
|
||||
|
||||
enum bifrost_texture_format {
|
||||
/* 16-bit floating point, with optional clamping */
|
||||
BIFROST_TEXTURE_FORMAT_F16 = 0,
|
||||
BIFROST_TEXTURE_FORMAT_F16_POS = 1,
|
||||
BIFROST_TEXTURE_FORMAT_F16_PM1 = 2,
|
||||
BIFROST_TEXTURE_FORMAT_F16_1 = 3,
|
||||
/* 16-bit floating point, with optional clamping */
|
||||
BIFROST_TEXTURE_FORMAT_F16 = 0,
|
||||
BIFROST_TEXTURE_FORMAT_F16_POS = 1,
|
||||
BIFROST_TEXTURE_FORMAT_F16_PM1 = 2,
|
||||
BIFROST_TEXTURE_FORMAT_F16_1 = 3,
|
||||
|
||||
/* 32-bit floating point, with optional clamping */
|
||||
BIFROST_TEXTURE_FORMAT_F32 = 4,
|
||||
BIFROST_TEXTURE_FORMAT_F32_POS = 5,
|
||||
BIFROST_TEXTURE_FORMAT_F32_PM1 = 6,
|
||||
BIFROST_TEXTURE_FORMAT_F32_1 = 7,
|
||||
/* 32-bit floating point, with optional clamping */
|
||||
BIFROST_TEXTURE_FORMAT_F32 = 4,
|
||||
BIFROST_TEXTURE_FORMAT_F32_POS = 5,
|
||||
BIFROST_TEXTURE_FORMAT_F32_PM1 = 6,
|
||||
BIFROST_TEXTURE_FORMAT_F32_1 = 7,
|
||||
};
|
||||
|
||||
enum bifrost_texture_format_full {
|
||||
/* Transclude bifrost_texture_format from above */
|
||||
/* Transclude bifrost_texture_format from above */
|
||||
|
||||
/* Integers, unclamped */
|
||||
BIFROST_TEXTURE_FORMAT_U16 = 12,
|
||||
BIFROST_TEXTURE_FORMAT_S16 = 13,
|
||||
BIFROST_TEXTURE_FORMAT_U32 = 14,
|
||||
BIFROST_TEXTURE_FORMAT_S32 = 15,
|
||||
/* Integers, unclamped */
|
||||
BIFROST_TEXTURE_FORMAT_U16 = 12,
|
||||
BIFROST_TEXTURE_FORMAT_S16 = 13,
|
||||
BIFROST_TEXTURE_FORMAT_U32 = 14,
|
||||
BIFROST_TEXTURE_FORMAT_S32 = 15,
|
||||
};
|
||||
|
||||
enum bifrost_texture_fetch {
|
||||
/* Default texelFetch */
|
||||
BIFROST_TEXTURE_FETCH_TEXEL = 1,
|
||||
/* Default texelFetch */
|
||||
BIFROST_TEXTURE_FETCH_TEXEL = 1,
|
||||
|
||||
/* Deprecated, fetches 4x U32 of a U8 x 4 texture. Do not use. */
|
||||
BIFROST_TEXTURE_FETCH_GATHER4_RGBA = 3,
|
||||
/* Deprecated, fetches 4x U32 of a U8 x 4 texture. Do not use. */
|
||||
BIFROST_TEXTURE_FETCH_GATHER4_RGBA = 3,
|
||||
|
||||
/* Gathers */
|
||||
BIFROST_TEXTURE_FETCH_GATHER4_R = 4,
|
||||
BIFROST_TEXTURE_FETCH_GATHER4_G = 5,
|
||||
BIFROST_TEXTURE_FETCH_GATHER4_B = 6,
|
||||
BIFROST_TEXTURE_FETCH_GATHER4_A = 7
|
||||
/* Gathers */
|
||||
BIFROST_TEXTURE_FETCH_GATHER4_R = 4,
|
||||
BIFROST_TEXTURE_FETCH_GATHER4_G = 5,
|
||||
BIFROST_TEXTURE_FETCH_GATHER4_B = 6,
|
||||
BIFROST_TEXTURE_FETCH_GATHER4_A = 7
|
||||
};
|
||||
|
||||
struct bifrost_texture_operation {
|
||||
/* If immediate_indices is set:
|
||||
* - immediate sampler index
|
||||
* - index used as texture index
|
||||
* Otherwise:
|
||||
* - bifrost_single_index in lower 2 bits
|
||||
* - 0x3 in upper 2 bits (single-texturing)
|
||||
*/
|
||||
unsigned sampler_index_or_mode : 4;
|
||||
unsigned index : 7;
|
||||
bool immediate_indices : 1;
|
||||
enum bifrost_tex_op op : 3;
|
||||
/* If immediate_indices is set:
|
||||
* - immediate sampler index
|
||||
* - index used as texture index
|
||||
* Otherwise:
|
||||
* - bifrost_single_index in lower 2 bits
|
||||
* - 0x3 in upper 2 bits (single-texturing)
|
||||
*/
|
||||
unsigned sampler_index_or_mode : 4;
|
||||
unsigned index : 7;
|
||||
bool immediate_indices : 1;
|
||||
enum bifrost_tex_op op : 3;
|
||||
|
||||
/* If set for TEX/FETCH, loads texel offsets and multisample index from
|
||||
* a staging register containing offset_x:offset_y:offset_z:ms_index
|
||||
* packed 8:8:8:8. Offsets must be in [-31, +31]. If set for
|
||||
* GRDESC(_DER), disable LOD bias. */
|
||||
bool offset_or_bias_disable : 1;
|
||||
/* If set for TEX/FETCH, loads texel offsets and multisample index from
|
||||
* a staging register containing offset_x:offset_y:offset_z:ms_index
|
||||
* packed 8:8:8:8. Offsets must be in [-31, +31]. If set for
|
||||
* GRDESC(_DER), disable LOD bias. */
|
||||
bool offset_or_bias_disable : 1;
|
||||
|
||||
/* If set for TEX/FETCH, loads fp32 shadow comparison value from a
|
||||
* staging register. Implies fetch_component = gather4_r. If set for
|
||||
* GRDESC(_DER), disables LOD clamping. */
|
||||
bool shadow_or_clamp_disable : 1;
|
||||
/* If set for TEX/FETCH, loads fp32 shadow comparison value from a
|
||||
* staging register. Implies fetch_component = gather4_r. If set for
|
||||
* GRDESC(_DER), disables LOD clamping. */
|
||||
bool shadow_or_clamp_disable : 1;
|
||||
|
||||
/* If set, loads an uint32 array index from a staging register. */
|
||||
bool array : 1;
|
||||
/* If set, loads an uint32 array index from a staging register. */
|
||||
bool array : 1;
|
||||
|
||||
/* Texture dimension, or 0 for a cubemap */
|
||||
unsigned dimension : 2;
|
||||
/* Texture dimension, or 0 for a cubemap */
|
||||
unsigned dimension : 2;
|
||||
|
||||
/* Method to compute LOD value or for a FETCH, the
|
||||
* bifrost_texture_fetch component specification */
|
||||
enum bifrost_lod_mode lod_or_fetch : 3;
|
||||
/* Method to compute LOD value or for a FETCH, the
|
||||
* bifrost_texture_fetch component specification */
|
||||
enum bifrost_lod_mode lod_or_fetch : 3;
|
||||
|
||||
/* Reserved */
|
||||
unsigned zero : 1;
|
||||
/* Reserved */
|
||||
unsigned zero : 1;
|
||||
|
||||
/* Register format for the result */
|
||||
enum bifrost_texture_format_full format : 4;
|
||||
/* Register format for the result */
|
||||
enum bifrost_texture_format_full format : 4;
|
||||
|
||||
/* Write mask for the result */
|
||||
unsigned mask : 4;
|
||||
/* Write mask for the result */
|
||||
unsigned mask : 4;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct bifrost_dual_texture_operation {
|
||||
unsigned primary_sampler_index : 2;
|
||||
unsigned mode : 2; /* 0x1 for dual */
|
||||
unsigned primary_texture_index : 2;
|
||||
unsigned secondary_sampler_index : 2;
|
||||
unsigned secondary_texture_index : 2;
|
||||
unsigned primary_sampler_index : 2;
|
||||
unsigned mode : 2; /* 0x1 for dual */
|
||||
unsigned primary_texture_index : 2;
|
||||
unsigned secondary_sampler_index : 2;
|
||||
unsigned secondary_texture_index : 2;
|
||||
|
||||
/* Leave zero for dual texturing */
|
||||
unsigned reserved : 1;
|
||||
unsigned index_mode_zero : 1;
|
||||
/* Leave zero for dual texturing */
|
||||
unsigned reserved : 1;
|
||||
unsigned index_mode_zero : 1;
|
||||
|
||||
/* Base staging register to write the secondary results to */
|
||||
unsigned secondary_register : 6;
|
||||
/* Base staging register to write the secondary results to */
|
||||
unsigned secondary_register : 6;
|
||||
|
||||
/* Format/mask for each texture */
|
||||
enum bifrost_texture_format secondary_format : 3;
|
||||
unsigned secondary_mask : 4;
|
||||
/* Format/mask for each texture */
|
||||
enum bifrost_texture_format secondary_format : 3;
|
||||
unsigned secondary_mask : 4;
|
||||
|
||||
enum bifrost_texture_format primary_format : 3;
|
||||
unsigned primary_mask : 4;
|
||||
enum bifrost_texture_format primary_format : 3;
|
||||
unsigned primary_mask : 4;
|
||||
} __attribute__((packed));
|
||||
|
||||
static inline uint32_t
|
||||
bi_dual_tex_as_u32(struct bifrost_dual_texture_operation desc)
|
||||
{
|
||||
uint32_t desc_u;
|
||||
memcpy(&desc_u, &desc, sizeof(desc));
|
||||
uint32_t desc_u;
|
||||
memcpy(&desc_u, &desc, sizeof(desc));
|
||||
|
||||
return desc_u;
|
||||
return desc_u;
|
||||
}
|
||||
|
||||
#define BIFROST_MEGA_SAMPLE 128
|
||||
#define BIFROST_ALL_SAMPLES 255
|
||||
#define BIFROST_MEGA_SAMPLE 128
|
||||
#define BIFROST_ALL_SAMPLES 255
|
||||
#define BIFROST_CURRENT_PIXEL 255
|
||||
|
||||
struct bifrost_pixel_indices {
|
||||
unsigned sample : 8;
|
||||
unsigned rt : 8;
|
||||
unsigned x : 8;
|
||||
unsigned y : 8;
|
||||
unsigned sample : 8;
|
||||
unsigned rt : 8;
|
||||
unsigned x : 8;
|
||||
unsigned y : 8;
|
||||
} __attribute__((packed));
|
||||
|
||||
enum bi_constmod {
|
||||
BI_CONSTMOD_NONE,
|
||||
BI_CONSTMOD_PC_LO,
|
||||
BI_CONSTMOD_PC_HI,
|
||||
BI_CONSTMOD_PC_LO_HI
|
||||
BI_CONSTMOD_NONE,
|
||||
BI_CONSTMOD_PC_LO,
|
||||
BI_CONSTMOD_PC_HI,
|
||||
BI_CONSTMOD_PC_LO_HI
|
||||
};
|
||||
|
||||
struct bi_constants {
|
||||
/* Raw constant values */
|
||||
uint64_t raw[6];
|
||||
/* Raw constant values */
|
||||
uint64_t raw[6];
|
||||
|
||||
/* Associated modifier derived from M values */
|
||||
enum bi_constmod mods[6];
|
||||
/* Associated modifier derived from M values */
|
||||
enum bi_constmod mods[6];
|
||||
};
|
||||
|
||||
/* FAU selectors for constants are out-of-order, construct the top bits
|
||||
|
|
@ -623,12 +623,10 @@ struct bi_constants {
|
|||
static inline unsigned
|
||||
bi_constant_field(unsigned idx)
|
||||
{
|
||||
const unsigned values[] = {
|
||||
4, 5, 6, 7, 2, 3
|
||||
};
|
||||
const unsigned values[] = {4, 5, 6, 7, 2, 3};
|
||||
|
||||
assert(idx <= 5);
|
||||
return values[idx] << 4;
|
||||
assert(idx <= 5);
|
||||
return values[idx] << 4;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -25,73 +25,73 @@
|
|||
#define __BIFROST_PUBLIC_H_
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "panfrost/util/pan_ir.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
void
|
||||
bifrost_compile_shader_nir(nir_shader *nir,
|
||||
const struct panfrost_compile_inputs *inputs,
|
||||
struct util_dynarray *binary,
|
||||
struct pan_shader_info *info);
|
||||
void bifrost_compile_shader_nir(nir_shader *nir,
|
||||
const struct panfrost_compile_inputs *inputs,
|
||||
struct util_dynarray *binary,
|
||||
struct pan_shader_info *info);
|
||||
|
||||
static const nir_shader_compiler_options bifrost_nir_options = {
|
||||
.lower_scmp = true,
|
||||
.lower_flrp16 = true,
|
||||
.lower_flrp32 = true,
|
||||
.lower_flrp64 = true,
|
||||
.lower_ffract = true,
|
||||
.lower_fmod = true,
|
||||
.lower_fdiv = true,
|
||||
.lower_isign = true,
|
||||
.lower_find_lsb = true,
|
||||
.lower_ifind_msb = true,
|
||||
.lower_fdph = true,
|
||||
.lower_fsqrt = true,
|
||||
.lower_scmp = true,
|
||||
.lower_flrp16 = true,
|
||||
.lower_flrp32 = true,
|
||||
.lower_flrp64 = true,
|
||||
.lower_ffract = true,
|
||||
.lower_fmod = true,
|
||||
.lower_fdiv = true,
|
||||
.lower_isign = true,
|
||||
.lower_find_lsb = true,
|
||||
.lower_ifind_msb = true,
|
||||
.lower_fdph = true,
|
||||
.lower_fsqrt = true,
|
||||
|
||||
.lower_fsign = true,
|
||||
.lower_fsign = true,
|
||||
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_rotate = true,
|
||||
.lower_bitfield_insert_to_shifts = true,
|
||||
.lower_bitfield_extract_to_shifts = true,
|
||||
.lower_insert_byte = true,
|
||||
.lower_rotate = true,
|
||||
|
||||
.lower_pack_half_2x16 = true,
|
||||
.lower_pack_unorm_2x16 = true,
|
||||
.lower_pack_snorm_2x16 = true,
|
||||
.lower_pack_unorm_4x8 = true,
|
||||
.lower_pack_snorm_4x8 = true,
|
||||
.lower_unpack_half_2x16 = true,
|
||||
.lower_unpack_unorm_2x16 = true,
|
||||
.lower_unpack_snorm_2x16 = true,
|
||||
.lower_unpack_unorm_4x8 = true,
|
||||
.lower_unpack_snorm_4x8 = true,
|
||||
.lower_pack_split = true,
|
||||
.lower_pack_half_2x16 = true,
|
||||
.lower_pack_unorm_2x16 = true,
|
||||
.lower_pack_snorm_2x16 = true,
|
||||
.lower_pack_unorm_4x8 = true,
|
||||
.lower_pack_snorm_4x8 = true,
|
||||
.lower_unpack_half_2x16 = true,
|
||||
.lower_unpack_unorm_2x16 = true,
|
||||
.lower_unpack_snorm_2x16 = true,
|
||||
.lower_unpack_unorm_4x8 = true,
|
||||
.lower_unpack_snorm_4x8 = true,
|
||||
.lower_pack_split = true,
|
||||
|
||||
.lower_doubles_options = nir_lower_dmod,
|
||||
/* TODO: Don't lower supported 64-bit operations */
|
||||
.lower_int64_options = ~0,
|
||||
/* TODO: Use IMULD on v7 */
|
||||
.lower_mul_high = true,
|
||||
.lower_fisnormal = true,
|
||||
.lower_uadd_carry = true,
|
||||
.lower_usub_borrow = true,
|
||||
.lower_doubles_options = nir_lower_dmod,
|
||||
/* TODO: Don't lower supported 64-bit operations */
|
||||
.lower_int64_options = ~0,
|
||||
/* TODO: Use IMULD on v7 */
|
||||
.lower_mul_high = true,
|
||||
.lower_fisnormal = true,
|
||||
.lower_uadd_carry = true,
|
||||
.lower_usub_borrow = true,
|
||||
|
||||
.has_fsub = true,
|
||||
.has_isub = true,
|
||||
.vectorize_io = true,
|
||||
.vectorize_vec2_16bit = true,
|
||||
.fuse_ffma16 = true,
|
||||
.fuse_ffma32 = true,
|
||||
.fuse_ffma64 = true,
|
||||
.use_interpolated_input_intrinsics = true,
|
||||
.has_fsub = true,
|
||||
.has_isub = true,
|
||||
.vectorize_io = true,
|
||||
.vectorize_vec2_16bit = true,
|
||||
.fuse_ffma16 = true,
|
||||
.fuse_ffma32 = true,
|
||||
.fuse_ffma64 = true,
|
||||
.use_interpolated_input_intrinsics = true,
|
||||
|
||||
.lower_uniforms_to_ubo = true,
|
||||
.lower_uniforms_to_ubo = true,
|
||||
|
||||
.has_cs_global_id = true,
|
||||
.lower_cs_local_index_to_id = true,
|
||||
.max_unroll_iterations = 32,
|
||||
.force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
|
||||
.force_indirect_unrolling_sampler = true,
|
||||
.has_cs_global_id = true,
|
||||
.lower_cs_local_index_to_id = true,
|
||||
.max_unroll_iterations = 32,
|
||||
.force_indirect_unrolling =
|
||||
(nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
|
||||
.force_indirect_unrolling_sampler = true,
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -24,21 +24,21 @@
|
|||
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_builder.h"
|
||||
#include "compiler.h"
|
||||
|
||||
bool
|
||||
bi_has_arg(const bi_instr *ins, bi_index arg)
|
||||
{
|
||||
if (!ins)
|
||||
return false;
|
||||
if (!ins)
|
||||
return false;
|
||||
|
||||
bi_foreach_src(ins, s) {
|
||||
if (bi_is_equiv(ins->src[s], arg))
|
||||
return true;
|
||||
}
|
||||
bi_foreach_src(ins, s) {
|
||||
if (bi_is_equiv(ins->src[s], arg))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Precondition: valid 16-bit or 32-bit register format. Returns whether it is
|
||||
|
|
@ -48,131 +48,131 @@ bi_has_arg(const bi_instr *ins, bi_index arg)
|
|||
bool
|
||||
bi_is_regfmt_16(enum bi_register_format fmt)
|
||||
{
|
||||
switch (fmt) {
|
||||
case BI_REGISTER_FORMAT_F16:
|
||||
case BI_REGISTER_FORMAT_S16:
|
||||
case BI_REGISTER_FORMAT_U16:
|
||||
return true;
|
||||
case BI_REGISTER_FORMAT_F32:
|
||||
case BI_REGISTER_FORMAT_S32:
|
||||
case BI_REGISTER_FORMAT_U32:
|
||||
case BI_REGISTER_FORMAT_AUTO:
|
||||
return false;
|
||||
default:
|
||||
unreachable("Invalid register format");
|
||||
}
|
||||
switch (fmt) {
|
||||
case BI_REGISTER_FORMAT_F16:
|
||||
case BI_REGISTER_FORMAT_S16:
|
||||
case BI_REGISTER_FORMAT_U16:
|
||||
return true;
|
||||
case BI_REGISTER_FORMAT_F32:
|
||||
case BI_REGISTER_FORMAT_S32:
|
||||
case BI_REGISTER_FORMAT_U32:
|
||||
case BI_REGISTER_FORMAT_AUTO:
|
||||
return false;
|
||||
default:
|
||||
unreachable("Invalid register format");
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
bi_count_staging_registers(const bi_instr *ins)
|
||||
{
|
||||
enum bi_sr_count count = bi_opcode_props[ins->op].sr_count;
|
||||
unsigned vecsize = ins->vecsize + 1; /* XXX: off-by-one */
|
||||
enum bi_sr_count count = bi_opcode_props[ins->op].sr_count;
|
||||
unsigned vecsize = ins->vecsize + 1; /* XXX: off-by-one */
|
||||
|
||||
switch (count) {
|
||||
case BI_SR_COUNT_0 ... BI_SR_COUNT_4:
|
||||
return count;
|
||||
case BI_SR_COUNT_FORMAT:
|
||||
return bi_is_regfmt_16(ins->register_format) ?
|
||||
DIV_ROUND_UP(vecsize, 2) : vecsize;
|
||||
case BI_SR_COUNT_VECSIZE:
|
||||
return vecsize;
|
||||
case BI_SR_COUNT_SR_COUNT:
|
||||
return ins->sr_count;
|
||||
}
|
||||
switch (count) {
|
||||
case BI_SR_COUNT_0 ... BI_SR_COUNT_4:
|
||||
return count;
|
||||
case BI_SR_COUNT_FORMAT:
|
||||
return bi_is_regfmt_16(ins->register_format) ? DIV_ROUND_UP(vecsize, 2)
|
||||
: vecsize;
|
||||
case BI_SR_COUNT_VECSIZE:
|
||||
return vecsize;
|
||||
case BI_SR_COUNT_SR_COUNT:
|
||||
return ins->sr_count;
|
||||
}
|
||||
|
||||
unreachable("Invalid sr_count");
|
||||
unreachable("Invalid sr_count");
|
||||
}
|
||||
|
||||
unsigned
|
||||
bi_count_read_registers(const bi_instr *ins, unsigned s)
|
||||
{
|
||||
/* ATOM reads 1 but writes 2. Exception for ACMPXCHG */
|
||||
if (s == 0 && ins->op == BI_OPCODE_ATOM_RETURN_I32)
|
||||
return (ins->atom_opc == BI_ATOM_OPC_ACMPXCHG) ? 2 : 1;
|
||||
else if (s == 0 && bi_opcode_props[ins->op].sr_read)
|
||||
return bi_count_staging_registers(ins);
|
||||
else if (s == 4 && ins->op == BI_OPCODE_BLEND)
|
||||
return ins->sr_count_2; /* Dual source blending */
|
||||
else if (s == 0 && ins->op == BI_OPCODE_SPLIT_I32)
|
||||
return ins->nr_dests;
|
||||
else
|
||||
return 1;
|
||||
/* ATOM reads 1 but writes 2. Exception for ACMPXCHG */
|
||||
if (s == 0 && ins->op == BI_OPCODE_ATOM_RETURN_I32)
|
||||
return (ins->atom_opc == BI_ATOM_OPC_ACMPXCHG) ? 2 : 1;
|
||||
else if (s == 0 && bi_opcode_props[ins->op].sr_read)
|
||||
return bi_count_staging_registers(ins);
|
||||
else if (s == 4 && ins->op == BI_OPCODE_BLEND)
|
||||
return ins->sr_count_2; /* Dual source blending */
|
||||
else if (s == 0 && ins->op == BI_OPCODE_SPLIT_I32)
|
||||
return ins->nr_dests;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
unsigned
|
||||
bi_count_write_registers(const bi_instr *ins, unsigned d)
|
||||
{
|
||||
if (d == 0 && bi_opcode_props[ins->op].sr_write) {
|
||||
switch (ins->op) {
|
||||
case BI_OPCODE_TEXC:
|
||||
case BI_OPCODE_TEXC_DUAL:
|
||||
if (ins->sr_count_2)
|
||||
return ins->sr_count;
|
||||
else
|
||||
return bi_is_regfmt_16(ins->register_format) ? 2 : 4;
|
||||
if (d == 0 && bi_opcode_props[ins->op].sr_write) {
|
||||
switch (ins->op) {
|
||||
case BI_OPCODE_TEXC:
|
||||
case BI_OPCODE_TEXC_DUAL:
|
||||
if (ins->sr_count_2)
|
||||
return ins->sr_count;
|
||||
else
|
||||
return bi_is_regfmt_16(ins->register_format) ? 2 : 4;
|
||||
|
||||
case BI_OPCODE_TEX_SINGLE:
|
||||
case BI_OPCODE_TEX_FETCH:
|
||||
case BI_OPCODE_TEX_GATHER: {
|
||||
unsigned chans = util_bitcount(ins->write_mask);
|
||||
case BI_OPCODE_TEX_SINGLE:
|
||||
case BI_OPCODE_TEX_FETCH:
|
||||
case BI_OPCODE_TEX_GATHER: {
|
||||
unsigned chans = util_bitcount(ins->write_mask);
|
||||
|
||||
return bi_is_regfmt_16(ins->register_format) ?
|
||||
DIV_ROUND_UP(chans, 2) : chans;
|
||||
}
|
||||
return bi_is_regfmt_16(ins->register_format) ? DIV_ROUND_UP(chans, 2)
|
||||
: chans;
|
||||
}
|
||||
|
||||
case BI_OPCODE_ACMPXCHG_I32:
|
||||
/* Reads 2 but writes 1 */
|
||||
return 1;
|
||||
case BI_OPCODE_ACMPXCHG_I32:
|
||||
/* Reads 2 but writes 1 */
|
||||
return 1;
|
||||
|
||||
case BI_OPCODE_ATOM1_RETURN_I32:
|
||||
/* Allow omitting the destination for plain ATOM1 */
|
||||
return bi_is_null(ins->dest[0]) ? 0 : ins->sr_count;
|
||||
default:
|
||||
return bi_count_staging_registers(ins);
|
||||
}
|
||||
} else if (ins->op == BI_OPCODE_SEG_ADD_I64) {
|
||||
return 2;
|
||||
} else if (ins->op == BI_OPCODE_TEXC_DUAL && d == 1) {
|
||||
return ins->sr_count_2;
|
||||
} else if (ins->op == BI_OPCODE_COLLECT_I32 && d == 0) {
|
||||
return ins->nr_srcs;
|
||||
}
|
||||
case BI_OPCODE_ATOM1_RETURN_I32:
|
||||
/* Allow omitting the destination for plain ATOM1 */
|
||||
return bi_is_null(ins->dest[0]) ? 0 : ins->sr_count;
|
||||
default:
|
||||
return bi_count_staging_registers(ins);
|
||||
}
|
||||
} else if (ins->op == BI_OPCODE_SEG_ADD_I64) {
|
||||
return 2;
|
||||
} else if (ins->op == BI_OPCODE_TEXC_DUAL && d == 1) {
|
||||
return ins->sr_count_2;
|
||||
} else if (ins->op == BI_OPCODE_COLLECT_I32 && d == 0) {
|
||||
return ins->nr_srcs;
|
||||
}
|
||||
|
||||
return 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
unsigned
|
||||
bi_writemask(const bi_instr *ins, unsigned d)
|
||||
{
|
||||
unsigned mask = BITFIELD_MASK(bi_count_write_registers(ins, d));
|
||||
unsigned shift = ins->dest[d].offset;
|
||||
return (mask << shift);
|
||||
unsigned mask = BITFIELD_MASK(bi_count_write_registers(ins, d));
|
||||
unsigned shift = ins->dest[d].offset;
|
||||
return (mask << shift);
|
||||
}
|
||||
|
||||
bi_clause *
|
||||
bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause)
|
||||
{
|
||||
if (!block && !clause)
|
||||
return NULL;
|
||||
if (!block && !clause)
|
||||
return NULL;
|
||||
|
||||
/* Try the first clause in this block if we're starting from scratch */
|
||||
if (!clause && !list_is_empty(&block->clauses))
|
||||
return list_first_entry(&block->clauses, bi_clause, link);
|
||||
/* Try the first clause in this block if we're starting from scratch */
|
||||
if (!clause && !list_is_empty(&block->clauses))
|
||||
return list_first_entry(&block->clauses, bi_clause, link);
|
||||
|
||||
/* Try the next clause in this block */
|
||||
if (clause && clause->link.next != &block->clauses)
|
||||
return list_first_entry(&(clause->link), bi_clause, link);
|
||||
/* Try the next clause in this block */
|
||||
if (clause && clause->link.next != &block->clauses)
|
||||
return list_first_entry(&(clause->link), bi_clause, link);
|
||||
|
||||
/* Try the next block, or the one after that if it's empty, etc .*/
|
||||
bi_block *next_block = bi_next_block(block);
|
||||
/* Try the next block, or the one after that if it's empty, etc .*/
|
||||
bi_block *next_block = bi_next_block(block);
|
||||
|
||||
bi_foreach_block_from(ctx, next_block, block) {
|
||||
if (!list_is_empty(&block->clauses))
|
||||
return list_first_entry(&block->clauses, bi_clause, link);
|
||||
}
|
||||
bi_foreach_block_from(ctx, next_block, block) {
|
||||
if (!list_is_empty(&block->clauses))
|
||||
return list_first_entry(&block->clauses, bi_clause, link);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Does an instruction have a side effect not captured by its register
|
||||
|
|
@ -184,41 +184,41 @@ bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause)
|
|||
bool
|
||||
bi_side_effects(const bi_instr *I)
|
||||
{
|
||||
if (bi_opcode_props[I->op].last)
|
||||
return true;
|
||||
if (bi_opcode_props[I->op].last)
|
||||
return true;
|
||||
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_DISCARD_F32:
|
||||
case BI_OPCODE_DISCARD_B32:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_DISCARD_F32:
|
||||
case BI_OPCODE_DISCARD_B32:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (bi_opcode_props[I->op].message) {
|
||||
case BIFROST_MESSAGE_NONE:
|
||||
case BIFROST_MESSAGE_VARYING:
|
||||
case BIFROST_MESSAGE_ATTRIBUTE:
|
||||
case BIFROST_MESSAGE_TEX:
|
||||
case BIFROST_MESSAGE_VARTEX:
|
||||
case BIFROST_MESSAGE_LOAD:
|
||||
case BIFROST_MESSAGE_64BIT:
|
||||
return false;
|
||||
switch (bi_opcode_props[I->op].message) {
|
||||
case BIFROST_MESSAGE_NONE:
|
||||
case BIFROST_MESSAGE_VARYING:
|
||||
case BIFROST_MESSAGE_ATTRIBUTE:
|
||||
case BIFROST_MESSAGE_TEX:
|
||||
case BIFROST_MESSAGE_VARTEX:
|
||||
case BIFROST_MESSAGE_LOAD:
|
||||
case BIFROST_MESSAGE_64BIT:
|
||||
return false;
|
||||
|
||||
case BIFROST_MESSAGE_STORE:
|
||||
case BIFROST_MESSAGE_ATOMIC:
|
||||
case BIFROST_MESSAGE_BARRIER:
|
||||
case BIFROST_MESSAGE_BLEND:
|
||||
case BIFROST_MESSAGE_Z_STENCIL:
|
||||
case BIFROST_MESSAGE_ATEST:
|
||||
case BIFROST_MESSAGE_JOB:
|
||||
return true;
|
||||
case BIFROST_MESSAGE_STORE:
|
||||
case BIFROST_MESSAGE_ATOMIC:
|
||||
case BIFROST_MESSAGE_BARRIER:
|
||||
case BIFROST_MESSAGE_BLEND:
|
||||
case BIFROST_MESSAGE_Z_STENCIL:
|
||||
case BIFROST_MESSAGE_ATEST:
|
||||
case BIFROST_MESSAGE_JOB:
|
||||
return true;
|
||||
|
||||
case BIFROST_MESSAGE_TILE:
|
||||
return (I->op != BI_OPCODE_LD_TILE);
|
||||
}
|
||||
case BIFROST_MESSAGE_TILE:
|
||||
return (I->op != BI_OPCODE_LD_TILE);
|
||||
}
|
||||
|
||||
unreachable("Invalid message type");
|
||||
unreachable("Invalid message type");
|
||||
}
|
||||
|
||||
/* Branch reconvergence is required when the execution mask may change
|
||||
|
|
@ -230,10 +230,10 @@ bi_side_effects(const bi_instr *I)
|
|||
bool
|
||||
bi_reconverge_branches(bi_block *block)
|
||||
{
|
||||
if (bi_num_successors(block) == 1)
|
||||
return bi_num_predecessors(block->successors[0]) > 1;
|
||||
else
|
||||
return true;
|
||||
if (bi_num_successors(block) == 1)
|
||||
return bi_num_predecessors(block->successors[0]) > 1;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -252,42 +252,41 @@ bi_reconverge_branches(bi_block *block)
|
|||
bool
|
||||
bi_can_replace_with_csel(bi_instr *I)
|
||||
{
|
||||
return ((I->op == BI_OPCODE_MUX_I32) || (I->op == BI_OPCODE_MUX_V2I16)) &&
|
||||
(I->mux != BI_MUX_BIT) &&
|
||||
(I->src[0].swizzle == BI_SWIZZLE_H01) &&
|
||||
(I->src[1].swizzle == BI_SWIZZLE_H01) &&
|
||||
(I->src[2].swizzle == BI_SWIZZLE_H01);
|
||||
return ((I->op == BI_OPCODE_MUX_I32) || (I->op == BI_OPCODE_MUX_V2I16)) &&
|
||||
(I->mux != BI_MUX_BIT) && (I->src[0].swizzle == BI_SWIZZLE_H01) &&
|
||||
(I->src[1].swizzle == BI_SWIZZLE_H01) &&
|
||||
(I->src[2].swizzle == BI_SWIZZLE_H01);
|
||||
}
|
||||
|
||||
static enum bi_opcode
|
||||
bi_csel_for_mux(bool must_sign, bool b32, enum bi_mux mux)
|
||||
{
|
||||
switch (mux) {
|
||||
case BI_MUX_INT_ZERO:
|
||||
if (must_sign)
|
||||
return b32 ? BI_OPCODE_CSEL_U32 : BI_OPCODE_CSEL_V2U16;
|
||||
else
|
||||
return b32 ? BI_OPCODE_CSEL_I32 : BI_OPCODE_CSEL_V2I16;
|
||||
case BI_MUX_NEG:
|
||||
return b32 ? BI_OPCODE_CSEL_S32 : BI_OPCODE_CSEL_V2S16;
|
||||
case BI_MUX_FP_ZERO:
|
||||
return b32 ? BI_OPCODE_CSEL_F32 : BI_OPCODE_CSEL_V2F16;
|
||||
default:
|
||||
unreachable("No CSEL for MUX.bit");
|
||||
}
|
||||
switch (mux) {
|
||||
case BI_MUX_INT_ZERO:
|
||||
if (must_sign)
|
||||
return b32 ? BI_OPCODE_CSEL_U32 : BI_OPCODE_CSEL_V2U16;
|
||||
else
|
||||
return b32 ? BI_OPCODE_CSEL_I32 : BI_OPCODE_CSEL_V2I16;
|
||||
case BI_MUX_NEG:
|
||||
return b32 ? BI_OPCODE_CSEL_S32 : BI_OPCODE_CSEL_V2S16;
|
||||
case BI_MUX_FP_ZERO:
|
||||
return b32 ? BI_OPCODE_CSEL_F32 : BI_OPCODE_CSEL_V2F16;
|
||||
default:
|
||||
unreachable("No CSEL for MUX.bit");
|
||||
}
|
||||
}
|
||||
|
||||
bi_instr *
|
||||
bi_csel_from_mux(bi_builder *b, const bi_instr *I, bool must_sign)
|
||||
{
|
||||
assert(I->op == BI_OPCODE_MUX_I32 || I->op == BI_OPCODE_MUX_V2I16);
|
||||
assert(I->op == BI_OPCODE_MUX_I32 || I->op == BI_OPCODE_MUX_V2I16);
|
||||
|
||||
/* Build a new CSEL */
|
||||
enum bi_cmpf cmpf = (I->mux == BI_MUX_NEG) ? BI_CMPF_LT : BI_CMPF_EQ;
|
||||
bi_instr *csel = bi_csel_u32_to(b, I->dest[0], I->src[2], bi_zero(),
|
||||
I->src[0], I->src[1], cmpf);
|
||||
/* Build a new CSEL */
|
||||
enum bi_cmpf cmpf = (I->mux == BI_MUX_NEG) ? BI_CMPF_LT : BI_CMPF_EQ;
|
||||
bi_instr *csel = bi_csel_u32_to(b, I->dest[0], I->src[2], bi_zero(),
|
||||
I->src[0], I->src[1], cmpf);
|
||||
|
||||
/* Fixup the opcode and use it */
|
||||
csel->op = bi_csel_for_mux(must_sign, I->op == BI_OPCODE_MUX_I32, I->mux);
|
||||
return csel;
|
||||
/* Fixup the opcode and use it */
|
||||
csel->op = bi_csel_for_mux(must_sign, I->op == BI_OPCODE_MUX_I32, I->mux);
|
||||
return csel;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,15 +26,15 @@
|
|||
|
||||
#include <getopt.h>
|
||||
#include <string.h>
|
||||
#include "disassemble.h"
|
||||
#include "valhall/disassemble.h"
|
||||
#include "compiler.h"
|
||||
#include "disassemble.h"
|
||||
|
||||
#include "main/mtypes.h"
|
||||
#include "compiler/glsl/standalone.h"
|
||||
#include "compiler/glsl/glsl_to_nir.h"
|
||||
#include "compiler/glsl/gl_nir.h"
|
||||
#include "compiler/glsl/glsl_to_nir.h"
|
||||
#include "compiler/glsl/standalone.h"
|
||||
#include "compiler/nir_types.h"
|
||||
#include "main/mtypes.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "bifrost_compile.h"
|
||||
|
||||
|
|
@ -44,25 +44,25 @@ int verbose = 0;
|
|||
static gl_shader_stage
|
||||
filename_to_stage(const char *stage)
|
||||
{
|
||||
const char *ext = strrchr(stage, '.');
|
||||
const char *ext = strrchr(stage, '.');
|
||||
|
||||
if (ext == NULL) {
|
||||
fprintf(stderr, "No extension found in %s\n", stage);
|
||||
exit(1);
|
||||
}
|
||||
if (ext == NULL) {
|
||||
fprintf(stderr, "No extension found in %s\n", stage);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (!strcmp(ext, ".cs") || !strcmp(ext, ".comp"))
|
||||
return MESA_SHADER_COMPUTE;
|
||||
else if (!strcmp(ext, ".vs") || !strcmp(ext, ".vert"))
|
||||
return MESA_SHADER_VERTEX;
|
||||
else if (!strcmp(ext, ".fs") || !strcmp(ext, ".frag"))
|
||||
return MESA_SHADER_FRAGMENT;
|
||||
else {
|
||||
fprintf(stderr, "Invalid extension %s\n", ext);
|
||||
exit(1);
|
||||
}
|
||||
if (!strcmp(ext, ".cs") || !strcmp(ext, ".comp"))
|
||||
return MESA_SHADER_COMPUTE;
|
||||
else if (!strcmp(ext, ".vs") || !strcmp(ext, ".vert"))
|
||||
return MESA_SHADER_VERTEX;
|
||||
else if (!strcmp(ext, ".fs") || !strcmp(ext, ".frag"))
|
||||
return MESA_SHADER_FRAGMENT;
|
||||
else {
|
||||
fprintf(stderr, "Invalid extension %s\n", ext);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
unreachable("Should've returned or bailed");
|
||||
unreachable("Should've returned or bailed");
|
||||
}
|
||||
|
||||
static int
|
||||
|
|
@ -80,7 +80,7 @@ glsl_type_size(const struct glsl_type *type, bool bindless)
|
|||
static void
|
||||
insert_sorted(struct exec_list *var_list, nir_variable *new_var)
|
||||
{
|
||||
nir_foreach_variable_in_list (var, var_list) {
|
||||
nir_foreach_variable_in_list(var, var_list) {
|
||||
if (var->data.location > new_var->data.location) {
|
||||
exec_node_insert_node_before(&var->node, &new_var->node);
|
||||
return;
|
||||
|
|
@ -94,7 +94,7 @@ sort_varyings(nir_shader *nir, nir_variable_mode mode)
|
|||
{
|
||||
struct exec_list new_list;
|
||||
exec_list_make_empty(&new_list);
|
||||
nir_foreach_variable_with_modes_safe (var, nir, mode) {
|
||||
nir_foreach_variable_with_modes_safe(var, nir, mode) {
|
||||
exec_node_remove(&var->node);
|
||||
insert_sorted(&new_list, var);
|
||||
}
|
||||
|
|
@ -104,7 +104,7 @@ sort_varyings(nir_shader *nir, nir_variable_mode mode)
|
|||
static void
|
||||
fixup_varying_slots(nir_shader *nir, nir_variable_mode mode)
|
||||
{
|
||||
nir_foreach_variable_with_modes (var, nir, mode) {
|
||||
nir_foreach_variable_with_modes(var, nir, mode) {
|
||||
if (var->data.location >= VARYING_SLOT_VAR0) {
|
||||
var->data.location += 9;
|
||||
} else if ((var->data.location >= VARYING_SLOT_TEX0) &&
|
||||
|
|
@ -117,228 +117,219 @@ fixup_varying_slots(nir_shader *nir, nir_variable_mode mode)
|
|||
static void
|
||||
compile_shader(int stages, char **files)
|
||||
{
|
||||
struct gl_shader_program *prog;
|
||||
nir_shader *nir[MESA_SHADER_COMPUTE + 1];
|
||||
unsigned shader_types[MESA_SHADER_COMPUTE + 1];
|
||||
struct gl_shader_program *prog;
|
||||
nir_shader *nir[MESA_SHADER_COMPUTE + 1];
|
||||
unsigned shader_types[MESA_SHADER_COMPUTE + 1];
|
||||
|
||||
if (stages > MESA_SHADER_COMPUTE) {
|
||||
fprintf(stderr, "Too many stages");
|
||||
exit(1);
|
||||
}
|
||||
if (stages > MESA_SHADER_COMPUTE) {
|
||||
fprintf(stderr, "Too many stages");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < stages; ++i)
|
||||
shader_types[i] = filename_to_stage(files[i]);
|
||||
for (unsigned i = 0; i < stages; ++i)
|
||||
shader_types[i] = filename_to_stage(files[i]);
|
||||
|
||||
struct standalone_options options = {
|
||||
.glsl_version = 300, /* ES - needed for precision */
|
||||
.do_link = true,
|
||||
.lower_precision = true
|
||||
};
|
||||
struct standalone_options options = {
|
||||
.glsl_version = 300, /* ES - needed for precision */
|
||||
.do_link = true,
|
||||
.lower_precision = true};
|
||||
|
||||
static struct gl_context local_ctx;
|
||||
static struct gl_context local_ctx;
|
||||
|
||||
prog = standalone_compile_shader(&options, stages, files, &local_ctx);
|
||||
prog = standalone_compile_shader(&options, stages, files, &local_ctx);
|
||||
|
||||
for (unsigned i = 0; i < stages; ++i) {
|
||||
gl_shader_stage stage = shader_types[i];
|
||||
prog->_LinkedShaders[stage]->Program->info.stage = stage;
|
||||
}
|
||||
for (unsigned i = 0; i < stages; ++i) {
|
||||
gl_shader_stage stage = shader_types[i];
|
||||
prog->_LinkedShaders[stage]->Program->info.stage = stage;
|
||||
}
|
||||
|
||||
struct util_dynarray binary;
|
||||
struct util_dynarray binary;
|
||||
|
||||
util_dynarray_init(&binary, NULL);
|
||||
util_dynarray_init(&binary, NULL);
|
||||
|
||||
for (unsigned i = 0; i < stages; ++i) {
|
||||
nir[i] = glsl_to_nir(&local_ctx.Const, prog, shader_types[i], &bifrost_nir_options);
|
||||
for (unsigned i = 0; i < stages; ++i) {
|
||||
nir[i] = glsl_to_nir(&local_ctx.Const, prog, shader_types[i],
|
||||
&bifrost_nir_options);
|
||||
|
||||
if (shader_types[i] == MESA_SHADER_VERTEX) {
|
||||
nir_assign_var_locations(nir[i], nir_var_shader_in, &nir[i]->num_inputs,
|
||||
glsl_type_size);
|
||||
sort_varyings(nir[i], nir_var_shader_out);
|
||||
nir_assign_var_locations(nir[i], nir_var_shader_out, &nir[i]->num_outputs,
|
||||
glsl_type_size);
|
||||
fixup_varying_slots(nir[i], nir_var_shader_out);
|
||||
} else if (shader_types[i] == MESA_SHADER_FRAGMENT) {
|
||||
sort_varyings(nir[i], nir_var_shader_in);
|
||||
nir_assign_var_locations(nir[i], nir_var_shader_in, &nir[i]->num_inputs,
|
||||
glsl_type_size);
|
||||
fixup_varying_slots(nir[i], nir_var_shader_in);
|
||||
nir_assign_var_locations(nir[i], nir_var_shader_out, &nir[i]->num_outputs,
|
||||
glsl_type_size);
|
||||
}
|
||||
if (shader_types[i] == MESA_SHADER_VERTEX) {
|
||||
nir_assign_var_locations(nir[i], nir_var_shader_in,
|
||||
&nir[i]->num_inputs, glsl_type_size);
|
||||
sort_varyings(nir[i], nir_var_shader_out);
|
||||
nir_assign_var_locations(nir[i], nir_var_shader_out,
|
||||
&nir[i]->num_outputs, glsl_type_size);
|
||||
fixup_varying_slots(nir[i], nir_var_shader_out);
|
||||
} else if (shader_types[i] == MESA_SHADER_FRAGMENT) {
|
||||
sort_varyings(nir[i], nir_var_shader_in);
|
||||
nir_assign_var_locations(nir[i], nir_var_shader_in,
|
||||
&nir[i]->num_inputs, glsl_type_size);
|
||||
fixup_varying_slots(nir[i], nir_var_shader_in);
|
||||
nir_assign_var_locations(nir[i], nir_var_shader_out,
|
||||
&nir[i]->num_outputs, glsl_type_size);
|
||||
}
|
||||
|
||||
nir_assign_var_locations(nir[i], nir_var_uniform, &nir[i]->num_uniforms,
|
||||
glsl_type_size);
|
||||
nir_assign_var_locations(nir[i], nir_var_uniform, &nir[i]->num_uniforms,
|
||||
glsl_type_size);
|
||||
|
||||
NIR_PASS_V(nir[i], nir_lower_global_vars_to_local);
|
||||
NIR_PASS_V(nir[i], nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir[i]), true, i == 0);
|
||||
NIR_PASS_V(nir[i], nir_opt_copy_prop_vars);
|
||||
NIR_PASS_V(nir[i], nir_opt_combine_stores, nir_var_all);
|
||||
NIR_PASS_V(nir[i], nir_lower_global_vars_to_local);
|
||||
NIR_PASS_V(nir[i], nir_lower_io_to_temporaries,
|
||||
nir_shader_get_entrypoint(nir[i]), true, i == 0);
|
||||
NIR_PASS_V(nir[i], nir_opt_copy_prop_vars);
|
||||
NIR_PASS_V(nir[i], nir_opt_combine_stores, nir_var_all);
|
||||
|
||||
NIR_PASS_V(nir[i], nir_lower_system_values);
|
||||
NIR_PASS_V(nir[i], gl_nir_lower_samplers, prog);
|
||||
NIR_PASS_V(nir[i], nir_split_var_copies);
|
||||
NIR_PASS_V(nir[i], nir_lower_var_copies);
|
||||
NIR_PASS_V(nir[i], nir_lower_system_values);
|
||||
NIR_PASS_V(nir[i], gl_nir_lower_samplers, prog);
|
||||
NIR_PASS_V(nir[i], nir_split_var_copies);
|
||||
NIR_PASS_V(nir[i], nir_lower_var_copies);
|
||||
|
||||
NIR_PASS_V(nir[i], nir_lower_io, nir_var_uniform,
|
||||
st_packed_uniforms_type_size,
|
||||
(nir_lower_io_options)0);
|
||||
NIR_PASS_V(nir[i], nir_lower_uniforms_to_ubo, true, false);
|
||||
NIR_PASS_V(nir[i], nir_lower_io, nir_var_uniform,
|
||||
st_packed_uniforms_type_size, (nir_lower_io_options)0);
|
||||
NIR_PASS_V(nir[i], nir_lower_uniforms_to_ubo, true, false);
|
||||
|
||||
/* before buffers and vars_to_ssa */
|
||||
NIR_PASS_V(nir[i], gl_nir_lower_images, true);
|
||||
/* before buffers and vars_to_ssa */
|
||||
NIR_PASS_V(nir[i], gl_nir_lower_images, true);
|
||||
|
||||
NIR_PASS_V(nir[i], gl_nir_lower_buffers, prog);
|
||||
NIR_PASS_V(nir[i], nir_opt_constant_folding);
|
||||
NIR_PASS_V(nir[i], gl_nir_lower_buffers, prog);
|
||||
NIR_PASS_V(nir[i], nir_opt_constant_folding);
|
||||
|
||||
struct panfrost_compile_inputs inputs = {
|
||||
.gpu_id = gpu_id,
|
||||
.fixed_sysval_ubo = -1,
|
||||
};
|
||||
struct pan_shader_info info = { 0 };
|
||||
struct panfrost_compile_inputs inputs = {
|
||||
.gpu_id = gpu_id,
|
||||
.fixed_sysval_ubo = -1,
|
||||
};
|
||||
struct pan_shader_info info = {0};
|
||||
|
||||
util_dynarray_clear(&binary);
|
||||
bifrost_compile_shader_nir(nir[i], &inputs, &binary, &info);
|
||||
util_dynarray_clear(&binary);
|
||||
bifrost_compile_shader_nir(nir[i], &inputs, &binary, &info);
|
||||
|
||||
char *fn = NULL;
|
||||
asprintf(&fn, "shader_%u.bin", i);
|
||||
assert(fn != NULL);
|
||||
FILE *fp = fopen(fn, "wb");
|
||||
fwrite(binary.data, 1, binary.size, fp);
|
||||
fclose(fp);
|
||||
free(fn);
|
||||
}
|
||||
char *fn = NULL;
|
||||
asprintf(&fn, "shader_%u.bin", i);
|
||||
assert(fn != NULL);
|
||||
FILE *fp = fopen(fn, "wb");
|
||||
fwrite(binary.data, 1, binary.size, fp);
|
||||
fclose(fp);
|
||||
free(fn);
|
||||
}
|
||||
|
||||
util_dynarray_fini(&binary);
|
||||
util_dynarray_fini(&binary);
|
||||
}
|
||||
|
||||
#define BI_FOURCC(ch0, ch1, ch2, ch3) ( \
|
||||
(uint32_t)(ch0) | (uint32_t)(ch1) << 8 | \
|
||||
(uint32_t)(ch2) << 16 | (uint32_t)(ch3) << 24)
|
||||
#define BI_FOURCC(ch0, ch1, ch2, ch3) \
|
||||
((uint32_t)(ch0) | (uint32_t)(ch1) << 8 | (uint32_t)(ch2) << 16 | \
|
||||
(uint32_t)(ch3) << 24)
|
||||
|
||||
static void
|
||||
disassemble(const char *filename)
|
||||
{
|
||||
FILE *fp = fopen(filename, "rb");
|
||||
assert(fp);
|
||||
FILE *fp = fopen(filename, "rb");
|
||||
assert(fp);
|
||||
|
||||
fseek(fp, 0, SEEK_END);
|
||||
unsigned filesize = ftell(fp);
|
||||
rewind(fp);
|
||||
fseek(fp, 0, SEEK_END);
|
||||
unsigned filesize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
uint32_t *code = malloc(filesize);
|
||||
unsigned res = fread(code, 1, filesize, fp);
|
||||
if (res != filesize) {
|
||||
printf("Couldn't read full file\n");
|
||||
}
|
||||
uint32_t *code = malloc(filesize);
|
||||
unsigned res = fread(code, 1, filesize, fp);
|
||||
if (res != filesize) {
|
||||
printf("Couldn't read full file\n");
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
fclose(fp);
|
||||
|
||||
void *entrypoint = code;
|
||||
void *entrypoint = code;
|
||||
|
||||
if (filesize && code[0] == BI_FOURCC('M', 'B', 'S', '2')) {
|
||||
for (int i = 0; i < filesize / 4; ++i) {
|
||||
if (code[i] != BI_FOURCC('O', 'B', 'J', 'C'))
|
||||
continue;
|
||||
if (filesize && code[0] == BI_FOURCC('M', 'B', 'S', '2')) {
|
||||
for (int i = 0; i < filesize / 4; ++i) {
|
||||
if (code[i] != BI_FOURCC('O', 'B', 'J', 'C'))
|
||||
continue;
|
||||
|
||||
unsigned size = code[i + 1];
|
||||
unsigned offset = i + 2;
|
||||
unsigned size = code[i + 1];
|
||||
unsigned offset = i + 2;
|
||||
|
||||
entrypoint = code + offset;
|
||||
filesize = size;
|
||||
}
|
||||
}
|
||||
entrypoint = code + offset;
|
||||
filesize = size;
|
||||
}
|
||||
}
|
||||
|
||||
if ((gpu_id >> 12) >= 9)
|
||||
disassemble_valhall(stdout, entrypoint, filesize, verbose);
|
||||
else
|
||||
disassemble_bifrost(stdout, entrypoint, filesize, verbose);
|
||||
if ((gpu_id >> 12) >= 9)
|
||||
disassemble_valhall(stdout, entrypoint, filesize, verbose);
|
||||
else
|
||||
disassemble_bifrost(stdout, entrypoint, filesize, verbose);
|
||||
|
||||
free(code);
|
||||
free(code);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
int c;
|
||||
int c;
|
||||
|
||||
if (argc < 2) {
|
||||
printf("Pass a command\n");
|
||||
exit(1);
|
||||
}
|
||||
if (argc < 2) {
|
||||
printf("Pass a command\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static struct option longopts[] = {
|
||||
{ "id", optional_argument, NULL, 'i' },
|
||||
{ "gpu", optional_argument, NULL, 'g' },
|
||||
{ "verbose", no_argument, &verbose, 'v' },
|
||||
{ NULL, 0, NULL, 0 }
|
||||
};
|
||||
static struct option longopts[] = {{"id", optional_argument, NULL, 'i'},
|
||||
{"gpu", optional_argument, NULL, 'g'},
|
||||
{"verbose", no_argument, &verbose, 'v'},
|
||||
{NULL, 0, NULL, 0}};
|
||||
|
||||
static struct {
|
||||
const char *name;
|
||||
unsigned major, minor;
|
||||
} gpus[] = {
|
||||
{ "G71", 6, 0 },
|
||||
{ "G72", 6, 2 },
|
||||
{ "G51", 7, 0 },
|
||||
{ "G76", 7, 1 },
|
||||
{ "G52", 7, 2 },
|
||||
{ "G31", 7, 3 },
|
||||
{ "G77", 9, 0 },
|
||||
{ "G57", 9, 1 },
|
||||
{ "G78", 9, 2 },
|
||||
{ "G57", 9, 3 },
|
||||
{ "G68", 9, 4 },
|
||||
{ "G78AE", 9, 5 },
|
||||
};
|
||||
static struct {
|
||||
const char *name;
|
||||
unsigned major, minor;
|
||||
} gpus[] = {
|
||||
{"G71", 6, 0}, {"G72", 6, 2}, {"G51", 7, 0}, {"G76", 7, 1},
|
||||
{"G52", 7, 2}, {"G31", 7, 3}, {"G77", 9, 0}, {"G57", 9, 1},
|
||||
{"G78", 9, 2}, {"G57", 9, 3}, {"G68", 9, 4}, {"G78AE", 9, 5},
|
||||
};
|
||||
|
||||
while ((c = getopt_long(argc, argv, "v:", longopts, NULL)) != -1) {
|
||||
while ((c = getopt_long(argc, argv, "v:", longopts, NULL)) != -1) {
|
||||
|
||||
switch (c) {
|
||||
case 'i':
|
||||
gpu_id = atoi(optarg);
|
||||
switch (c) {
|
||||
case 'i':
|
||||
gpu_id = atoi(optarg);
|
||||
|
||||
if (!gpu_id) {
|
||||
fprintf(stderr, "Expected GPU ID, got %s\n", optarg);
|
||||
return 1;
|
||||
}
|
||||
if (!gpu_id) {
|
||||
fprintf(stderr, "Expected GPU ID, got %s\n", optarg);
|
||||
return 1;
|
||||
}
|
||||
|
||||
break;
|
||||
case 'g':
|
||||
gpu_id = 0;
|
||||
break;
|
||||
case 'g':
|
||||
gpu_id = 0;
|
||||
|
||||
/* Compatibility with the Arm compiler */
|
||||
if (strncmp(optarg, "Mali-", 5) == 0) optarg += 5;
|
||||
/* Compatibility with the Arm compiler */
|
||||
if (strncmp(optarg, "Mali-", 5) == 0)
|
||||
optarg += 5;
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(gpus); ++i) {
|
||||
if (strcmp(gpus[i].name, optarg)) continue;
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(gpus); ++i) {
|
||||
if (strcmp(gpus[i].name, optarg))
|
||||
continue;
|
||||
|
||||
unsigned major = gpus[i].major;
|
||||
unsigned minor = gpus[i].minor;
|
||||
unsigned major = gpus[i].major;
|
||||
unsigned minor = gpus[i].minor;
|
||||
|
||||
gpu_id = (major << 12) | (minor << 8);
|
||||
break;
|
||||
}
|
||||
gpu_id = (major << 12) | (minor << 8);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!gpu_id) {
|
||||
fprintf(stderr, "Unknown GPU %s\n", optarg);
|
||||
return 1;
|
||||
}
|
||||
if (!gpu_id) {
|
||||
fprintf(stderr, "Unknown GPU %s\n", optarg);
|
||||
return 1;
|
||||
}
|
||||
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (strcmp(argv[optind], "compile") == 0)
|
||||
compile_shader(argc - optind - 1, &argv[optind + 1]);
|
||||
else if (strcmp(argv[optind], "disasm") == 0)
|
||||
disassemble(argv[optind + 1]);
|
||||
else {
|
||||
fprintf(stderr, "Unknown command. Valid: compile/disasm\n");
|
||||
return 1;
|
||||
}
|
||||
if (strcmp(argv[optind], "compile") == 0)
|
||||
compile_shader(argc - optind - 1, &argv[optind + 1]);
|
||||
else if (strcmp(argv[optind], "disasm") == 0)
|
||||
disassemble(argv[optind + 1]);
|
||||
else {
|
||||
fprintf(stderr, "Unknown command. Valid: compile/disasm\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -34,14 +34,20 @@
|
|||
|
||||
void disassemble_bifrost(FILE *fp, uint8_t *code, size_t size, bool verbose);
|
||||
|
||||
void
|
||||
bi_disasm_fma(FILE *fp, unsigned bits, struct bifrost_regs *srcs, struct bifrost_regs *next_regs, unsigned staging_register, unsigned branch_offset, struct bi_constants *consts, bool first);
|
||||
void bi_disasm_fma(FILE *fp, unsigned bits, struct bifrost_regs *srcs,
|
||||
struct bifrost_regs *next_regs, unsigned staging_register,
|
||||
unsigned branch_offset, struct bi_constants *consts,
|
||||
bool first);
|
||||
|
||||
void bi_disasm_add(FILE *fp, unsigned bits, struct bifrost_regs *srcs, struct bifrost_regs *next_regs, unsigned staging_register, unsigned branch_offset, struct bi_constants *consts, bool first);
|
||||
void bi_disasm_add(FILE *fp, unsigned bits, struct bifrost_regs *srcs,
|
||||
struct bifrost_regs *next_regs, unsigned staging_register,
|
||||
unsigned branch_offset, struct bi_constants *consts,
|
||||
bool first);
|
||||
|
||||
void bi_disasm_dest_fma(FILE *fp, struct bifrost_regs *next_regs, bool first);
|
||||
void bi_disasm_dest_add(FILE *fp, struct bifrost_regs *next_regs, bool first);
|
||||
|
||||
void dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs, unsigned branch_offset, struct bi_constants *consts, bool isFMA);
|
||||
void dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs,
|
||||
unsigned branch_offset, struct bi_constants *consts, bool isFMA);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -62,182 +62,187 @@ typedef uint16_t nodearray_value;
|
|||
typedef uint64_t nodearray_sparse;
|
||||
|
||||
typedef struct {
|
||||
union {
|
||||
nodearray_sparse *sparse;
|
||||
nodearray_value *dense;
|
||||
};
|
||||
unsigned size;
|
||||
unsigned sparse_capacity;
|
||||
union {
|
||||
nodearray_sparse *sparse;
|
||||
nodearray_value *dense;
|
||||
};
|
||||
unsigned size;
|
||||
unsigned sparse_capacity;
|
||||
} nodearray;
|
||||
|
||||
/* Align sizes to 16-bytes for SIMD purposes */
|
||||
#define NODEARRAY_DENSE_ALIGN(x) ALIGN_POT(x, 16)
|
||||
|
||||
#define nodearray_sparse_foreach(buf, elem) \
|
||||
for (nodearray_sparse *elem = (buf)->sparse; \
|
||||
#define nodearray_sparse_foreach(buf, elem) \
|
||||
for (nodearray_sparse *elem = (buf)->sparse; \
|
||||
elem < (buf)->sparse + (buf)->size; elem++)
|
||||
|
||||
#define nodearray_dense_foreach(buf, elem) \
|
||||
for (nodearray_value *elem = (buf)->dense; \
|
||||
#define nodearray_dense_foreach(buf, elem) \
|
||||
for (nodearray_value *elem = (buf)->dense; \
|
||||
elem < (buf)->dense + (buf)->size; elem++)
|
||||
|
||||
#define nodearray_dense_foreach_64(buf, elem) \
|
||||
for (uint64_t *elem = (uint64_t *)(buf)->dense; \
|
||||
#define nodearray_dense_foreach_64(buf, elem) \
|
||||
for (uint64_t *elem = (uint64_t *)(buf)->dense; \
|
||||
(nodearray_value *)elem < (buf)->dense + (buf)->size; elem++)
|
||||
|
||||
static inline bool
|
||||
nodearray_is_sparse(const nodearray *a)
|
||||
{
|
||||
return a->sparse_capacity != ~0U;
|
||||
return a->sparse_capacity != ~0U;
|
||||
}
|
||||
|
||||
static inline void
|
||||
nodearray_init(nodearray *a)
|
||||
{
|
||||
memset(a, 0, sizeof(nodearray));
|
||||
memset(a, 0, sizeof(nodearray));
|
||||
}
|
||||
|
||||
static inline void
|
||||
nodearray_reset(nodearray *a)
|
||||
{
|
||||
free(a->sparse);
|
||||
nodearray_init(a);
|
||||
free(a->sparse);
|
||||
nodearray_init(a);
|
||||
}
|
||||
|
||||
static inline nodearray_sparse
|
||||
nodearray_encode(unsigned key, nodearray_value value)
|
||||
{
|
||||
static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
|
||||
return ((nodearray_sparse) key << 16) | value;
|
||||
static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
|
||||
return ((nodearray_sparse)key << 16) | value;
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
nodearray_sparse_key(const nodearray_sparse *elem)
|
||||
{
|
||||
static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
|
||||
return *elem >> 16;
|
||||
static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
|
||||
return *elem >> 16;
|
||||
}
|
||||
|
||||
static inline nodearray_value
|
||||
nodearray_sparse_value(const nodearray_sparse *elem)
|
||||
{
|
||||
return *elem & NODEARRAY_MAX_VALUE;
|
||||
return *elem & NODEARRAY_MAX_VALUE;
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
nodearray_sparse_search(const nodearray *a, nodearray_sparse key, nodearray_sparse **elem)
|
||||
nodearray_sparse_search(const nodearray *a, nodearray_sparse key,
|
||||
nodearray_sparse **elem)
|
||||
{
|
||||
assert(nodearray_is_sparse(a) && a->size);
|
||||
assert(nodearray_is_sparse(a) && a->size);
|
||||
|
||||
nodearray_sparse *data = a->sparse;
|
||||
nodearray_sparse *data = a->sparse;
|
||||
|
||||
/* Encode the key using the highest possible value, so that the
|
||||
* matching node must be encoded lower than this
|
||||
*/
|
||||
nodearray_sparse skey = nodearray_encode(key, NODEARRAY_MAX_VALUE);
|
||||
/* Encode the key using the highest possible value, so that the
|
||||
* matching node must be encoded lower than this
|
||||
*/
|
||||
nodearray_sparse skey = nodearray_encode(key, NODEARRAY_MAX_VALUE);
|
||||
|
||||
unsigned left = 0;
|
||||
unsigned right = a->size - 1;
|
||||
unsigned left = 0;
|
||||
unsigned right = a->size - 1;
|
||||
|
||||
if (data[right] <= skey)
|
||||
left = right;
|
||||
if (data[right] <= skey)
|
||||
left = right;
|
||||
|
||||
while (left != right) {
|
||||
/* No need to worry about overflow, we couldn't have more than
|
||||
* 2^24 elements */
|
||||
unsigned probe = (left + right + 1) / 2;
|
||||
while (left != right) {
|
||||
/* No need to worry about overflow, we couldn't have more than
|
||||
* 2^24 elements */
|
||||
unsigned probe = (left + right + 1) / 2;
|
||||
|
||||
if (data[probe] > skey)
|
||||
right = probe - 1;
|
||||
else
|
||||
left = probe;
|
||||
}
|
||||
if (data[probe] > skey)
|
||||
right = probe - 1;
|
||||
else
|
||||
left = probe;
|
||||
}
|
||||
|
||||
*elem = data + left;
|
||||
return left;
|
||||
*elem = data + left;
|
||||
return left;
|
||||
}
|
||||
|
||||
static inline void
|
||||
nodearray_orr(nodearray *a, unsigned key, nodearray_value value,
|
||||
unsigned max_sparse, unsigned max)
|
||||
{
|
||||
assert(key < (1 << 24));
|
||||
assert(key < max);
|
||||
assert(key < (1 << 24));
|
||||
assert(key < max);
|
||||
|
||||
if (!value)
|
||||
return;
|
||||
if (!value)
|
||||
return;
|
||||
|
||||
if (nodearray_is_sparse(a)) {
|
||||
unsigned size = a->size;
|
||||
unsigned left = 0;
|
||||
if (nodearray_is_sparse(a)) {
|
||||
unsigned size = a->size;
|
||||
unsigned left = 0;
|
||||
|
||||
if (size) {
|
||||
/* First, binary search for key */
|
||||
nodearray_sparse *elem;
|
||||
left = nodearray_sparse_search(a, key, &elem);
|
||||
if (size) {
|
||||
/* First, binary search for key */
|
||||
nodearray_sparse *elem;
|
||||
left = nodearray_sparse_search(a, key, &elem);
|
||||
|
||||
if (nodearray_sparse_key(elem) == key) {
|
||||
*elem |= value;
|
||||
return;
|
||||
}
|
||||
if (nodearray_sparse_key(elem) == key) {
|
||||
*elem |= value;
|
||||
return;
|
||||
}
|
||||
|
||||
/* We insert before `left`, so increment it if it's
|
||||
* out of order */
|
||||
if (nodearray_sparse_key(elem) < key)
|
||||
++left;
|
||||
}
|
||||
/* We insert before `left`, so increment it if it's
|
||||
* out of order */
|
||||
if (nodearray_sparse_key(elem) < key)
|
||||
++left;
|
||||
}
|
||||
|
||||
if (size < max_sparse && (size + 1) < max / 4) {
|
||||
/* We didn't find it, but we know where to insert it. */
|
||||
if (size < max_sparse && (size + 1) < max / 4) {
|
||||
/* We didn't find it, but we know where to insert it. */
|
||||
|
||||
nodearray_sparse *data = a->sparse;
|
||||
nodearray_sparse *data_move = data + left;
|
||||
nodearray_sparse *data = a->sparse;
|
||||
nodearray_sparse *data_move = data + left;
|
||||
|
||||
bool realloc = (++a->size) > a->sparse_capacity;
|
||||
bool realloc = (++a->size) > a->sparse_capacity;
|
||||
|
||||
if (realloc) {
|
||||
a->sparse_capacity = MIN2(MAX2(a->sparse_capacity * 2, 64), max / 4);
|
||||
if (realloc) {
|
||||
a->sparse_capacity =
|
||||
MIN2(MAX2(a->sparse_capacity * 2, 64), max / 4);
|
||||
|
||||
a->sparse = (nodearray_sparse *)malloc(a->sparse_capacity * sizeof(nodearray_sparse));
|
||||
a->sparse = (nodearray_sparse *)malloc(a->sparse_capacity *
|
||||
sizeof(nodearray_sparse));
|
||||
|
||||
if (left)
|
||||
memcpy(a->sparse, data, left * sizeof(nodearray_sparse));
|
||||
}
|
||||
if (left)
|
||||
memcpy(a->sparse, data, left * sizeof(nodearray_sparse));
|
||||
}
|
||||
|
||||
nodearray_sparse *elem = a->sparse + left;
|
||||
nodearray_sparse *elem = a->sparse + left;
|
||||
|
||||
if (left != size)
|
||||
memmove(elem + 1, data_move, (size - left) * sizeof(nodearray_sparse));
|
||||
if (left != size)
|
||||
memmove(elem + 1, data_move,
|
||||
(size - left) * sizeof(nodearray_sparse));
|
||||
|
||||
*elem = nodearray_encode(key, value);
|
||||
*elem = nodearray_encode(key, value);
|
||||
|
||||
if (realloc)
|
||||
free(data);
|
||||
if (realloc)
|
||||
free(data);
|
||||
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* There are too many elements, so convert to a dense array */
|
||||
nodearray old = *a;
|
||||
/* There are too many elements, so convert to a dense array */
|
||||
nodearray old = *a;
|
||||
|
||||
a->dense = (nodearray_value *)calloc(NODEARRAY_DENSE_ALIGN(max), sizeof(nodearray_value));
|
||||
a->size = max;
|
||||
a->sparse_capacity = ~0U;
|
||||
a->dense = (nodearray_value *)calloc(NODEARRAY_DENSE_ALIGN(max),
|
||||
sizeof(nodearray_value));
|
||||
a->size = max;
|
||||
a->sparse_capacity = ~0U;
|
||||
|
||||
nodearray_value *data = a->dense;
|
||||
nodearray_value *data = a->dense;
|
||||
|
||||
nodearray_sparse_foreach(&old, x) {
|
||||
unsigned key = nodearray_sparse_key(x);
|
||||
nodearray_value value = nodearray_sparse_value(x);
|
||||
nodearray_sparse_foreach(&old, x) {
|
||||
unsigned key = nodearray_sparse_key(x);
|
||||
nodearray_value value = nodearray_sparse_value(x);
|
||||
|
||||
assert(key < max);
|
||||
data[key] = value;
|
||||
}
|
||||
assert(key < max);
|
||||
data[key] = value;
|
||||
}
|
||||
|
||||
free(old.sparse);
|
||||
}
|
||||
free(old.sparse);
|
||||
}
|
||||
|
||||
a->dense[key] |= value;
|
||||
a->dense[key] |= value;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
|||
|
|
@ -21,14 +21,15 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_test.h"
|
||||
#include "bi_builder.h"
|
||||
#include "bi_test.h"
|
||||
#include "compiler.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
static std::string
|
||||
to_string(const bi_instr *I) {
|
||||
to_string(const bi_instr *I)
|
||||
{
|
||||
char *cstr = NULL;
|
||||
size_t size = 0;
|
||||
FILE *f = open_memstream(&cstr, &size);
|
||||
|
|
@ -40,23 +41,21 @@ to_string(const bi_instr *I) {
|
|||
}
|
||||
|
||||
static testing::AssertionResult
|
||||
constant_fold_pred(const char *I_expr,
|
||||
const char *expected_expr,
|
||||
bi_instr *I,
|
||||
constant_fold_pred(const char *I_expr, const char *expected_expr, bi_instr *I,
|
||||
uint32_t expected)
|
||||
{
|
||||
bool unsupported = false;
|
||||
uint32_t v = bi_fold_constant(I, &unsupported);
|
||||
if (unsupported) {
|
||||
return testing::AssertionFailure()
|
||||
<< "Constant fold unsupported for instruction \n\n"
|
||||
<< " " << to_string(I);
|
||||
<< "Constant fold unsupported for instruction \n\n"
|
||||
<< " " << to_string(I);
|
||||
} else if (v != expected) {
|
||||
return testing::AssertionFailure()
|
||||
<< "Unexpected result when constant folding instruction\n\n"
|
||||
<< " " << to_string(I) << "\n"
|
||||
<< " Actual: " << v << "\n"
|
||||
<< "Expected: " << expected << "\n";
|
||||
<< "Unexpected result when constant folding instruction\n\n"
|
||||
<< " " << to_string(I) << "\n"
|
||||
<< " Actual: " << v << "\n"
|
||||
<< "Expected: " << expected << "\n";
|
||||
} else {
|
||||
return testing::AssertionSuccess();
|
||||
}
|
||||
|
|
@ -64,7 +63,6 @@ constant_fold_pred(const char *I_expr,
|
|||
|
||||
#define EXPECT_FOLD(i, e) EXPECT_PRED_FORMAT2(constant_fold_pred, i, e)
|
||||
|
||||
|
||||
static testing::AssertionResult
|
||||
not_constant_fold_pred(const char *I_expr, bi_instr *I)
|
||||
{
|
||||
|
|
@ -74,22 +72,23 @@ not_constant_fold_pred(const char *I_expr, bi_instr *I)
|
|||
return testing::AssertionSuccess();
|
||||
} else {
|
||||
return testing::AssertionFailure()
|
||||
<< "Instruction\n\n"
|
||||
<< " " << to_string(I) << "\n"
|
||||
<< "shouldn't have constant folded, but folded to: " << v;
|
||||
<< "Instruction\n\n"
|
||||
<< " " << to_string(I) << "\n"
|
||||
<< "shouldn't have constant folded, but folded to: " << v;
|
||||
}
|
||||
}
|
||||
|
||||
#define EXPECT_NOT_FOLD(i) EXPECT_PRED_FORMAT1(not_constant_fold_pred, i)
|
||||
|
||||
|
||||
class ConstantFold : public testing::Test {
|
||||
protected:
|
||||
ConstantFold() {
|
||||
protected:
|
||||
ConstantFold()
|
||||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
b = bit_builder(mem_ctx);
|
||||
}
|
||||
~ConstantFold() {
|
||||
~ConstantFold()
|
||||
{
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
|
|
@ -101,9 +100,7 @@ TEST_F(ConstantFold, Swizzles)
|
|||
{
|
||||
bi_index reg = bi_register(0);
|
||||
|
||||
EXPECT_FOLD(
|
||||
bi_swz_v2i16_to(b, reg, bi_imm_u32(0xCAFEBABE)),
|
||||
0xCAFEBABE);
|
||||
EXPECT_FOLD(bi_swz_v2i16_to(b, reg, bi_imm_u32(0xCAFEBABE)), 0xCAFEBABE);
|
||||
|
||||
EXPECT_FOLD(
|
||||
bi_swz_v2i16_to(b, reg, bi_swz_16(bi_imm_u32(0xCAFEBABE), false, false)),
|
||||
|
|
@ -123,18 +120,17 @@ TEST_F(ConstantFold, VectorConstructions2i16)
|
|||
bi_index reg = bi_register(0);
|
||||
|
||||
EXPECT_FOLD(
|
||||
bi_mkvec_v2i16_to(b, reg, bi_imm_u16(0xCAFE),
|
||||
bi_imm_u16(0xBABE)),
|
||||
bi_mkvec_v2i16_to(b, reg, bi_imm_u16(0xCAFE), bi_imm_u16(0xBABE)),
|
||||
0xBABECAFE);
|
||||
|
||||
EXPECT_FOLD(
|
||||
bi_mkvec_v2i16_to(b, reg, bi_swz_16(bi_imm_u32(0xCAFEBABE), true, true),
|
||||
bi_imm_u16(0xBABE)),
|
||||
bi_imm_u16(0xBABE)),
|
||||
0xBABECAFE);
|
||||
|
||||
EXPECT_FOLD(
|
||||
bi_mkvec_v2i16_to(b, reg, bi_swz_16(bi_imm_u32(0xCAFEBABE), true, true),
|
||||
bi_swz_16(bi_imm_u32(0xCAFEBABE), false, false)),
|
||||
bi_swz_16(bi_imm_u32(0xCAFEBABE), false, false)),
|
||||
0xBABECAFE);
|
||||
}
|
||||
|
||||
|
|
@ -173,17 +169,18 @@ TEST_F(ConstantFold, LimitedShiftsForTexturing)
|
|||
{
|
||||
bi_index reg = bi_register(0);
|
||||
|
||||
EXPECT_FOLD(
|
||||
bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE), bi_imm_u32(0xA0000), bi_imm_u8(4)),
|
||||
(0xCAFE << 4) | 0xA0000);
|
||||
EXPECT_FOLD(bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE),
|
||||
bi_imm_u32(0xA0000), bi_imm_u8(4)),
|
||||
(0xCAFE << 4) | 0xA0000);
|
||||
|
||||
EXPECT_NOT_FOLD(
|
||||
bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE), bi_not(bi_imm_u32(0xA0000)), bi_imm_u8(4)));
|
||||
EXPECT_NOT_FOLD(bi_lshift_or_i32_to(
|
||||
b, reg, bi_imm_u32(0xCAFE), bi_not(bi_imm_u32(0xA0000)), bi_imm_u8(4)));
|
||||
|
||||
EXPECT_NOT_FOLD(
|
||||
bi_lshift_or_i32_to(b, reg, bi_not(bi_imm_u32(0xCAFE)), bi_imm_u32(0xA0000), bi_imm_u8(4)));
|
||||
EXPECT_NOT_FOLD(bi_lshift_or_i32_to(b, reg, bi_not(bi_imm_u32(0xCAFE)),
|
||||
bi_imm_u32(0xA0000), bi_imm_u8(4)));
|
||||
|
||||
bi_instr *I = bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE), bi_imm_u32(0xA0000), bi_imm_u8(4));
|
||||
bi_instr *I = bi_lshift_or_i32_to(b, reg, bi_imm_u32(0xCAFE),
|
||||
bi_imm_u32(0xA0000), bi_imm_u8(4));
|
||||
I->not_result = true;
|
||||
EXPECT_NOT_FOLD(I);
|
||||
}
|
||||
|
|
@ -193,9 +190,12 @@ TEST_F(ConstantFold, NonConstantSourcesCannotBeFolded)
|
|||
bi_index reg = bi_register(0);
|
||||
|
||||
EXPECT_NOT_FOLD(bi_swz_v2i16_to(b, reg, bi_temp(b->shader)));
|
||||
EXPECT_NOT_FOLD(bi_mkvec_v2i16_to(b, reg, bi_temp(b->shader), bi_temp(b->shader)));
|
||||
EXPECT_NOT_FOLD(bi_mkvec_v2i16_to(b, reg, bi_temp(b->shader), bi_imm_u32(0xDEADBEEF)));
|
||||
EXPECT_NOT_FOLD(bi_mkvec_v2i16_to(b, reg, bi_imm_u32(0xDEADBEEF), bi_temp(b->shader)));
|
||||
EXPECT_NOT_FOLD(
|
||||
bi_mkvec_v2i16_to(b, reg, bi_temp(b->shader), bi_temp(b->shader)));
|
||||
EXPECT_NOT_FOLD(
|
||||
bi_mkvec_v2i16_to(b, reg, bi_temp(b->shader), bi_imm_u32(0xDEADBEEF)));
|
||||
EXPECT_NOT_FOLD(
|
||||
bi_mkvec_v2i16_to(b, reg, bi_imm_u32(0xDEADBEEF), bi_temp(b->shader)));
|
||||
}
|
||||
|
||||
TEST_F(ConstantFold, OtherOperationsShouldNotFold)
|
||||
|
|
|
|||
|
|
@ -21,55 +21,57 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_test.h"
|
||||
#include "bi_builder.h"
|
||||
#include "bi_test.h"
|
||||
#include "compiler.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#define CASE(shader_stage, instr, expected) do { \
|
||||
bi_builder *A = bit_builder(mem_ctx); \
|
||||
bi_builder *B = bit_builder(mem_ctx); \
|
||||
{ \
|
||||
bi_builder *b = A; \
|
||||
bi_index u = bi_temp(b->shader); \
|
||||
bi_index v = bi_temp(b->shader); \
|
||||
A->shader->stage = MESA_SHADER_ ## shader_stage; \
|
||||
instr; \
|
||||
} \
|
||||
{ \
|
||||
bi_builder *b = B; \
|
||||
bi_index u = bi_temp(b->shader); \
|
||||
bi_index v = bi_temp(b->shader); \
|
||||
B->shader->stage = MESA_SHADER_ ## shader_stage; \
|
||||
expected; \
|
||||
} \
|
||||
bi_opt_fuse_dual_texture(A->shader); \
|
||||
if (!bit_shader_equal(A->shader, B->shader)) { \
|
||||
ADD_FAILURE(); \
|
||||
fprintf(stderr, "Optimization produce unexpected result"); \
|
||||
fprintf(stderr, " Actual:\n"); \
|
||||
bi_print_shader(A->shader, stderr); \
|
||||
fprintf(stderr, "Expected:\n"); \
|
||||
bi_print_shader(B->shader, stderr); \
|
||||
fprintf(stderr, "\n"); \
|
||||
} \
|
||||
} while(0)
|
||||
#define CASE(shader_stage, instr, expected) \
|
||||
do { \
|
||||
bi_builder *A = bit_builder(mem_ctx); \
|
||||
bi_builder *B = bit_builder(mem_ctx); \
|
||||
{ \
|
||||
bi_builder *b = A; \
|
||||
bi_index u = bi_temp(b->shader); \
|
||||
bi_index v = bi_temp(b->shader); \
|
||||
A->shader->stage = MESA_SHADER_##shader_stage; \
|
||||
instr; \
|
||||
} \
|
||||
{ \
|
||||
bi_builder *b = B; \
|
||||
bi_index u = bi_temp(b->shader); \
|
||||
bi_index v = bi_temp(b->shader); \
|
||||
B->shader->stage = MESA_SHADER_##shader_stage; \
|
||||
expected; \
|
||||
} \
|
||||
bi_opt_fuse_dual_texture(A->shader); \
|
||||
if (!bit_shader_equal(A->shader, B->shader)) { \
|
||||
ADD_FAILURE(); \
|
||||
fprintf(stderr, "Optimization produce unexpected result"); \
|
||||
fprintf(stderr, " Actual:\n"); \
|
||||
bi_print_shader(A->shader, stderr); \
|
||||
fprintf(stderr, "Expected:\n"); \
|
||||
bi_print_shader(B->shader, stderr); \
|
||||
fprintf(stderr, "\n"); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define NEGCASE(stage, instr) CASE(stage, instr, instr)
|
||||
|
||||
class DualTexture : public testing::Test {
|
||||
protected:
|
||||
DualTexture() {
|
||||
protected:
|
||||
DualTexture()
|
||||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
|
||||
reg = bi_register(0);
|
||||
x = bi_register(4);
|
||||
y = bi_register(8);
|
||||
|
||||
reg = bi_register(0);
|
||||
x = bi_register(4);
|
||||
y = bi_register(8);
|
||||
}
|
||||
|
||||
~DualTexture() {
|
||||
~DualTexture()
|
||||
{
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
|
|
@ -78,134 +80,165 @@ protected:
|
|||
bi_index reg, x, y;
|
||||
};
|
||||
|
||||
|
||||
TEST_F(DualTexture, FuseDualTexFragment)
|
||||
{
|
||||
CASE(FRAGMENT, {
|
||||
CASE(
|
||||
FRAGMENT,
|
||||
{
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
|
||||
}, {
|
||||
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), false, 4, 4);
|
||||
});
|
||||
},
|
||||
{
|
||||
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144),
|
||||
false, 4, 4);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(DualTexture, FuseDualTexKernel)
|
||||
{
|
||||
CASE(KERNEL, {
|
||||
CASE(
|
||||
KERNEL,
|
||||
{
|
||||
bi_texs_2d_f32_to(b, x, u, v, true, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, true, 1, 1);
|
||||
}, {
|
||||
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), true, 4, 4);
|
||||
});
|
||||
},
|
||||
{
|
||||
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), true,
|
||||
4, 4);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(DualTexture, FuseDualTexVertex)
|
||||
{
|
||||
CASE(VERTEX, {
|
||||
CASE(
|
||||
VERTEX,
|
||||
{
|
||||
bi_texs_2d_f32_to(b, x, u, v, true, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, true, 1, 1);
|
||||
}, {
|
||||
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), true, 4, 4);
|
||||
});
|
||||
},
|
||||
{
|
||||
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F00144), true,
|
||||
4, 4);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(DualTexture, DontFuseDualTexWrongStage)
|
||||
{
|
||||
NEGCASE(FRAGMENT, {
|
||||
bi_texs_2d_f32_to(b, x, u, v, true, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, true, 1, 1);
|
||||
bi_texs_2d_f32_to(b, x, u, v, true, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, true, 1, 1);
|
||||
});
|
||||
|
||||
NEGCASE(KERNEL, {
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
|
||||
});
|
||||
|
||||
NEGCASE(VERTEX, {
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(DualTexture, FuseDualTexMaximumIndex)
|
||||
{
|
||||
CASE(FRAGMENT, {
|
||||
CASE(
|
||||
FRAGMENT,
|
||||
{
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 2, 2);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 3, 3);
|
||||
}, {
|
||||
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F003E6), false, 4, 4);
|
||||
});
|
||||
},
|
||||
{
|
||||
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F003E6),
|
||||
false, 4, 4);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(DualTexture, FuseDualTexMixedIndex)
|
||||
{
|
||||
CASE(FRAGMENT, {
|
||||
CASE(
|
||||
FRAGMENT,
|
||||
{
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 3, 2);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 2, 3);
|
||||
}, {
|
||||
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F003A7), false, 4, 4);
|
||||
});
|
||||
},
|
||||
{
|
||||
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF9F003A7),
|
||||
false, 4, 4);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(DualTexture, DontFuseDualTexOutOfBounds)
|
||||
{
|
||||
NEGCASE(FRAGMENT, {
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 4, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 4, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
|
||||
});
|
||||
|
||||
NEGCASE(FRAGMENT, {
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 0, 4);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 0, 4);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
|
||||
});
|
||||
|
||||
NEGCASE(FRAGMENT, {
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 4, 1);
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 4, 1);
|
||||
});
|
||||
|
||||
NEGCASE(FRAGMENT, {
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 1, 4);
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 1, 4);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(DualTexture, FuseDualTexFP16)
|
||||
{
|
||||
CASE(FRAGMENT, {
|
||||
CASE(
|
||||
FRAGMENT,
|
||||
{
|
||||
bi_texs_2d_f16_to(b, x, u, v, false, 0, 0);
|
||||
bi_texs_2d_f16_to(b, y, u, v, false, 1, 1);
|
||||
}, {
|
||||
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF1E00144), false, 2, 2);
|
||||
});
|
||||
},
|
||||
{
|
||||
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF1E00144),
|
||||
false, 2, 2);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(DualTexture, FuseDualTexMixedSize)
|
||||
{
|
||||
CASE(FRAGMENT, {
|
||||
CASE(
|
||||
FRAGMENT,
|
||||
{
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
|
||||
bi_texs_2d_f16_to(b, y, u, v, false, 1, 1);
|
||||
}, {
|
||||
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0XF9E00144), false, 4, 2);
|
||||
});
|
||||
},
|
||||
{
|
||||
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0XF9E00144),
|
||||
false, 4, 2);
|
||||
});
|
||||
|
||||
CASE(FRAGMENT, {
|
||||
CASE(
|
||||
FRAGMENT,
|
||||
{
|
||||
bi_texs_2d_f16_to(b, x, u, v, false, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
|
||||
}, {
|
||||
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF1F00144), false, 2, 4);
|
||||
});
|
||||
},
|
||||
{
|
||||
bi_texc_dual_to(b, x, y, bi_null(), u, v, bi_imm_u32(0xF1F00144),
|
||||
false, 2, 4);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(DualTexture, DontFuseMixedCoordinates)
|
||||
{
|
||||
NEGCASE(FRAGMENT, {
|
||||
bi_texs_2d_f32_to(b, x, bi_neg(u), v, false, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
|
||||
bi_texs_2d_f32_to(b, x, bi_neg(u), v, false, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, u, v, false, 1, 1);
|
||||
});
|
||||
|
||||
NEGCASE(FRAGMENT, {
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, v, u, false, 1, 1);
|
||||
bi_texs_2d_f32_to(b, x, u, v, false, 0, 0);
|
||||
bi_texs_2d_f32_to(b, y, v, u, false, 1, 1);
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,31 +21,34 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_test.h"
|
||||
#include "bi_builder.h"
|
||||
#include "bi_test.h"
|
||||
#include "compiler.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, bi_lower_swizzle)
|
||||
#define CASE(instr, expected) \
|
||||
INSTRUCTION_CASE(instr, expected, bi_lower_swizzle)
|
||||
#define NEGCASE(instr) CASE(instr, instr)
|
||||
|
||||
class LowerSwizzle : public testing::Test {
|
||||
protected:
|
||||
LowerSwizzle() {
|
||||
protected:
|
||||
LowerSwizzle()
|
||||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
|
||||
reg = bi_register(0);
|
||||
x = bi_register(1);
|
||||
y = bi_register(2);
|
||||
z = bi_register(3);
|
||||
w = bi_register(4);
|
||||
reg = bi_register(0);
|
||||
x = bi_register(1);
|
||||
y = bi_register(2);
|
||||
z = bi_register(3);
|
||||
w = bi_register(4);
|
||||
|
||||
x3210 = x;
|
||||
x3210 = x;
|
||||
x3210.swizzle = BI_SWIZZLE_B3210;
|
||||
}
|
||||
|
||||
~LowerSwizzle() {
|
||||
~LowerSwizzle()
|
||||
{
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
|
|
@ -58,7 +61,8 @@ protected:
|
|||
TEST_F(LowerSwizzle, Csel16)
|
||||
{
|
||||
CASE(bi_csel_v2f16_to(b, reg, bi_half(x, 0), y, z, w, BI_CMPF_NE),
|
||||
bi_csel_v2f16_to(b, reg, bi_swz_v2i16(b, bi_half(x, 0)), y, z, w, BI_CMPF_NE));
|
||||
bi_csel_v2f16_to(b, reg, bi_swz_v2i16(b, bi_half(x, 0)), y, z, w,
|
||||
BI_CMPF_NE));
|
||||
}
|
||||
|
||||
TEST_F(LowerSwizzle, Fma16)
|
||||
|
|
@ -79,23 +83,22 @@ TEST_F(LowerSwizzle, ClzHadd8)
|
|||
TEST_F(LowerSwizzle, FirstShift8)
|
||||
{
|
||||
enum bi_opcode ops[] = {
|
||||
BI_OPCODE_LSHIFT_AND_V4I8,
|
||||
BI_OPCODE_LSHIFT_OR_V4I8,
|
||||
BI_OPCODE_LSHIFT_XOR_V4I8,
|
||||
BI_OPCODE_RSHIFT_AND_V4I8,
|
||||
BI_OPCODE_RSHIFT_OR_V4I8,
|
||||
BI_OPCODE_RSHIFT_XOR_V4I8,
|
||||
BI_OPCODE_LSHIFT_AND_V4I8, BI_OPCODE_LSHIFT_OR_V4I8,
|
||||
BI_OPCODE_LSHIFT_XOR_V4I8, BI_OPCODE_RSHIFT_AND_V4I8,
|
||||
BI_OPCODE_RSHIFT_OR_V4I8, BI_OPCODE_RSHIFT_XOR_V4I8,
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(ops); ++i) {
|
||||
CASE({
|
||||
CASE(
|
||||
{
|
||||
bi_instr *I = bi_lshift_and_v4i8_to(b, reg, x3210, y, z);
|
||||
I->op = ops[i];
|
||||
},
|
||||
{
|
||||
bi_instr *I = bi_lshift_and_v4i8_to(b, reg, bi_swz_v4i8(b, x3210), y, z);
|
||||
},
|
||||
{
|
||||
bi_instr *I =
|
||||
bi_lshift_and_v4i8_to(b, reg, bi_swz_v4i8(b, x3210), y, z);
|
||||
I->op = ops[i];
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -21,56 +21,58 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_test.h"
|
||||
#include "bi_builder.h"
|
||||
#include "bi_test.h"
|
||||
#include "compiler.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#define CASE(instr, expected) do { \
|
||||
bi_builder *A = bit_builder(mem_ctx); \
|
||||
bi_builder *B = bit_builder(mem_ctx); \
|
||||
A->shader->info.bifrost = rzalloc(mem_ctx, struct bifrost_shader_info); \
|
||||
B->shader->info.bifrost = rzalloc(mem_ctx, struct bifrost_shader_info); \
|
||||
{ \
|
||||
bi_builder *b = A; \
|
||||
bi_index u = bi_temp(b->shader); \
|
||||
UNUSED bi_index v = bi_temp(b->shader); \
|
||||
UNUSED bi_index w = bi_temp(b->shader); \
|
||||
instr; \
|
||||
} \
|
||||
{ \
|
||||
bi_builder *b = B; \
|
||||
bi_index u = bi_temp(b->shader); \
|
||||
UNUSED bi_index v = bi_temp(b->shader); \
|
||||
UNUSED bi_index w = bi_temp(b->shader); \
|
||||
expected; \
|
||||
} \
|
||||
bi_opt_message_preload(A->shader); \
|
||||
if (!bit_shader_equal(A->shader, B->shader)) { \
|
||||
ADD_FAILURE(); \
|
||||
fprintf(stderr, "Optimization produce unexpected result"); \
|
||||
fprintf(stderr, " Actual:\n"); \
|
||||
bi_print_shader(A->shader, stderr); \
|
||||
fprintf(stderr, "Expected:\n"); \
|
||||
bi_print_shader(B->shader, stderr); \
|
||||
fprintf(stderr, "\n"); \
|
||||
} \
|
||||
} while(0)
|
||||
#define CASE(instr, expected) \
|
||||
do { \
|
||||
bi_builder *A = bit_builder(mem_ctx); \
|
||||
bi_builder *B = bit_builder(mem_ctx); \
|
||||
A->shader->info.bifrost = rzalloc(mem_ctx, struct bifrost_shader_info); \
|
||||
B->shader->info.bifrost = rzalloc(mem_ctx, struct bifrost_shader_info); \
|
||||
{ \
|
||||
bi_builder *b = A; \
|
||||
bi_index u = bi_temp(b->shader); \
|
||||
UNUSED bi_index v = bi_temp(b->shader); \
|
||||
UNUSED bi_index w = bi_temp(b->shader); \
|
||||
instr; \
|
||||
} \
|
||||
{ \
|
||||
bi_builder *b = B; \
|
||||
bi_index u = bi_temp(b->shader); \
|
||||
UNUSED bi_index v = bi_temp(b->shader); \
|
||||
UNUSED bi_index w = bi_temp(b->shader); \
|
||||
expected; \
|
||||
} \
|
||||
bi_opt_message_preload(A->shader); \
|
||||
if (!bit_shader_equal(A->shader, B->shader)) { \
|
||||
ADD_FAILURE(); \
|
||||
fprintf(stderr, "Optimization produce unexpected result"); \
|
||||
fprintf(stderr, " Actual:\n"); \
|
||||
bi_print_shader(A->shader, stderr); \
|
||||
fprintf(stderr, "Expected:\n"); \
|
||||
bi_print_shader(B->shader, stderr); \
|
||||
fprintf(stderr, "\n"); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define NEGCASE(instr) CASE(instr, instr)
|
||||
|
||||
class MessagePreload : public testing::Test {
|
||||
protected:
|
||||
MessagePreload() {
|
||||
protected:
|
||||
MessagePreload()
|
||||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
|
||||
x = bi_register(16);
|
||||
y = bi_register(32);
|
||||
|
||||
x = bi_register(16);
|
||||
y = bi_register(32);
|
||||
}
|
||||
|
||||
~MessagePreload() {
|
||||
~MessagePreload()
|
||||
{
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
|
|
@ -84,100 +86,117 @@ protected:
|
|||
|
||||
b->cursor = bi_before_block(bi_start_block(&b->shader->blocks));
|
||||
bi_foreach_src(I, i)
|
||||
I->src[i] = bi_mov_i32(b, bi_register(idx*4 + i));
|
||||
I->src[i] = bi_mov_i32(b, bi_register(idx * 4 + i));
|
||||
|
||||
b->cursor = bi_after_instr(I);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
TEST_F(MessagePreload, PreloadLdVarSample)
|
||||
{
|
||||
CASE({
|
||||
CASE(
|
||||
{
|
||||
bi_ld_var_imm_to(b, u, bi_register(61), BI_REGISTER_FORMAT_F32,
|
||||
BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
|
||||
}, {
|
||||
preload_moves(b, u, 4, 0);
|
||||
});
|
||||
},
|
||||
{ preload_moves(b, u, 4, 0); });
|
||||
}
|
||||
|
||||
TEST_F(MessagePreload, PreloadLdVarLdVar)
|
||||
{
|
||||
CASE({
|
||||
CASE(
|
||||
{
|
||||
bi_ld_var_imm_to(b, u, bi_register(61), BI_REGISTER_FORMAT_F32,
|
||||
BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 2);
|
||||
bi_ld_var_imm_to(b, v, bi_register(61), BI_REGISTER_FORMAT_F32,
|
||||
BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 1);
|
||||
}, {
|
||||
},
|
||||
{
|
||||
preload_moves(b, u, 4, 0);
|
||||
preload_moves(b, v, 4, 1);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(MessagePreload, MaxTwoMessages)
|
||||
{
|
||||
CASE({
|
||||
CASE(
|
||||
{
|
||||
bi_ld_var_imm_to(b, u, bi_register(61), BI_REGISTER_FORMAT_F32,
|
||||
BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 2);
|
||||
bi_ld_var_imm_to(b, v, bi_register(61), BI_REGISTER_FORMAT_F32,
|
||||
BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 1);
|
||||
bi_ld_var_imm_to(b, w, bi_register(61), BI_REGISTER_FORMAT_F32,
|
||||
BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
|
||||
},
|
||||
{
|
||||
},
|
||||
{
|
||||
preload_moves(b, u, 4, 0);
|
||||
preload_moves(b, v, 4, 1);
|
||||
bi_ld_var_imm_to(b, w, bi_register(61), BI_REGISTER_FORMAT_F32,
|
||||
BI_SAMPLE_SAMPLE, BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
|
||||
});
|
||||
});
|
||||
|
||||
CASE({
|
||||
bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
|
||||
bi_var_tex_f16_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 1, 2);
|
||||
bi_var_tex_f16_to(b, w, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 3, 3);
|
||||
}, {
|
||||
CASE(
|
||||
{
|
||||
bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
|
||||
0);
|
||||
bi_var_tex_f16_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 1,
|
||||
2);
|
||||
bi_var_tex_f16_to(b, w, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 3,
|
||||
3);
|
||||
},
|
||||
{
|
||||
preload_moves(b, u, 4, 0);
|
||||
preload_moves(b, v, 2, 1);
|
||||
bi_var_tex_f16_to(b, w, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 3, 3);
|
||||
});
|
||||
bi_var_tex_f16_to(b, w, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 3,
|
||||
3);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(MessagePreload, PreloadVartexF16)
|
||||
{
|
||||
CASE({
|
||||
bi_var_tex_f16_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
|
||||
}, {
|
||||
preload_moves(b, u, 2, 0);
|
||||
});
|
||||
CASE(
|
||||
{
|
||||
bi_var_tex_f16_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
|
||||
0);
|
||||
},
|
||||
{ preload_moves(b, u, 2, 0); });
|
||||
}
|
||||
|
||||
TEST_F(MessagePreload, PreloadVartexF32)
|
||||
{
|
||||
CASE({
|
||||
bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
|
||||
}, {
|
||||
preload_moves(b, u, 4, 0);
|
||||
});
|
||||
CASE(
|
||||
{
|
||||
bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
|
||||
0);
|
||||
},
|
||||
{ preload_moves(b, u, 4, 0); });
|
||||
}
|
||||
|
||||
TEST_F(MessagePreload, PreloadVartexF32VartexF16)
|
||||
{
|
||||
CASE({
|
||||
bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
|
||||
bi_var_tex_f16_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 1, 2);
|
||||
}, {
|
||||
CASE(
|
||||
{
|
||||
bi_var_tex_f32_to(b, u, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
|
||||
0);
|
||||
bi_var_tex_f16_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 1,
|
||||
2);
|
||||
},
|
||||
{
|
||||
preload_moves(b, u, 4, 0);
|
||||
preload_moves(b, v, 2, 1);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(MessagePreload, PreloadVartexLodModes)
|
||||
{
|
||||
CASE({
|
||||
CASE(
|
||||
{
|
||||
bi_var_tex_f32_to(b, u, true, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
|
||||
bi_var_tex_f32_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
|
||||
}, {
|
||||
bi_var_tex_f32_to(b, v, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
|
||||
0);
|
||||
},
|
||||
{
|
||||
preload_moves(b, u, 4, 0);
|
||||
preload_moves(b, v, 4, 1);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,9 +21,9 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_test.h"
|
||||
#include "bi_builder.h"
|
||||
#include "bi_test.h"
|
||||
#include "compiler.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
|
|
@ -38,24 +38,35 @@ bi_optimizer(bi_context *ctx)
|
|||
/* Define reg first so it has a consistent variable index, and pass it to an
|
||||
* instruction that cannot be dead code eliminated so the program is nontrivial.
|
||||
*/
|
||||
#define CASE(instr, expected) INSTRUCTION_CASE(\
|
||||
{ UNUSED bi_index reg = bi_temp(b->shader); instr; bi_kaboom(b, reg); }, \
|
||||
{ UNUSED bi_index reg = bi_temp(b->shader); expected; bi_kaboom(b, reg); }, \
|
||||
#define CASE(instr, expected) \
|
||||
INSTRUCTION_CASE( \
|
||||
{ \
|
||||
UNUSED bi_index reg = bi_temp(b->shader); \
|
||||
instr; \
|
||||
bi_kaboom(b, reg); \
|
||||
}, \
|
||||
{ \
|
||||
UNUSED bi_index reg = bi_temp(b->shader); \
|
||||
expected; \
|
||||
bi_kaboom(b, reg); \
|
||||
}, \
|
||||
bi_optimizer);
|
||||
|
||||
#define NEGCASE(instr) CASE(instr, instr)
|
||||
|
||||
class Optimizer : public testing::Test {
|
||||
protected:
|
||||
Optimizer() {
|
||||
protected:
|
||||
Optimizer()
|
||||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
|
||||
x = bi_register(1);
|
||||
y = bi_register(2);
|
||||
x = bi_register(1);
|
||||
y = bi_register(2);
|
||||
negabsx = bi_neg(bi_abs(x));
|
||||
}
|
||||
|
||||
~Optimizer() {
|
||||
~Optimizer()
|
||||
{
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
|
|
@ -95,91 +106,124 @@ TEST_F(Optimizer, FusedFABSNEGForFP16)
|
|||
|
||||
TEST_F(Optimizer, FuseFADD_F32WithEqualSourcesAbsAbsAndClamp)
|
||||
{
|
||||
CASE({
|
||||
bi_instr *I = bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), bi_abs(x));
|
||||
CASE(
|
||||
{
|
||||
bi_instr *I =
|
||||
bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), bi_abs(x));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
}, {
|
||||
},
|
||||
{
|
||||
bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
});
|
||||
});
|
||||
|
||||
CASE({
|
||||
bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_fabsneg_f32(b, bi_abs(x)));
|
||||
CASE(
|
||||
{
|
||||
bi_instr *I =
|
||||
bi_fadd_f32_to(b, reg, bi_abs(x), bi_fabsneg_f32(b, bi_abs(x)));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
}, {
|
||||
},
|
||||
{
|
||||
bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
});
|
||||
});
|
||||
|
||||
CASE({
|
||||
bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), bi_abs(x)));
|
||||
CASE(
|
||||
{
|
||||
bi_instr *I =
|
||||
bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), bi_abs(x)));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
}, {
|
||||
},
|
||||
{
|
||||
bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, FuseFADD_V2F16WithDifferentSourcesAbsAbsAndClamp)
|
||||
{
|
||||
CASE({
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(y));
|
||||
CASE(
|
||||
{
|
||||
bi_instr *I =
|
||||
bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(y));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
}, {
|
||||
},
|
||||
{
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
});
|
||||
});
|
||||
|
||||
CASE({
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(y)));
|
||||
CASE(
|
||||
{
|
||||
bi_instr *I =
|
||||
bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(y)));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
}, {
|
||||
},
|
||||
{
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
});
|
||||
});
|
||||
|
||||
CASE({
|
||||
bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(y)));
|
||||
CASE(
|
||||
{
|
||||
bi_instr *I =
|
||||
bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(y)));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
}, {
|
||||
},
|
||||
{
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, AvoidFADD_V2F16WithEqualSourcesAbsAbsAndClamp)
|
||||
{
|
||||
NEGCASE({
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(x));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
bi_instr *I =
|
||||
bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(x));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
});
|
||||
|
||||
NEGCASE({
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(x)));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
bi_instr *I =
|
||||
bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(x)));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
});
|
||||
|
||||
NEGCASE({
|
||||
bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(x)));
|
||||
bi_instr *I =
|
||||
bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(x)));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, SwizzlesComposedForFP16)
|
||||
{
|
||||
CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y),
|
||||
CASE(bi_fadd_v2f16_to(
|
||||
b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y),
|
||||
bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y));
|
||||
|
||||
CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, negabsx), true, false), y),
|
||||
CASE(bi_fadd_v2f16_to(
|
||||
b, reg, bi_swz_16(bi_fabsneg_v2f16(b, negabsx), true, false), y),
|
||||
bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y));
|
||||
|
||||
CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), true, false), y),
|
||||
CASE(bi_fadd_v2f16_to(
|
||||
b, reg,
|
||||
bi_swz_16(bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), true,
|
||||
false),
|
||||
y),
|
||||
bi_fadd_v2f16_to(b, reg, negabsx, y));
|
||||
|
||||
CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, false)), true, false), y),
|
||||
CASE(bi_fadd_v2f16_to(
|
||||
b, reg,
|
||||
bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, false)), true, false),
|
||||
y),
|
||||
bi_fadd_v2f16_to(b, reg, bi_half(negabsx, false), y));
|
||||
|
||||
CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, true)), true, false), y),
|
||||
CASE(bi_fadd_v2f16_to(
|
||||
b, reg,
|
||||
bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, true)), true, false),
|
||||
y),
|
||||
bi_fadd_v2f16_to(b, reg, bi_half(negabsx, true), y));
|
||||
}
|
||||
|
||||
|
|
@ -192,7 +236,8 @@ TEST_F(Optimizer, PreserveWidens)
|
|||
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, true)), y),
|
||||
bi_fadd_f32_to(b, reg, bi_half(negabsx, true), y));
|
||||
|
||||
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(x, true)), bi_fabsneg_f32(b, bi_half(x, false))),
|
||||
CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(x, true)),
|
||||
bi_fabsneg_f32(b, bi_half(x, false))),
|
||||
bi_fadd_f32_to(b, reg, bi_half(x, true), bi_half(x, false)));
|
||||
}
|
||||
|
||||
|
|
@ -219,85 +264,100 @@ TEST_F(Optimizer, AvoidZeroAndFABSNEGFootguns)
|
|||
|
||||
TEST_F(Optimizer, ClampsPropagated)
|
||||
{
|
||||
CASE({
|
||||
bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, x, y));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
}, {
|
||||
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
});
|
||||
CASE(
|
||||
{
|
||||
bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, x, y));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
},
|
||||
{
|
||||
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
});
|
||||
|
||||
CASE({
|
||||
bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
}, {
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
});
|
||||
CASE(
|
||||
{
|
||||
bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y));
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
},
|
||||
{
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
TEST_F(Optimizer, ClampsComposed)
|
||||
{
|
||||
CASE({
|
||||
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
|
||||
bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
|
||||
I->clamp = BI_CLAMP_CLAMP_M1_1;
|
||||
J->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
}, {
|
||||
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
});
|
||||
CASE(
|
||||
{
|
||||
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
|
||||
bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
|
||||
I->clamp = BI_CLAMP_CLAMP_M1_1;
|
||||
J->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
},
|
||||
{
|
||||
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
});
|
||||
|
||||
CASE({
|
||||
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
|
||||
bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
J->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
}, {
|
||||
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
});
|
||||
CASE(
|
||||
{
|
||||
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
|
||||
bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
J->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
},
|
||||
{
|
||||
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
});
|
||||
|
||||
CASE({
|
||||
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
|
||||
bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
J->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
}, {
|
||||
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
});
|
||||
CASE(
|
||||
{
|
||||
bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y);
|
||||
bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
J->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
},
|
||||
{
|
||||
bi_instr *I = bi_fadd_f32_to(b, reg, x, y);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
});
|
||||
|
||||
CASE({
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
|
||||
bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
|
||||
I->clamp = BI_CLAMP_CLAMP_M1_1;
|
||||
J->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
}, {
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
});
|
||||
CASE(
|
||||
{
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
|
||||
bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
|
||||
I->clamp = BI_CLAMP_CLAMP_M1_1;
|
||||
J->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
},
|
||||
{
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
});
|
||||
|
||||
CASE({
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
|
||||
bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
J->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
}, {
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
});
|
||||
CASE(
|
||||
{
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
|
||||
bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
J->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
},
|
||||
{
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_1;
|
||||
});
|
||||
|
||||
CASE({
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
|
||||
bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
J->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
}, {
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
});
|
||||
CASE(
|
||||
{
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y);
|
||||
bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
J->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
},
|
||||
{
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y);
|
||||
I->clamp = BI_CLAMP_CLAMP_0_INF;
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, DoNotMixSizesWhenClamping)
|
||||
|
|
@ -341,21 +401,29 @@ TEST_F(Optimizer, FuseComparisonsWithDISCARD)
|
|||
bi_discard_f32(b, x, y, BI_CMPF_EQ));
|
||||
|
||||
for (unsigned h = 0; h < 2; ++h) {
|
||||
CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1), h)),
|
||||
CASE(bi_discard_b32(
|
||||
b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1),
|
||||
h)),
|
||||
bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_LE));
|
||||
|
||||
CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_NE, BI_RESULT_TYPE_I1), h)),
|
||||
CASE(bi_discard_b32(
|
||||
b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_NE, BI_RESULT_TYPE_I1),
|
||||
h)),
|
||||
bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_NE));
|
||||
|
||||
CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_EQ, BI_RESULT_TYPE_M1), h)),
|
||||
CASE(bi_discard_b32(
|
||||
b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_EQ, BI_RESULT_TYPE_M1),
|
||||
h)),
|
||||
bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_EQ));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, DoNotFuseSpecialComparisons)
|
||||
{
|
||||
NEGCASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_GTLT, BI_RESULT_TYPE_F1)));
|
||||
NEGCASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_TOTAL, BI_RESULT_TYPE_F1)));
|
||||
NEGCASE(
|
||||
bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_GTLT, BI_RESULT_TYPE_F1)));
|
||||
NEGCASE(bi_discard_b32(
|
||||
b, bi_fcmp_f32(b, x, y, BI_CMPF_TOTAL, BI_RESULT_TYPE_F1)));
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, FuseResultType)
|
||||
|
|
@ -365,25 +433,33 @@ TEST_F(Optimizer, FuseResultType)
|
|||
BI_MUX_INT_ZERO),
|
||||
bi_fcmp_f32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1));
|
||||
|
||||
CASE(bi_mux_i32_to(b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
|
||||
bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO),
|
||||
bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_F1));
|
||||
CASE(bi_mux_i32_to(
|
||||
b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
|
||||
bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO),
|
||||
bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
|
||||
BI_RESULT_TYPE_F1));
|
||||
|
||||
CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1),
|
||||
bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO),
|
||||
bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_I1));
|
||||
CASE(bi_mux_i32_to(
|
||||
b, reg, bi_imm_u32(0), bi_imm_u32(1),
|
||||
bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO),
|
||||
bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
|
||||
BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(bi_mux_v2i16_to(b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0),
|
||||
bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO),
|
||||
bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_F1));
|
||||
bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE,
|
||||
BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO),
|
||||
bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
|
||||
BI_RESULT_TYPE_F1));
|
||||
|
||||
CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
|
||||
bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO),
|
||||
bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_I1));
|
||||
bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE,
|
||||
BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO),
|
||||
bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE,
|
||||
BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1),
|
||||
bi_icmp_u32(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
|
|
@ -391,13 +467,13 @@ TEST_F(Optimizer, FuseResultType)
|
|||
bi_icmp_u32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
|
||||
bi_icmp_v2u16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO),
|
||||
bi_icmp_v2u16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO),
|
||||
bi_icmp_v2u16_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(bi_mux_v4i8_to(b, reg, bi_imm_u8(0), bi_imm_u8(1),
|
||||
bi_icmp_v4u8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO),
|
||||
bi_icmp_v4u8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO),
|
||||
bi_icmp_v4u8_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1),
|
||||
|
|
@ -406,31 +482,36 @@ TEST_F(Optimizer, FuseResultType)
|
|||
bi_icmp_s32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1),
|
||||
bi_icmp_v2s16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO),
|
||||
bi_icmp_v2s16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO),
|
||||
bi_icmp_v2s16_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(bi_mux_v4i8_to(b, reg, bi_imm_u8(0), bi_imm_u8(1),
|
||||
bi_icmp_v4s8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO),
|
||||
bi_icmp_v4s8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO),
|
||||
bi_icmp_v4s8_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1));
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, DoNotFuseMixedSizeResultType)
|
||||
{
|
||||
NEGCASE(bi_mux_i32_to(b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
|
||||
bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO));
|
||||
NEGCASE(bi_mux_i32_to(
|
||||
b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0),
|
||||
bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO));
|
||||
|
||||
NEGCASE(bi_mux_v2i16_to(b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0),
|
||||
bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO));
|
||||
NEGCASE(bi_mux_v2i16_to(
|
||||
b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0),
|
||||
bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1),
|
||||
BI_MUX_INT_ZERO));
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, VarTexCoord32)
|
||||
{
|
||||
CASE({
|
||||
bi_index ld = bi_ld_var_imm(b, bi_null(), BI_REGISTER_FORMAT_F32, BI_SAMPLE_CENTER, BI_UPDATE_STORE, BI_VECSIZE_V2, 0);
|
||||
CASE(
|
||||
{
|
||||
bi_index ld =
|
||||
bi_ld_var_imm(b, bi_null(), BI_REGISTER_FORMAT_F32,
|
||||
BI_SAMPLE_CENTER, BI_UPDATE_STORE, BI_VECSIZE_V2, 0);
|
||||
|
||||
bi_index x = bi_temp(b->shader);
|
||||
bi_index y = bi_temp(b->shader);
|
||||
|
|
@ -439,9 +520,11 @@ TEST_F(Optimizer, VarTexCoord32)
|
|||
split->dest[1] = y;
|
||||
|
||||
bi_texs_2d_f32_to(b, reg, x, y, false, 0, 0);
|
||||
}, {
|
||||
bi_var_tex_f32_to(b, reg, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0);
|
||||
});
|
||||
},
|
||||
{
|
||||
bi_var_tex_f32_to(b, reg, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0,
|
||||
0);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(Optimizer, Int8ToFloat32)
|
||||
|
|
@ -458,7 +541,6 @@ TEST_F(Optimizer, Int8ToFloat32)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_F(Optimizer, Int16ToFloat32)
|
||||
{
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
|
|
|
|||
|
|
@ -21,23 +21,27 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_test.h"
|
||||
#include "compiler.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include "mesa-gtest-extras.h"
|
||||
|
||||
class PackFormats : public testing::Test
|
||||
{
|
||||
protected:
|
||||
PackFormats() {
|
||||
class PackFormats : public testing::Test {
|
||||
protected:
|
||||
PackFormats()
|
||||
{
|
||||
util_dynarray_init(&result, NULL);
|
||||
}
|
||||
~PackFormats() {
|
||||
~PackFormats()
|
||||
{
|
||||
util_dynarray_fini(&result);
|
||||
}
|
||||
|
||||
const uint64_t *result_as_u64_array() { return reinterpret_cast<uint64_t *>(result.data); }
|
||||
const uint64_t *result_as_u64_array()
|
||||
{
|
||||
return reinterpret_cast<uint64_t *>(result.data);
|
||||
}
|
||||
|
||||
struct util_dynarray result;
|
||||
};
|
||||
|
|
@ -46,7 +50,7 @@ TEST_F(PackFormats, 1)
|
|||
{
|
||||
/* Test case from the blob */
|
||||
struct bi_packed_tuple tuples[] = {
|
||||
{ 0x2380cb1c02200000, 0x10e0 },
|
||||
{0x2380cb1c02200000, 0x10e0},
|
||||
};
|
||||
|
||||
uint64_t header = 0x021000011800;
|
||||
|
|
@ -65,8 +69,8 @@ TEST_F(PackFormats, 1)
|
|||
TEST_F(PackFormats, 2)
|
||||
{
|
||||
struct bi_packed_tuple tuples[] = {
|
||||
{ 0x9380cb6044000044, 0xf65 },
|
||||
{ 0xaf8721a05c000081, 0x1831 },
|
||||
{0x9380cb6044000044, 0xf65},
|
||||
{0xaf8721a05c000081, 0x1831},
|
||||
};
|
||||
|
||||
bi_pack_format(&result, 0, tuples, 2, 0x52800011800, 0, 0, false);
|
||||
|
|
@ -86,9 +90,9 @@ TEST_F(PackFormats, 2)
|
|||
TEST_F(PackFormats, 3)
|
||||
{
|
||||
struct bi_packed_tuple tuples[] = {
|
||||
{ 0x93805b8040000000, 0xf65 },
|
||||
{ 0x93886db05c000000, 0xf65 },
|
||||
{ 0xb380cb180c000080, 0x18b1 },
|
||||
{0x93805b8040000000, 0xf65},
|
||||
{0x93886db05c000000, 0xf65},
|
||||
{0xb380cb180c000080, 0x18b1},
|
||||
};
|
||||
|
||||
bi_pack_format(&result, 0, tuples, 3, 0x3100000000, 0, 0, true);
|
||||
|
|
@ -96,12 +100,8 @@ TEST_F(PackFormats, 3)
|
|||
bi_pack_format(&result, 4, tuples, 3, 0x3100000000, 0, 0, true);
|
||||
|
||||
const uint64_t expected[] = {
|
||||
0x805b804000000029,
|
||||
0x0188000000076593,
|
||||
0x886db05c00000021,
|
||||
0x58c0600004076593,
|
||||
0x0000000000000044,
|
||||
0x60002c6ce0300000,
|
||||
0x805b804000000029, 0x0188000000076593, 0x886db05c00000021,
|
||||
0x58c0600004076593, 0x0000000000000044, 0x60002c6ce0300000,
|
||||
};
|
||||
|
||||
ASSERT_EQ(result.size, 48);
|
||||
|
|
@ -111,10 +111,10 @@ TEST_F(PackFormats, 3)
|
|||
TEST_F(PackFormats, 4)
|
||||
{
|
||||
struct bi_packed_tuple tuples[] = {
|
||||
{ 0xad8c87004000005f, 0x2f18 },
|
||||
{ 0xad8c87385c00004f, 0x2f18 },
|
||||
{ 0xad8c87385c00006e, 0x2f18 },
|
||||
{ 0xb380cb182c000080, 0x18b1 },
|
||||
{0xad8c87004000005f, 0x2f18},
|
||||
{0xad8c87385c00004f, 0x2f18},
|
||||
{0xad8c87385c00006e, 0x2f18},
|
||||
{0xb380cb182c000080, 0x18b1},
|
||||
};
|
||||
|
||||
uint64_t EC0 = (0x10000001ff000000) >> 4;
|
||||
|
|
@ -124,12 +124,8 @@ TEST_F(PackFormats, 4)
|
|||
bi_pack_format(&result, 6, tuples, 4, 0x3100000000, EC0, 0, false);
|
||||
|
||||
const uint64_t expected[] = {
|
||||
0x8c87004000005f2d,
|
||||
0x01880000000718ad,
|
||||
0x8c87385c00004f25,
|
||||
0x39c2e000037718ad,
|
||||
0x80cb182c00008005,
|
||||
0xac01c62b6320b1b3,
|
||||
0x8c87004000005f2d, 0x01880000000718ad, 0x8c87385c00004f25,
|
||||
0x39c2e000037718ad, 0x80cb182c00008005, 0xac01c62b6320b1b3,
|
||||
};
|
||||
|
||||
ASSERT_EQ(result.size, 48);
|
||||
|
|
@ -139,11 +135,9 @@ TEST_F(PackFormats, 4)
|
|||
TEST_F(PackFormats, 5)
|
||||
{
|
||||
struct bi_packed_tuple tuples[] = {
|
||||
{ 0x9380688040000000, 0xf65 },
|
||||
{ 0xd4057300c000040, 0xf26 },
|
||||
{ 0x1f80cb1858000000, 0x19ab },
|
||||
{ 0x937401f85c000000, 0xf65 },
|
||||
{ 0xb380cb180c000080, 0x18a1 },
|
||||
{0x9380688040000000, 0xf65}, {0xd4057300c000040, 0xf26},
|
||||
{0x1f80cb1858000000, 0x19ab}, {0x937401f85c000000, 0xf65},
|
||||
{0xb380cb180c000080, 0x18a1},
|
||||
};
|
||||
|
||||
uint64_t EC0 = (0x183f800000) >> 4;
|
||||
|
|
@ -154,14 +148,9 @@ TEST_F(PackFormats, 5)
|
|||
bi_pack_format(&result, 8, tuples, 5, 0x3100000000, EC0, 0, true);
|
||||
|
||||
const uint64_t expected[] = {
|
||||
0x8068804000000029,
|
||||
0x0188000000076593,
|
||||
0x4057300c00004021,
|
||||
0x58c2c0000007260d,
|
||||
0x7401f85c0000008b,
|
||||
0x00006ac7e0376593,
|
||||
0x80cb180c00008053,
|
||||
0x000000183f80a1b3,
|
||||
0x8068804000000029, 0x0188000000076593, 0x4057300c00004021,
|
||||
0x58c2c0000007260d, 0x7401f85c0000008b, 0x00006ac7e0376593,
|
||||
0x80cb180c00008053, 0x000000183f80a1b3,
|
||||
};
|
||||
|
||||
ASSERT_EQ(result.size, 64);
|
||||
|
|
@ -171,12 +160,9 @@ TEST_F(PackFormats, 5)
|
|||
TEST_F(PackFormats, 6)
|
||||
{
|
||||
struct bi_packed_tuple tuples[] = {
|
||||
{ 0xad8c870068000048, 0x2f18 },
|
||||
{ 0xad8c87385c000050, 0x2f18 },
|
||||
{ 0xad8c87385c00006a, 0x2f18 },
|
||||
{ 0xad8c87385c000074, 0x2f18 },
|
||||
{ 0xad8c87385c000020, 0x2f18 },
|
||||
{ 0xad8c87385c000030, 0x2f18 },
|
||||
{0xad8c870068000048, 0x2f18}, {0xad8c87385c000050, 0x2f18},
|
||||
{0xad8c87385c00006a, 0x2f18}, {0xad8c87385c000074, 0x2f18},
|
||||
{0xad8c87385c000020, 0x2f18}, {0xad8c87385c000030, 0x2f18},
|
||||
};
|
||||
|
||||
uint64_t EC0 = (0x345678912345670) >> 4;
|
||||
|
|
@ -188,15 +174,9 @@ TEST_F(PackFormats, 6)
|
|||
bi_pack_format(&result, 10, tuples, 6, 0x60000011800, EC0, 0, false);
|
||||
|
||||
const uint64_t expected[] = {
|
||||
0x8c8700680000482d,
|
||||
0x30000008c00718ad,
|
||||
0x8c87385c00005025,
|
||||
0x39c2e000035718ad,
|
||||
0x8c87385c00007401,
|
||||
0xb401c62b632718ad,
|
||||
0x8c87385c00002065,
|
||||
0x39c2e000018718ad,
|
||||
0x3456789123456706,
|
||||
0x8c8700680000482d, 0x30000008c00718ad, 0x8c87385c00005025,
|
||||
0x39c2e000035718ad, 0x8c87385c00007401, 0xb401c62b632718ad,
|
||||
0x8c87385c00002065, 0x39c2e000018718ad, 0x3456789123456706,
|
||||
0xa001c62b63200000,
|
||||
};
|
||||
|
||||
|
|
@ -207,13 +187,10 @@ TEST_F(PackFormats, 6)
|
|||
TEST_F(PackFormats, 7)
|
||||
{
|
||||
struct bi_packed_tuple tuples[] = {
|
||||
{ 0x9020074040000083, 0xf65 },
|
||||
{ 0x90000d4058100080, 0xf65 },
|
||||
{ 0x90000a3058700082, 0xf65 },
|
||||
{ 0x9020074008114581, 0xf65 },
|
||||
{ 0x90000d0058000080, 0xf65 },
|
||||
{ 0x9000083058700082, 0xf65 },
|
||||
{ 0x2380cb199ac38400, 0x327a },
|
||||
{0x9020074040000083, 0xf65}, {0x90000d4058100080, 0xf65},
|
||||
{0x90000a3058700082, 0xf65}, {0x9020074008114581, 0xf65},
|
||||
{0x90000d0058000080, 0xf65}, {0x9000083058700082, 0xf65},
|
||||
{0x2380cb199ac38400, 0x327a},
|
||||
};
|
||||
|
||||
bi_pack_format(&result, 0, tuples, 7, 0x3000100000, 0, 0, true);
|
||||
|
|
@ -223,15 +200,9 @@ TEST_F(PackFormats, 7)
|
|||
bi_pack_format(&result, 11, tuples, 7, 0x3000100000, 0, 0, true);
|
||||
|
||||
const uint64_t expected[] = {
|
||||
0x2007404000008329,
|
||||
0x0180008000076590,
|
||||
0x000d405810008021,
|
||||
0x5182c38004176590,
|
||||
0x2007400811458101,
|
||||
0x2401d96400076590,
|
||||
0x000d005800008061,
|
||||
0x4182c38004176590,
|
||||
0x80cb199ac3840047,
|
||||
0x2007404000008329, 0x0180008000076590, 0x000d405810008021,
|
||||
0x5182c38004176590, 0x2007400811458101, 0x2401d96400076590,
|
||||
0x000d005800008061, 0x4182c38004176590, 0x80cb199ac3840047,
|
||||
0x3801d96400027a23,
|
||||
};
|
||||
|
||||
|
|
@ -242,14 +213,10 @@ TEST_F(PackFormats, 7)
|
|||
TEST_F(PackFormats, 8)
|
||||
{
|
||||
struct bi_packed_tuple tuples[] = {
|
||||
{ 0x442087037a2f8643, 0x3021 },
|
||||
{ 0x84008d0586100043, 0x200 },
|
||||
{ 0x7c008d0028014543, 0x0 },
|
||||
{ 0x1c00070058200081, 0x1980 },
|
||||
{ 0x1600dd878320400, 0x200 },
|
||||
{ 0x49709c1b08308900, 0x200 },
|
||||
{ 0x6c2007807881ca00, 0x40 },
|
||||
{ 0x8d70fc0d94900083, 0x800 },
|
||||
{0x442087037a2f8643, 0x3021}, {0x84008d0586100043, 0x200},
|
||||
{0x7c008d0028014543, 0x0}, {0x1c00070058200081, 0x1980},
|
||||
{0x1600dd878320400, 0x200}, {0x49709c1b08308900, 0x200},
|
||||
{0x6c2007807881ca00, 0x40}, {0x8d70fc0d94900083, 0x800},
|
||||
};
|
||||
|
||||
uint64_t EC0 = (0x32e635d0) >> 4;
|
||||
|
|
@ -262,18 +229,10 @@ TEST_F(PackFormats, 8)
|
|||
bi_pack_format(&result, 13, tuples, 8, 0x61001311800, EC0, 0, true);
|
||||
|
||||
const uint64_t expected[] = {
|
||||
0x2087037a2f86432e,
|
||||
0x30800988c0002144,
|
||||
0x008d058610004320,
|
||||
0x6801400a2a1a0084,
|
||||
0x0007005820008101,
|
||||
0x0c00001f0021801c,
|
||||
0x600dd87832040060,
|
||||
0xe0d8418448020001,
|
||||
0x2007807881ca00c0,
|
||||
0xc6ba80125c20406c,
|
||||
0x70fc0d9490008359,
|
||||
0x0000000032e0008d,
|
||||
0x2087037a2f86432e, 0x30800988c0002144, 0x008d058610004320,
|
||||
0x6801400a2a1a0084, 0x0007005820008101, 0x0c00001f0021801c,
|
||||
0x600dd87832040060, 0xe0d8418448020001, 0x2007807881ca00c0,
|
||||
0xc6ba80125c20406c, 0x70fc0d9490008359, 0x0000000032e0008d,
|
||||
};
|
||||
|
||||
ASSERT_EQ(result.size, 96);
|
||||
|
|
|
|||
|
|
@ -39,14 +39,9 @@ TEST(Packing, PackLiteral)
|
|||
TEST(Packing, PackUpper)
|
||||
{
|
||||
struct bi_packed_tuple tuples[] = {
|
||||
{ 0, 0x3 << (75 - 64) },
|
||||
{ 0, 0x1 << (75 - 64) },
|
||||
{ 0, 0x7 << (75 - 64) },
|
||||
{ 0, 0x0 << (75 - 64) },
|
||||
{ 0, 0x2 << (75 - 64) },
|
||||
{ 0, 0x6 << (75 - 64) },
|
||||
{ 0, 0x5 << (75 - 64) },
|
||||
{ 0, 0x4 << (75 - 64) },
|
||||
{0, 0x3 << (75 - 64)}, {0, 0x1 << (75 - 64)}, {0, 0x7 << (75 - 64)},
|
||||
{0, 0x0 << (75 - 64)}, {0, 0x2 << (75 - 64)}, {0, 0x6 << (75 - 64)},
|
||||
{0, 0x5 << (75 - 64)}, {0, 0x4 << (75 - 64)},
|
||||
};
|
||||
|
||||
EXPECT_EQ(bi_pack_upper(U(0), tuples, 8), 3);
|
||||
|
|
@ -62,9 +57,9 @@ TEST(Packing, PackUpper)
|
|||
TEST(Packing, PackTupleBits)
|
||||
{
|
||||
struct bi_packed_tuple tuples[] = {
|
||||
{ 0x1234567801234567, 0x3A },
|
||||
{ 0x9876543299999999, 0x1B },
|
||||
{ 0xABCDEF0101234567, 0x7C },
|
||||
{0x1234567801234567, 0x3A},
|
||||
{0x9876543299999999, 0x1B},
|
||||
{0xABCDEF0101234567, 0x7C},
|
||||
};
|
||||
|
||||
EXPECT_EQ(bi_pack_tuple_bits(T(0), tuples, 8, 0, 30), 0x01234567);
|
||||
|
|
@ -75,19 +70,14 @@ TEST(Packing, PackTupleBits)
|
|||
TEST(Packing, PackSync)
|
||||
{
|
||||
struct bi_packed_tuple tuples[] = {
|
||||
{ 0, 0x3 << (75 - 64) },
|
||||
{ 0, 0x5 << (75 - 64) },
|
||||
{ 0, 0x7 << (75 - 64) },
|
||||
{ 0, 0x0 << (75 - 64) },
|
||||
{ 0, 0x2 << (75 - 64) },
|
||||
{ 0, 0x6 << (75 - 64) },
|
||||
{ 0, 0x5 << (75 - 64) },
|
||||
{ 0, 0x4 << (75 - 64) },
|
||||
{0, 0x3 << (75 - 64)}, {0, 0x5 << (75 - 64)}, {0, 0x7 << (75 - 64)},
|
||||
{0, 0x0 << (75 - 64)}, {0, 0x2 << (75 - 64)}, {0, 0x6 << (75 - 64)},
|
||||
{0, 0x5 << (75 - 64)}, {0, 0x4 << (75 - 64)},
|
||||
};
|
||||
|
||||
EXPECT_EQ(bi_pack_sync(L(3), L(1), L(7), tuples, 8, false), 0xCF);
|
||||
EXPECT_EQ(bi_pack_sync(L(3), L(1), U(7), tuples, 8, false), 0xCC);
|
||||
EXPECT_EQ(bi_pack_sync(L(3), U(1), U(7), tuples, 8, false), 0xEC);
|
||||
EXPECT_EQ(bi_pack_sync(Z, U(1), U(7), tuples, 8, false), 0x2C);
|
||||
EXPECT_EQ(bi_pack_sync(Z, U(1), U(7), tuples, 8, true) , 0x6C);
|
||||
EXPECT_EQ(bi_pack_sync(Z, U(1), U(7), tuples, 8, false), 0x2C);
|
||||
EXPECT_EQ(bi_pack_sync(Z, U(1), U(7), tuples, 8, true), 0x6C);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,23 +21,28 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler.h"
|
||||
#include "bi_test.h"
|
||||
#include "bi_builder.h"
|
||||
#include "bi_test.h"
|
||||
#include "compiler.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
class SchedulerPredicates : public testing::Test {
|
||||
protected:
|
||||
SchedulerPredicates() {
|
||||
protected:
|
||||
SchedulerPredicates()
|
||||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
b = bit_builder(mem_ctx);
|
||||
}
|
||||
~SchedulerPredicates() {
|
||||
~SchedulerPredicates()
|
||||
{
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
bi_index TMP() { return bi_temp(b->shader); }
|
||||
bi_index TMP()
|
||||
{
|
||||
return bi_temp(b->shader);
|
||||
}
|
||||
|
||||
void *mem_ctx;
|
||||
bi_builder *b;
|
||||
|
|
|
|||
|
|
@ -1,21 +1,21 @@
|
|||
#ifndef __DISASM_H
|
||||
#define __DISASM_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <inttypes.h>
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define BIT(b) (1ull << (b))
|
||||
#define MASK(count) ((1ull << (count)) - 1)
|
||||
#define BIT(b) (1ull << (b))
|
||||
#define MASK(count) ((1ull << (count)) - 1)
|
||||
#define SEXT(b, count) ((b ^ BIT(count - 1)) - BIT(count - 1))
|
||||
#define UNUSED __attribute__((unused))
|
||||
#define UNUSED __attribute__((unused))
|
||||
|
||||
#define VA_SRC_UNIFORM_TYPE 0x2
|
||||
#define VA_SRC_IMM_TYPE 0x3
|
||||
#define VA_SRC_IMM_TYPE 0x3
|
||||
|
||||
static inline void
|
||||
va_print_dest(FILE *fp, uint8_t dest, bool can_mask)
|
||||
|
|
@ -51,7 +51,7 @@ disassemble_valhall(FILE *fp, const uint64_t *code, unsigned size, bool verbose)
|
|||
if (verbose) {
|
||||
/* Print byte pattern */
|
||||
for (unsigned j = 0; j < 8; ++j)
|
||||
fprintf(fp, "%02x ", (uint8_t) (instr >> (j * 8)));
|
||||
fprintf(fp, "%02x ", (uint8_t)(instr >> (j * 8)));
|
||||
|
||||
fprintf(fp, " ");
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -21,10 +21,10 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "va_compiler.h"
|
||||
#include "bi_test.h"
|
||||
#include "bi_builder.h"
|
||||
#include "util/u_cpu_detect.h"
|
||||
#include "bi_builder.h"
|
||||
#include "bi_test.h"
|
||||
#include "va_compiler.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
|
|
@ -37,102 +37,137 @@ add_imm(bi_context *ctx)
|
|||
}
|
||||
|
||||
#define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, add_imm)
|
||||
#define NEGCASE(instr) CASE(instr, instr)
|
||||
#define NEGCASE(instr) CASE(instr, instr)
|
||||
|
||||
class AddImm : public testing::Test {
|
||||
protected:
|
||||
AddImm() {
|
||||
protected:
|
||||
AddImm()
|
||||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
}
|
||||
|
||||
~AddImm() {
|
||||
~AddImm()
|
||||
{
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
void *mem_ctx;
|
||||
};
|
||||
|
||||
|
||||
TEST_F(AddImm, Basic) {
|
||||
TEST_F(AddImm, Basic)
|
||||
{
|
||||
CASE(bi_mov_i32_to(b, bi_register(63), bi_imm_u32(0xABAD1DEA)),
|
||||
bi_iadd_imm_i32_to(b, bi_register(63), bi_zero(), 0xABAD1DEA));
|
||||
|
||||
CASE(bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0)),
|
||||
bi_fadd_imm_f32_to(b, bi_register(1), bi_register(2), fui(42.0)));
|
||||
|
||||
CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_imm_f32(42.0)),
|
||||
bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)), fui(42.0)));
|
||||
CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)),
|
||||
bi_imm_f32(42.0)),
|
||||
bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)),
|
||||
fui(42.0)));
|
||||
|
||||
CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)), bi_neg(bi_imm_f32(42.0))),
|
||||
bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)), fui(-42.0)));
|
||||
CASE(bi_fadd_f32_to(b, bi_register(1), bi_discard(bi_register(2)),
|
||||
bi_neg(bi_imm_f32(42.0))),
|
||||
bi_fadd_imm_f32_to(b, bi_register(1), bi_discard(bi_register(2)),
|
||||
fui(-42.0)));
|
||||
}
|
||||
|
||||
TEST_F(AddImm, Commutativty) {
|
||||
TEST_F(AddImm, Commutativty)
|
||||
{
|
||||
CASE(bi_fadd_f32_to(b, bi_register(1), bi_imm_f32(42.0), bi_register(2)),
|
||||
bi_fadd_imm_f32_to(b, bi_register(1), bi_register(2), fui(42.0)));
|
||||
}
|
||||
|
||||
TEST_F(AddImm, NoModifiers) {
|
||||
NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_abs(bi_register(2)), bi_imm_f32(42.0)));
|
||||
NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_neg(bi_register(2)), bi_imm_f32(42.0)));
|
||||
NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_f32(42.0)));
|
||||
TEST_F(AddImm, NoModifiers)
|
||||
{
|
||||
NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_abs(bi_register(2)),
|
||||
bi_imm_f32(42.0)));
|
||||
NEGCASE(bi_fadd_f32_to(b, bi_register(1), bi_neg(bi_register(2)),
|
||||
bi_imm_f32(42.0)));
|
||||
NEGCASE(bi_fadd_f32_to(b, bi_register(1),
|
||||
bi_swz_16(bi_register(2), false, false),
|
||||
bi_imm_f32(42.0)));
|
||||
}
|
||||
|
||||
TEST_F(AddImm, NoClamp) {
|
||||
TEST_F(AddImm, NoClamp)
|
||||
{
|
||||
NEGCASE({
|
||||
bi_instr *I = bi_fadd_f32_to(b, bi_register(1), bi_register(2),
|
||||
bi_imm_f32(42.0));
|
||||
bi_instr *I =
|
||||
bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0));
|
||||
I->clamp = BI_CLAMP_CLAMP_M1_1;
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(AddImm, OtherTypes) {
|
||||
TEST_F(AddImm, OtherTypes)
|
||||
{
|
||||
CASE(bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0)),
|
||||
bi_fadd_imm_v2f16_to(b, bi_register(1), bi_register(2), 0x51405140));
|
||||
|
||||
CASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
|
||||
CASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2),
|
||||
bi_imm_u32(0xDEADBEEF), false),
|
||||
bi_iadd_imm_i32_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
|
||||
|
||||
CASE(bi_iadd_v2u16_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
|
||||
CASE(bi_iadd_v2u16_to(b, bi_register(1), bi_register(2),
|
||||
bi_imm_u32(0xDEADBEEF), false),
|
||||
bi_iadd_imm_v2i16_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
|
||||
|
||||
CASE(bi_iadd_v4u8_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
|
||||
CASE(bi_iadd_v4u8_to(b, bi_register(1), bi_register(2),
|
||||
bi_imm_u32(0xDEADBEEF), false),
|
||||
bi_iadd_imm_v4i8_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
|
||||
|
||||
CASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
|
||||
CASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2),
|
||||
bi_imm_u32(0xDEADBEEF), false),
|
||||
bi_iadd_imm_i32_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
|
||||
|
||||
CASE(bi_iadd_v2s16_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
|
||||
CASE(bi_iadd_v2s16_to(b, bi_register(1), bi_register(2),
|
||||
bi_imm_u32(0xDEADBEEF), false),
|
||||
bi_iadd_imm_v2i16_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
|
||||
|
||||
CASE(bi_iadd_v4s8_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), false),
|
||||
CASE(bi_iadd_v4s8_to(b, bi_register(1), bi_register(2),
|
||||
bi_imm_u32(0xDEADBEEF), false),
|
||||
bi_iadd_imm_v4i8_to(b, bi_register(1), bi_register(2), 0xDEADBEEF));
|
||||
|
||||
NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
|
||||
NEGCASE(bi_iadd_v2u16_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
|
||||
NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), true));
|
||||
NEGCASE(bi_iadd_s32_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
|
||||
NEGCASE(bi_iadd_v2s16_to(b, bi_register(1), bi_swz_16(bi_register(2), false, false), bi_imm_u32(0xDEADBEEF), false));
|
||||
NEGCASE(bi_iadd_u32_to(b, bi_register(1),
|
||||
bi_swz_16(bi_register(2), false, false),
|
||||
bi_imm_u32(0xDEADBEEF), false));
|
||||
NEGCASE(bi_iadd_v2u16_to(b, bi_register(1),
|
||||
bi_swz_16(bi_register(2), false, false),
|
||||
bi_imm_u32(0xDEADBEEF), false));
|
||||
NEGCASE(bi_iadd_u32_to(b, bi_register(1), bi_register(2),
|
||||
bi_imm_u32(0xDEADBEEF), true));
|
||||
NEGCASE(bi_iadd_s32_to(b, bi_register(1),
|
||||
bi_swz_16(bi_register(2), false, false),
|
||||
bi_imm_u32(0xDEADBEEF), false));
|
||||
NEGCASE(bi_iadd_v2s16_to(b, bi_register(1),
|
||||
bi_swz_16(bi_register(2), false, false),
|
||||
bi_imm_u32(0xDEADBEEF), false));
|
||||
|
||||
NEGCASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2), bi_imm_u32(0xDEADBEEF), true));
|
||||
NEGCASE(bi_iadd_s32_to(b, bi_register(1), bi_register(2),
|
||||
bi_imm_u32(0xDEADBEEF), true));
|
||||
}
|
||||
|
||||
TEST_F(AddImm, Int8) {
|
||||
TEST_F(AddImm, Int8)
|
||||
{
|
||||
bi_index idx = bi_register(2);
|
||||
idx.swizzle = BI_SWIZZLE_B0000;
|
||||
NEGCASE(bi_iadd_v4u8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
|
||||
NEGCASE(bi_iadd_v4s8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
|
||||
NEGCASE(
|
||||
bi_iadd_v4u8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
|
||||
NEGCASE(
|
||||
bi_iadd_v4s8_to(b, bi_register(1), idx, bi_imm_u32(0xDEADBEEF), false));
|
||||
}
|
||||
|
||||
TEST_F(AddImm, OnlyRTE) {
|
||||
TEST_F(AddImm, OnlyRTE)
|
||||
{
|
||||
NEGCASE({
|
||||
bi_instr *I = bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0));
|
||||
I->round = BI_ROUND_RTP;
|
||||
bi_instr *I =
|
||||
bi_fadd_f32_to(b, bi_register(1), bi_register(2), bi_imm_f32(42.0));
|
||||
I->round = BI_ROUND_RTP;
|
||||
});
|
||||
|
||||
NEGCASE({
|
||||
bi_instr *I = bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0));
|
||||
I->round = BI_ROUND_RTZ;
|
||||
bi_instr *I =
|
||||
bi_fadd_v2f16_to(b, bi_register(1), bi_register(2), bi_imm_f16(42.0));
|
||||
I->round = BI_ROUND_RTZ;
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -21,8 +21,8 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
#include "disassemble.h"
|
||||
|
||||
static inline uint8_t
|
||||
|
|
@ -39,7 +39,7 @@ parse_hex(const char *in)
|
|||
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
uint8_t byte = (parse_nibble(in[0]) << 4) | parse_nibble(in[1]);
|
||||
v |= ((uint64_t) byte) << (8 * i);
|
||||
v |= ((uint64_t)byte) << (8 * i);
|
||||
|
||||
/* Skip the space after the byte */
|
||||
in += 3;
|
||||
|
|
|
|||
|
|
@ -21,8 +21,8 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bi_test.h"
|
||||
#include "bi_builder.h"
|
||||
#include "bi_test.h"
|
||||
#include "va_compiler.h"
|
||||
#include "valhall_enums.h"
|
||||
|
||||
|
|
@ -37,177 +37,190 @@ strip_nops(bi_context *ctx)
|
|||
}
|
||||
}
|
||||
|
||||
#define CASE(shader_stage, test) do { \
|
||||
bi_builder *A = bit_builder(mem_ctx); \
|
||||
bi_builder *B = bit_builder(mem_ctx); \
|
||||
{ \
|
||||
UNUSED bi_builder *b = A; \
|
||||
A->shader->stage = MESA_SHADER_ ## shader_stage; \
|
||||
test; \
|
||||
} \
|
||||
strip_nops(A->shader); \
|
||||
va_insert_flow_control_nops(A->shader); \
|
||||
{ \
|
||||
UNUSED bi_builder *b = B; \
|
||||
B->shader->stage = MESA_SHADER_ ## shader_stage; \
|
||||
test; \
|
||||
} \
|
||||
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
|
||||
} while(0)
|
||||
#define CASE(shader_stage, test) \
|
||||
do { \
|
||||
bi_builder *A = bit_builder(mem_ctx); \
|
||||
bi_builder *B = bit_builder(mem_ctx); \
|
||||
{ \
|
||||
UNUSED bi_builder *b = A; \
|
||||
A->shader->stage = MESA_SHADER_##shader_stage; \
|
||||
test; \
|
||||
} \
|
||||
strip_nops(A->shader); \
|
||||
va_insert_flow_control_nops(A->shader); \
|
||||
{ \
|
||||
UNUSED bi_builder *b = B; \
|
||||
B->shader->stage = MESA_SHADER_##shader_stage; \
|
||||
test; \
|
||||
} \
|
||||
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
|
||||
} while (0)
|
||||
|
||||
#define flow(f) bi_nop(b)->flow = VA_FLOW_ ## f
|
||||
#define flow(f) bi_nop(b)->flow = VA_FLOW_##f
|
||||
|
||||
class InsertFlow : public testing::Test {
|
||||
protected:
|
||||
InsertFlow() {
|
||||
protected:
|
||||
InsertFlow()
|
||||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
}
|
||||
|
||||
~InsertFlow() {
|
||||
~InsertFlow()
|
||||
{
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
void *mem_ctx;
|
||||
};
|
||||
|
||||
TEST_F(InsertFlow, PreserveEmptyShader) {
|
||||
TEST_F(InsertFlow, PreserveEmptyShader)
|
||||
{
|
||||
CASE(FRAGMENT, {});
|
||||
}
|
||||
|
||||
TEST_F(InsertFlow, TilebufferWait7) {
|
||||
TEST_F(InsertFlow, TilebufferWait7)
|
||||
{
|
||||
CASE(FRAGMENT, {
|
||||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(WAIT);
|
||||
bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_register(6), bi_register(7), bi_register(8),
|
||||
BI_REGISTER_FORMAT_AUTO, 4, 4);
|
||||
flow(END);
|
||||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(WAIT);
|
||||
bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_register(6), bi_register(7), bi_register(8),
|
||||
BI_REGISTER_FORMAT_AUTO, 4, 4);
|
||||
flow(END);
|
||||
});
|
||||
|
||||
CASE(FRAGMENT, {
|
||||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(WAIT);
|
||||
bi_st_tile(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(WAIT);
|
||||
bi_st_tile(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
|
||||
flow(END);
|
||||
});
|
||||
|
||||
CASE(FRAGMENT, {
|
||||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(WAIT);
|
||||
bi_ld_tile_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
|
||||
flow(END);
|
||||
});
|
||||
|
||||
CASE(FRAGMENT, {
|
||||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(WAIT);
|
||||
bi_ld_tile_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
|
||||
flow(END);
|
||||
flow(END);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(InsertFlow, AtestWait6AndWait0After) {
|
||||
TEST_F(InsertFlow, AtestWait6AndWait0After)
|
||||
{
|
||||
CASE(FRAGMENT, {
|
||||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(WAIT0126);
|
||||
bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_fau(BIR_FAU_ATEST_PARAM, false));
|
||||
flow(WAIT0);
|
||||
flow(END);
|
||||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(WAIT0126);
|
||||
bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_fau(BIR_FAU_ATEST_PARAM, false));
|
||||
flow(WAIT0);
|
||||
flow(END);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(InsertFlow, ZSEmitWait6) {
|
||||
TEST_F(InsertFlow, ZSEmitWait6)
|
||||
{
|
||||
CASE(FRAGMENT, {
|
||||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(WAIT0126);
|
||||
bi_zs_emit_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_register(6), true, true);
|
||||
flow(END);
|
||||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(WAIT0126);
|
||||
bi_zs_emit_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_register(6), true, true);
|
||||
flow(END);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(InsertFlow, LoadThenUnrelatedThenUse) {
|
||||
TEST_F(InsertFlow, LoadThenUnrelatedThenUse)
|
||||
{
|
||||
CASE(VERTEX, {
|
||||
bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(WAIT0);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
|
||||
flow(END);
|
||||
bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(WAIT0);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
|
||||
flow(END);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(InsertFlow, SingleLdVar) {
|
||||
TEST_F(InsertFlow, SingleLdVar)
|
||||
{
|
||||
CASE(FRAGMENT, {
|
||||
flow(DISCARD);
|
||||
bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
|
||||
BI_SOURCE_FORMAT_F16,
|
||||
BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0);
|
||||
flow(WAIT0);
|
||||
flow(END);
|
||||
flow(DISCARD);
|
||||
bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
|
||||
BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE,
|
||||
BI_VECSIZE_V4, 0);
|
||||
flow(WAIT0);
|
||||
flow(END);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(InsertFlow, SerializeLdVars) {
|
||||
TEST_F(InsertFlow, SerializeLdVars)
|
||||
{
|
||||
CASE(FRAGMENT, {
|
||||
flow(DISCARD);
|
||||
bi_ld_var_buf_imm_f16_to(b, bi_register(16), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
|
||||
BI_SOURCE_FORMAT_F16,
|
||||
BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
|
||||
bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
|
||||
BI_SOURCE_FORMAT_F16,
|
||||
BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0);
|
||||
flow(WAIT0);
|
||||
bi_ld_var_buf_imm_f16_to(b, bi_register(8), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
|
||||
BI_SOURCE_FORMAT_F16,
|
||||
BI_UPDATE_STORE, BI_VECSIZE_V4, 1);
|
||||
flow(WAIT0);
|
||||
flow(END);
|
||||
flow(DISCARD);
|
||||
bi_ld_var_buf_imm_f16_to(b, bi_register(16), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
|
||||
BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
|
||||
BI_VECSIZE_V4, 0);
|
||||
bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
|
||||
BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE,
|
||||
BI_VECSIZE_V4, 0);
|
||||
flow(WAIT0);
|
||||
bi_ld_var_buf_imm_f16_to(b, bi_register(8), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
|
||||
BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
|
||||
BI_VECSIZE_V4, 1);
|
||||
flow(WAIT0);
|
||||
flow(END);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(InsertFlow, Clper) {
|
||||
TEST_F(InsertFlow, Clper)
|
||||
{
|
||||
CASE(FRAGMENT, {
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
|
||||
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
|
||||
BI_SUBGROUP_SUBGROUP4);
|
||||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(END);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
|
||||
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
|
||||
BI_SUBGROUP_SUBGROUP4);
|
||||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(END);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(InsertFlow, TextureImplicit) {
|
||||
TEST_F(InsertFlow, TextureImplicit)
|
||||
{
|
||||
CASE(FRAGMENT, {
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
|
||||
bi_register(12), false, BI_DIMENSION_2D,
|
||||
BI_REGISTER_FORMAT_F32, false, false,
|
||||
BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
|
||||
flow(DISCARD);
|
||||
flow(WAIT0);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(END);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
|
||||
bi_register(12), false, BI_DIMENSION_2D,
|
||||
BI_REGISTER_FORMAT_F32, false, false,
|
||||
BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
|
||||
flow(DISCARD);
|
||||
flow(WAIT0);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(END);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(InsertFlow, TextureExplicit) {
|
||||
TEST_F(InsertFlow, TextureExplicit)
|
||||
{
|
||||
CASE(FRAGMENT, {
|
||||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
|
||||
bi_register(12), false, BI_DIMENSION_2D,
|
||||
BI_REGISTER_FORMAT_F32, false, false,
|
||||
BI_VA_LOD_MODE_ZERO_LOD, BI_WRITE_MASK_RGBA, 4);
|
||||
flow(WAIT0);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(END);
|
||||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
|
||||
bi_register(12), false, BI_DIMENSION_2D,
|
||||
BI_REGISTER_FORMAT_F32, false, false,
|
||||
BI_VA_LOD_MODE_ZERO_LOD, BI_WRITE_MASK_RGBA, 4);
|
||||
flow(WAIT0);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(END);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -217,49 +230,52 @@ TEST_F(InsertFlow, TextureExplicit) {
|
|||
* \ /
|
||||
* D
|
||||
*/
|
||||
TEST_F(InsertFlow, DiamondCFG) {
|
||||
TEST_F(InsertFlow, DiamondCFG)
|
||||
{
|
||||
CASE(FRAGMENT, {
|
||||
bi_block *A = bi_start_block(&b->shader->blocks);
|
||||
bi_block *B = bit_block(b->shader);
|
||||
bi_block *C = bit_block(b->shader);
|
||||
bi_block *D = bit_block(b->shader);
|
||||
bi_block *A = bi_start_block(&b->shader->blocks);
|
||||
bi_block *B = bit_block(b->shader);
|
||||
bi_block *C = bit_block(b->shader);
|
||||
bi_block *D = bit_block(b->shader);
|
||||
|
||||
bi_block_add_successor(A, B);
|
||||
bi_block_add_successor(A, C);
|
||||
bi_block_add_successor(A, B);
|
||||
bi_block_add_successor(A, C);
|
||||
|
||||
bi_block_add_successor(B, D);
|
||||
bi_block_add_successor(C, D);
|
||||
bi_block_add_successor(B, D);
|
||||
bi_block_add_successor(C, D);
|
||||
|
||||
/* B uses helper invocations, no other block does.
|
||||
*
|
||||
* That means B and C need to discard helpers.
|
||||
*/
|
||||
b->cursor = bi_after_block(B);
|
||||
bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
|
||||
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
|
||||
BI_SUBGROUP_SUBGROUP4);
|
||||
flow(DISCARD);
|
||||
flow(RECONVERGE);
|
||||
/* B uses helper invocations, no other block does.
|
||||
*
|
||||
* That means B and C need to discard helpers.
|
||||
*/
|
||||
b->cursor = bi_after_block(B);
|
||||
bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
|
||||
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
|
||||
BI_SUBGROUP_SUBGROUP4);
|
||||
flow(DISCARD);
|
||||
flow(RECONVERGE);
|
||||
|
||||
b->cursor = bi_after_block(C);
|
||||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(RECONVERGE);
|
||||
b->cursor = bi_after_block(C);
|
||||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(RECONVERGE);
|
||||
|
||||
b->cursor = bi_after_block(D);
|
||||
flow(END);
|
||||
b->cursor = bi_after_block(D);
|
||||
flow(END);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(InsertFlow, BarrierBug) {
|
||||
TEST_F(InsertFlow, BarrierBug)
|
||||
{
|
||||
CASE(KERNEL, {
|
||||
bi_instr *I = bi_store_i32(b, bi_register(0), bi_register(2), bi_register(4), BI_SEG_NONE, 0);
|
||||
I->slot = 2;
|
||||
bi_instr *I = bi_store_i32(b, bi_register(0), bi_register(2),
|
||||
bi_register(4), BI_SEG_NONE, 0);
|
||||
I->slot = 2;
|
||||
|
||||
bi_fadd_f32_to(b, bi_register(10), bi_register(10), bi_register(10));
|
||||
flow(WAIT2);
|
||||
bi_barrier(b);
|
||||
flow(WAIT);
|
||||
flow(END);
|
||||
bi_fadd_f32_to(b, bi_register(10), bi_register(10), bi_register(10));
|
||||
flow(WAIT2);
|
||||
bi_barrier(b);
|
||||
flow(WAIT);
|
||||
flow(END);
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,9 +21,9 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "va_compiler.h"
|
||||
#include "bi_test.h"
|
||||
#include "bi_builder.h"
|
||||
#include "bi_test.h"
|
||||
#include "va_compiler.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
|
|
@ -38,19 +38,22 @@ add_imm(bi_context *ctx)
|
|||
#define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, add_imm)
|
||||
|
||||
class LowerConstants : public testing::Test {
|
||||
protected:
|
||||
LowerConstants() {
|
||||
protected:
|
||||
LowerConstants()
|
||||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
}
|
||||
|
||||
~LowerConstants() {
|
||||
~LowerConstants()
|
||||
{
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
void *mem_ctx;
|
||||
};
|
||||
|
||||
TEST_F(LowerConstants, Float32) {
|
||||
TEST_F(LowerConstants, Float32)
|
||||
{
|
||||
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(0.0)),
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), va_lut(0)));
|
||||
|
||||
|
|
@ -61,46 +64,59 @@ TEST_F(LowerConstants, Float32) {
|
|||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), va_lut(17)));
|
||||
}
|
||||
|
||||
TEST_F(LowerConstants, WidenFloat16) {
|
||||
TEST_F(LowerConstants, WidenFloat16)
|
||||
{
|
||||
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(0.5)),
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_half(va_lut(26), 1)));
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(26), 1)));
|
||||
|
||||
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(255.0)),
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_half(va_lut(23), 0)));
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(23), 0)));
|
||||
|
||||
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(256.0)),
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_half(va_lut(23), 1)));
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(23), 1)));
|
||||
|
||||
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(8.0)),
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_half(va_lut(30), 1)));
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(30), 1)));
|
||||
}
|
||||
|
||||
TEST_F(LowerConstants, ReplicateFloat16) {
|
||||
TEST_F(LowerConstants, ReplicateFloat16)
|
||||
{
|
||||
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(255.0)),
|
||||
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_half(va_lut(23), 0)));
|
||||
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(23), 0)));
|
||||
|
||||
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(4.0)),
|
||||
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_half(va_lut(29), 1)));
|
||||
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(29), 1)));
|
||||
}
|
||||
|
||||
TEST_F(LowerConstants, NegateFloat32) {
|
||||
TEST_F(LowerConstants, NegateFloat32)
|
||||
{
|
||||
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(-1.0)),
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_neg(va_lut(16))));
|
||||
|
||||
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_imm_f32(-255.0)),
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_neg(bi_half(va_lut(23), 0))));
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0),
|
||||
bi_neg(bi_half(va_lut(23), 0))));
|
||||
}
|
||||
|
||||
TEST_F(LowerConstants, NegateReplicateFloat16)
|
||||
{
|
||||
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(-255.0)),
|
||||
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_neg(bi_half(va_lut(23), 0))));
|
||||
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
|
||||
bi_neg(bi_half(va_lut(23), 0))));
|
||||
}
|
||||
|
||||
TEST_F(LowerConstants, NegateVec2Float16)
|
||||
{
|
||||
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xBC008000)),
|
||||
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_neg(va_lut(27))));
|
||||
CASE(
|
||||
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xBC008000)),
|
||||
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_neg(va_lut(27))));
|
||||
}
|
||||
|
||||
TEST_F(LowerConstants, Int8InInt32)
|
||||
|
|
@ -117,87 +133,105 @@ TEST_F(LowerConstants, ZeroExtendForUnsigned)
|
|||
CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFF),
|
||||
bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_byte(va_lut(1), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
bi_byte(va_lut(1), 0), bi_register(0), BI_CMPF_LT,
|
||||
BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(1), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
CASE(
|
||||
bi_icmp_and_u32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFFFF),
|
||||
bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(1), 0), bi_register(0), BI_CMPF_LT,
|
||||
BI_RESULT_TYPE_I1));
|
||||
}
|
||||
|
||||
TEST_F(LowerConstants, SignExtendPositiveForSigned)
|
||||
{
|
||||
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0x7F), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0x7F),
|
||||
bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_byte(va_lut(2), 3), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
bi_byte(va_lut(2), 3), bi_register(0), BI_CMPF_LT,
|
||||
BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0x7FFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(2), 1), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
CASE(
|
||||
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0x7FFF),
|
||||
bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(2), 1), bi_register(0), BI_CMPF_LT,
|
||||
BI_RESULT_TYPE_I1));
|
||||
}
|
||||
|
||||
TEST_F(LowerConstants, SignExtendNegativeForSigned)
|
||||
{
|
||||
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT,
|
||||
BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_byte(va_lut(23), 0), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
bi_byte(va_lut(23), 0), bi_register(0), BI_CMPF_LT,
|
||||
BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT,
|
||||
BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(3), 1), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
bi_half(va_lut(3), 1), bi_register(0), BI_CMPF_LT,
|
||||
BI_RESULT_TYPE_I1));
|
||||
}
|
||||
|
||||
TEST_F(LowerConstants, DontZeroExtendForSigned)
|
||||
{
|
||||
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFF),
|
||||
bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_iadd_imm_i32(b, va_lut(0), 0xFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFF), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_iadd_imm_i32(b, va_lut(0), 0xFFFF), bi_register(0),
|
||||
bi_iadd_imm_i32(b, va_lut(0), 0xFF), bi_register(0),
|
||||
BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(
|
||||
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0), bi_imm_u32(0xFFFF),
|
||||
bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_s32_to(b, bi_register(0), bi_register(0),
|
||||
bi_iadd_imm_i32(b, va_lut(0), 0xFFFF), bi_register(0),
|
||||
BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
}
|
||||
|
||||
TEST_F(LowerConstants, DontZeroExtendNegative)
|
||||
{
|
||||
CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_imm_u32(0xFFFFFFF8), bi_register(0), BI_CMPF_LT,
|
||||
BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFFF8), bi_register(0),
|
||||
BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFFF8),
|
||||
bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
|
||||
CASE(bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1),
|
||||
bi_imm_u32(0xFFFFFAFC), bi_register(0), BI_CMPF_LT,
|
||||
BI_RESULT_TYPE_I1),
|
||||
bi_icmp_and_u32_to(b, bi_register(0), bi_register(0),
|
||||
bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFAFC), bi_register(0),
|
||||
BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
bi_iadd_imm_i32(b, va_lut(0), 0xFFFFFAFC),
|
||||
bi_register(0), BI_CMPF_LT, BI_RESULT_TYPE_I1));
|
||||
}
|
||||
|
||||
TEST_F(LowerConstants, HandleTrickyNegativesFP16)
|
||||
{
|
||||
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(-57216.0)),
|
||||
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_half(va_lut(3), 1)));
|
||||
CASE(
|
||||
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(-57216.0)),
|
||||
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
|
||||
bi_half(va_lut(3), 1)));
|
||||
|
||||
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(57216.0)),
|
||||
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_neg(bi_half(va_lut(3), 1))));
|
||||
CASE(
|
||||
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0), bi_imm_f16(57216.0)),
|
||||
bi_fadd_v2f16_to(b, bi_register(0), bi_register(0),
|
||||
bi_neg(bi_half(va_lut(3), 1))));
|
||||
}
|
||||
|
||||
TEST_F(LowerConstants, MaintainMkvecRestrictedSwizzles)
|
||||
{
|
||||
CASE(bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u8(0), bi_imm_u32(0)),
|
||||
CASE(bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0), bi_imm_u8(0),
|
||||
bi_imm_u32(0)),
|
||||
bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0),
|
||||
bi_byte(va_lut(0), 0), va_lut(0)));
|
||||
|
||||
CASE(bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0),
|
||||
bi_imm_u8(14), bi_imm_u32(0)),
|
||||
CASE(bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0), bi_imm_u8(14),
|
||||
bi_imm_u32(0)),
|
||||
bi_mkvec_v2i8_to(b, bi_register(0), bi_register(0),
|
||||
bi_byte(va_lut(11), 2), va_lut(0)));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,18 +21,19 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "va_compiler.h"
|
||||
#include "bi_test.h"
|
||||
#include "bi_builder.h"
|
||||
#include "bi_test.h"
|
||||
#include "va_compiler.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, va_lower_isel)
|
||||
#define NEGCASE(instr) CASE(instr, instr)
|
||||
#define NEGCASE(instr) CASE(instr, instr)
|
||||
|
||||
class LowerIsel : public testing::Test {
|
||||
protected:
|
||||
LowerIsel() {
|
||||
protected:
|
||||
LowerIsel()
|
||||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
reg = bi_register(1);
|
||||
x = bi_register(2);
|
||||
|
|
@ -40,7 +41,8 @@ protected:
|
|||
z = bi_register(4);
|
||||
}
|
||||
|
||||
~LowerIsel() {
|
||||
~LowerIsel()
|
||||
{
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
|
|
@ -48,14 +50,16 @@ protected:
|
|||
bi_index reg, x, y, z;
|
||||
};
|
||||
|
||||
TEST_F(LowerIsel, 8BitSwizzles) {
|
||||
TEST_F(LowerIsel, 8BitSwizzles)
|
||||
{
|
||||
for (unsigned i = 0; i < 4; ++i) {
|
||||
CASE(bi_swz_v4i8_to(b, reg, bi_byte(reg, i)),
|
||||
bi_iadd_v4u8_to(b, reg, bi_byte(reg, i), bi_zero(), false));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(LowerIsel, 16BitSwizzles) {
|
||||
TEST_F(LowerIsel, 16BitSwizzles)
|
||||
{
|
||||
for (unsigned i = 0; i < 2; ++i) {
|
||||
for (unsigned j = 0; j < 2; ++j) {
|
||||
CASE(bi_swz_v2i16_to(b, reg, bi_swz_16(reg, i, j)),
|
||||
|
|
@ -64,24 +68,30 @@ TEST_F(LowerIsel, 16BitSwizzles) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(LowerIsel, JumpsLoweredToBranches) {
|
||||
bi_block block = { };
|
||||
TEST_F(LowerIsel, JumpsLoweredToBranches)
|
||||
{
|
||||
bi_block block = {};
|
||||
|
||||
CASE({
|
||||
bi_instr *I = bi_jump(b, bi_imm_u32(0xDEADBEEF));
|
||||
I->branch_target = █
|
||||
}, {
|
||||
bi_instr *I = bi_branchz_i16(b, bi_zero(), bi_imm_u32(0xDEADBEEF), BI_CMPF_EQ);
|
||||
I->branch_target = █
|
||||
});
|
||||
CASE(
|
||||
{
|
||||
bi_instr *I = bi_jump(b, bi_imm_u32(0xDEADBEEF));
|
||||
I->branch_target = █
|
||||
},
|
||||
{
|
||||
bi_instr *I =
|
||||
bi_branchz_i16(b, bi_zero(), bi_imm_u32(0xDEADBEEF), BI_CMPF_EQ);
|
||||
I->branch_target = █
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(LowerIsel, IndirectJumpsLoweredToBranches) {
|
||||
TEST_F(LowerIsel, IndirectJumpsLoweredToBranches)
|
||||
{
|
||||
CASE(bi_jump(b, bi_register(17)),
|
||||
bi_branchzi(b, bi_zero(), bi_register(17), BI_CMPF_EQ));
|
||||
}
|
||||
|
||||
TEST_F(LowerIsel, IntegerCSEL) {
|
||||
TEST_F(LowerIsel, IntegerCSEL)
|
||||
{
|
||||
CASE(bi_csel_i32(b, reg, reg, reg, reg, BI_CMPF_EQ),
|
||||
bi_csel_u32(b, reg, reg, reg, reg, BI_CMPF_EQ));
|
||||
|
||||
|
|
@ -89,7 +99,8 @@ TEST_F(LowerIsel, IntegerCSEL) {
|
|||
bi_csel_v2u16(b, reg, reg, reg, reg, BI_CMPF_EQ));
|
||||
}
|
||||
|
||||
TEST_F(LowerIsel, AvoidSimpleMux) {
|
||||
TEST_F(LowerIsel, AvoidSimpleMux)
|
||||
{
|
||||
CASE(bi_mux_i32(b, x, y, z, BI_MUX_INT_ZERO),
|
||||
bi_csel_u32(b, z, bi_zero(), x, y, BI_CMPF_EQ));
|
||||
CASE(bi_mux_i32(b, x, y, z, BI_MUX_NEG),
|
||||
|
|
@ -105,27 +116,32 @@ TEST_F(LowerIsel, AvoidSimpleMux) {
|
|||
bi_csel_v2f16(b, z, bi_zero(), x, y, BI_CMPF_EQ));
|
||||
}
|
||||
|
||||
TEST_F(LowerIsel, BitwiseMux) {
|
||||
TEST_F(LowerIsel, BitwiseMux)
|
||||
{
|
||||
NEGCASE(bi_mux_i32(b, x, y, z, BI_MUX_BIT));
|
||||
NEGCASE(bi_mux_v2i16(b, x, y, z, BI_MUX_BIT));
|
||||
NEGCASE(bi_mux_v4i8(b, x, y, z, BI_MUX_BIT));
|
||||
}
|
||||
|
||||
TEST_F(LowerIsel, MuxInt8) {
|
||||
TEST_F(LowerIsel, MuxInt8)
|
||||
{
|
||||
NEGCASE(bi_mux_v4i8(b, x, y, z, BI_MUX_INT_ZERO));
|
||||
NEGCASE(bi_mux_v4i8(b, x, y, z, BI_MUX_NEG));
|
||||
NEGCASE(bi_mux_v4i8(b, x, y, z, BI_MUX_FP_ZERO));
|
||||
}
|
||||
|
||||
TEST_F(LowerIsel, FaddRscale) {
|
||||
CASE(bi_fadd_rscale_f32_to(b, reg, x, y, z, BI_SPECIAL_NONE),
|
||||
bi_fma_rscale_f32_to(b, reg, x, bi_imm_f32(1.0), y, z, BI_SPECIAL_NONE));
|
||||
TEST_F(LowerIsel, FaddRscale)
|
||||
{
|
||||
CASE(
|
||||
bi_fadd_rscale_f32_to(b, reg, x, y, z, BI_SPECIAL_NONE),
|
||||
bi_fma_rscale_f32_to(b, reg, x, bi_imm_f32(1.0), y, z, BI_SPECIAL_NONE));
|
||||
|
||||
CASE(bi_fadd_rscale_f32_to(b, reg, x, y, z, BI_SPECIAL_N),
|
||||
bi_fma_rscale_f32_to(b, reg, x, bi_imm_f32(1.0), y, z, BI_SPECIAL_N));
|
||||
}
|
||||
|
||||
TEST_F(LowerIsel, Smoke) {
|
||||
TEST_F(LowerIsel, Smoke)
|
||||
{
|
||||
NEGCASE(bi_fadd_f32_to(b, reg, reg, reg));
|
||||
NEGCASE(bi_csel_s32_to(b, reg, reg, reg, reg, reg, BI_CMPF_LT));
|
||||
NEGCASE(bi_csel_u32_to(b, reg, reg, reg, reg, reg, BI_CMPF_LT));
|
||||
|
|
|
|||
|
|
@ -21,14 +21,14 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bi_test.h"
|
||||
#include "bi_builder.h"
|
||||
#include "bi_test.h"
|
||||
#include "va_compiler.h"
|
||||
#include "valhall_enums.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#define R(x) bi_register(x)
|
||||
#define R(x) bi_register(x)
|
||||
#define DR(x) bi_discard(R(x))
|
||||
|
||||
static void
|
||||
|
|
@ -40,105 +40,119 @@ strip_discard(bi_context *ctx)
|
|||
}
|
||||
}
|
||||
|
||||
#define CASE(test) do { \
|
||||
void *mem_ctx = ralloc_context(NULL); \
|
||||
bi_builder *A = bit_builder(mem_ctx); \
|
||||
bi_builder *B = bit_builder(mem_ctx); \
|
||||
{ \
|
||||
UNUSED bi_builder *b = A; \
|
||||
test; \
|
||||
} \
|
||||
strip_discard(A->shader); \
|
||||
va_mark_last(A->shader); \
|
||||
{ \
|
||||
UNUSED bi_builder *b = B; \
|
||||
test; \
|
||||
} \
|
||||
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
|
||||
ralloc_free(mem_ctx); \
|
||||
} while(0)
|
||||
#define CASE(test) \
|
||||
do { \
|
||||
void *mem_ctx = ralloc_context(NULL); \
|
||||
bi_builder *A = bit_builder(mem_ctx); \
|
||||
bi_builder *B = bit_builder(mem_ctx); \
|
||||
{ \
|
||||
UNUSED bi_builder *b = A; \
|
||||
test; \
|
||||
} \
|
||||
strip_discard(A->shader); \
|
||||
va_mark_last(A->shader); \
|
||||
{ \
|
||||
UNUSED bi_builder *b = B; \
|
||||
test; \
|
||||
} \
|
||||
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
|
||||
ralloc_free(mem_ctx); \
|
||||
} while (0)
|
||||
|
||||
TEST(MarkLast, Simple) {
|
||||
TEST(MarkLast, Simple)
|
||||
{
|
||||
CASE(bi_fadd_f32_to(b, R(0), DR(0), DR(1)));
|
||||
|
||||
CASE({
|
||||
bi_fadd_f32_to(b, R(2), R(0), DR(1));
|
||||
bi_fadd_f32_to(b, R(0), DR(0), DR(2));
|
||||
bi_fadd_f32_to(b, R(2), R(0), DR(1));
|
||||
bi_fadd_f32_to(b, R(0), DR(0), DR(2));
|
||||
});
|
||||
}
|
||||
|
||||
TEST(MarkLast, SameSourceAndDestination) {
|
||||
TEST(MarkLast, SameSourceAndDestination)
|
||||
{
|
||||
CASE({
|
||||
bi_fadd_f32_to(b, R(0), DR(0), DR(0));
|
||||
bi_fadd_f32_to(b, R(0), DR(0), DR(0));
|
||||
bi_fadd_f32_to(b, R(0), DR(0), DR(0));
|
||||
bi_fadd_f32_to(b, R(0), DR(0), DR(0));
|
||||
bi_fadd_f32_to(b, R(0), DR(0), DR(0));
|
||||
bi_fadd_f32_to(b, R(0), DR(0), DR(0));
|
||||
});
|
||||
}
|
||||
|
||||
TEST(MarkLast, StagingReadBefore) {
|
||||
TEST(MarkLast, StagingReadBefore)
|
||||
{
|
||||
CASE({
|
||||
bi_fadd_f32_to(b, R(9), R(2), DR(7));
|
||||
bi_st_tile(b, R(0), DR(4), DR(5), DR(6), BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
|
||||
bi_fadd_f32_to(b, R(9), R(2), DR(7));
|
||||
bi_st_tile(b, R(0), DR(4), DR(5), DR(6), BI_REGISTER_FORMAT_F32,
|
||||
BI_VECSIZE_V4);
|
||||
});
|
||||
}
|
||||
|
||||
TEST(MarkLast, StagingReadAfter) {
|
||||
TEST(MarkLast, StagingReadAfter)
|
||||
{
|
||||
CASE({
|
||||
bi_st_tile(b, R(0), DR(4), DR(5), DR(6), BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
|
||||
bi_fadd_f32_to(b, R(9), R(2), DR(7));
|
||||
bi_st_tile(b, R(0), DR(4), DR(5), DR(6), BI_REGISTER_FORMAT_F32,
|
||||
BI_VECSIZE_V4);
|
||||
bi_fadd_f32_to(b, R(9), R(2), DR(7));
|
||||
});
|
||||
}
|
||||
|
||||
TEST(MarkLast, NonstagingSourceToAsync) {
|
||||
TEST(MarkLast, NonstagingSourceToAsync)
|
||||
{
|
||||
CASE({
|
||||
bi_st_tile(b, R(0), R(4), R(5), DR(6), BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
|
||||
bi_fadd_f32_to(b, R(9), DR(4), DR(5));
|
||||
bi_st_tile(b, R(0), R(4), R(5), DR(6), BI_REGISTER_FORMAT_F32,
|
||||
BI_VECSIZE_V4);
|
||||
bi_fadd_f32_to(b, R(9), DR(4), DR(5));
|
||||
});
|
||||
}
|
||||
|
||||
TEST(MarkLast, Both64) {
|
||||
TEST(MarkLast, Both64)
|
||||
{
|
||||
CASE(bi_load_i32_to(b, R(0), DR(8), DR(9), BI_SEG_NONE, 0));
|
||||
}
|
||||
|
||||
TEST(MarkLast, Neither64ThenBoth) {
|
||||
TEST(MarkLast, Neither64ThenBoth)
|
||||
{
|
||||
CASE({
|
||||
bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
|
||||
bi_load_i32_to(b, R(1), DR(8), DR(9), BI_SEG_NONE, 8);
|
||||
bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
|
||||
bi_load_i32_to(b, R(1), DR(8), DR(9), BI_SEG_NONE, 8);
|
||||
});
|
||||
}
|
||||
|
||||
TEST(MarkLast, Half64) {
|
||||
TEST(MarkLast, Half64)
|
||||
{
|
||||
CASE({
|
||||
bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
|
||||
bi_fadd_f32_to(b, R(8), DR(8), DR(8));
|
||||
bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
|
||||
bi_fadd_f32_to(b, R(8), DR(8), DR(8));
|
||||
});
|
||||
|
||||
CASE({
|
||||
bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
|
||||
bi_fadd_f32_to(b, R(9), DR(9), DR(9));
|
||||
bi_load_i32_to(b, R(0), R(8), R(9), BI_SEG_NONE, 0);
|
||||
bi_fadd_f32_to(b, R(9), DR(9), DR(9));
|
||||
});
|
||||
}
|
||||
|
||||
TEST(MarkLast, RegisterBlendDescriptor) {
|
||||
TEST(MarkLast, RegisterBlendDescriptor)
|
||||
{
|
||||
CASE({
|
||||
bi_blend_to(b, R(48), R(0), DR(60), DR(4), DR(5), bi_null(),
|
||||
BI_REGISTER_FORMAT_F32, 4, 0);
|
||||
bi_blend_to(b, R(48), R(0), DR(60), DR(4), DR(5), bi_null(),
|
||||
BI_REGISTER_FORMAT_F32, 4, 0);
|
||||
});
|
||||
|
||||
CASE({
|
||||
bi_blend_to(b, R(48), R(0), DR(60), R(4), R(5), bi_null(),
|
||||
BI_REGISTER_FORMAT_F32, 4, 0);
|
||||
bi_fadd_f32_to(b, R(4), DR(4), DR(7));
|
||||
bi_blend_to(b, R(48), R(0), DR(60), R(4), R(5), bi_null(),
|
||||
BI_REGISTER_FORMAT_F32, 4, 0);
|
||||
bi_fadd_f32_to(b, R(4), DR(4), DR(7));
|
||||
});
|
||||
|
||||
CASE({
|
||||
bi_blend_to(b, R(48), R(0), DR(60), R(4), R(5), bi_null(),
|
||||
BI_REGISTER_FORMAT_F32, 4, 0);
|
||||
bi_fadd_f32_to(b, R(4), DR(5), DR(7));
|
||||
bi_blend_to(b, R(48), R(0), DR(60), R(4), R(5), bi_null(),
|
||||
BI_REGISTER_FORMAT_F32, 4, 0);
|
||||
bi_fadd_f32_to(b, R(4), DR(5), DR(7));
|
||||
});
|
||||
}
|
||||
|
||||
TEST(MarkLast, ControlFlowAllFeatures) {
|
||||
TEST(MarkLast, ControlFlowAllFeatures)
|
||||
{
|
||||
/* A
|
||||
* / \
|
||||
* B C
|
||||
|
|
@ -153,9 +167,8 @@ TEST(MarkLast, ControlFlowAllFeatures) {
|
|||
|
||||
b->cursor = bi_after_block(A);
|
||||
{
|
||||
bi_instr *I =
|
||||
bi_st_tile(b, R(10), DR(14), DR(15), DR(16),
|
||||
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
|
||||
bi_instr *I = bi_st_tile(b, R(10), DR(14), DR(15), DR(16),
|
||||
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4);
|
||||
I->slot = 2;
|
||||
|
||||
bi_load_i32_to(b, R(20), R(28), R(29), BI_SEG_NONE, 0);
|
||||
|
|
|
|||
|
|
@ -21,42 +21,45 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bi_test.h"
|
||||
#include "bi_builder.h"
|
||||
#include "bi_test.h"
|
||||
#include "va_compiler.h"
|
||||
#include "valhall_enums.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#define CASE(test, expected) do { \
|
||||
bi_builder *A = bit_builder(mem_ctx); \
|
||||
bi_builder *B = bit_builder(mem_ctx); \
|
||||
{ \
|
||||
bi_builder *b = A; \
|
||||
A->shader->stage = MESA_SHADER_FRAGMENT; \
|
||||
test; \
|
||||
} \
|
||||
va_merge_flow(A->shader); \
|
||||
{ \
|
||||
bi_builder *b = B; \
|
||||
B->shader->stage = MESA_SHADER_FRAGMENT; \
|
||||
expected; \
|
||||
} \
|
||||
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
|
||||
} while(0)
|
||||
#define CASE(test, expected) \
|
||||
do { \
|
||||
bi_builder *A = bit_builder(mem_ctx); \
|
||||
bi_builder *B = bit_builder(mem_ctx); \
|
||||
{ \
|
||||
bi_builder *b = A; \
|
||||
A->shader->stage = MESA_SHADER_FRAGMENT; \
|
||||
test; \
|
||||
} \
|
||||
va_merge_flow(A->shader); \
|
||||
{ \
|
||||
bi_builder *b = B; \
|
||||
B->shader->stage = MESA_SHADER_FRAGMENT; \
|
||||
expected; \
|
||||
} \
|
||||
ASSERT_SHADER_EQUAL(A->shader, B->shader); \
|
||||
} while (0)
|
||||
|
||||
#define NEGCASE(test) CASE(test, test)
|
||||
|
||||
#define flow(f) bi_nop(b)->flow = VA_FLOW_ ## f
|
||||
#define flow(f) bi_nop(b)->flow = VA_FLOW_##f
|
||||
|
||||
class MergeFlow : public testing::Test {
|
||||
protected:
|
||||
MergeFlow() {
|
||||
protected:
|
||||
MergeFlow()
|
||||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
atest = bi_fau(BIR_FAU_ATEST_PARAM, false);
|
||||
}
|
||||
|
||||
~MergeFlow() {
|
||||
~MergeFlow()
|
||||
{
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
|
|
@ -65,74 +68,84 @@ protected:
|
|||
bi_index atest;
|
||||
};
|
||||
|
||||
TEST_F(MergeFlow, End) {
|
||||
CASE({
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_register(6), bi_register(7), bi_register(8),
|
||||
BI_REGISTER_FORMAT_AUTO, 4, 4);
|
||||
flow(END);
|
||||
},
|
||||
{
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_register(6), bi_register(7), bi_register(8),
|
||||
BI_REGISTER_FORMAT_AUTO, 4, 4);
|
||||
I->flow = VA_FLOW_END;
|
||||
});
|
||||
TEST_F(MergeFlow, End)
|
||||
{
|
||||
CASE(
|
||||
{
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_register(6), bi_register(7), bi_register(8),
|
||||
BI_REGISTER_FORMAT_AUTO, 4, 4);
|
||||
flow(END);
|
||||
},
|
||||
{
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_register(6), bi_register(7), bi_register(8),
|
||||
BI_REGISTER_FORMAT_AUTO, 4, 4);
|
||||
I->flow = VA_FLOW_END;
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(MergeFlow, Reconverge) {
|
||||
CASE({
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_register(6), bi_register(7), bi_register(8),
|
||||
BI_REGISTER_FORMAT_AUTO, 4, 4);
|
||||
flow(RECONVERGE);
|
||||
},
|
||||
{
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_register(6), bi_register(7), bi_register(8),
|
||||
BI_REGISTER_FORMAT_AUTO, 4, 4);
|
||||
I->flow = VA_FLOW_RECONVERGE;
|
||||
});
|
||||
TEST_F(MergeFlow, Reconverge)
|
||||
{
|
||||
CASE(
|
||||
{
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_register(6), bi_register(7), bi_register(8),
|
||||
BI_REGISTER_FORMAT_AUTO, 4, 4);
|
||||
flow(RECONVERGE);
|
||||
},
|
||||
{
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_register(6), bi_register(7), bi_register(8),
|
||||
BI_REGISTER_FORMAT_AUTO, 4, 4);
|
||||
I->flow = VA_FLOW_RECONVERGE;
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(MergeFlow, TrivialWait) {
|
||||
CASE({
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(WAIT0126);
|
||||
bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
|
||||
},
|
||||
{
|
||||
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I->flow = VA_FLOW_WAIT0126;
|
||||
bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
|
||||
});
|
||||
TEST_F(MergeFlow, TrivialWait)
|
||||
{
|
||||
CASE(
|
||||
{
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(WAIT0126);
|
||||
bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
|
||||
},
|
||||
{
|
||||
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I->flow = VA_FLOW_WAIT0126;
|
||||
bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(MergeFlow, LoadThenUnrelatedThenUse) {
|
||||
CASE({
|
||||
TEST_F(MergeFlow, LoadThenUnrelatedThenUse)
|
||||
{
|
||||
CASE(
|
||||
{
|
||||
bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(WAIT0);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
|
||||
flow(END);
|
||||
},
|
||||
{
|
||||
},
|
||||
{
|
||||
bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
|
||||
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I->flow = VA_FLOW_WAIT0;
|
||||
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
|
||||
I->flow = VA_FLOW_END;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(MergeFlow, TrivialDiscard) {
|
||||
CASE({
|
||||
TEST_F(MergeFlow, TrivialDiscard)
|
||||
{
|
||||
CASE(
|
||||
{
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
|
||||
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
|
||||
|
|
@ -140,31 +153,35 @@ TEST_F(MergeFlow, TrivialDiscard) {
|
|||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(END);
|
||||
},
|
||||
{
|
||||
},
|
||||
{
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
|
||||
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
|
||||
BI_SUBGROUP_SUBGROUP4);
|
||||
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
|
||||
BI_SUBGROUP_SUBGROUP4);
|
||||
I->flow = VA_FLOW_DISCARD;
|
||||
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I->flow = VA_FLOW_END;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(MergeFlow, TrivialDiscardAtTheStart) {
|
||||
CASE({
|
||||
TEST_F(MergeFlow, TrivialDiscardAtTheStart)
|
||||
{
|
||||
CASE(
|
||||
{
|
||||
flow(DISCARD);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
},
|
||||
{
|
||||
},
|
||||
{
|
||||
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I->flow = VA_FLOW_DISCARD;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(MergeFlow, MoveDiscardPastWait) {
|
||||
CASE({
|
||||
TEST_F(MergeFlow, MoveDiscardPastWait)
|
||||
{
|
||||
CASE(
|
||||
{
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
|
||||
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
|
||||
|
|
@ -172,20 +189,22 @@ TEST_F(MergeFlow, MoveDiscardPastWait) {
|
|||
flow(DISCARD);
|
||||
flow(WAIT0);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
},
|
||||
{
|
||||
},
|
||||
{
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
|
||||
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
|
||||
BI_SUBGROUP_SUBGROUP4);
|
||||
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
|
||||
BI_SUBGROUP_SUBGROUP4);
|
||||
I->flow = VA_FLOW_WAIT0;
|
||||
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I->flow = VA_FLOW_DISCARD;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(MergeFlow, OccludedWaitsAndDiscard) {
|
||||
CASE({
|
||||
TEST_F(MergeFlow, OccludedWaitsAndDiscard)
|
||||
{
|
||||
CASE(
|
||||
{
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
|
||||
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
|
||||
|
|
@ -194,75 +213,84 @@ TEST_F(MergeFlow, OccludedWaitsAndDiscard) {
|
|||
flow(DISCARD);
|
||||
flow(WAIT2);
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
},
|
||||
{
|
||||
},
|
||||
{
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
|
||||
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
|
||||
BI_SUBGROUP_SUBGROUP4);
|
||||
BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
|
||||
BI_SUBGROUP_SUBGROUP4);
|
||||
I->flow = VA_FLOW_WAIT02;
|
||||
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I->flow = VA_FLOW_DISCARD;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(MergeFlow, DeleteUselessWaits) {
|
||||
CASE({
|
||||
TEST_F(MergeFlow, DeleteUselessWaits)
|
||||
{
|
||||
CASE(
|
||||
{
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
flow(WAIT0);
|
||||
flow(WAIT2);
|
||||
flow(END);
|
||||
},
|
||||
{
|
||||
},
|
||||
{
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I->flow = VA_FLOW_END;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(MergeFlow, BlockFullOfUselessWaits) {
|
||||
CASE({
|
||||
TEST_F(MergeFlow, BlockFullOfUselessWaits)
|
||||
{
|
||||
CASE(
|
||||
{
|
||||
flow(WAIT0);
|
||||
flow(WAIT2);
|
||||
flow(DISCARD);
|
||||
flow(END);
|
||||
},
|
||||
{
|
||||
flow(END);
|
||||
});
|
||||
},
|
||||
{ flow(END); });
|
||||
}
|
||||
|
||||
TEST_F(MergeFlow, WaitWithMessage) {
|
||||
CASE({
|
||||
TEST_F(MergeFlow, WaitWithMessage)
|
||||
{
|
||||
CASE(
|
||||
{
|
||||
bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
|
||||
flow(WAIT0);
|
||||
},
|
||||
{
|
||||
I = bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
|
||||
},
|
||||
{
|
||||
I = bi_ld_attr_imm_to(b, bi_register(16), bi_register(60),
|
||||
bi_register(61), BI_REGISTER_FORMAT_F32,
|
||||
BI_VECSIZE_V4, 1);
|
||||
I->flow = VA_FLOW_WAIT0;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(MergeFlow, CantMoveWaitPastMessage) {
|
||||
TEST_F(MergeFlow, CantMoveWaitPastMessage)
|
||||
{
|
||||
NEGCASE({
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I = bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I =
|
||||
bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
|
||||
|
||||
/* Pretend it's blocked for some reason. This doesn't actually happen
|
||||
* with the current algorithm, but it's good to handle the special
|
||||
* cases correctly in case we change later on.
|
||||
*/
|
||||
I->flow = VA_FLOW_DISCARD;
|
||||
flow(WAIT0);
|
||||
/* Pretend it's blocked for some reason. This doesn't actually happen
|
||||
* with the current algorithm, but it's good to handle the special
|
||||
* cases correctly in case we change later on.
|
||||
*/
|
||||
I->flow = VA_FLOW_DISCARD;
|
||||
flow(WAIT0);
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(MergeFlow, DeletePointlessDiscard) {
|
||||
CASE({
|
||||
TEST_F(MergeFlow, DeletePointlessDiscard)
|
||||
{
|
||||
CASE(
|
||||
{
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
|
||||
bi_register(12), false, BI_DIMENSION_2D,
|
||||
|
|
@ -277,31 +305,34 @@ TEST_F(MergeFlow, DeletePointlessDiscard) {
|
|||
bi_register(6), bi_register(7), bi_register(8),
|
||||
BI_REGISTER_FORMAT_AUTO, 4, 4);
|
||||
flow(END);
|
||||
},
|
||||
{
|
||||
},
|
||||
{
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
|
||||
I = bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
|
||||
bi_register(12), false, BI_DIMENSION_2D,
|
||||
BI_REGISTER_FORMAT_F32, false, false,
|
||||
BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
|
||||
I = bi_tex_single_to(
|
||||
b, bi_register(0), bi_register(4), bi_register(8), bi_register(12),
|
||||
false, BI_DIMENSION_2D, BI_REGISTER_FORMAT_F32, false, false,
|
||||
BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
|
||||
I->flow = VA_FLOW_WAIT0126;
|
||||
I = bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5), atest);
|
||||
I = bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
atest);
|
||||
I->flow = VA_FLOW_WAIT;
|
||||
I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
|
||||
bi_register(6), bi_register(7), bi_register(8),
|
||||
BI_REGISTER_FORMAT_AUTO, 4, 4);
|
||||
I->flow = VA_FLOW_END;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
TEST_F(MergeFlow, PreserveTerminalBarriers) {
|
||||
CASE({
|
||||
TEST_F(MergeFlow, PreserveTerminalBarriers)
|
||||
{
|
||||
CASE(
|
||||
{
|
||||
bi_barrier(b);
|
||||
flow(WAIT);
|
||||
flow(END);
|
||||
},
|
||||
{
|
||||
},
|
||||
{
|
||||
bi_barrier(b)->flow = VA_FLOW_WAIT;
|
||||
flow(END);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,34 +21,38 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "va_compiler.h"
|
||||
#include "bi_test.h"
|
||||
#include "bi_builder.h"
|
||||
#include "bi_test.h"
|
||||
#include "va_compiler.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#define CASE(instr, expected) do { \
|
||||
uint64_t _value = va_pack_instr(instr); \
|
||||
if (_value != expected) { \
|
||||
fprintf(stderr, "Got %" PRIx64 ", expected %" PRIx64 "\n", _value, (uint64_t) expected); \
|
||||
bi_print_instr(instr, stderr); \
|
||||
fprintf(stderr, "\n"); \
|
||||
ADD_FAILURE(); \
|
||||
} \
|
||||
} while(0)
|
||||
#define CASE(instr, expected) \
|
||||
do { \
|
||||
uint64_t _value = va_pack_instr(instr); \
|
||||
if (_value != expected) { \
|
||||
fprintf(stderr, "Got %" PRIx64 ", expected %" PRIx64 "\n", _value, \
|
||||
(uint64_t)expected); \
|
||||
bi_print_instr(instr, stderr); \
|
||||
fprintf(stderr, "\n"); \
|
||||
ADD_FAILURE(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
class ValhallPacking : public testing::Test {
|
||||
protected:
|
||||
ValhallPacking() {
|
||||
protected:
|
||||
ValhallPacking()
|
||||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
b = bit_builder(mem_ctx);
|
||||
|
||||
zero = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 0), false);
|
||||
one = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 8), false);
|
||||
n4567 = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 4), true);
|
||||
zero = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 0), false);
|
||||
one = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 8), false);
|
||||
n4567 = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 4), true);
|
||||
}
|
||||
|
||||
~ValhallPacking() {
|
||||
~ValhallPacking()
|
||||
{
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
|
|
@ -57,60 +61,67 @@ protected:
|
|||
bi_index zero, one, n4567;
|
||||
};
|
||||
|
||||
TEST_F(ValhallPacking, Moves) {
|
||||
TEST_F(ValhallPacking, Moves)
|
||||
{
|
||||
CASE(bi_mov_i32_to(b, bi_register(1), bi_register(2)),
|
||||
0x0091c10000000002ULL);
|
||||
CASE(bi_mov_i32_to(b, bi_register(1), bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 5), false)),
|
||||
0x0091c1000000008aULL);
|
||||
0x0091c10000000002ULL);
|
||||
CASE(bi_mov_i32_to(b, bi_register(1),
|
||||
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 5), false)),
|
||||
0x0091c1000000008aULL);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, Fadd) {
|
||||
TEST_F(ValhallPacking, Fadd)
|
||||
{
|
||||
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_register(2)),
|
||||
0x00a4c00000000201ULL);
|
||||
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_abs(bi_register(2))),
|
||||
0x00a4c02000000201ULL);
|
||||
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(bi_register(2))),
|
||||
0x00a4c01000000201ULL);
|
||||
0x00a4c00000000201ULL);
|
||||
CASE(
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_abs(bi_register(2))),
|
||||
0x00a4c02000000201ULL);
|
||||
CASE(
|
||||
bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(bi_register(2))),
|
||||
0x00a4c01000000201ULL);
|
||||
|
||||
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_swz_16(bi_register(1), false, false),
|
||||
CASE(bi_fadd_v2f16_to(b, bi_register(0),
|
||||
bi_swz_16(bi_register(1), false, false),
|
||||
bi_swz_16(bi_register(0), true, true)),
|
||||
0x00a5c0000c000001ULL);
|
||||
0x00a5c0000c000001ULL);
|
||||
|
||||
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(1), bi_register(0)),
|
||||
0x00a5c00028000001ULL);
|
||||
0x00a5c00028000001ULL);
|
||||
|
||||
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_register(1),
|
||||
bi_swz_16(bi_register(0), true, false)),
|
||||
0x00a5c00024000001ULL);
|
||||
0x00a5c00024000001ULL);
|
||||
|
||||
CASE(bi_fadd_v2f16_to(b, bi_register(0), bi_discard(bi_abs(bi_register(0))),
|
||||
bi_neg(zero)),
|
||||
0x00a5c0902800c040ULL);
|
||||
0x00a5c0902800c040ULL);
|
||||
|
||||
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
|
||||
zero),
|
||||
0x00a4c0000000c001ULL);
|
||||
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), zero),
|
||||
0x00a4c0000000c001ULL);
|
||||
|
||||
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
|
||||
bi_neg(zero)),
|
||||
0x00a4c0100000c001ULL);
|
||||
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1), bi_neg(zero)),
|
||||
0x00a4c0100000c001ULL);
|
||||
|
||||
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
|
||||
bi_half(bi_register(0), true)),
|
||||
0x00a4c00008000001ULL);
|
||||
0x00a4c00008000001ULL);
|
||||
|
||||
CASE(bi_fadd_f32_to(b, bi_register(0), bi_register(1),
|
||||
bi_half(bi_register(0), false)),
|
||||
0x00a4c00004000001ULL);
|
||||
0x00a4c00004000001ULL);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, Clper) {
|
||||
TEST_F(ValhallPacking, Clper)
|
||||
{
|
||||
CASE(bi_clper_i32_to(b, bi_register(0), bi_register(0), bi_byte(n4567, 0),
|
||||
BI_INACTIVE_RESULT_F1, BI_LANE_OP_NONE, BI_SUBGROUP_SUBGROUP16),
|
||||
0x00a0c030128fc900);
|
||||
BI_INACTIVE_RESULT_F1, BI_LANE_OP_NONE,
|
||||
BI_SUBGROUP_SUBGROUP16),
|
||||
0x00a0c030128fc900);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, Clamps) {
|
||||
TEST_F(ValhallPacking, Clamps)
|
||||
{
|
||||
bi_instr *I = bi_fadd_f32_to(b, bi_register(0), bi_register(1),
|
||||
bi_neg(bi_abs(bi_register(2))));
|
||||
CASE(I, 0x00a4c03000000201ULL);
|
||||
|
|
@ -119,209 +130,243 @@ TEST_F(ValhallPacking, Clamps) {
|
|||
CASE(I, 0x00a4c03200000201ULL);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, Misc) {
|
||||
TEST_F(ValhallPacking, Misc)
|
||||
{
|
||||
CASE(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)),
|
||||
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 4), false),
|
||||
bi_neg(zero)),
|
||||
0x00b2c10400c08841ULL);
|
||||
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 4), false),
|
||||
bi_neg(zero)),
|
||||
0x00b2c10400c08841ULL);
|
||||
|
||||
CASE(bi_fround_f32_to(b, bi_register(2), bi_discard(bi_neg(bi_register(2))),
|
||||
BI_ROUND_RTN),
|
||||
0x0090c240800d0042ULL);
|
||||
0x0090c240800d0042ULL);
|
||||
|
||||
CASE(bi_fround_v2f16_to(b, bi_half(bi_register(0), false), bi_register(0),
|
||||
BI_ROUND_RTN),
|
||||
0x00904000a00f0000ULL);
|
||||
BI_ROUND_RTN),
|
||||
0x00904000a00f0000ULL);
|
||||
|
||||
CASE(bi_fround_v2f16_to(b, bi_half(bi_register(0), false),
|
||||
bi_swz_16(bi_register(1), true, false), BI_ROUND_RTN),
|
||||
0x00904000900f0001ULL);
|
||||
CASE(
|
||||
bi_fround_v2f16_to(b, bi_half(bi_register(0), false),
|
||||
bi_swz_16(bi_register(1), true, false), BI_ROUND_RTN),
|
||||
0x00904000900f0001ULL);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, FaddImm) {
|
||||
CASE(bi_fadd_imm_f32_to(b, bi_register(2), bi_discard(bi_register(2)), 0x4847C6C0),
|
||||
0x0114C24847C6C042ULL);
|
||||
TEST_F(ValhallPacking, FaddImm)
|
||||
{
|
||||
CASE(bi_fadd_imm_f32_to(b, bi_register(2), bi_discard(bi_register(2)),
|
||||
0x4847C6C0),
|
||||
0x0114C24847C6C042ULL);
|
||||
|
||||
CASE(bi_fadd_imm_v2f16_to(b, bi_register(2), bi_discard(bi_register(2)), 0x70AC6784),
|
||||
0x0115C270AC678442ULL);
|
||||
CASE(bi_fadd_imm_v2f16_to(b, bi_register(2), bi_discard(bi_register(2)),
|
||||
0x70AC6784),
|
||||
0x0115C270AC678442ULL);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, Comparions) {
|
||||
TEST_F(ValhallPacking, Comparions)
|
||||
{
|
||||
CASE(bi_icmp_or_v2s16_to(b, bi_register(2),
|
||||
bi_discard(bi_swz_16(bi_register(3), true, false)),
|
||||
bi_discard(bi_swz_16(bi_register(2), true, false)),
|
||||
zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
|
||||
bi_discard(bi_swz_16(bi_register(3), true, false)),
|
||||
bi_discard(bi_swz_16(bi_register(2), true, false)),
|
||||
zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
|
||||
0x00f9c21184c04243);
|
||||
|
||||
CASE(bi_fcmp_or_v2f16_to(b, bi_register(2),
|
||||
bi_discard(bi_swz_16(bi_register(3), true, false)),
|
||||
bi_discard(bi_swz_16(bi_register(2), false, false)),
|
||||
zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
|
||||
0x00f5c20190c04243);
|
||||
bi_discard(bi_swz_16(bi_register(3), true, false)),
|
||||
bi_discard(bi_swz_16(bi_register(2), false, false)),
|
||||
zero, BI_CMPF_GT, BI_RESULT_TYPE_M1),
|
||||
0x00f5c20190c04243);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, Conversions) {
|
||||
TEST_F(ValhallPacking, Conversions)
|
||||
{
|
||||
CASE(bi_v2s16_to_v2f16_to(b, bi_register(2), bi_discard(bi_register(2))),
|
||||
0x0090c22000070042);
|
||||
0x0090c22000070042);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, BranchzI16) {
|
||||
bi_instr *I = bi_branchz_i16(b, bi_half(bi_register(2), false), bi_null(), BI_CMPF_EQ);
|
||||
TEST_F(ValhallPacking, BranchzI16)
|
||||
{
|
||||
bi_instr *I =
|
||||
bi_branchz_i16(b, bi_half(bi_register(2), false), bi_null(), BI_CMPF_EQ);
|
||||
I->branch_offset = 1;
|
||||
CASE(I, 0x001fc03000000102);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, BranchzI16Backwards) {
|
||||
TEST_F(ValhallPacking, BranchzI16Backwards)
|
||||
{
|
||||
bi_instr *I = bi_branchz_i16(b, zero, bi_null(), BI_CMPF_EQ);
|
||||
I->branch_offset = -8;
|
||||
CASE(I, 0x001fc017fffff8c0);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, Blend) {
|
||||
CASE(bi_blend_to(b, bi_null(), bi_register(0), bi_register(60),
|
||||
bi_fau(BIR_FAU_BLEND_0, false),
|
||||
bi_fau(BIR_FAU_BLEND_0, true),
|
||||
bi_null(), BI_REGISTER_FORMAT_F16, 2, 0),
|
||||
0x007f4004333c00f0);
|
||||
TEST_F(ValhallPacking, Blend)
|
||||
{
|
||||
CASE(
|
||||
bi_blend_to(b, bi_null(), bi_register(0), bi_register(60),
|
||||
bi_fau(BIR_FAU_BLEND_0, false), bi_fau(BIR_FAU_BLEND_0, true),
|
||||
bi_null(), BI_REGISTER_FORMAT_F16, 2, 0),
|
||||
0x007f4004333c00f0);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, Mux) {
|
||||
TEST_F(ValhallPacking, Mux)
|
||||
{
|
||||
CASE(bi_mux_i32_to(b, bi_register(0), bi_discard(bi_register(0)),
|
||||
bi_discard(bi_register(4)),
|
||||
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 0), false), BI_MUX_BIT),
|
||||
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 0), false),
|
||||
BI_MUX_BIT),
|
||||
0x00b8c00300804440ull);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, AtestFP16) {
|
||||
TEST_F(ValhallPacking, AtestFP16)
|
||||
{
|
||||
CASE(bi_atest_to(b, bi_register(60), bi_register(60),
|
||||
bi_half(bi_register(1), true),
|
||||
bi_fau(BIR_FAU_ATEST_PARAM, false)),
|
||||
0x007dbc0208ea013c);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, AtestFP32) {
|
||||
TEST_F(ValhallPacking, AtestFP32)
|
||||
{
|
||||
CASE(bi_atest_to(b, bi_register(60), bi_register(60), one,
|
||||
bi_fau(BIR_FAU_ATEST_PARAM, false)),
|
||||
0x007dbc0200ead03c);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, Transcendentals) {
|
||||
TEST_F(ValhallPacking, Transcendentals)
|
||||
{
|
||||
CASE(bi_frexpm_f32_to(b, bi_register(1), bi_register(0), false, true),
|
||||
0x0099c10001000000);
|
||||
|
||||
CASE(bi_frexpe_f32_to(b, bi_register(0), bi_discard(bi_register(0)), false, true),
|
||||
CASE(bi_frexpe_f32_to(b, bi_register(0), bi_discard(bi_register(0)), false,
|
||||
true),
|
||||
0x0099c00001020040);
|
||||
|
||||
CASE(bi_frsq_f32_to(b, bi_register(2), bi_register(1)),
|
||||
0x009cc20000020001);
|
||||
CASE(bi_frsq_f32_to(b, bi_register(2), bi_register(1)), 0x009cc20000020001);
|
||||
|
||||
CASE(bi_fma_rscale_f32_to(b, bi_register(0), bi_discard(bi_register(1)), bi_discard(bi_register(2)), bi_neg(zero), bi_discard(bi_register(0)), BI_SPECIAL_LEFT),
|
||||
CASE(bi_fma_rscale_f32_to(b, bi_register(0), bi_discard(bi_register(1)),
|
||||
bi_discard(bi_register(2)), bi_neg(zero),
|
||||
bi_discard(bi_register(0)), BI_SPECIAL_LEFT),
|
||||
0x0162c00440c04241);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, Csel) {
|
||||
TEST_F(ValhallPacking, Csel)
|
||||
{
|
||||
CASE(bi_csel_u32_to(b, bi_register(1), bi_discard(bi_register(2)),
|
||||
bi_discard(bi_register(3)),
|
||||
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), false),
|
||||
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), true),
|
||||
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
|
||||
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true),
|
||||
BI_CMPF_EQ),
|
||||
0x0150c10085844342);
|
||||
|
||||
CASE(bi_csel_u32_to(b, bi_register(1), bi_discard(bi_register(2)),
|
||||
bi_discard(bi_register(3)),
|
||||
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), false),
|
||||
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), true),
|
||||
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
|
||||
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true),
|
||||
BI_CMPF_LT),
|
||||
0x0150c10485844342);
|
||||
|
||||
CASE(bi_csel_s32_to(b, bi_register(1), bi_discard(bi_register(2)),
|
||||
bi_discard(bi_register(3)),
|
||||
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), false),
|
||||
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 2), true),
|
||||
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), false),
|
||||
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 2), true),
|
||||
BI_CMPF_LT),
|
||||
0x0158c10485844342);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, LdAttrImm) {
|
||||
bi_instr *I = bi_ld_attr_imm_to(b, bi_register(0),
|
||||
bi_discard(bi_register(60)),
|
||||
bi_discard(bi_register(61)),
|
||||
BI_REGISTER_FORMAT_F16, BI_VECSIZE_V4, 1);
|
||||
TEST_F(ValhallPacking, LdAttrImm)
|
||||
{
|
||||
bi_instr *I = bi_ld_attr_imm_to(
|
||||
b, bi_register(0), bi_discard(bi_register(60)),
|
||||
bi_discard(bi_register(61)), BI_REGISTER_FORMAT_F16, BI_VECSIZE_V4, 1);
|
||||
I->table = 1;
|
||||
|
||||
CASE(I, 0x0066800433117d7c);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, LdVarBufImmF16) {
|
||||
TEST_F(ValhallPacking, LdVarBufImmF16)
|
||||
{
|
||||
CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
|
||||
BI_SOURCE_FORMAT_F16,
|
||||
BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0),
|
||||
BI_SOURCE_FORMAT_F16, BI_UPDATE_RETRIEVE,
|
||||
BI_VECSIZE_V4, 0),
|
||||
0x005d82143300003d);
|
||||
|
||||
CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F16, BI_SAMPLE_SAMPLE,
|
||||
BI_SOURCE_FORMAT_F16,
|
||||
BI_UPDATE_STORE, BI_VECSIZE_V4, 0),
|
||||
0x005d80843300003d);
|
||||
BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
|
||||
BI_VECSIZE_V4, 0),
|
||||
0x005d80843300003d);
|
||||
|
||||
CASE(bi_ld_var_buf_imm_f16_to(b, bi_register(0), bi_register(61),
|
||||
BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTROID,
|
||||
BI_SOURCE_FORMAT_F16,
|
||||
BI_UPDATE_STORE, BI_VECSIZE_V4, 8),
|
||||
0x005d80443308003d);
|
||||
BI_SOURCE_FORMAT_F16, BI_UPDATE_STORE,
|
||||
BI_VECSIZE_V4, 8),
|
||||
0x005d80443308003d);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, LeaBufImm) {
|
||||
TEST_F(ValhallPacking, LeaBufImm)
|
||||
{
|
||||
CASE(bi_lea_buf_imm_to(b, bi_register(4), bi_discard(bi_register(59))),
|
||||
0x005e840400000d7b);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, StoreSegment) {
|
||||
TEST_F(ValhallPacking, StoreSegment)
|
||||
{
|
||||
CASE(bi_store_i96(b, bi_register(0), bi_discard(bi_register(4)),
|
||||
bi_discard(bi_register(5)), BI_SEG_VARY, 0),
|
||||
bi_discard(bi_register(5)), BI_SEG_VARY, 0),
|
||||
0x0061400632000044);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, Convert16To32) {
|
||||
CASE(bi_u16_to_u32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), false, false))),
|
||||
0x0090c20000140077);
|
||||
TEST_F(ValhallPacking, Convert16To32)
|
||||
{
|
||||
CASE(bi_u16_to_u32_to(b, bi_register(2),
|
||||
bi_discard(bi_swz_16(bi_register(55), false, false))),
|
||||
0x0090c20000140077);
|
||||
|
||||
CASE(bi_u16_to_u32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), true, false))),
|
||||
0x0090c20010140077);
|
||||
CASE(bi_u16_to_u32_to(b, bi_register(2),
|
||||
bi_discard(bi_swz_16(bi_register(55), true, false))),
|
||||
0x0090c20010140077);
|
||||
|
||||
CASE(bi_u16_to_f32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), false, false))),
|
||||
0x0090c20000150077);
|
||||
CASE(bi_u16_to_f32_to(b, bi_register(2),
|
||||
bi_discard(bi_swz_16(bi_register(55), false, false))),
|
||||
0x0090c20000150077);
|
||||
|
||||
CASE(bi_u16_to_f32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), true, false))),
|
||||
0x0090c20010150077);
|
||||
CASE(bi_u16_to_f32_to(b, bi_register(2),
|
||||
bi_discard(bi_swz_16(bi_register(55), true, false))),
|
||||
0x0090c20010150077);
|
||||
|
||||
CASE(bi_s16_to_s32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), false, false))),
|
||||
0x0090c20000040077);
|
||||
CASE(bi_s16_to_s32_to(b, bi_register(2),
|
||||
bi_discard(bi_swz_16(bi_register(55), false, false))),
|
||||
0x0090c20000040077);
|
||||
|
||||
CASE(bi_s16_to_s32_to(b, bi_register(2), bi_discard(bi_swz_16(bi_register(55), true, false))),
|
||||
0x0090c20010040077);
|
||||
CASE(bi_s16_to_s32_to(b, bi_register(2),
|
||||
bi_discard(bi_swz_16(bi_register(55), true, false))),
|
||||
0x0090c20010040077);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, Swizzle8) {
|
||||
CASE(bi_icmp_or_v4u8_to(b, bi_register(1), bi_byte(bi_register(0), 0),
|
||||
zero, zero, BI_CMPF_NE, BI_RESULT_TYPE_I1),
|
||||
TEST_F(ValhallPacking, Swizzle8)
|
||||
{
|
||||
CASE(bi_icmp_or_v4u8_to(b, bi_register(1), bi_byte(bi_register(0), 0), zero,
|
||||
zero, BI_CMPF_NE, BI_RESULT_TYPE_I1),
|
||||
0x00f2c14300c0c000);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, FauPage1) {
|
||||
CASE(bi_mov_i32_to(b, bi_register(1), bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 32), false)),
|
||||
0x0291c10000000080ULL);
|
||||
TEST_F(ValhallPacking, FauPage1)
|
||||
{
|
||||
CASE(bi_mov_i32_to(b, bi_register(1),
|
||||
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 32), false)),
|
||||
0x0291c10000000080ULL);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, LdTileV3F16) {
|
||||
TEST_F(ValhallPacking, LdTileV3F16)
|
||||
{
|
||||
CASE(bi_ld_tile_to(b, bi_register(4), bi_discard(bi_register(0)),
|
||||
bi_register(60), bi_register(3),
|
||||
BI_REGISTER_FORMAT_F16, BI_VECSIZE_V3),
|
||||
bi_register(60), bi_register(3), BI_REGISTER_FORMAT_F16,
|
||||
BI_VECSIZE_V3),
|
||||
0x0078840423033c40);
|
||||
}
|
||||
|
||||
TEST_F(ValhallPacking, Rhadd8) {
|
||||
TEST_F(ValhallPacking, Rhadd8)
|
||||
{
|
||||
CASE(bi_hadd_v4s8_to(b, bi_register(0), bi_discard(bi_register(1)),
|
||||
bi_discard(bi_register(0)), BI_ROUND_RTP),
|
||||
0x00aac000400b4041);
|
||||
|
|
|
|||
|
|
@ -21,41 +21,44 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "va_compiler.h"
|
||||
#include "bi_test.h"
|
||||
#include "bi_builder.h"
|
||||
#include "bi_test.h"
|
||||
#include "va_compiler.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#define CASE(instr, expected) do { \
|
||||
if (va_validate_fau(instr) != expected) { \
|
||||
fprintf(stderr, "Incorrect validation for:\n"); \
|
||||
bi_print_instr(instr, stderr); \
|
||||
fprintf(stderr, "\n"); \
|
||||
ADD_FAILURE(); \
|
||||
} \
|
||||
} while(0)
|
||||
#define CASE(instr, expected) \
|
||||
do { \
|
||||
if (va_validate_fau(instr) != expected) { \
|
||||
fprintf(stderr, "Incorrect validation for:\n"); \
|
||||
bi_print_instr(instr, stderr); \
|
||||
fprintf(stderr, "\n"); \
|
||||
ADD_FAILURE(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define VALID(instr) CASE(instr, true)
|
||||
#define VALID(instr) CASE(instr, true)
|
||||
#define INVALID(instr) CASE(instr, false)
|
||||
|
||||
class ValidateFau : public testing::Test {
|
||||
protected:
|
||||
ValidateFau() {
|
||||
protected:
|
||||
ValidateFau()
|
||||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
b = bit_builder(mem_ctx);
|
||||
|
||||
zero = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 0), false);
|
||||
imm1 = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 1), false);
|
||||
imm2 = bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | 2), false);
|
||||
unif = bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 5), false);
|
||||
unif_hi = bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 5), true);
|
||||
unif2 = bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 6), false);
|
||||
zero = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 0), false);
|
||||
imm1 = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 1), false);
|
||||
imm2 = bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | 2), false);
|
||||
unif = bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 5), false);
|
||||
unif_hi = bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 5), true);
|
||||
unif2 = bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 6), false);
|
||||
core_id = bi_fau(BIR_FAU_CORE_ID, false);
|
||||
lane_id = bi_fau(BIR_FAU_LANE_ID, false);
|
||||
}
|
||||
|
||||
~ValidateFau() {
|
||||
~ValidateFau()
|
||||
{
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
||||
|
|
@ -66,8 +69,8 @@ protected:
|
|||
|
||||
TEST_F(ValidateFau, One64BitUniformSlot)
|
||||
{
|
||||
VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(3),
|
||||
unif));
|
||||
VALID(
|
||||
bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(3), unif));
|
||||
VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), unif_hi, unif));
|
||||
VALID(bi_fma_f32_to(b, bi_register(1), unif, unif, unif_hi));
|
||||
INVALID(bi_fma_f32_to(b, bi_register(1), unif, unif2, bi_register(1)));
|
||||
|
|
@ -77,8 +80,8 @@ TEST_F(ValidateFau, One64BitUniformSlot)
|
|||
* marked as valid in early versions of the validator.
|
||||
*/
|
||||
INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2),
|
||||
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 0), false),
|
||||
bi_fau((enum bir_fau) (BIR_FAU_UNIFORM | 1), true)));
|
||||
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 0), false),
|
||||
bi_fau((enum bir_fau)(BIR_FAU_UNIFORM | 1), true)));
|
||||
}
|
||||
|
||||
TEST_F(ValidateFau, Combined64BitUniformsConstants)
|
||||
|
|
@ -99,17 +102,16 @@ TEST_F(ValidateFau, UniformsOnlyInDefaultMode)
|
|||
TEST_F(ValidateFau, SingleSpecialImmediate)
|
||||
{
|
||||
VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(2),
|
||||
lane_id));
|
||||
lane_id));
|
||||
VALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), bi_register(2),
|
||||
core_id));
|
||||
INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), lane_id,
|
||||
core_id));
|
||||
core_id));
|
||||
INVALID(bi_fma_f32_to(b, bi_register(1), bi_register(2), lane_id, core_id));
|
||||
}
|
||||
|
||||
TEST_F(ValidateFau, SmokeTests)
|
||||
{
|
||||
VALID(bi_mov_i32_to(b, bi_register(1), bi_register(2)));
|
||||
VALID(bi_mov_i32_to(b, bi_register(1), unif));
|
||||
VALID(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)),
|
||||
unif, bi_neg(zero)));
|
||||
VALID(bi_fma_f32_to(b, bi_register(1), bi_discard(bi_register(1)), unif,
|
||||
bi_neg(zero)));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ va_select_fau_page(const bi_instr *I)
|
|||
{
|
||||
bi_foreach_src(I, s) {
|
||||
if (I->src[s].type == BI_INDEX_FAU)
|
||||
return va_fau_page((enum bir_fau) I->src[s].value);
|
||||
return va_fau_page((enum bir_fau)I->src[s].value);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
@ -91,8 +91,7 @@ struct va_stats {
|
|||
unsigned fma, cvt, sfu, v, ls, t;
|
||||
};
|
||||
|
||||
void
|
||||
va_count_instr_stats(bi_instr *I, struct va_stats *stats);
|
||||
void va_count_instr_stats(bi_instr *I, struct va_stats *stats);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern C */
|
||||
|
|
|
|||
|
|
@ -21,9 +21,9 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bi_builder.h"
|
||||
#include "va_compiler.h"
|
||||
#include "valhall_enums.h"
|
||||
#include "bi_builder.h"
|
||||
|
||||
/*
|
||||
* Insert flow control into a scheduled and register allocated shader. This
|
||||
|
|
@ -176,7 +176,8 @@ bi_depend_on_writers(struct bi_scoreboard_state *st, uint64_t regmask)
|
|||
/* Sets the dependencies for a given clause, updating the model */
|
||||
|
||||
static void
|
||||
bi_set_dependencies(bi_block *block, bi_instr *I, struct bi_scoreboard_state *st)
|
||||
bi_set_dependencies(bi_block *block, bi_instr *I,
|
||||
struct bi_scoreboard_state *st)
|
||||
{
|
||||
/* Depend on writers to handle read-after-write and write-after-write
|
||||
* dependencies. Write-after-read dependencies are handled in the hardware
|
||||
|
|
@ -482,7 +483,8 @@ va_insert_flow_control_nops(bi_context *ctx)
|
|||
*/
|
||||
if (va_should_end(block) || block->needs_nop) {
|
||||
/* Don't bother adding a NOP into an unreachable block */
|
||||
if (block == bi_start_block(&ctx->blocks) || bi_num_predecessors(block))
|
||||
if (block == bi_start_block(&ctx->blocks) ||
|
||||
bi_num_predecessors(block))
|
||||
bi_flow(ctx, bi_after_block(block), VA_FLOW_END);
|
||||
} else if (bi_reconverge_branches(block)) {
|
||||
/* TODO: Do we have ever need to reconverge from an empty block? */
|
||||
|
|
|
|||
|
|
@ -21,9 +21,9 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bi_builder.h"
|
||||
#include "va_compiler.h"
|
||||
#include "valhall.h"
|
||||
#include "bi_builder.h"
|
||||
|
||||
/* Only some special immediates are available, as specified in the Table of
|
||||
* Immediates in the specification. Other immediates must be lowered, either to
|
||||
|
|
@ -51,7 +51,7 @@ va_lut_index_32(uint32_t imm)
|
|||
static bi_index
|
||||
va_lut_index_16(uint16_t imm)
|
||||
{
|
||||
uint16_t *arr16 = (uint16_t *) valhall_immediates;
|
||||
uint16_t *arr16 = (uint16_t *)valhall_immediates;
|
||||
|
||||
for (unsigned i = 0; i < (2 * ARRAY_SIZE(valhall_immediates)); ++i) {
|
||||
if (arr16[i] == imm)
|
||||
|
|
@ -64,7 +64,7 @@ va_lut_index_16(uint16_t imm)
|
|||
UNUSED static bi_index
|
||||
va_lut_index_8(uint8_t imm)
|
||||
{
|
||||
uint8_t *arr8 = (uint8_t *) valhall_immediates;
|
||||
uint8_t *arr8 = (uint8_t *)valhall_immediates;
|
||||
|
||||
for (unsigned i = 0; i < (4 * ARRAY_SIZE(valhall_immediates)); ++i) {
|
||||
if (arr8[i] == imm)
|
||||
|
|
@ -109,36 +109,43 @@ is_extension_of_16(uint32_t x, bool is_signed)
|
|||
}
|
||||
|
||||
static bi_index
|
||||
va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool is_signed, bool staging)
|
||||
va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info,
|
||||
bool is_signed, bool staging)
|
||||
{
|
||||
/* Try the constant as-is */
|
||||
if (!staging) {
|
||||
bi_index lut = va_lut_index_32(value);
|
||||
if (!bi_is_null(lut)) return lut;
|
||||
if (!bi_is_null(lut))
|
||||
return lut;
|
||||
|
||||
/* ...or negated as a FP32 constant */
|
||||
if (info.absneg && info.size == VA_SIZE_32) {
|
||||
lut = bi_neg(va_lut_index_32(fui(-uif(value))));
|
||||
if (!bi_is_null(lut)) return lut;
|
||||
if (!bi_is_null(lut))
|
||||
return lut;
|
||||
}
|
||||
|
||||
/* ...or negated as a FP16 constant */
|
||||
if (info.absneg && info.size == VA_SIZE_16) {
|
||||
lut = bi_neg(va_lut_index_32(value ^ 0x80008000));
|
||||
if (!bi_is_null(lut)) return lut;
|
||||
if (!bi_is_null(lut))
|
||||
return lut;
|
||||
}
|
||||
}
|
||||
|
||||
/* Try using a single half of a FP16 constant */
|
||||
bool replicated_halves = (value & 0xFFFF) == (value >> 16);
|
||||
if (!staging && info.swizzle && info.size == VA_SIZE_16 && replicated_halves) {
|
||||
if (!staging && info.swizzle && info.size == VA_SIZE_16 &&
|
||||
replicated_halves) {
|
||||
bi_index lut = va_lut_index_16(value & 0xFFFF);
|
||||
if (!bi_is_null(lut)) return lut;
|
||||
if (!bi_is_null(lut))
|
||||
return lut;
|
||||
|
||||
/* ...possibly negated */
|
||||
if (info.absneg) {
|
||||
lut = bi_neg(va_lut_index_16((value & 0xFFFF) ^ 0x8000));
|
||||
if (!bi_is_null(lut)) return lut;
|
||||
if (!bi_is_null(lut))
|
||||
return lut;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -147,25 +154,28 @@ va_resolve_constant(bi_builder *b, uint32_t value, struct va_src_info info, bool
|
|||
is_extension_of_8(value, is_signed)) {
|
||||
|
||||
bi_index lut = va_lut_index_8(value & 0xFF);
|
||||
if (!bi_is_null(lut)) return lut;
|
||||
if (!bi_is_null(lut))
|
||||
return lut;
|
||||
}
|
||||
|
||||
/* Try extending a halfword */
|
||||
if (!staging && info.widen &&
|
||||
is_extension_of_16(value, is_signed)) {
|
||||
if (!staging && info.widen && is_extension_of_16(value, is_signed)) {
|
||||
|
||||
bi_index lut = va_lut_index_16(value & 0xFFFF);
|
||||
if (!bi_is_null(lut)) return lut;
|
||||
if (!bi_is_null(lut))
|
||||
return lut;
|
||||
}
|
||||
|
||||
/* Try demoting the constant to FP16 */
|
||||
if (!staging && info.swizzle && info.size == VA_SIZE_32) {
|
||||
bi_index lut = va_demote_constant_fp16(value);
|
||||
if (!bi_is_null(lut)) return lut;
|
||||
if (!bi_is_null(lut))
|
||||
return lut;
|
||||
|
||||
if (info.absneg) {
|
||||
bi_index lut = bi_neg(va_demote_constant_fp16(fui(-uif(value))));
|
||||
if (!bi_is_null(lut)) return lut;
|
||||
if (!bi_is_null(lut))
|
||||
return lut;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -218,7 +228,8 @@ va_lower_constants(bi_context *ctx, bi_instr *I)
|
|||
value = bi_apply_swizzle(value, swz);
|
||||
}
|
||||
|
||||
bi_index cons = va_resolve_constant(&b, value, info, is_signed, staging);
|
||||
bi_index cons =
|
||||
va_resolve_constant(&b, value, info, is_signed, staging);
|
||||
cons.neg ^= I->src[s].neg;
|
||||
I->src[s] = cons;
|
||||
|
||||
|
|
|
|||
|
|
@ -21,9 +21,9 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bi_builder.h"
|
||||
#include "va_compiler.h"
|
||||
#include "valhall.h"
|
||||
#include "bi_builder.h"
|
||||
|
||||
static bi_instr *
|
||||
lower(bi_builder *b, bi_instr *I)
|
||||
|
|
@ -38,45 +38,56 @@ lower(bi_builder *b, bi_instr *I)
|
|||
return bi_iadd_v4u8_to(b, I->dest[0], I->src[0], bi_zero(), false);
|
||||
|
||||
case BI_OPCODE_ICMP_I32:
|
||||
return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
|
||||
return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
|
||||
I->cmpf, I->result_type);
|
||||
|
||||
case BI_OPCODE_ICMP_V2I16:
|
||||
return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
|
||||
return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
|
||||
I->cmpf, I->result_type);
|
||||
|
||||
case BI_OPCODE_ICMP_V4I8:
|
||||
return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
|
||||
return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
|
||||
I->cmpf, I->result_type);
|
||||
|
||||
case BI_OPCODE_ICMP_U32:
|
||||
return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
|
||||
return bi_icmp_or_u32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
|
||||
I->cmpf, I->result_type);
|
||||
|
||||
case BI_OPCODE_ICMP_V2U16:
|
||||
return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
|
||||
return bi_icmp_or_v2u16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
|
||||
I->cmpf, I->result_type);
|
||||
|
||||
case BI_OPCODE_ICMP_V4U8:
|
||||
return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
|
||||
return bi_icmp_or_v4u8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
|
||||
I->cmpf, I->result_type);
|
||||
|
||||
case BI_OPCODE_ICMP_S32:
|
||||
return bi_icmp_or_s32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
|
||||
return bi_icmp_or_s32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
|
||||
I->cmpf, I->result_type);
|
||||
|
||||
case BI_OPCODE_ICMP_V2S16:
|
||||
return bi_icmp_or_v2s16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
|
||||
return bi_icmp_or_v2s16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
|
||||
I->cmpf, I->result_type);
|
||||
|
||||
case BI_OPCODE_ICMP_V4S8:
|
||||
return bi_icmp_or_v4s8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
|
||||
return bi_icmp_or_v4s8_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
|
||||
I->cmpf, I->result_type);
|
||||
|
||||
case BI_OPCODE_FCMP_F32:
|
||||
return bi_fcmp_or_f32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
|
||||
return bi_fcmp_or_f32_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
|
||||
I->cmpf, I->result_type);
|
||||
|
||||
case BI_OPCODE_FCMP_V2F16:
|
||||
return bi_fcmp_or_v2f16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(), I->cmpf, I->result_type);
|
||||
return bi_fcmp_or_v2f16_to(b, I->dest[0], I->src[0], I->src[1], bi_zero(),
|
||||
I->cmpf, I->result_type);
|
||||
|
||||
/* Integer CSEL must have a signedness */
|
||||
case BI_OPCODE_CSEL_I32:
|
||||
case BI_OPCODE_CSEL_V2I16:
|
||||
assert(I->cmpf == BI_CMPF_EQ || I->cmpf == BI_CMPF_NE);
|
||||
|
||||
I->op = (I->op == BI_OPCODE_CSEL_I32) ? BI_OPCODE_CSEL_U32 :
|
||||
BI_OPCODE_CSEL_V2U16;
|
||||
I->op = (I->op == BI_OPCODE_CSEL_I32) ? BI_OPCODE_CSEL_U32
|
||||
: BI_OPCODE_CSEL_V2U16;
|
||||
return NULL;
|
||||
|
||||
/* Jump -> conditional branch with condition tied to true. */
|
||||
|
|
@ -117,7 +128,7 @@ lower(bi_builder *b, bi_instr *I)
|
|||
|
||||
case BI_OPCODE_FADD_RSCALE_F32:
|
||||
return bi_fma_rscale_f32_to(b, I->dest[0], I->src[0], bi_imm_f32(1.0),
|
||||
I->src[1], I->src[2], I->special);
|
||||
I->src[1], I->src[2], I->special);
|
||||
|
||||
default:
|
||||
return NULL;
|
||||
|
|
|
|||
|
|
@ -21,8 +21,8 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "va_compiler.h"
|
||||
#include "bi_builder.h"
|
||||
#include "va_compiler.h"
|
||||
|
||||
/*
|
||||
* Bifrost uses split 64-bit addresses, specified as two consecutive sources.
|
||||
|
|
@ -38,8 +38,7 @@ lower_split_src(bi_context *ctx, bi_instr *I, unsigned s)
|
|||
bi_index offset_fau = I->src[s];
|
||||
offset_fau.offset++;
|
||||
|
||||
if (I->src[s].type == BI_INDEX_FAU &&
|
||||
I->src[s].offset == 0 &&
|
||||
if (I->src[s].type == BI_INDEX_FAU && I->src[s].offset == 0 &&
|
||||
bi_is_value_equiv(offset_fau, I->src[s + 1])) {
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ scoreboard_update(struct bi_scoreboard_state *st, const bi_instr *I)
|
|||
/* Unmark registers after they are waited on */
|
||||
for (unsigned i = 0; i < VA_NUM_GENERAL_SLOTS; ++i) {
|
||||
if (waits_on_slot(I->flow, i))
|
||||
st->read[i] = 0;
|
||||
st->read[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -111,8 +111,8 @@ va_analyze_scoreboard_reads(bi_context *ctx)
|
|||
bi_worklist_push_tail(&worklist, block);
|
||||
|
||||
/* Reset analysis from previous pass */
|
||||
block->scoreboard_in = (struct bi_scoreboard_state){ 0 };
|
||||
block->scoreboard_out = (struct bi_scoreboard_state){ 0 };
|
||||
block->scoreboard_in = (struct bi_scoreboard_state){0};
|
||||
block->scoreboard_out = (struct bi_scoreboard_state){0};
|
||||
}
|
||||
|
||||
/* Perform forward data flow analysis to calculate dependencies */
|
||||
|
|
|
|||
|
|
@ -21,9 +21,9 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bi_builder.h"
|
||||
#include "va_compiler.h"
|
||||
#include "valhall_enums.h"
|
||||
#include "bi_builder.h"
|
||||
|
||||
/*
|
||||
* Merge NOPs with flow control with nearby instructions to eliminate the NOPs,
|
||||
|
|
@ -80,8 +80,10 @@ merge_end_reconverge(bi_block *block)
|
|||
bi_instr *last = list_last_entry(&block->instructions, bi_instr, link);
|
||||
bi_instr *penult = bi_prev_op(last);
|
||||
|
||||
if (last->op != BI_OPCODE_NOP) return;
|
||||
if (last->flow != VA_FLOW_RECONVERGE && last->flow != VA_FLOW_END) return;
|
||||
if (last->op != BI_OPCODE_NOP)
|
||||
return;
|
||||
if (last->flow != VA_FLOW_RECONVERGE && last->flow != VA_FLOW_END)
|
||||
return;
|
||||
|
||||
/* End implies all other flow control except for waiting on barriers (slot
|
||||
* #7, with VA_FLOW_WAIT), so remove blocking flow control.
|
||||
|
|
@ -99,7 +101,8 @@ merge_end_reconverge(bi_block *block)
|
|||
}
|
||||
|
||||
/* If there is blocking flow control, we can't merge */
|
||||
if (penult->flow != VA_FLOW_NONE) return;
|
||||
if (penult->flow != VA_FLOW_NONE)
|
||||
return;
|
||||
|
||||
/* Else, merge */
|
||||
penult->flow = last->flow;
|
||||
|
|
@ -133,8 +136,8 @@ merge_waits(bi_block *block)
|
|||
bi_instr *last_free = NULL;
|
||||
|
||||
bi_foreach_instr_in_block_safe(block, I) {
|
||||
if (last_free != NULL &&
|
||||
I->op == BI_OPCODE_NOP && va_flow_is_wait_or_none(I->flow)) {
|
||||
if (last_free != NULL && I->op == BI_OPCODE_NOP &&
|
||||
va_flow_is_wait_or_none(I->flow)) {
|
||||
|
||||
/* Merge waits with compatible instructions */
|
||||
last_free->flow = union_waits(last_free->flow, I->flow);
|
||||
|
|
@ -212,8 +215,10 @@ va_merge_flow(bi_context *ctx)
|
|||
{
|
||||
bi_foreach_block(ctx, block) {
|
||||
/* If there are less than 2 instructions, there's nothing to merge */
|
||||
if (list_is_empty(&block->instructions)) continue;
|
||||
if (list_is_singular(&block->instructions)) continue;
|
||||
if (list_is_empty(&block->instructions))
|
||||
continue;
|
||||
if (list_is_singular(&block->instructions))
|
||||
continue;
|
||||
|
||||
merge_end_reconverge(block);
|
||||
merge_waits(block);
|
||||
|
|
|
|||
|
|
@ -29,15 +29,21 @@ static enum bi_opcode
|
|||
va_op_add_imm(enum bi_opcode op)
|
||||
{
|
||||
switch (op) {
|
||||
case BI_OPCODE_FADD_F32: return BI_OPCODE_FADD_IMM_F32;
|
||||
case BI_OPCODE_FADD_V2F16: return BI_OPCODE_FADD_IMM_V2F16;
|
||||
case BI_OPCODE_FADD_F32:
|
||||
return BI_OPCODE_FADD_IMM_F32;
|
||||
case BI_OPCODE_FADD_V2F16:
|
||||
return BI_OPCODE_FADD_IMM_V2F16;
|
||||
case BI_OPCODE_IADD_S32:
|
||||
case BI_OPCODE_IADD_U32: return BI_OPCODE_IADD_IMM_I32;
|
||||
case BI_OPCODE_IADD_U32:
|
||||
return BI_OPCODE_IADD_IMM_I32;
|
||||
case BI_OPCODE_IADD_V2S16:
|
||||
case BI_OPCODE_IADD_V2U16: return BI_OPCODE_IADD_IMM_V2I16;
|
||||
case BI_OPCODE_IADD_V2U16:
|
||||
return BI_OPCODE_IADD_IMM_V2I16;
|
||||
case BI_OPCODE_IADD_V4S8:
|
||||
case BI_OPCODE_IADD_V4U8: return BI_OPCODE_IADD_IMM_V4I8;
|
||||
default: return 0;
|
||||
case BI_OPCODE_IADD_V4U8:
|
||||
return BI_OPCODE_IADD_IMM_V4I8;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -46,8 +52,8 @@ va_is_add_imm(bi_instr *I, unsigned s)
|
|||
{
|
||||
assert(s < I->nr_srcs);
|
||||
|
||||
return I->src[s].swizzle == BI_SWIZZLE_H01 &&
|
||||
!I->src[s].abs && !I->src[s].neg && !I->clamp && !I->round;
|
||||
return I->src[s].swizzle == BI_SWIZZLE_H01 && !I->src[s].abs &&
|
||||
!I->src[s].neg && !I->clamp && !I->round;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
|
|
@ -83,11 +89,14 @@ va_fuse_add_imm(bi_instr *I)
|
|||
}
|
||||
|
||||
enum bi_opcode op = va_op_add_imm(I->op);
|
||||
if (!op) return;
|
||||
if (!op)
|
||||
return;
|
||||
|
||||
unsigned s = va_choose_imm(I);
|
||||
if (s > 1) return;
|
||||
if (!va_is_add_imm(I, 1 - s)) return;
|
||||
if (s > 1)
|
||||
return;
|
||||
if (!va_is_add_imm(I, 1 - s))
|
||||
return;
|
||||
|
||||
I->op = op;
|
||||
I->index = bi_apply_swizzle(I->src[s].value, I->src[s].swizzle);
|
||||
|
|
|
|||
|
|
@ -21,10 +21,10 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bi_builder.h"
|
||||
#include "va_compiler.h"
|
||||
#include "valhall.h"
|
||||
#include "valhall_enums.h"
|
||||
#include "bi_builder.h"
|
||||
|
||||
/* This file contains the final passes of the compiler. Running after
|
||||
* scheduling and RA, the IR is now finalized, so we need to emit it to actual
|
||||
|
|
@ -36,7 +36,7 @@
|
|||
* Prints the (first) failing instruction to aid debugging.
|
||||
*/
|
||||
NORETURN static void PRINTFLIKE(2, 3)
|
||||
invalid_instruction(const bi_instr *I, const char *cause, ...)
|
||||
invalid_instruction(const bi_instr *I, const char *cause, ...)
|
||||
{
|
||||
fputs("\nInvalid ", stderr);
|
||||
|
||||
|
|
@ -56,8 +56,9 @@ invalid_instruction(const bi_instr *I, const char *cause, ...)
|
|||
* Like assert, but prints the instruction if the assertion fails to aid
|
||||
* debugging invalid inputs to the packing module.
|
||||
*/
|
||||
#define pack_assert(I, cond) \
|
||||
if (!(cond)) invalid_instruction(I, "invariant " #cond);
|
||||
#define pack_assert(I, cond) \
|
||||
if (!(cond)) \
|
||||
invalid_instruction(I, "invariant " #cond);
|
||||
|
||||
/*
|
||||
* Validate that two adjacent 32-bit sources form an aligned 64-bit register
|
||||
|
|
@ -95,14 +96,20 @@ static unsigned
|
|||
va_pack_fau_special(const bi_instr *I, enum bir_fau fau)
|
||||
{
|
||||
switch (fau) {
|
||||
case BIR_FAU_ATEST_PARAM: return VA_FAU_SPECIAL_PAGE_0_ATEST_DATUM;
|
||||
case BIR_FAU_TLS_PTR: return VA_FAU_SPECIAL_PAGE_1_THREAD_LOCAL_POINTER;
|
||||
case BIR_FAU_WLS_PTR: return VA_FAU_SPECIAL_PAGE_1_WORKGROUP_LOCAL_POINTER;
|
||||
case BIR_FAU_LANE_ID: return VA_FAU_SPECIAL_PAGE_3_LANE_ID;
|
||||
case BIR_FAU_PROGRAM_COUNTER: return VA_FAU_SPECIAL_PAGE_3_PROGRAM_COUNTER;
|
||||
case BIR_FAU_SAMPLE_POS_ARRAY:return VA_FAU_SPECIAL_PAGE_0_SAMPLE;
|
||||
case BIR_FAU_ATEST_PARAM:
|
||||
return VA_FAU_SPECIAL_PAGE_0_ATEST_DATUM;
|
||||
case BIR_FAU_TLS_PTR:
|
||||
return VA_FAU_SPECIAL_PAGE_1_THREAD_LOCAL_POINTER;
|
||||
case BIR_FAU_WLS_PTR:
|
||||
return VA_FAU_SPECIAL_PAGE_1_WORKGROUP_LOCAL_POINTER;
|
||||
case BIR_FAU_LANE_ID:
|
||||
return VA_FAU_SPECIAL_PAGE_3_LANE_ID;
|
||||
case BIR_FAU_PROGRAM_COUNTER:
|
||||
return VA_FAU_SPECIAL_PAGE_3_PROGRAM_COUNTER;
|
||||
case BIR_FAU_SAMPLE_POS_ARRAY:
|
||||
return VA_FAU_SPECIAL_PAGE_0_SAMPLE;
|
||||
|
||||
case BIR_FAU_BLEND_0...(BIR_FAU_BLEND_0 + 7):
|
||||
case BIR_FAU_BLEND_0 ...(BIR_FAU_BLEND_0 + 7):
|
||||
return VA_FAU_SPECIAL_PAGE_0_BLEND_DESCRIPTOR_0 + (fau - BIR_FAU_BLEND_0);
|
||||
|
||||
default:
|
||||
|
|
@ -136,7 +143,8 @@ va_pack_src(const bi_instr *I, unsigned s)
|
|||
|
||||
if (idx.type == BI_INDEX_REGISTER) {
|
||||
unsigned value = va_pack_reg(I, idx);
|
||||
if (idx.discard) value |= (1 << 6);
|
||||
if (idx.discard)
|
||||
value |= (1 << 6);
|
||||
return value;
|
||||
} else if (idx.type == BI_INDEX_FAU) {
|
||||
pack_assert(I, idx.offset <= 1);
|
||||
|
|
@ -150,10 +158,14 @@ static unsigned
|
|||
va_pack_wrmask(const bi_instr *I)
|
||||
{
|
||||
switch (I->dest[0].swizzle) {
|
||||
case BI_SWIZZLE_H00: return 0x1;
|
||||
case BI_SWIZZLE_H11: return 0x2;
|
||||
case BI_SWIZZLE_H01: return 0x3;
|
||||
default: invalid_instruction(I, "write mask");
|
||||
case BI_SWIZZLE_H00:
|
||||
return 0x1;
|
||||
case BI_SWIZZLE_H11:
|
||||
return 0x2;
|
||||
case BI_SWIZZLE_H01:
|
||||
return 0x3;
|
||||
default:
|
||||
invalid_instruction(I, "write mask");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -161,17 +173,27 @@ static enum va_atomic_operation
|
|||
va_pack_atom_opc(const bi_instr *I)
|
||||
{
|
||||
switch (I->atom_opc) {
|
||||
case BI_ATOM_OPC_AADD: return VA_ATOMIC_OPERATION_AADD;
|
||||
case BI_ATOM_OPC_ASMIN: return VA_ATOMIC_OPERATION_ASMIN;
|
||||
case BI_ATOM_OPC_ASMAX: return VA_ATOMIC_OPERATION_ASMAX;
|
||||
case BI_ATOM_OPC_AUMIN: return VA_ATOMIC_OPERATION_AUMIN;
|
||||
case BI_ATOM_OPC_AUMAX: return VA_ATOMIC_OPERATION_AUMAX;
|
||||
case BI_ATOM_OPC_AAND: return VA_ATOMIC_OPERATION_AAND;
|
||||
case BI_ATOM_OPC_AOR: return VA_ATOMIC_OPERATION_AOR;
|
||||
case BI_ATOM_OPC_AXOR: return VA_ATOMIC_OPERATION_AXOR;
|
||||
case BI_ATOM_OPC_AADD:
|
||||
return VA_ATOMIC_OPERATION_AADD;
|
||||
case BI_ATOM_OPC_ASMIN:
|
||||
return VA_ATOMIC_OPERATION_ASMIN;
|
||||
case BI_ATOM_OPC_ASMAX:
|
||||
return VA_ATOMIC_OPERATION_ASMAX;
|
||||
case BI_ATOM_OPC_AUMIN:
|
||||
return VA_ATOMIC_OPERATION_AUMIN;
|
||||
case BI_ATOM_OPC_AUMAX:
|
||||
return VA_ATOMIC_OPERATION_AUMAX;
|
||||
case BI_ATOM_OPC_AAND:
|
||||
return VA_ATOMIC_OPERATION_AAND;
|
||||
case BI_ATOM_OPC_AOR:
|
||||
return VA_ATOMIC_OPERATION_AOR;
|
||||
case BI_ATOM_OPC_AXOR:
|
||||
return VA_ATOMIC_OPERATION_AXOR;
|
||||
case BI_ATOM_OPC_ACMPXCHG:
|
||||
case BI_ATOM_OPC_AXCHG: return VA_ATOMIC_OPERATION_AXCHG;
|
||||
default: invalid_instruction(I, "atomic opcode");
|
||||
case BI_ATOM_OPC_AXCHG:
|
||||
return VA_ATOMIC_OPERATION_AXCHG;
|
||||
default:
|
||||
invalid_instruction(I, "atomic opcode");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -179,12 +201,18 @@ static enum va_atomic_operation_with_1
|
|||
va_pack_atom_opc_1(const bi_instr *I)
|
||||
{
|
||||
switch (I->atom_opc) {
|
||||
case BI_ATOM_OPC_AINC: return VA_ATOMIC_OPERATION_WITH_1_AINC;
|
||||
case BI_ATOM_OPC_ADEC: return VA_ATOMIC_OPERATION_WITH_1_ADEC;
|
||||
case BI_ATOM_OPC_AUMAX1: return VA_ATOMIC_OPERATION_WITH_1_AUMAX1;
|
||||
case BI_ATOM_OPC_ASMAX1: return VA_ATOMIC_OPERATION_WITH_1_ASMAX1;
|
||||
case BI_ATOM_OPC_AOR1: return VA_ATOMIC_OPERATION_WITH_1_AOR1;
|
||||
default: invalid_instruction(I, "atomic opcode with implicit 1");
|
||||
case BI_ATOM_OPC_AINC:
|
||||
return VA_ATOMIC_OPERATION_WITH_1_AINC;
|
||||
case BI_ATOM_OPC_ADEC:
|
||||
return VA_ATOMIC_OPERATION_WITH_1_ADEC;
|
||||
case BI_ATOM_OPC_AUMAX1:
|
||||
return VA_ATOMIC_OPERATION_WITH_1_AUMAX1;
|
||||
case BI_ATOM_OPC_ASMAX1:
|
||||
return VA_ATOMIC_OPERATION_WITH_1_ASMAX1;
|
||||
case BI_ATOM_OPC_AOR1:
|
||||
return VA_ATOMIC_OPERATION_WITH_1_AOR1;
|
||||
default:
|
||||
invalid_instruction(I, "atomic opcode with implicit 1");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -199,10 +227,14 @@ static enum va_widen
|
|||
va_pack_widen_f32(const bi_instr *I, enum bi_swizzle swz)
|
||||
{
|
||||
switch (swz) {
|
||||
case BI_SWIZZLE_H01: return VA_WIDEN_NONE;
|
||||
case BI_SWIZZLE_H00: return VA_WIDEN_H0;
|
||||
case BI_SWIZZLE_H11: return VA_WIDEN_H1;
|
||||
default: invalid_instruction(I, "widen");
|
||||
case BI_SWIZZLE_H01:
|
||||
return VA_WIDEN_NONE;
|
||||
case BI_SWIZZLE_H00:
|
||||
return VA_WIDEN_H0;
|
||||
case BI_SWIZZLE_H11:
|
||||
return VA_WIDEN_H1;
|
||||
default:
|
||||
invalid_instruction(I, "widen");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -210,11 +242,16 @@ static enum va_swizzles_16_bit
|
|||
va_pack_swizzle_f16(const bi_instr *I, enum bi_swizzle swz)
|
||||
{
|
||||
switch (swz) {
|
||||
case BI_SWIZZLE_H00: return VA_SWIZZLES_16_BIT_H00;
|
||||
case BI_SWIZZLE_H10: return VA_SWIZZLES_16_BIT_H10;
|
||||
case BI_SWIZZLE_H01: return VA_SWIZZLES_16_BIT_H01;
|
||||
case BI_SWIZZLE_H11: return VA_SWIZZLES_16_BIT_H11;
|
||||
default: invalid_instruction(I, "16-bit swizzle");
|
||||
case BI_SWIZZLE_H00:
|
||||
return VA_SWIZZLES_16_BIT_H00;
|
||||
case BI_SWIZZLE_H10:
|
||||
return VA_SWIZZLES_16_BIT_H10;
|
||||
case BI_SWIZZLE_H01:
|
||||
return VA_SWIZZLES_16_BIT_H01;
|
||||
case BI_SWIZZLE_H11:
|
||||
return VA_SWIZZLES_16_BIT_H11;
|
||||
default:
|
||||
invalid_instruction(I, "16-bit swizzle");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -223,37 +260,62 @@ va_pack_widen(const bi_instr *I, enum bi_swizzle swz, enum va_size size)
|
|||
{
|
||||
if (size == VA_SIZE_8) {
|
||||
switch (swz) {
|
||||
case BI_SWIZZLE_H01: return VA_SWIZZLES_8_BIT_B0123;
|
||||
case BI_SWIZZLE_H00: return VA_SWIZZLES_8_BIT_B0101;
|
||||
case BI_SWIZZLE_H11: return VA_SWIZZLES_8_BIT_B2323;
|
||||
case BI_SWIZZLE_B0000: return VA_SWIZZLES_8_BIT_B0000;
|
||||
case BI_SWIZZLE_B1111: return VA_SWIZZLES_8_BIT_B1111;
|
||||
case BI_SWIZZLE_B2222: return VA_SWIZZLES_8_BIT_B2222;
|
||||
case BI_SWIZZLE_B3333: return VA_SWIZZLES_8_BIT_B3333;
|
||||
default: invalid_instruction(I, "8-bit widen");
|
||||
case BI_SWIZZLE_H01:
|
||||
return VA_SWIZZLES_8_BIT_B0123;
|
||||
case BI_SWIZZLE_H00:
|
||||
return VA_SWIZZLES_8_BIT_B0101;
|
||||
case BI_SWIZZLE_H11:
|
||||
return VA_SWIZZLES_8_BIT_B2323;
|
||||
case BI_SWIZZLE_B0000:
|
||||
return VA_SWIZZLES_8_BIT_B0000;
|
||||
case BI_SWIZZLE_B1111:
|
||||
return VA_SWIZZLES_8_BIT_B1111;
|
||||
case BI_SWIZZLE_B2222:
|
||||
return VA_SWIZZLES_8_BIT_B2222;
|
||||
case BI_SWIZZLE_B3333:
|
||||
return VA_SWIZZLES_8_BIT_B3333;
|
||||
default:
|
||||
invalid_instruction(I, "8-bit widen");
|
||||
}
|
||||
} else if (size == VA_SIZE_16) {
|
||||
switch (swz) {
|
||||
case BI_SWIZZLE_H00: return VA_SWIZZLES_16_BIT_H00;
|
||||
case BI_SWIZZLE_H10: return VA_SWIZZLES_16_BIT_H10;
|
||||
case BI_SWIZZLE_H01: return VA_SWIZZLES_16_BIT_H01;
|
||||
case BI_SWIZZLE_H11: return VA_SWIZZLES_16_BIT_H11;
|
||||
case BI_SWIZZLE_B0000: return VA_SWIZZLES_16_BIT_B00;
|
||||
case BI_SWIZZLE_B1111: return VA_SWIZZLES_16_BIT_B11;
|
||||
case BI_SWIZZLE_B2222: return VA_SWIZZLES_16_BIT_B22;
|
||||
case BI_SWIZZLE_B3333: return VA_SWIZZLES_16_BIT_B33;
|
||||
default: invalid_instruction(I, "16-bit widen");
|
||||
case BI_SWIZZLE_H00:
|
||||
return VA_SWIZZLES_16_BIT_H00;
|
||||
case BI_SWIZZLE_H10:
|
||||
return VA_SWIZZLES_16_BIT_H10;
|
||||
case BI_SWIZZLE_H01:
|
||||
return VA_SWIZZLES_16_BIT_H01;
|
||||
case BI_SWIZZLE_H11:
|
||||
return VA_SWIZZLES_16_BIT_H11;
|
||||
case BI_SWIZZLE_B0000:
|
||||
return VA_SWIZZLES_16_BIT_B00;
|
||||
case BI_SWIZZLE_B1111:
|
||||
return VA_SWIZZLES_16_BIT_B11;
|
||||
case BI_SWIZZLE_B2222:
|
||||
return VA_SWIZZLES_16_BIT_B22;
|
||||
case BI_SWIZZLE_B3333:
|
||||
return VA_SWIZZLES_16_BIT_B33;
|
||||
default:
|
||||
invalid_instruction(I, "16-bit widen");
|
||||
}
|
||||
} else if (size == VA_SIZE_32) {
|
||||
switch (swz) {
|
||||
case BI_SWIZZLE_H01: return VA_SWIZZLES_32_BIT_NONE;
|
||||
case BI_SWIZZLE_H00: return VA_SWIZZLES_32_BIT_H0;
|
||||
case BI_SWIZZLE_H11: return VA_SWIZZLES_32_BIT_H1;
|
||||
case BI_SWIZZLE_B0000: return VA_SWIZZLES_32_BIT_B0;
|
||||
case BI_SWIZZLE_B1111: return VA_SWIZZLES_32_BIT_B1;
|
||||
case BI_SWIZZLE_B2222: return VA_SWIZZLES_32_BIT_B2;
|
||||
case BI_SWIZZLE_B3333: return VA_SWIZZLES_32_BIT_B3;
|
||||
default: invalid_instruction(I, "32-bit widen");
|
||||
case BI_SWIZZLE_H01:
|
||||
return VA_SWIZZLES_32_BIT_NONE;
|
||||
case BI_SWIZZLE_H00:
|
||||
return VA_SWIZZLES_32_BIT_H0;
|
||||
case BI_SWIZZLE_H11:
|
||||
return VA_SWIZZLES_32_BIT_H1;
|
||||
case BI_SWIZZLE_B0000:
|
||||
return VA_SWIZZLES_32_BIT_B0;
|
||||
case BI_SWIZZLE_B1111:
|
||||
return VA_SWIZZLES_32_BIT_B1;
|
||||
case BI_SWIZZLE_B2222:
|
||||
return VA_SWIZZLES_32_BIT_B2;
|
||||
case BI_SWIZZLE_B3333:
|
||||
return VA_SWIZZLES_32_BIT_B3;
|
||||
default:
|
||||
invalid_instruction(I, "32-bit widen");
|
||||
}
|
||||
} else {
|
||||
invalid_instruction(I, "type size for widen");
|
||||
|
|
@ -264,14 +326,22 @@ static enum va_half_swizzles_8_bit
|
|||
va_pack_halfswizzle(const bi_instr *I, enum bi_swizzle swz)
|
||||
{
|
||||
switch (swz) {
|
||||
case BI_SWIZZLE_B0000: return VA_HALF_SWIZZLES_8_BIT_B00;
|
||||
case BI_SWIZZLE_B1111: return VA_HALF_SWIZZLES_8_BIT_B11;
|
||||
case BI_SWIZZLE_B2222: return VA_HALF_SWIZZLES_8_BIT_B22;
|
||||
case BI_SWIZZLE_B3333: return VA_HALF_SWIZZLES_8_BIT_B33;
|
||||
case BI_SWIZZLE_B0011: return VA_HALF_SWIZZLES_8_BIT_B01;
|
||||
case BI_SWIZZLE_B2233: return VA_HALF_SWIZZLES_8_BIT_B23;
|
||||
case BI_SWIZZLE_B0022: return VA_HALF_SWIZZLES_8_BIT_B02;
|
||||
default: invalid_instruction(I, "v2u8 swizzle");
|
||||
case BI_SWIZZLE_B0000:
|
||||
return VA_HALF_SWIZZLES_8_BIT_B00;
|
||||
case BI_SWIZZLE_B1111:
|
||||
return VA_HALF_SWIZZLES_8_BIT_B11;
|
||||
case BI_SWIZZLE_B2222:
|
||||
return VA_HALF_SWIZZLES_8_BIT_B22;
|
||||
case BI_SWIZZLE_B3333:
|
||||
return VA_HALF_SWIZZLES_8_BIT_B33;
|
||||
case BI_SWIZZLE_B0011:
|
||||
return VA_HALF_SWIZZLES_8_BIT_B01;
|
||||
case BI_SWIZZLE_B2233:
|
||||
return VA_HALF_SWIZZLES_8_BIT_B23;
|
||||
case BI_SWIZZLE_B0022:
|
||||
return VA_HALF_SWIZZLES_8_BIT_B02;
|
||||
default:
|
||||
invalid_instruction(I, "v2u8 swizzle");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -279,12 +349,18 @@ static enum va_lanes_8_bit
|
|||
va_pack_shift_lanes(const bi_instr *I, enum bi_swizzle swz)
|
||||
{
|
||||
switch (swz) {
|
||||
case BI_SWIZZLE_H01: return VA_LANES_8_BIT_B02;
|
||||
case BI_SWIZZLE_B0000: return VA_LANES_8_BIT_B00;
|
||||
case BI_SWIZZLE_B1111: return VA_LANES_8_BIT_B11;
|
||||
case BI_SWIZZLE_B2222: return VA_LANES_8_BIT_B22;
|
||||
case BI_SWIZZLE_B3333: return VA_LANES_8_BIT_B33;
|
||||
default: invalid_instruction(I, "lane shift");
|
||||
case BI_SWIZZLE_H01:
|
||||
return VA_LANES_8_BIT_B02;
|
||||
case BI_SWIZZLE_B0000:
|
||||
return VA_LANES_8_BIT_B00;
|
||||
case BI_SWIZZLE_B1111:
|
||||
return VA_LANES_8_BIT_B11;
|
||||
case BI_SWIZZLE_B2222:
|
||||
return VA_LANES_8_BIT_B22;
|
||||
case BI_SWIZZLE_B3333:
|
||||
return VA_LANES_8_BIT_B33;
|
||||
default:
|
||||
invalid_instruction(I, "lane shift");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -292,10 +368,14 @@ static enum va_combine
|
|||
va_pack_combine(const bi_instr *I, enum bi_swizzle swz)
|
||||
{
|
||||
switch (swz) {
|
||||
case BI_SWIZZLE_H01: return VA_COMBINE_NONE;
|
||||
case BI_SWIZZLE_H00: return VA_COMBINE_H0;
|
||||
case BI_SWIZZLE_H11: return VA_COMBINE_H1;
|
||||
default: invalid_instruction(I, "branch lane");
|
||||
case BI_SWIZZLE_H01:
|
||||
return VA_COMBINE_NONE;
|
||||
case BI_SWIZZLE_H00:
|
||||
return VA_COMBINE_H0;
|
||||
case BI_SWIZZLE_H11:
|
||||
return VA_COMBINE_H1;
|
||||
default:
|
||||
invalid_instruction(I, "branch lane");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -303,10 +383,14 @@ static enum va_source_format
|
|||
va_pack_source_format(const bi_instr *I)
|
||||
{
|
||||
switch (I->source_format) {
|
||||
case BI_SOURCE_FORMAT_FLAT32: return VA_SOURCE_FORMAT_SRC_FLAT32;
|
||||
case BI_SOURCE_FORMAT_FLAT16: return VA_SOURCE_FORMAT_SRC_FLAT16;
|
||||
case BI_SOURCE_FORMAT_F32: return VA_SOURCE_FORMAT_SRC_F32;
|
||||
case BI_SOURCE_FORMAT_F16: return VA_SOURCE_FORMAT_SRC_F16;
|
||||
case BI_SOURCE_FORMAT_FLAT32:
|
||||
return VA_SOURCE_FORMAT_SRC_FLAT32;
|
||||
case BI_SOURCE_FORMAT_FLAT16:
|
||||
return VA_SOURCE_FORMAT_SRC_FLAT16;
|
||||
case BI_SOURCE_FORMAT_F32:
|
||||
return VA_SOURCE_FORMAT_SRC_F32;
|
||||
case BI_SOURCE_FORMAT_F16:
|
||||
return VA_SOURCE_FORMAT_SRC_F16;
|
||||
}
|
||||
|
||||
invalid_instruction(I, "source format");
|
||||
|
|
@ -316,9 +400,12 @@ static uint64_t
|
|||
va_pack_rhadd(const bi_instr *I)
|
||||
{
|
||||
switch (I->round) {
|
||||
case BI_ROUND_RTN: return 0; /* hadd */
|
||||
case BI_ROUND_RTP: return BITFIELD_BIT(30); /* rhadd */
|
||||
default: unreachable("Invalid round for HADD");
|
||||
case BI_ROUND_RTN:
|
||||
return 0; /* hadd */
|
||||
case BI_ROUND_RTP:
|
||||
return BITFIELD_BIT(30); /* rhadd */
|
||||
default:
|
||||
unreachable("Invalid round for HADD");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -334,15 +421,17 @@ va_pack_alu(const bi_instr *I)
|
|||
case BI_OPCODE_FREXPE_V2F16:
|
||||
case BI_OPCODE_FREXPM_F32:
|
||||
case BI_OPCODE_FREXPM_V2F16:
|
||||
if (I->sqrt) hex |= 1ull << 24;
|
||||
if (I->log) hex |= 1ull << 25;
|
||||
if (I->sqrt)
|
||||
hex |= 1ull << 24;
|
||||
if (I->log)
|
||||
hex |= 1ull << 25;
|
||||
break;
|
||||
|
||||
/* Add mux type */
|
||||
case BI_OPCODE_MUX_I32:
|
||||
case BI_OPCODE_MUX_V2I16:
|
||||
case BI_OPCODE_MUX_V4I8:
|
||||
hex |= (uint64_t) I->mux << 32;
|
||||
hex |= (uint64_t)I->mux << 32;
|
||||
break;
|
||||
|
||||
/* Add .eq flag */
|
||||
|
|
@ -350,12 +439,13 @@ va_pack_alu(const bi_instr *I)
|
|||
case BI_OPCODE_BRANCHZI:
|
||||
pack_assert(I, I->cmpf == BI_CMPF_EQ || I->cmpf == BI_CMPF_NE);
|
||||
|
||||
if (I->cmpf == BI_CMPF_EQ) hex |= (1ull << 36);
|
||||
if (I->cmpf == BI_CMPF_EQ)
|
||||
hex |= (1ull << 36);
|
||||
|
||||
if (I->op == BI_OPCODE_BRANCHZI)
|
||||
hex |= (0x1ull << 40); /* Absolute */
|
||||
else
|
||||
hex |= ((uint64_t) I->branch_offset & BITFIELD_MASK(27)) << 8;
|
||||
hex |= ((uint64_t)I->branch_offset & BITFIELD_MASK(27)) << 8;
|
||||
|
||||
break;
|
||||
|
||||
|
|
@ -369,7 +459,7 @@ va_pack_alu(const bi_instr *I)
|
|||
case BI_OPCODE_RSHIFT_XOR_I32:
|
||||
case BI_OPCODE_RSHIFT_XOR_V2I16:
|
||||
case BI_OPCODE_RSHIFT_XOR_V4I8:
|
||||
hex |= (uint64_t) I->arithmetic << 34;
|
||||
hex |= (uint64_t)I->arithmetic << 34;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_LEA_BUF_IMM:
|
||||
|
|
@ -378,8 +468,8 @@ va_pack_alu(const bi_instr *I)
|
|||
break;
|
||||
|
||||
case BI_OPCODE_LEA_ATTR_IMM:
|
||||
hex |= ((uint64_t) I->table) << 16;
|
||||
hex |= ((uint64_t) I->attribute_index) << 20;
|
||||
hex |= ((uint64_t)I->table) << 16;
|
||||
hex |= ((uint64_t)I->attribute_index) << 20;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_IADD_IMM_I32:
|
||||
|
|
@ -387,13 +477,13 @@ va_pack_alu(const bi_instr *I)
|
|||
case BI_OPCODE_IADD_IMM_V4I8:
|
||||
case BI_OPCODE_FADD_IMM_F32:
|
||||
case BI_OPCODE_FADD_IMM_V2F16:
|
||||
hex |= ((uint64_t) I->index) << 8;
|
||||
hex |= ((uint64_t)I->index) << 8;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_CLPER_I32:
|
||||
hex |= ((uint64_t) I->inactive_result) << 22;
|
||||
hex |= ((uint64_t) I->lane_op) << 32;
|
||||
hex |= ((uint64_t) I->subgroup) << 36;
|
||||
hex |= ((uint64_t)I->inactive_result) << 22;
|
||||
hex |= ((uint64_t)I->lane_op) << 32;
|
||||
hex |= ((uint64_t)I->subgroup) << 36;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_LD_VAR:
|
||||
|
|
@ -406,35 +496,37 @@ va_pack_alu(const bi_instr *I)
|
|||
case BI_OPCODE_LD_VAR_BUF_IMM_F32:
|
||||
case BI_OPCODE_LD_VAR_SPECIAL:
|
||||
if (I->op == BI_OPCODE_LD_VAR_SPECIAL)
|
||||
hex |= ((uint64_t) I->varying_name) << 12; /* instead of index */
|
||||
hex |= ((uint64_t)I->varying_name) << 12; /* instead of index */
|
||||
else if (I->op == BI_OPCODE_LD_VAR_BUF_IMM_F16 ||
|
||||
I->op == BI_OPCODE_LD_VAR_BUF_IMM_F32) {
|
||||
hex |= ((uint64_t) I->index) << 16;
|
||||
hex |= ((uint64_t)I->index) << 16;
|
||||
} else if (I->op == BI_OPCODE_LD_VAR_IMM ||
|
||||
I->op == BI_OPCODE_LD_VAR_FLAT_IMM) {
|
||||
hex |= ((uint64_t) I->table) << 8;
|
||||
hex |= ((uint64_t) I->index) << 12;
|
||||
hex |= ((uint64_t)I->table) << 8;
|
||||
hex |= ((uint64_t)I->index) << 12;
|
||||
}
|
||||
|
||||
hex |= ((uint64_t) va_pack_source_format(I)) << 24;
|
||||
hex |= ((uint64_t) I->update) << 36;
|
||||
hex |= ((uint64_t) I->sample) << 38;
|
||||
hex |= ((uint64_t)va_pack_source_format(I)) << 24;
|
||||
hex |= ((uint64_t)I->update) << 36;
|
||||
hex |= ((uint64_t)I->sample) << 38;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_LD_ATTR_IMM:
|
||||
hex |= ((uint64_t) I->table) << 16;
|
||||
hex |= ((uint64_t) I->attribute_index) << 20;
|
||||
hex |= ((uint64_t)I->table) << 16;
|
||||
hex |= ((uint64_t)I->attribute_index) << 20;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_LD_TEX_IMM:
|
||||
case BI_OPCODE_LEA_TEX_IMM:
|
||||
hex |= ((uint64_t) I->table) << 16;
|
||||
hex |= ((uint64_t) I->texture_index) << 20;
|
||||
hex |= ((uint64_t)I->table) << 16;
|
||||
hex |= ((uint64_t)I->texture_index) << 20;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_ZS_EMIT:
|
||||
if (I->stencil) hex |= (1 << 24);
|
||||
if (I->z) hex |= (1 << 25);
|
||||
if (I->stencil)
|
||||
hex |= (1 << 24);
|
||||
if (I->z)
|
||||
hex |= (1 << 25);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
@ -444,14 +536,14 @@ va_pack_alu(const bi_instr *I)
|
|||
/* FMA_RSCALE.f32 special modes treated as extra opcodes */
|
||||
if (I->op == BI_OPCODE_FMA_RSCALE_F32) {
|
||||
pack_assert(I, I->special < 4);
|
||||
hex |= ((uint64_t) I->special) << 48;
|
||||
hex |= ((uint64_t)I->special) << 48;
|
||||
}
|
||||
|
||||
/* Add the normal destination or a placeholder. Staging destinations are
|
||||
* added elsewhere, as they require special handling for control fields.
|
||||
*/
|
||||
if (info.has_dest && info.nr_staging_dests == 0) {
|
||||
hex |= (uint64_t) va_pack_dest(I) << 40;
|
||||
hex |= (uint64_t)va_pack_dest(I) << 40;
|
||||
} else if (info.nr_staging_dests == 0 && info.nr_staging_srcs == 0) {
|
||||
pack_assert(I, I->nr_dests == 0);
|
||||
hex |= 0xC0ull << 40; /* Placeholder */
|
||||
|
|
@ -469,19 +561,24 @@ va_pack_alu(const bi_instr *I)
|
|||
enum va_size size = src_info.size;
|
||||
|
||||
bi_index src = I->src[logical_i + src_offset];
|
||||
hex |= (uint64_t) va_pack_src(I, logical_i + src_offset) << (8 * i);
|
||||
hex |= (uint64_t)va_pack_src(I, logical_i + src_offset) << (8 * i);
|
||||
|
||||
if (src_info.notted) {
|
||||
if (src.neg) hex |= (1ull << 35);
|
||||
if (src.neg)
|
||||
hex |= (1ull << 35);
|
||||
} else if (src_info.absneg) {
|
||||
unsigned neg_offs = 32 + 2 + ((2 - i) * 2);
|
||||
unsigned abs_offs = 33 + 2 + ((2 - i) * 2);
|
||||
|
||||
if (src.neg) hex |= 1ull << neg_offs;
|
||||
if (src.abs) hex |= 1ull << abs_offs;
|
||||
if (src.neg)
|
||||
hex |= 1ull << neg_offs;
|
||||
if (src.abs)
|
||||
hex |= 1ull << abs_offs;
|
||||
} else {
|
||||
if (src.neg) invalid_instruction(I, "negate");
|
||||
if (src.abs) invalid_instruction(I, "absolute value");
|
||||
if (src.neg)
|
||||
invalid_instruction(I, "negate");
|
||||
if (src.abs)
|
||||
invalid_instruction(I, "absolute value");
|
||||
}
|
||||
|
||||
if (src_info.swizzle) {
|
||||
|
|
@ -489,50 +586,56 @@ va_pack_alu(const bi_instr *I)
|
|||
unsigned S = src.swizzle;
|
||||
pack_assert(I, size == VA_SIZE_16 || size == VA_SIZE_32);
|
||||
|
||||
uint64_t v = (size == VA_SIZE_32 ? va_pack_widen_f32(I, S) : va_pack_swizzle_f16(I, S));
|
||||
uint64_t v = (size == VA_SIZE_32 ? va_pack_widen_f32(I, S)
|
||||
: va_pack_swizzle_f16(I, S));
|
||||
hex |= v << offs;
|
||||
} else if (src_info.widen) {
|
||||
unsigned offs = (i == 1) ? 26 : 36;
|
||||
hex |= (uint64_t) va_pack_widen(I, src.swizzle, src_info.size) << offs;
|
||||
hex |= (uint64_t)va_pack_widen(I, src.swizzle, src_info.size) << offs;
|
||||
} else if (src_info.lane) {
|
||||
unsigned offs = (I->op == BI_OPCODE_MKVEC_V2I8) ?
|
||||
((i == 0) ? 38 : 36) :
|
||||
28;
|
||||
unsigned offs =
|
||||
(I->op == BI_OPCODE_MKVEC_V2I8) ? ((i == 0) ? 38 : 36) : 28;
|
||||
|
||||
if (src_info.size == VA_SIZE_16) {
|
||||
hex |= (src.swizzle == BI_SWIZZLE_H11 ? 1 : 0) << offs;
|
||||
} else if (I->op == BI_OPCODE_BRANCHZ_I16) {
|
||||
hex |= ((uint64_t) va_pack_combine(I, src.swizzle) << 37);
|
||||
hex |= ((uint64_t)va_pack_combine(I, src.swizzle) << 37);
|
||||
} else {
|
||||
pack_assert(I, src_info.size == VA_SIZE_8);
|
||||
unsigned comp = src.swizzle - BI_SWIZZLE_B0000;
|
||||
pack_assert(I, comp < 4);
|
||||
hex |= (uint64_t) comp << offs;
|
||||
hex |= (uint64_t)comp << offs;
|
||||
}
|
||||
} else if (src_info.lanes) {
|
||||
pack_assert(I, src_info.size == VA_SIZE_8);
|
||||
pack_assert(I, i == 1);
|
||||
hex |= (uint64_t) va_pack_shift_lanes(I, src.swizzle) << 26;
|
||||
hex |= (uint64_t)va_pack_shift_lanes(I, src.swizzle) << 26;
|
||||
} else if (src_info.combine) {
|
||||
/* Treat as swizzle, subgroup ops not yet supported */
|
||||
pack_assert(I, src_info.size == VA_SIZE_32);
|
||||
pack_assert(I, i == 0);
|
||||
hex |= (uint64_t) va_pack_widen_f32(I, src.swizzle) << 37;
|
||||
hex |= (uint64_t)va_pack_widen_f32(I, src.swizzle) << 37;
|
||||
} else if (src_info.halfswizzle) {
|
||||
pack_assert(I, src_info.size == VA_SIZE_8);
|
||||
pack_assert(I, i == 0);
|
||||
hex |= (uint64_t) va_pack_halfswizzle(I, src.swizzle) << 36;
|
||||
hex |= (uint64_t)va_pack_halfswizzle(I, src.swizzle) << 36;
|
||||
} else if (src.swizzle != BI_SWIZZLE_H01) {
|
||||
invalid_instruction(I, "swizzle");
|
||||
}
|
||||
}
|
||||
|
||||
if (info.saturate) hex |= (uint64_t) I->saturate << 30;
|
||||
if (info.rhadd) hex |= va_pack_rhadd(I);
|
||||
if (info.clamp) hex |= (uint64_t) I->clamp << 32;
|
||||
if (info.round_mode) hex |= (uint64_t) I->round << 30;
|
||||
if (info.condition) hex |= (uint64_t) I->cmpf << 32;
|
||||
if (info.result_type) hex |= (uint64_t) I->result_type << 30;
|
||||
if (info.saturate)
|
||||
hex |= (uint64_t)I->saturate << 30;
|
||||
if (info.rhadd)
|
||||
hex |= va_pack_rhadd(I);
|
||||
if (info.clamp)
|
||||
hex |= (uint64_t)I->clamp << 32;
|
||||
if (info.round_mode)
|
||||
hex |= (uint64_t)I->round << 30;
|
||||
if (info.condition)
|
||||
hex |= (uint64_t)I->cmpf << 32;
|
||||
if (info.result_type)
|
||||
hex |= (uint64_t)I->result_type << 30;
|
||||
|
||||
return hex;
|
||||
}
|
||||
|
|
@ -541,37 +644,35 @@ static uint64_t
|
|||
va_pack_byte_offset(const bi_instr *I)
|
||||
{
|
||||
int16_t offset = I->byte_offset;
|
||||
if (offset != I->byte_offset) invalid_instruction(I, "byte offset");
|
||||
if (offset != I->byte_offset)
|
||||
invalid_instruction(I, "byte offset");
|
||||
|
||||
uint16_t offset_as_u16 = offset;
|
||||
return ((uint64_t) offset_as_u16) << 8;
|
||||
return ((uint64_t)offset_as_u16) << 8;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
va_pack_byte_offset_8(const bi_instr *I)
|
||||
{
|
||||
uint8_t offset = I->byte_offset;
|
||||
if (offset != I->byte_offset) invalid_instruction(I, "byte offset");
|
||||
if (offset != I->byte_offset)
|
||||
invalid_instruction(I, "byte offset");
|
||||
|
||||
return ((uint64_t) offset) << 8;
|
||||
return ((uint64_t)offset) << 8;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
va_pack_load(const bi_instr *I, bool buffer_descriptor)
|
||||
{
|
||||
const uint8_t load_lane_identity[8] = {
|
||||
VA_LOAD_LANE_8_BIT_B0,
|
||||
VA_LOAD_LANE_16_BIT_H0,
|
||||
VA_LOAD_LANE_24_BIT_IDENTITY,
|
||||
VA_LOAD_LANE_32_BIT_W0,
|
||||
VA_LOAD_LANE_48_BIT_IDENTITY,
|
||||
VA_LOAD_LANE_64_BIT_IDENTITY,
|
||||
VA_LOAD_LANE_96_BIT_IDENTITY,
|
||||
VA_LOAD_LANE_128_BIT_IDENTITY,
|
||||
VA_LOAD_LANE_8_BIT_B0, VA_LOAD_LANE_16_BIT_H0,
|
||||
VA_LOAD_LANE_24_BIT_IDENTITY, VA_LOAD_LANE_32_BIT_W0,
|
||||
VA_LOAD_LANE_48_BIT_IDENTITY, VA_LOAD_LANE_64_BIT_IDENTITY,
|
||||
VA_LOAD_LANE_96_BIT_IDENTITY, VA_LOAD_LANE_128_BIT_IDENTITY,
|
||||
};
|
||||
|
||||
unsigned memory_size = (valhall_opcodes[I->op].exact >> 27) & 0x7;
|
||||
uint64_t hex = (uint64_t) load_lane_identity[memory_size] << 36;
|
||||
uint64_t hex = (uint64_t)load_lane_identity[memory_size] << 36;
|
||||
|
||||
// unsigned
|
||||
hex |= (1ull << 39);
|
||||
|
|
@ -579,10 +680,10 @@ va_pack_load(const bi_instr *I, bool buffer_descriptor)
|
|||
if (!buffer_descriptor)
|
||||
hex |= va_pack_byte_offset(I);
|
||||
|
||||
hex |= (uint64_t) va_pack_src(I, 0) << 0;
|
||||
hex |= (uint64_t)va_pack_src(I, 0) << 0;
|
||||
|
||||
if (buffer_descriptor)
|
||||
hex |= (uint64_t) va_pack_src(I, 1) << 8;
|
||||
hex |= (uint64_t)va_pack_src(I, 1) << 8;
|
||||
|
||||
return hex;
|
||||
}
|
||||
|
|
@ -591,10 +692,14 @@ static uint64_t
|
|||
va_pack_memory_access(const bi_instr *I)
|
||||
{
|
||||
switch (I->seg) {
|
||||
case BI_SEG_TL: return VA_MEMORY_ACCESS_FORCE;
|
||||
case BI_SEG_POS: return VA_MEMORY_ACCESS_ISTREAM;
|
||||
case BI_SEG_VARY: return VA_MEMORY_ACCESS_ESTREAM;
|
||||
default: return VA_MEMORY_ACCESS_NONE;
|
||||
case BI_SEG_TL:
|
||||
return VA_MEMORY_ACCESS_FORCE;
|
||||
case BI_SEG_POS:
|
||||
return VA_MEMORY_ACCESS_ISTREAM;
|
||||
case BI_SEG_VARY:
|
||||
return VA_MEMORY_ACCESS_ESTREAM;
|
||||
default:
|
||||
return VA_MEMORY_ACCESS_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -604,7 +709,7 @@ va_pack_store(const bi_instr *I)
|
|||
uint64_t hex = va_pack_memory_access(I) << 24;
|
||||
|
||||
va_validate_register_pair(I, 1);
|
||||
hex |= (uint64_t) va_pack_src(I, 1) << 0;
|
||||
hex |= (uint64_t)va_pack_src(I, 1) << 0;
|
||||
|
||||
hex |= va_pack_byte_offset(I);
|
||||
|
||||
|
|
@ -615,11 +720,16 @@ static enum va_lod_mode
|
|||
va_pack_lod_mode(const bi_instr *I)
|
||||
{
|
||||
switch (I->va_lod_mode) {
|
||||
case BI_VA_LOD_MODE_ZERO_LOD: return VA_LOD_MODE_ZERO;
|
||||
case BI_VA_LOD_MODE_COMPUTED_LOD: return VA_LOD_MODE_COMPUTED;
|
||||
case BI_VA_LOD_MODE_EXPLICIT: return VA_LOD_MODE_EXPLICIT;
|
||||
case BI_VA_LOD_MODE_COMPUTED_BIAS: return VA_LOD_MODE_COMPUTED_BIAS;
|
||||
case BI_VA_LOD_MODE_GRDESC: return VA_LOD_MODE_GRDESC;
|
||||
case BI_VA_LOD_MODE_ZERO_LOD:
|
||||
return VA_LOD_MODE_ZERO;
|
||||
case BI_VA_LOD_MODE_COMPUTED_LOD:
|
||||
return VA_LOD_MODE_COMPUTED;
|
||||
case BI_VA_LOD_MODE_EXPLICIT:
|
||||
return VA_LOD_MODE_EXPLICIT;
|
||||
case BI_VA_LOD_MODE_COMPUTED_BIAS:
|
||||
return VA_LOD_MODE_COMPUTED_BIAS;
|
||||
case BI_VA_LOD_MODE_GRDESC:
|
||||
return VA_LOD_MODE_GRDESC;
|
||||
}
|
||||
|
||||
invalid_instruction(I, "LOD mode");
|
||||
|
|
@ -650,14 +760,22 @@ static enum va_register_format
|
|||
va_pack_register_format(const bi_instr *I)
|
||||
{
|
||||
switch (I->register_format) {
|
||||
case BI_REGISTER_FORMAT_AUTO: return VA_REGISTER_FORMAT_AUTO;
|
||||
case BI_REGISTER_FORMAT_F32: return VA_REGISTER_FORMAT_F32;
|
||||
case BI_REGISTER_FORMAT_F16: return VA_REGISTER_FORMAT_F16;
|
||||
case BI_REGISTER_FORMAT_S32: return VA_REGISTER_FORMAT_S32;
|
||||
case BI_REGISTER_FORMAT_S16: return VA_REGISTER_FORMAT_S16;
|
||||
case BI_REGISTER_FORMAT_U32: return VA_REGISTER_FORMAT_U32;
|
||||
case BI_REGISTER_FORMAT_U16: return VA_REGISTER_FORMAT_U16;
|
||||
default: invalid_instruction(I, "register format");
|
||||
case BI_REGISTER_FORMAT_AUTO:
|
||||
return VA_REGISTER_FORMAT_AUTO;
|
||||
case BI_REGISTER_FORMAT_F32:
|
||||
return VA_REGISTER_FORMAT_F32;
|
||||
case BI_REGISTER_FORMAT_F16:
|
||||
return VA_REGISTER_FORMAT_F16;
|
||||
case BI_REGISTER_FORMAT_S32:
|
||||
return VA_REGISTER_FORMAT_S32;
|
||||
case BI_REGISTER_FORMAT_S16:
|
||||
return VA_REGISTER_FORMAT_S16;
|
||||
case BI_REGISTER_FORMAT_U32:
|
||||
return VA_REGISTER_FORMAT_U32;
|
||||
case BI_REGISTER_FORMAT_U16:
|
||||
return VA_REGISTER_FORMAT_U16;
|
||||
default:
|
||||
invalid_instruction(I, "register format");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -666,35 +784,34 @@ va_pack_instr(const bi_instr *I)
|
|||
{
|
||||
struct va_opcode_info info = valhall_opcodes[I->op];
|
||||
|
||||
uint64_t hex = info.exact | (((uint64_t) I->flow) << 59);
|
||||
hex |= ((uint64_t) va_select_fau_page(I)) << 57;
|
||||
uint64_t hex = info.exact | (((uint64_t)I->flow) << 59);
|
||||
hex |= ((uint64_t)va_select_fau_page(I)) << 57;
|
||||
|
||||
if (info.slot)
|
||||
hex |= ((uint64_t) I->slot << 30);
|
||||
hex |= ((uint64_t)I->slot << 30);
|
||||
|
||||
if (info.sr_count) {
|
||||
bool read = bi_opcode_props[I->op].sr_read;
|
||||
bi_index sr = read ? I->src[0] : I->dest[0];
|
||||
|
||||
unsigned count = read ?
|
||||
bi_count_read_registers(I, 0) :
|
||||
bi_count_write_registers(I, 0);
|
||||
unsigned count =
|
||||
read ? bi_count_read_registers(I, 0) : bi_count_write_registers(I, 0);
|
||||
|
||||
hex |= ((uint64_t) count << 33);
|
||||
hex |= (uint64_t) va_pack_reg(I, sr) << 40;
|
||||
hex |= ((uint64_t) info.sr_control << 46);
|
||||
hex |= ((uint64_t)count << 33);
|
||||
hex |= (uint64_t)va_pack_reg(I, sr) << 40;
|
||||
hex |= ((uint64_t)info.sr_control << 46);
|
||||
}
|
||||
|
||||
if (info.sr_write_count) {
|
||||
hex |= ((uint64_t) bi_count_write_registers(I, 0) - 1) << 36;
|
||||
hex |= ((uint64_t) va_pack_reg(I, I->dest[0])) << 16;
|
||||
hex |= ((uint64_t)bi_count_write_registers(I, 0) - 1) << 36;
|
||||
hex |= ((uint64_t)va_pack_reg(I, I->dest[0])) << 16;
|
||||
}
|
||||
|
||||
if (info.vecsize)
|
||||
hex |= ((uint64_t) I->vecsize << 28);
|
||||
hex |= ((uint64_t)I->vecsize << 28);
|
||||
|
||||
if (info.register_format)
|
||||
hex |= ((uint64_t) va_pack_register_format(I)) << 24;
|
||||
hex |= ((uint64_t)va_pack_register_format(I)) << 24;
|
||||
|
||||
switch (I->op) {
|
||||
case BI_OPCODE_LOAD_I8:
|
||||
|
|
@ -738,18 +855,18 @@ va_pack_instr(const bi_instr *I)
|
|||
|
||||
/* 64-bit source */
|
||||
va_validate_register_pair(I, 0);
|
||||
hex |= (uint64_t) va_pack_src(I, 0) << 0;
|
||||
hex |= (uint64_t)va_pack_src(I, 0) << 0;
|
||||
hex |= va_pack_byte_offset_8(I);
|
||||
hex |= ((uint64_t) va_pack_atom_opc_1(I)) << 22;
|
||||
hex |= ((uint64_t)va_pack_atom_opc_1(I)) << 22;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_ATOM_I32:
|
||||
case BI_OPCODE_ATOM_RETURN_I32:
|
||||
/* 64-bit source */
|
||||
va_validate_register_pair(I, 1);
|
||||
hex |= (uint64_t) va_pack_src(I, 1) << 0;
|
||||
hex |= (uint64_t)va_pack_src(I, 1) << 0;
|
||||
hex |= va_pack_byte_offset_8(I);
|
||||
hex |= ((uint64_t) va_pack_atom_opc(I)) << 22;
|
||||
hex |= ((uint64_t)va_pack_atom_opc(I)) << 22;
|
||||
|
||||
if (I->op == BI_OPCODE_ATOM_RETURN_I32)
|
||||
hex |= (0xc0ull << 40); // flags
|
||||
|
|
@ -764,56 +881,61 @@ va_pack_instr(const bi_instr *I)
|
|||
hex |= va_pack_store(I);
|
||||
|
||||
/* Conversion descriptor */
|
||||
hex |= (uint64_t) va_pack_src(I, 3) << 16;
|
||||
hex |= (uint64_t)va_pack_src(I, 3) << 16;
|
||||
break;
|
||||
|
||||
case BI_OPCODE_BLEND:
|
||||
{
|
||||
case BI_OPCODE_BLEND: {
|
||||
/* Source 0 - Blend descriptor (64-bit) */
|
||||
hex |= ((uint64_t) va_pack_src(I, 2)) << 0;
|
||||
hex |= ((uint64_t)va_pack_src(I, 2)) << 0;
|
||||
va_validate_register_pair(I, 2);
|
||||
|
||||
/* Target */
|
||||
if (I->branch_offset & 0x7) invalid_instruction(I, "unaligned branch");
|
||||
if (I->branch_offset & 0x7)
|
||||
invalid_instruction(I, "unaligned branch");
|
||||
hex |= ((I->branch_offset >> 3) << 8);
|
||||
|
||||
/* Source 2 - coverage mask */
|
||||
hex |= ((uint64_t) va_pack_reg(I, I->src[1])) << 16;
|
||||
hex |= ((uint64_t)va_pack_reg(I, I->src[1])) << 16;
|
||||
|
||||
/* Vector size */
|
||||
unsigned vecsize = 4;
|
||||
hex |= ((uint64_t) (vecsize - 1) << 28);
|
||||
hex |= ((uint64_t)(vecsize - 1) << 28);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case BI_OPCODE_TEX_SINGLE:
|
||||
case BI_OPCODE_TEX_FETCH:
|
||||
case BI_OPCODE_TEX_GATHER:
|
||||
{
|
||||
case BI_OPCODE_TEX_GATHER: {
|
||||
/* Image to read from */
|
||||
hex |= ((uint64_t) va_pack_src(I, 1)) << 0;
|
||||
hex |= ((uint64_t)va_pack_src(I, 1)) << 0;
|
||||
|
||||
if (I->op == BI_OPCODE_TEX_FETCH && I->shadow)
|
||||
invalid_instruction(I, "TEX_FETCH does not support .shadow");
|
||||
|
||||
if (I->array_enable) hex |= (1ull << 10);
|
||||
if (I->texel_offset) hex |= (1ull << 11);
|
||||
if (I->shadow) hex |= (1ull << 12);
|
||||
if (I->skip) hex |= (1ull << 39);
|
||||
if (!bi_is_regfmt_16(I->register_format)) hex |= (1ull << 46);
|
||||
if (I->array_enable)
|
||||
hex |= (1ull << 10);
|
||||
if (I->texel_offset)
|
||||
hex |= (1ull << 11);
|
||||
if (I->shadow)
|
||||
hex |= (1ull << 12);
|
||||
if (I->skip)
|
||||
hex |= (1ull << 39);
|
||||
if (!bi_is_regfmt_16(I->register_format))
|
||||
hex |= (1ull << 46);
|
||||
|
||||
if (I->op == BI_OPCODE_TEX_SINGLE)
|
||||
hex |= ((uint64_t) va_pack_lod_mode(I)) << 13;
|
||||
hex |= ((uint64_t)va_pack_lod_mode(I)) << 13;
|
||||
|
||||
if (I->op == BI_OPCODE_TEX_GATHER) {
|
||||
if (I->integer_coordinates) hex |= (1 << 13);
|
||||
hex |= ((uint64_t) I->fetch_component) << 14;
|
||||
if (I->integer_coordinates)
|
||||
hex |= (1 << 13);
|
||||
hex |= ((uint64_t)I->fetch_component) << 14;
|
||||
}
|
||||
|
||||
hex |= (I->write_mask << 22);
|
||||
hex |= ((uint64_t) va_pack_register_type(I)) << 26;
|
||||
hex |= ((uint64_t) I->dimension) << 28;
|
||||
hex |= ((uint64_t)va_pack_register_type(I)) << 26;
|
||||
hex |= ((uint64_t)I->dimension) << 28;
|
||||
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,9 +22,9 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bi_builder.h"
|
||||
#include "va_compiler.h"
|
||||
#include "valhall.h"
|
||||
#include "bi_builder.h"
|
||||
|
||||
void
|
||||
va_count_instr_stats(bi_instr *I, struct va_stats *stats)
|
||||
|
|
@ -48,8 +48,8 @@ va_count_instr_stats(bi_instr *I, struct va_stats *stats)
|
|||
|
||||
/* Varying is scaled by 16-bit components interpolated */
|
||||
case VA_UNIT_V:
|
||||
stats->v += (I->vecsize + 1) *
|
||||
(bi_is_regfmt_16(I->register_format) ? 1 : 2);
|
||||
stats->v +=
|
||||
(I->vecsize + 1) * (bi_is_regfmt_16(I->register_format) ? 1 : 2);
|
||||
return;
|
||||
|
||||
/* We just count load/store and texturing for now */
|
||||
|
|
|
|||
|
|
@ -21,15 +21,16 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "bi_builder.h"
|
||||
#include "va_compiler.h"
|
||||
#include "valhall.h"
|
||||
#include "bi_builder.h"
|
||||
|
||||
/* Valhall has limits on access to fast-access uniforms:
|
||||
*
|
||||
* An instruction may access no more than a single 64-bit uniform slot.
|
||||
* An instruction may access no more than 64-bits of combined uniforms and constants.
|
||||
* An instruction may access no more than a single special immediate (e.g. lane_id).
|
||||
* An instruction may access no more than 64-bits of combined uniforms and
|
||||
* constants. An instruction may access no more than a single special immediate
|
||||
* (e.g. lane_id).
|
||||
*
|
||||
* We validate these constraints.
|
||||
*
|
||||
|
|
@ -114,7 +115,7 @@ bool
|
|||
va_validate_fau(bi_instr *I)
|
||||
{
|
||||
bool valid = true;
|
||||
struct fau_state fau = { .uniform_slot = -1 };
|
||||
struct fau_state fau = {.uniform_slot = -1};
|
||||
unsigned fau_page = va_select_fau_page(I);
|
||||
|
||||
bi_foreach_src(I, s) {
|
||||
|
|
@ -127,7 +128,7 @@ va_validate_fau(bi_instr *I)
|
|||
void
|
||||
va_repair_fau(bi_builder *b, bi_instr *I)
|
||||
{
|
||||
struct fau_state fau = { .uniform_slot = -1 };
|
||||
struct fau_state fau = {.uniform_slot = -1};
|
||||
unsigned fau_page = va_select_fau_page(I);
|
||||
|
||||
bi_foreach_src(I, s) {
|
||||
|
|
|
|||
|
|
@ -73,43 +73,42 @@ enum va_unit {
|
|||
};
|
||||
|
||||
struct va_src_info {
|
||||
bool absneg : 1;
|
||||
bool swizzle : 1;
|
||||
bool notted : 1;
|
||||
bool lane : 1;
|
||||
bool lanes : 1;
|
||||
bool halfswizzle : 1;
|
||||
bool widen : 1;
|
||||
bool combine : 1;
|
||||
bool absneg : 1;
|
||||
bool swizzle : 1;
|
||||
bool notted : 1;
|
||||
bool lane : 1;
|
||||
bool lanes : 1;
|
||||
bool halfswizzle : 1;
|
||||
bool widen : 1;
|
||||
bool combine : 1;
|
||||
enum va_size size : 2;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct va_opcode_info {
|
||||
uint64_t exact;
|
||||
struct va_src_info srcs[4];
|
||||
uint8_t type_size : 8;
|
||||
enum va_unit unit : 3;
|
||||
unsigned nr_srcs : 3;
|
||||
unsigned nr_staging_srcs : 2;
|
||||
uint8_t type_size : 8;
|
||||
enum va_unit unit : 3;
|
||||
unsigned nr_srcs : 3;
|
||||
unsigned nr_staging_srcs : 2;
|
||||
unsigned nr_staging_dests : 2;
|
||||
bool has_dest : 1;
|
||||
bool is_signed : 1;
|
||||
bool clamp : 1;
|
||||
bool saturate : 1;
|
||||
bool rhadd : 1;
|
||||
bool round_mode : 1;
|
||||
bool condition : 1;
|
||||
bool result_type : 1;
|
||||
bool vecsize : 1;
|
||||
bool register_format : 1;
|
||||
bool slot : 1;
|
||||
bool sr_count : 1;
|
||||
bool sr_write_count : 1;
|
||||
unsigned sr_control : 2;
|
||||
bool has_dest : 1;
|
||||
bool is_signed : 1;
|
||||
bool clamp : 1;
|
||||
bool saturate : 1;
|
||||
bool rhadd : 1;
|
||||
bool round_mode : 1;
|
||||
bool condition : 1;
|
||||
bool result_type : 1;
|
||||
bool vecsize : 1;
|
||||
bool register_format : 1;
|
||||
bool slot : 1;
|
||||
bool sr_count : 1;
|
||||
bool sr_write_count : 1;
|
||||
unsigned sr_control : 2;
|
||||
};
|
||||
|
||||
extern const struct va_opcode_info
|
||||
valhall_opcodes[BI_NUM_OPCODES];
|
||||
extern const struct va_opcode_info valhall_opcodes[BI_NUM_OPCODES];
|
||||
|
||||
/* Bifrost specifies the source of bitwise operations as (A, B, shift), but
|
||||
* Valhall specifies (A, shift, B). We follow Bifrost conventions in the
|
||||
|
|
|
|||
|
|
@ -47,8 +47,7 @@ pan_ioctl_get_param(int fd, unsigned long request, void *arg)
|
|||
struct drm_panfrost_get_param *gp = arg;
|
||||
|
||||
switch (gp->param) {
|
||||
case DRM_PANFROST_PARAM_GPU_PROD_ID:
|
||||
{
|
||||
case DRM_PANFROST_PARAM_GPU_PROD_ID: {
|
||||
char *override_version = getenv("PAN_GPU_ID");
|
||||
|
||||
if (override_version)
|
||||
|
|
|
|||
|
|
@ -13,22 +13,21 @@
|
|||
|
||||
#include "pan_pps_perf.h"
|
||||
|
||||
namespace pps
|
||||
{
|
||||
namespace pps {
|
||||
/// @brief Panfrost implementation of PPS driver.
|
||||
/// This driver queries the GPU through `drm/panfrost_drm.h`, using performance counters ioctls,
|
||||
/// which can be enabled by setting a kernel parameter: `modprobe panfrost unstable_ioctls=1`.
|
||||
/// The ioctl needs a buffer to copy data from kernel to user space.
|
||||
class PanfrostDriver : public Driver
|
||||
{
|
||||
public:
|
||||
/// This driver queries the GPU through `drm/panfrost_drm.h`, using performance
|
||||
/// counters ioctls, which can be enabled by setting a kernel parameter:
|
||||
/// `modprobe panfrost unstable_ioctls=1`. The ioctl needs a buffer to copy data
|
||||
/// from kernel to user space.
|
||||
class PanfrostDriver : public Driver {
|
||||
public:
|
||||
static inline PanfrostDriver &into(Driver &dri);
|
||||
static inline const PanfrostDriver &into(const Driver &dri);
|
||||
|
||||
/// @param A list of mali counter names
|
||||
/// @return A pair with two lists: counter groups and available counters
|
||||
static std::pair<std::vector<CounterGroup>, std::vector<Counter>> create_available_counters(
|
||||
const PanfrostPerf& perf);
|
||||
static std::pair<std::vector<CounterGroup>, std::vector<Counter>>
|
||||
create_available_counters(const PanfrostPerf &perf);
|
||||
|
||||
PanfrostDriver();
|
||||
~PanfrostDriver();
|
||||
|
|
@ -50,12 +49,14 @@ class PanfrostDriver : public Driver
|
|||
std::unique_ptr<PanfrostPerf> perf = nullptr;
|
||||
};
|
||||
|
||||
PanfrostDriver &PanfrostDriver::into(Driver &dri)
|
||||
PanfrostDriver &
|
||||
PanfrostDriver::into(Driver &dri)
|
||||
{
|
||||
return reinterpret_cast<PanfrostDriver &>(dri);
|
||||
}
|
||||
|
||||
const PanfrostDriver &PanfrostDriver::into(const Driver &dri)
|
||||
const PanfrostDriver &
|
||||
PanfrostDriver::into(const Driver &dri)
|
||||
{
|
||||
return reinterpret_cast<const PanfrostDriver &>(dri);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,35 +10,32 @@
|
|||
struct panfrost_device;
|
||||
struct panfrost_perf;
|
||||
|
||||
namespace pps
|
||||
{
|
||||
class PanfrostDevice
|
||||
{
|
||||
public:
|
||||
namespace pps {
|
||||
class PanfrostDevice {
|
||||
public:
|
||||
PanfrostDevice(int fd);
|
||||
~PanfrostDevice();
|
||||
|
||||
PanfrostDevice(const PanfrostDevice &) = delete;
|
||||
PanfrostDevice &operator=(const PanfrostDevice &) = delete;
|
||||
|
||||
PanfrostDevice(PanfrostDevice&&);
|
||||
PanfrostDevice& operator=(PanfrostDevice&&);
|
||||
PanfrostDevice(PanfrostDevice &&);
|
||||
PanfrostDevice &operator=(PanfrostDevice &&);
|
||||
|
||||
void *ctx = nullptr;
|
||||
struct panfrost_device* dev = nullptr;
|
||||
struct panfrost_device *dev = nullptr;
|
||||
};
|
||||
|
||||
class PanfrostPerf
|
||||
{
|
||||
public:
|
||||
PanfrostPerf(const PanfrostDevice& dev);
|
||||
class PanfrostPerf {
|
||||
public:
|
||||
PanfrostPerf(const PanfrostDevice &dev);
|
||||
~PanfrostPerf();
|
||||
|
||||
PanfrostPerf(const PanfrostPerf &) = delete;
|
||||
PanfrostPerf &operator=(const PanfrostPerf &) = delete;
|
||||
|
||||
PanfrostPerf(PanfrostPerf&&);
|
||||
PanfrostPerf& operator=(PanfrostPerf&&);
|
||||
PanfrostPerf(PanfrostPerf &&);
|
||||
PanfrostPerf &operator=(PanfrostPerf &&);
|
||||
|
||||
int enable() const;
|
||||
void disable() const;
|
||||
|
|
|
|||
|
|
@ -28,11 +28,11 @@
|
|||
#ifndef __PANFROST_JOB_H__
|
||||
#define __PANFROST_JOB_H__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint8_t u8;
|
||||
typedef uint16_t u16;
|
||||
typedef uint32_t u32;
|
||||
typedef uint64_t u64;
|
||||
|
|
@ -68,13 +68,13 @@ typedef uint64_t mali_ptr;
|
|||
/* These formats seem to largely duplicate the others. They're used at least
|
||||
* for Bifrost framebuffer output.
|
||||
*/
|
||||
#define MALI_FORMAT_SPECIAL2 (7 << 5)
|
||||
#define MALI_EXTRACT_TYPE(fmt) ((fmt) & 0xe0)
|
||||
#define MALI_FORMAT_SPECIAL2 (7 << 5)
|
||||
#define MALI_EXTRACT_TYPE(fmt) ((fmt)&0xe0)
|
||||
|
||||
/* If the high 3 bits are 3 to 6 these two bits say how many components
|
||||
* there are.
|
||||
*/
|
||||
#define MALI_NR_CHANNELS(n) ((n - 1) << 3)
|
||||
#define MALI_NR_CHANNELS(n) ((n - 1) << 3)
|
||||
#define MALI_EXTRACT_CHANNELS(fmt) ((((fmt) >> 3) & 3) + 1)
|
||||
|
||||
/* If the high 3 bits are 3 to 6, then the low 3 bits say how big each
|
||||
|
|
@ -93,7 +93,7 @@ typedef uint64_t mali_ptr;
|
|||
/* For MALI_FORMAT_SINT it means a half-float (e.g. RG16F). For
|
||||
* MALI_FORMAT_UNORM, it means a 32-bit float.
|
||||
*/
|
||||
#define MALI_CHANNEL_FLOAT 7
|
||||
#define MALI_CHANNEL_FLOAT 7
|
||||
#define MALI_EXTRACT_BITS(fmt) (fmt & 0x7)
|
||||
|
||||
#define MALI_EXTRACT_INDEX(pixfmt) (((pixfmt) >> 12) & 0xFF)
|
||||
|
|
@ -241,18 +241,18 @@ typedef uint64_t mali_ptr;
|
|||
/* Used for lod encoding. Thanks @urjaman for pointing out these routines can
|
||||
* be cleaned up a lot. */
|
||||
|
||||
#define DECODE_FIXED_16(x) ((float) (x / 256.0))
|
||||
#define DECODE_FIXED_16(x) ((float)(x / 256.0))
|
||||
|
||||
static inline int16_t
|
||||
FIXED_16(float x, bool allow_negative)
|
||||
{
|
||||
/* Clamp inputs, accounting for float error */
|
||||
float max_lod = (32.0 - (1.0 / 512.0));
|
||||
float min_lod = allow_negative ? -max_lod : 0.0;
|
||||
/* Clamp inputs, accounting for float error */
|
||||
float max_lod = (32.0 - (1.0 / 512.0));
|
||||
float min_lod = allow_negative ? -max_lod : 0.0;
|
||||
|
||||
x = ((x > max_lod) ? max_lod : ((x < min_lod) ? min_lod : x));
|
||||
x = ((x > max_lod) ? max_lod : ((x < min_lod) ? min_lod : x));
|
||||
|
||||
return (int) (x * 256.0);
|
||||
return (int)(x * 256.0);
|
||||
}
|
||||
|
||||
#endif /* __PANFROST_JOB_H__ */
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -36,54 +36,54 @@ extern FILE *pandecode_dump_stream;
|
|||
void pandecode_dump_file_open(void);
|
||||
|
||||
struct pandecode_mapped_memory {
|
||||
struct rb_node node;
|
||||
size_t length;
|
||||
void *addr;
|
||||
uint64_t gpu_va;
|
||||
bool ro;
|
||||
char name[32];
|
||||
struct rb_node node;
|
||||
size_t length;
|
||||
void *addr;
|
||||
uint64_t gpu_va;
|
||||
bool ro;
|
||||
char name[32];
|
||||
};
|
||||
|
||||
char *pointer_as_memory_reference(uint64_t ptr);
|
||||
|
||||
struct pandecode_mapped_memory *pandecode_find_mapped_gpu_mem_containing(uint64_t addr);
|
||||
struct pandecode_mapped_memory *
|
||||
pandecode_find_mapped_gpu_mem_containing(uint64_t addr);
|
||||
|
||||
void pandecode_map_read_write(void);
|
||||
|
||||
void pandecode_dump_mappings(void);
|
||||
|
||||
static inline void *
|
||||
__pandecode_fetch_gpu_mem(uint64_t gpu_va, size_t size,
|
||||
int line, const char *filename)
|
||||
__pandecode_fetch_gpu_mem(uint64_t gpu_va, size_t size, int line,
|
||||
const char *filename)
|
||||
{
|
||||
const struct pandecode_mapped_memory *mem =
|
||||
pandecode_find_mapped_gpu_mem_containing(gpu_va);
|
||||
const struct pandecode_mapped_memory *mem =
|
||||
pandecode_find_mapped_gpu_mem_containing(gpu_va);
|
||||
|
||||
if (!mem) {
|
||||
fprintf(stderr, "Access to unknown memory %" PRIx64 " in %s:%d\n",
|
||||
gpu_va, filename, line);
|
||||
assert(0);
|
||||
}
|
||||
if (!mem) {
|
||||
fprintf(stderr, "Access to unknown memory %" PRIx64 " in %s:%d\n", gpu_va,
|
||||
filename, line);
|
||||
assert(0);
|
||||
}
|
||||
|
||||
assert(size + (gpu_va - mem->gpu_va) <= mem->length);
|
||||
assert(size + (gpu_va - mem->gpu_va) <= mem->length);
|
||||
|
||||
return mem->addr + gpu_va - mem->gpu_va;
|
||||
return mem->addr + gpu_va - mem->gpu_va;
|
||||
}
|
||||
|
||||
#define pandecode_fetch_gpu_mem(gpu_va, size) \
|
||||
__pandecode_fetch_gpu_mem(gpu_va, size, __LINE__, __FILE__)
|
||||
#define pandecode_fetch_gpu_mem(gpu_va, size) \
|
||||
__pandecode_fetch_gpu_mem(gpu_va, size, __LINE__, __FILE__)
|
||||
|
||||
/* Returns a validated pointer to mapped GPU memory with the given pointer type,
|
||||
* size automatically determined from the pointer type
|
||||
*/
|
||||
#define PANDECODE_PTR(gpu_va, type) \
|
||||
((type*)(__pandecode_fetch_gpu_mem(gpu_va, sizeof(type), \
|
||||
__LINE__, __FILE__)))
|
||||
#define PANDECODE_PTR(gpu_va, type) \
|
||||
((type *)(__pandecode_fetch_gpu_mem(gpu_va, sizeof(type), __LINE__, \
|
||||
__FILE__)))
|
||||
|
||||
/* Usage: <variable type> PANDECODE_PTR_VAR(name, gpu_va) */
|
||||
#define PANDECODE_PTR_VAR(name, gpu_va) \
|
||||
name = __pandecode_fetch_gpu_mem(gpu_va, sizeof(*name), \
|
||||
__LINE__, __FILE__)
|
||||
#define PANDECODE_PTR_VAR(name, gpu_va) \
|
||||
name = __pandecode_fetch_gpu_mem(gpu_va, sizeof(*name), __LINE__, __FILE__)
|
||||
|
||||
/* Forward declare for all supported gens to permit thunking */
|
||||
void pandecode_jc_v4(mali_ptr jc_gpu_va, unsigned gpu_id);
|
||||
|
|
@ -101,44 +101,44 @@ void pandecode_abort_on_fault_v9(mali_ptr jc_gpu_va);
|
|||
static inline void
|
||||
pan_hexdump(FILE *fp, const uint8_t *hex, size_t cnt, bool with_strings)
|
||||
{
|
||||
for (unsigned i = 0; i < cnt; ++i) {
|
||||
if ((i & 0xF) == 0)
|
||||
fprintf(fp, "%06X ", i);
|
||||
for (unsigned i = 0; i < cnt; ++i) {
|
||||
if ((i & 0xF) == 0)
|
||||
fprintf(fp, "%06X ", i);
|
||||
|
||||
uint8_t v = hex[i];
|
||||
uint8_t v = hex[i];
|
||||
|
||||
if (v == 0 && (i & 0xF) == 0) {
|
||||
/* Check if we're starting an aligned run of zeroes */
|
||||
unsigned zero_count = 0;
|
||||
if (v == 0 && (i & 0xF) == 0) {
|
||||
/* Check if we're starting an aligned run of zeroes */
|
||||
unsigned zero_count = 0;
|
||||
|
||||
for (unsigned j = i; j < cnt; ++j) {
|
||||
if (hex[j] == 0)
|
||||
zero_count++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
for (unsigned j = i; j < cnt; ++j) {
|
||||
if (hex[j] == 0)
|
||||
zero_count++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
if (zero_count >= 32) {
|
||||
fprintf(fp, "*\n");
|
||||
i += (zero_count & ~0xF) - 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (zero_count >= 32) {
|
||||
fprintf(fp, "*\n");
|
||||
i += (zero_count & ~0xF) - 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(fp, "%02X ", hex[i]);
|
||||
if ((i & 0xF) == 0xF && with_strings) {
|
||||
fprintf(fp, " | ");
|
||||
for (unsigned j = i & ~0xF; j <= i; ++j) {
|
||||
uint8_t c = hex[j];
|
||||
fputc((c < 32 || c > 128) ? '.' : c, fp);
|
||||
}
|
||||
}
|
||||
fprintf(fp, "%02X ", hex[i]);
|
||||
if ((i & 0xF) == 0xF && with_strings) {
|
||||
fprintf(fp, " | ");
|
||||
for (unsigned j = i & ~0xF; j <= i; ++j) {
|
||||
uint8_t c = hex[j];
|
||||
fputc((c < 32 || c > 128) ? '.' : c, fp);
|
||||
}
|
||||
}
|
||||
|
||||
if ((i & 0xF) == 0xF)
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
if ((i & 0xF) == 0xF)
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
|
||||
fprintf(fp, "\n");
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
|
||||
#endif /* __MMAP_TRACE_H__ */
|
||||
|
|
|
|||
|
|
@ -23,18 +23,18 @@
|
|||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include "decode.h"
|
||||
#include "util/macros.h"
|
||||
#include "util/simple_mtx.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "util/simple_mtx.h"
|
||||
#include "decode.h"
|
||||
|
||||
FILE *pandecode_dump_stream;
|
||||
|
||||
|
|
@ -46,8 +46,8 @@ static struct util_dynarray ro_mappings;
|
|||
|
||||
static simple_mtx_t pandecode_lock = SIMPLE_MTX_INITIALIZER;
|
||||
|
||||
#define to_mapped_memory(x) \
|
||||
rb_node_data(struct pandecode_mapped_memory, x, node)
|
||||
#define to_mapped_memory(x) \
|
||||
rb_node_data(struct pandecode_mapped_memory, x, node)
|
||||
|
||||
/*
|
||||
* Compare a GPU VA to a node, considering a GPU VA to be equal to a node if it
|
||||
|
|
@ -57,147 +57,147 @@ static simple_mtx_t pandecode_lock = SIMPLE_MTX_INITIALIZER;
|
|||
static int
|
||||
pandecode_cmp_key(const struct rb_node *lhs, const void *key)
|
||||
{
|
||||
struct pandecode_mapped_memory *mem = to_mapped_memory(lhs);
|
||||
uint64_t *gpu_va = (uint64_t *) key;
|
||||
struct pandecode_mapped_memory *mem = to_mapped_memory(lhs);
|
||||
uint64_t *gpu_va = (uint64_t *)key;
|
||||
|
||||
if (mem->gpu_va <= *gpu_va && *gpu_va < (mem->gpu_va + mem->length))
|
||||
return 0;
|
||||
else
|
||||
return mem->gpu_va - *gpu_va;
|
||||
if (mem->gpu_va <= *gpu_va && *gpu_va < (mem->gpu_va + mem->length))
|
||||
return 0;
|
||||
else
|
||||
return mem->gpu_va - *gpu_va;
|
||||
}
|
||||
|
||||
static int
|
||||
pandecode_cmp(const struct rb_node *lhs, const struct rb_node *rhs)
|
||||
{
|
||||
return to_mapped_memory(lhs)->gpu_va - to_mapped_memory(rhs)->gpu_va;
|
||||
return to_mapped_memory(lhs)->gpu_va - to_mapped_memory(rhs)->gpu_va;
|
||||
}
|
||||
|
||||
static struct pandecode_mapped_memory *
|
||||
pandecode_find_mapped_gpu_mem_containing_rw(uint64_t addr)
|
||||
{
|
||||
simple_mtx_assert_locked(&pandecode_lock);
|
||||
simple_mtx_assert_locked(&pandecode_lock);
|
||||
|
||||
struct rb_node *node = rb_tree_search(&mmap_tree, &addr, pandecode_cmp_key);
|
||||
struct rb_node *node = rb_tree_search(&mmap_tree, &addr, pandecode_cmp_key);
|
||||
|
||||
return to_mapped_memory(node);
|
||||
return to_mapped_memory(node);
|
||||
}
|
||||
|
||||
struct pandecode_mapped_memory *
|
||||
pandecode_find_mapped_gpu_mem_containing(uint64_t addr)
|
||||
{
|
||||
simple_mtx_assert_locked(&pandecode_lock);
|
||||
simple_mtx_assert_locked(&pandecode_lock);
|
||||
|
||||
struct pandecode_mapped_memory *mem = pandecode_find_mapped_gpu_mem_containing_rw(addr);
|
||||
struct pandecode_mapped_memory *mem =
|
||||
pandecode_find_mapped_gpu_mem_containing_rw(addr);
|
||||
|
||||
if (mem && mem->addr && !mem->ro) {
|
||||
mprotect(mem->addr, mem->length, PROT_READ);
|
||||
mem->ro = true;
|
||||
util_dynarray_append(&ro_mappings, struct pandecode_mapped_memory *, mem);
|
||||
}
|
||||
if (mem && mem->addr && !mem->ro) {
|
||||
mprotect(mem->addr, mem->length, PROT_READ);
|
||||
mem->ro = true;
|
||||
util_dynarray_append(&ro_mappings, struct pandecode_mapped_memory *, mem);
|
||||
}
|
||||
|
||||
return mem;
|
||||
return mem;
|
||||
}
|
||||
|
||||
void
|
||||
pandecode_map_read_write(void)
|
||||
{
|
||||
simple_mtx_assert_locked(&pandecode_lock);
|
||||
simple_mtx_assert_locked(&pandecode_lock);
|
||||
|
||||
util_dynarray_foreach(&ro_mappings, struct pandecode_mapped_memory *, mem) {
|
||||
(*mem)->ro = false;
|
||||
mprotect((*mem)->addr, (*mem)->length, PROT_READ | PROT_WRITE);
|
||||
}
|
||||
util_dynarray_clear(&ro_mappings);
|
||||
util_dynarray_foreach(&ro_mappings, struct pandecode_mapped_memory *, mem) {
|
||||
(*mem)->ro = false;
|
||||
mprotect((*mem)->addr, (*mem)->length, PROT_READ | PROT_WRITE);
|
||||
}
|
||||
util_dynarray_clear(&ro_mappings);
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_add_name(struct pandecode_mapped_memory *mem, uint64_t gpu_va, const char *name)
|
||||
pandecode_add_name(struct pandecode_mapped_memory *mem, uint64_t gpu_va,
|
||||
const char *name)
|
||||
{
|
||||
simple_mtx_assert_locked(&pandecode_lock);
|
||||
simple_mtx_assert_locked(&pandecode_lock);
|
||||
|
||||
if (!name) {
|
||||
/* If we don't have a name, assign one */
|
||||
if (!name) {
|
||||
/* If we don't have a name, assign one */
|
||||
|
||||
snprintf(mem->name, sizeof(mem->name) - 1,
|
||||
"memory_%" PRIx64, gpu_va);
|
||||
} else {
|
||||
assert((strlen(name) + 1) < sizeof(mem->name));
|
||||
memcpy(mem->name, name, strlen(name) + 1);
|
||||
}
|
||||
snprintf(mem->name, sizeof(mem->name) - 1, "memory_%" PRIx64, gpu_va);
|
||||
} else {
|
||||
assert((strlen(name) + 1) < sizeof(mem->name));
|
||||
memcpy(mem->name, name, strlen(name) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pandecode_inject_mmap(uint64_t gpu_va, void *cpu, unsigned sz, const char *name)
|
||||
{
|
||||
simple_mtx_lock(&pandecode_lock);
|
||||
simple_mtx_lock(&pandecode_lock);
|
||||
|
||||
/* First, search if we already mapped this and are just updating an address */
|
||||
/* First, search if we already mapped this and are just updating an address */
|
||||
|
||||
struct pandecode_mapped_memory *existing =
|
||||
pandecode_find_mapped_gpu_mem_containing_rw(gpu_va);
|
||||
struct pandecode_mapped_memory *existing =
|
||||
pandecode_find_mapped_gpu_mem_containing_rw(gpu_va);
|
||||
|
||||
if (existing && existing->gpu_va == gpu_va) {
|
||||
existing->length = sz;
|
||||
existing->addr = cpu;
|
||||
pandecode_add_name(existing, gpu_va, name);
|
||||
} else {
|
||||
/* Otherwise, add a fresh mapping */
|
||||
struct pandecode_mapped_memory *mapped_mem = NULL;
|
||||
if (existing && existing->gpu_va == gpu_va) {
|
||||
existing->length = sz;
|
||||
existing->addr = cpu;
|
||||
pandecode_add_name(existing, gpu_va, name);
|
||||
} else {
|
||||
/* Otherwise, add a fresh mapping */
|
||||
struct pandecode_mapped_memory *mapped_mem = NULL;
|
||||
|
||||
mapped_mem = calloc(1, sizeof(*mapped_mem));
|
||||
mapped_mem->gpu_va = gpu_va;
|
||||
mapped_mem->length = sz;
|
||||
mapped_mem->addr = cpu;
|
||||
pandecode_add_name(mapped_mem, gpu_va, name);
|
||||
mapped_mem = calloc(1, sizeof(*mapped_mem));
|
||||
mapped_mem->gpu_va = gpu_va;
|
||||
mapped_mem->length = sz;
|
||||
mapped_mem->addr = cpu;
|
||||
pandecode_add_name(mapped_mem, gpu_va, name);
|
||||
|
||||
/* Add it to the tree */
|
||||
rb_tree_insert(&mmap_tree, &mapped_mem->node, pandecode_cmp);
|
||||
}
|
||||
/* Add it to the tree */
|
||||
rb_tree_insert(&mmap_tree, &mapped_mem->node, pandecode_cmp);
|
||||
}
|
||||
|
||||
simple_mtx_unlock(&pandecode_lock);
|
||||
simple_mtx_unlock(&pandecode_lock);
|
||||
}
|
||||
|
||||
void
|
||||
pandecode_inject_free(uint64_t gpu_va, unsigned sz)
|
||||
{
|
||||
simple_mtx_lock(&pandecode_lock);
|
||||
simple_mtx_lock(&pandecode_lock);
|
||||
|
||||
struct pandecode_mapped_memory *mem =
|
||||
pandecode_find_mapped_gpu_mem_containing_rw(gpu_va);
|
||||
struct pandecode_mapped_memory *mem =
|
||||
pandecode_find_mapped_gpu_mem_containing_rw(gpu_va);
|
||||
|
||||
if (mem) {
|
||||
assert(mem->gpu_va == gpu_va);
|
||||
assert(mem->length == sz);
|
||||
if (mem) {
|
||||
assert(mem->gpu_va == gpu_va);
|
||||
assert(mem->length == sz);
|
||||
|
||||
rb_tree_remove(&mmap_tree, &mem->node);
|
||||
free(mem);
|
||||
}
|
||||
rb_tree_remove(&mmap_tree, &mem->node);
|
||||
free(mem);
|
||||
}
|
||||
|
||||
simple_mtx_unlock(&pandecode_lock);
|
||||
simple_mtx_unlock(&pandecode_lock);
|
||||
}
|
||||
|
||||
char *
|
||||
pointer_as_memory_reference(uint64_t ptr)
|
||||
{
|
||||
simple_mtx_assert_locked(&pandecode_lock);
|
||||
simple_mtx_assert_locked(&pandecode_lock);
|
||||
|
||||
struct pandecode_mapped_memory *mapped;
|
||||
char *out = malloc(128);
|
||||
struct pandecode_mapped_memory *mapped;
|
||||
char *out = malloc(128);
|
||||
|
||||
/* Try to find the corresponding mapped zone */
|
||||
/* Try to find the corresponding mapped zone */
|
||||
|
||||
mapped = pandecode_find_mapped_gpu_mem_containing_rw(ptr);
|
||||
mapped = pandecode_find_mapped_gpu_mem_containing_rw(ptr);
|
||||
|
||||
if (mapped) {
|
||||
snprintf(out, 128, "%s + %d", mapped->name, (int) (ptr - mapped->gpu_va));
|
||||
return out;
|
||||
}
|
||||
if (mapped) {
|
||||
snprintf(out, 128, "%s + %d", mapped->name, (int)(ptr - mapped->gpu_va));
|
||||
return out;
|
||||
}
|
||||
|
||||
/* Just use the raw address if other options are exhausted */
|
||||
|
||||
snprintf(out, 128, "0x%" PRIx64, ptr);
|
||||
return out;
|
||||
/* Just use the raw address if other options are exhausted */
|
||||
|
||||
snprintf(out, 128, "0x%" PRIx64, ptr);
|
||||
return out;
|
||||
}
|
||||
|
||||
static int pandecode_dump_frame_count = 0;
|
||||
|
|
@ -207,129 +207,153 @@ static bool force_stderr = false;
|
|||
void
|
||||
pandecode_dump_file_open(void)
|
||||
{
|
||||
simple_mtx_assert_locked(&pandecode_lock);
|
||||
simple_mtx_assert_locked(&pandecode_lock);
|
||||
|
||||
if (pandecode_dump_stream)
|
||||
return;
|
||||
if (pandecode_dump_stream)
|
||||
return;
|
||||
|
||||
/* This does a getenv every frame, so it is possible to use
|
||||
* setenv to change the base at runtime.
|
||||
*/
|
||||
const char *dump_file_base = debug_get_option("PANDECODE_DUMP_FILE", "pandecode.dump");
|
||||
if (force_stderr || !strcmp(dump_file_base, "stderr"))
|
||||
pandecode_dump_stream = stderr;
|
||||
else {
|
||||
char buffer[1024];
|
||||
snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base, pandecode_dump_frame_count);
|
||||
printf("pandecode: dump command stream to file %s\n", buffer);
|
||||
pandecode_dump_stream = fopen(buffer, "w");
|
||||
if (!pandecode_dump_stream)
|
||||
fprintf(stderr,
|
||||
"pandecode: failed to open command stream log file %s\n",
|
||||
buffer);
|
||||
}
|
||||
/* This does a getenv every frame, so it is possible to use
|
||||
* setenv to change the base at runtime.
|
||||
*/
|
||||
const char *dump_file_base =
|
||||
debug_get_option("PANDECODE_DUMP_FILE", "pandecode.dump");
|
||||
if (force_stderr || !strcmp(dump_file_base, "stderr"))
|
||||
pandecode_dump_stream = stderr;
|
||||
else {
|
||||
char buffer[1024];
|
||||
snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base,
|
||||
pandecode_dump_frame_count);
|
||||
printf("pandecode: dump command stream to file %s\n", buffer);
|
||||
pandecode_dump_stream = fopen(buffer, "w");
|
||||
if (!pandecode_dump_stream)
|
||||
fprintf(stderr,
|
||||
"pandecode: failed to open command stream log file %s\n",
|
||||
buffer);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_dump_file_close(void)
|
||||
{
|
||||
simple_mtx_assert_locked(&pandecode_lock);
|
||||
simple_mtx_assert_locked(&pandecode_lock);
|
||||
|
||||
if (pandecode_dump_stream && pandecode_dump_stream != stderr) {
|
||||
if (fclose(pandecode_dump_stream))
|
||||
perror("pandecode: dump file");
|
||||
if (pandecode_dump_stream && pandecode_dump_stream != stderr) {
|
||||
if (fclose(pandecode_dump_stream))
|
||||
perror("pandecode: dump file");
|
||||
|
||||
pandecode_dump_stream = NULL;
|
||||
}
|
||||
pandecode_dump_stream = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pandecode_initialize(bool to_stderr)
|
||||
{
|
||||
force_stderr = to_stderr;
|
||||
rb_tree_init(&mmap_tree);
|
||||
util_dynarray_init(&ro_mappings, NULL);
|
||||
force_stderr = to_stderr;
|
||||
rb_tree_init(&mmap_tree);
|
||||
util_dynarray_init(&ro_mappings, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
pandecode_next_frame(void)
|
||||
{
|
||||
simple_mtx_lock(&pandecode_lock);
|
||||
simple_mtx_lock(&pandecode_lock);
|
||||
|
||||
pandecode_dump_file_close();
|
||||
pandecode_dump_frame_count++;
|
||||
pandecode_dump_file_close();
|
||||
pandecode_dump_frame_count++;
|
||||
|
||||
simple_mtx_unlock(&pandecode_lock);
|
||||
simple_mtx_unlock(&pandecode_lock);
|
||||
}
|
||||
|
||||
void
|
||||
pandecode_close(void)
|
||||
{
|
||||
simple_mtx_lock(&pandecode_lock);
|
||||
simple_mtx_lock(&pandecode_lock);
|
||||
|
||||
rb_tree_foreach_safe(struct pandecode_mapped_memory, it, &mmap_tree, node) {
|
||||
rb_tree_remove(&mmap_tree, &it->node);
|
||||
free(it);
|
||||
}
|
||||
rb_tree_foreach_safe(struct pandecode_mapped_memory, it, &mmap_tree, node) {
|
||||
rb_tree_remove(&mmap_tree, &it->node);
|
||||
free(it);
|
||||
}
|
||||
|
||||
util_dynarray_fini(&ro_mappings);
|
||||
pandecode_dump_file_close();
|
||||
util_dynarray_fini(&ro_mappings);
|
||||
pandecode_dump_file_close();
|
||||
|
||||
simple_mtx_unlock(&pandecode_lock);
|
||||
simple_mtx_unlock(&pandecode_lock);
|
||||
}
|
||||
|
||||
void
|
||||
pandecode_dump_mappings(void)
|
||||
{
|
||||
simple_mtx_lock(&pandecode_lock);
|
||||
simple_mtx_lock(&pandecode_lock);
|
||||
|
||||
pandecode_dump_file_open();
|
||||
pandecode_dump_file_open();
|
||||
|
||||
rb_tree_foreach(struct pandecode_mapped_memory, it, &mmap_tree, node) {
|
||||
if (!it->addr || !it->length)
|
||||
continue;
|
||||
rb_tree_foreach(struct pandecode_mapped_memory, it, &mmap_tree, node) {
|
||||
if (!it->addr || !it->length)
|
||||
continue;
|
||||
|
||||
fprintf(pandecode_dump_stream, "Buffer: %s gpu %" PRIx64 "\n\n",
|
||||
it->name, it->gpu_va);
|
||||
fprintf(pandecode_dump_stream, "Buffer: %s gpu %" PRIx64 "\n\n", it->name,
|
||||
it->gpu_va);
|
||||
|
||||
pan_hexdump(pandecode_dump_stream, it->addr, it->length, false);
|
||||
fprintf(pandecode_dump_stream, "\n");
|
||||
}
|
||||
pan_hexdump(pandecode_dump_stream, it->addr, it->length, false);
|
||||
fprintf(pandecode_dump_stream, "\n");
|
||||
}
|
||||
|
||||
fflush(pandecode_dump_stream);
|
||||
simple_mtx_unlock(&pandecode_lock);
|
||||
fflush(pandecode_dump_stream);
|
||||
simple_mtx_unlock(&pandecode_lock);
|
||||
}
|
||||
|
||||
void
|
||||
pandecode_abort_on_fault(mali_ptr jc_gpu_va, unsigned gpu_id)
|
||||
{
|
||||
simple_mtx_lock(&pandecode_lock);
|
||||
simple_mtx_lock(&pandecode_lock);
|
||||
|
||||
switch (pan_arch(gpu_id)) {
|
||||
case 4: pandecode_abort_on_fault_v4(jc_gpu_va); break;
|
||||
case 5: pandecode_abort_on_fault_v5(jc_gpu_va); break;
|
||||
case 6: pandecode_abort_on_fault_v6(jc_gpu_va); break;
|
||||
case 7: pandecode_abort_on_fault_v7(jc_gpu_va); break;
|
||||
case 9: pandecode_abort_on_fault_v9(jc_gpu_va); break;
|
||||
default: unreachable("Unsupported architecture");
|
||||
}
|
||||
switch (pan_arch(gpu_id)) {
|
||||
case 4:
|
||||
pandecode_abort_on_fault_v4(jc_gpu_va);
|
||||
break;
|
||||
case 5:
|
||||
pandecode_abort_on_fault_v5(jc_gpu_va);
|
||||
break;
|
||||
case 6:
|
||||
pandecode_abort_on_fault_v6(jc_gpu_va);
|
||||
break;
|
||||
case 7:
|
||||
pandecode_abort_on_fault_v7(jc_gpu_va);
|
||||
break;
|
||||
case 9:
|
||||
pandecode_abort_on_fault_v9(jc_gpu_va);
|
||||
break;
|
||||
default:
|
||||
unreachable("Unsupported architecture");
|
||||
}
|
||||
|
||||
simple_mtx_unlock(&pandecode_lock);
|
||||
simple_mtx_unlock(&pandecode_lock);
|
||||
}
|
||||
|
||||
void
|
||||
pandecode_jc(mali_ptr jc_gpu_va, unsigned gpu_id)
|
||||
{
|
||||
simple_mtx_lock(&pandecode_lock);
|
||||
simple_mtx_lock(&pandecode_lock);
|
||||
|
||||
switch (pan_arch(gpu_id)) {
|
||||
case 4: pandecode_jc_v4(jc_gpu_va, gpu_id); break;
|
||||
case 5: pandecode_jc_v5(jc_gpu_va, gpu_id); break;
|
||||
case 6: pandecode_jc_v6(jc_gpu_va, gpu_id); break;
|
||||
case 7: pandecode_jc_v7(jc_gpu_va, gpu_id); break;
|
||||
case 9: pandecode_jc_v9(jc_gpu_va, gpu_id); break;
|
||||
default: unreachable("Unsupported architecture");
|
||||
}
|
||||
switch (pan_arch(gpu_id)) {
|
||||
case 4:
|
||||
pandecode_jc_v4(jc_gpu_va, gpu_id);
|
||||
break;
|
||||
case 5:
|
||||
pandecode_jc_v5(jc_gpu_va, gpu_id);
|
||||
break;
|
||||
case 6:
|
||||
pandecode_jc_v6(jc_gpu_va, gpu_id);
|
||||
break;
|
||||
case 7:
|
||||
pandecode_jc_v7(jc_gpu_va, gpu_id);
|
||||
break;
|
||||
case 9:
|
||||
pandecode_jc_v9(jc_gpu_va, gpu_id);
|
||||
break;
|
||||
default:
|
||||
unreachable("Unsupported architecture");
|
||||
}
|
||||
|
||||
simple_mtx_unlock(&pandecode_lock);
|
||||
simple_mtx_unlock(&pandecode_lock);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,45 +56,45 @@
|
|||
static inline unsigned
|
||||
pan_arch(unsigned gpu_id)
|
||||
{
|
||||
switch (gpu_id) {
|
||||
case 0x600:
|
||||
case 0x620:
|
||||
case 0x720:
|
||||
return 4;
|
||||
case 0x750:
|
||||
case 0x820:
|
||||
case 0x830:
|
||||
case 0x860:
|
||||
case 0x880:
|
||||
return 5;
|
||||
default:
|
||||
return gpu_id >> 12;
|
||||
}
|
||||
switch (gpu_id) {
|
||||
case 0x600:
|
||||
case 0x620:
|
||||
case 0x720:
|
||||
return 4;
|
||||
case 0x750:
|
||||
case 0x820:
|
||||
case 0x830:
|
||||
case 0x860:
|
||||
case 0x880:
|
||||
return 5;
|
||||
default:
|
||||
return gpu_id >> 12;
|
||||
}
|
||||
}
|
||||
|
||||
/* Base macro defined on the command line. */
|
||||
#ifndef PAN_ARCH
|
||||
# include "genxml/common_pack.h"
|
||||
#include "genxml/common_pack.h"
|
||||
#else
|
||||
|
||||
/* Suffixing macros */
|
||||
#if (PAN_ARCH == 4)
|
||||
# define GENX(X) X##_v4
|
||||
# include "genxml/v4_pack.h"
|
||||
#define GENX(X) X##_v4
|
||||
#include "genxml/v4_pack.h"
|
||||
#elif (PAN_ARCH == 5)
|
||||
# define GENX(X) X##_v5
|
||||
# include "genxml/v5_pack.h"
|
||||
#define GENX(X) X##_v5
|
||||
#include "genxml/v5_pack.h"
|
||||
#elif (PAN_ARCH == 6)
|
||||
# define GENX(X) X##_v6
|
||||
# include "genxml/v6_pack.h"
|
||||
#define GENX(X) X##_v6
|
||||
#include "genxml/v6_pack.h"
|
||||
#elif (PAN_ARCH == 7)
|
||||
# define GENX(X) X##_v7
|
||||
# include "genxml/v7_pack.h"
|
||||
#define GENX(X) X##_v7
|
||||
#include "genxml/v7_pack.h"
|
||||
#elif (PAN_ARCH == 9)
|
||||
# define GENX(X) X##_v9
|
||||
# include "genxml/v9_pack.h"
|
||||
#define GENX(X) X##_v9
|
||||
#include "genxml/v9_pack.h"
|
||||
#else
|
||||
# error "Need to add suffixing macro for this architecture"
|
||||
#error "Need to add suffixing macro for this architecture"
|
||||
#endif
|
||||
|
||||
#endif /* PAN_ARCH */
|
||||
|
|
|
|||
|
|
@ -50,8 +50,8 @@
|
|||
* must also be cache-line aligned, so there can sometimes be a bit of padding
|
||||
* between the header and body.
|
||||
*
|
||||
* As an example, a 64x64 RGBA framebuffer contains 64/16 = 4 tiles horizontally and
|
||||
* 4 tiles vertically. There are 4*4=16 tiles in total, each containing 16
|
||||
* As an example, a 64x64 RGBA framebuffer contains 64/16 = 4 tiles horizontally
|
||||
* and 4 tiles vertically. There are 4*4=16 tiles in total, each containing 16
|
||||
* bytes of metadata, so there is a 16*16=256 byte header. 64x64 is already
|
||||
* tile aligned, so the body is 64*64 * 4 bytes per pixel = 16384 bytes of
|
||||
* body.
|
||||
|
|
@ -69,45 +69,45 @@
|
|||
static enum pipe_format
|
||||
unswizzled_format(enum pipe_format format)
|
||||
{
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_A8_UNORM:
|
||||
case PIPE_FORMAT_L8_UNORM:
|
||||
case PIPE_FORMAT_I8_UNORM:
|
||||
return PIPE_FORMAT_R8_UNORM;
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_A8_UNORM:
|
||||
case PIPE_FORMAT_L8_UNORM:
|
||||
case PIPE_FORMAT_I8_UNORM:
|
||||
return PIPE_FORMAT_R8_UNORM;
|
||||
|
||||
case PIPE_FORMAT_L8A8_UNORM:
|
||||
return PIPE_FORMAT_R8G8_UNORM;
|
||||
case PIPE_FORMAT_L8A8_UNORM:
|
||||
return PIPE_FORMAT_R8G8_UNORM;
|
||||
|
||||
case PIPE_FORMAT_B8G8R8_UNORM:
|
||||
return PIPE_FORMAT_R8G8B8_UNORM;
|
||||
case PIPE_FORMAT_B8G8R8_UNORM:
|
||||
return PIPE_FORMAT_R8G8B8_UNORM;
|
||||
|
||||
case PIPE_FORMAT_R8G8B8X8_UNORM:
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
case PIPE_FORMAT_A8R8G8B8_UNORM:
|
||||
case PIPE_FORMAT_X8R8G8B8_UNORM:
|
||||
case PIPE_FORMAT_X8B8G8R8_UNORM:
|
||||
case PIPE_FORMAT_A8B8G8R8_UNORM:
|
||||
return PIPE_FORMAT_R8G8B8A8_UNORM;
|
||||
case PIPE_FORMAT_R8G8B8X8_UNORM:
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
case PIPE_FORMAT_A8R8G8B8_UNORM:
|
||||
case PIPE_FORMAT_X8R8G8B8_UNORM:
|
||||
case PIPE_FORMAT_X8B8G8R8_UNORM:
|
||||
case PIPE_FORMAT_A8B8G8R8_UNORM:
|
||||
return PIPE_FORMAT_R8G8B8A8_UNORM;
|
||||
|
||||
case PIPE_FORMAT_B5G6R5_UNORM:
|
||||
return PIPE_FORMAT_R5G6B5_UNORM;
|
||||
case PIPE_FORMAT_B5G6R5_UNORM:
|
||||
return PIPE_FORMAT_R5G6B5_UNORM;
|
||||
|
||||
case PIPE_FORMAT_B5G5R5A1_UNORM:
|
||||
return PIPE_FORMAT_R5G5B5A1_UNORM;
|
||||
case PIPE_FORMAT_B5G5R5A1_UNORM:
|
||||
return PIPE_FORMAT_R5G5B5A1_UNORM;
|
||||
|
||||
case PIPE_FORMAT_R10G10B10X2_UNORM:
|
||||
case PIPE_FORMAT_B10G10R10A2_UNORM:
|
||||
case PIPE_FORMAT_B10G10R10X2_UNORM:
|
||||
return PIPE_FORMAT_R10G10B10A2_UNORM;
|
||||
case PIPE_FORMAT_R10G10B10X2_UNORM:
|
||||
case PIPE_FORMAT_B10G10R10A2_UNORM:
|
||||
case PIPE_FORMAT_B10G10R10X2_UNORM:
|
||||
return PIPE_FORMAT_R10G10B10A2_UNORM;
|
||||
|
||||
case PIPE_FORMAT_A4B4G4R4_UNORM:
|
||||
case PIPE_FORMAT_B4G4R4A4_UNORM:
|
||||
return PIPE_FORMAT_R4G4B4A4_UNORM;
|
||||
case PIPE_FORMAT_A4B4G4R4_UNORM:
|
||||
case PIPE_FORMAT_B4G4R4A4_UNORM:
|
||||
return PIPE_FORMAT_R4G4B4A4_UNORM;
|
||||
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
/* AFBC supports compressing a few canonical formats. Additional formats are
|
||||
|
|
@ -118,29 +118,29 @@ unswizzled_format(enum pipe_format format)
|
|||
enum pan_afbc_mode
|
||||
panfrost_afbc_format(unsigned arch, enum pipe_format format)
|
||||
{
|
||||
/* Luminance-alpha not supported for AFBC on v7+ */
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_A8_UNORM:
|
||||
case PIPE_FORMAT_L8_UNORM:
|
||||
case PIPE_FORMAT_I8_UNORM:
|
||||
case PIPE_FORMAT_L8A8_UNORM:
|
||||
if (arch >= 7)
|
||||
return PAN_AFBC_MODE_INVALID;
|
||||
else
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
/* Luminance-alpha not supported for AFBC on v7+ */
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_A8_UNORM:
|
||||
case PIPE_FORMAT_L8_UNORM:
|
||||
case PIPE_FORMAT_I8_UNORM:
|
||||
case PIPE_FORMAT_L8A8_UNORM:
|
||||
if (arch >= 7)
|
||||
return PAN_AFBC_MODE_INVALID;
|
||||
else
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* sRGB does not change the pixel format itself, only the
|
||||
* interpretation. The interpretation is handled by conversion hardware
|
||||
* independent to the compression hardware, so we can compress sRGB
|
||||
* formats by using the corresponding linear format.
|
||||
*/
|
||||
format = util_format_linear(format);
|
||||
/* sRGB does not change the pixel format itself, only the
|
||||
* interpretation. The interpretation is handled by conversion hardware
|
||||
* independent to the compression hardware, so we can compress sRGB
|
||||
* formats by using the corresponding linear format.
|
||||
*/
|
||||
format = util_format_linear(format);
|
||||
|
||||
/* We handle swizzling orthogonally to AFBC */
|
||||
format = unswizzled_format(format);
|
||||
/* We handle swizzling orthogonally to AFBC */
|
||||
format = unswizzled_format(format);
|
||||
|
||||
/* clang-format off */
|
||||
switch (format) {
|
||||
|
|
@ -166,9 +166,10 @@ panfrost_afbc_format(unsigned arch, enum pipe_format format)
|
|||
/* A format may be compressed as AFBC if it has an AFBC internal format */
|
||||
|
||||
bool
|
||||
panfrost_format_supports_afbc(const struct panfrost_device *dev, enum pipe_format format)
|
||||
panfrost_format_supports_afbc(const struct panfrost_device *dev,
|
||||
enum pipe_format format)
|
||||
{
|
||||
return panfrost_afbc_format(dev->arch, format) != PAN_AFBC_MODE_INVALID;
|
||||
return panfrost_afbc_format(dev->arch, format) != PAN_AFBC_MODE_INVALID;
|
||||
}
|
||||
|
||||
/* The lossless colour transform (AFBC_FORMAT_MOD_YTR) requires RGB. */
|
||||
|
|
@ -176,15 +177,14 @@ panfrost_format_supports_afbc(const struct panfrost_device *dev, enum pipe_forma
|
|||
bool
|
||||
panfrost_afbc_can_ytr(enum pipe_format format)
|
||||
{
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(format);
|
||||
const struct util_format_description *desc = util_format_description(format);
|
||||
|
||||
/* YTR is only defined for RGB(A) */
|
||||
if (desc->nr_channels != 3 && desc->nr_channels != 4)
|
||||
return false;
|
||||
/* YTR is only defined for RGB(A) */
|
||||
if (desc->nr_channels != 3 && desc->nr_channels != 4)
|
||||
return false;
|
||||
|
||||
/* The fourth channel if it exists doesn't matter */
|
||||
return desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB;
|
||||
/* The fourth channel if it exists doesn't matter */
|
||||
return desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -194,5 +194,5 @@ panfrost_afbc_can_ytr(enum pipe_format format)
|
|||
bool
|
||||
panfrost_afbc_can_tile(const struct panfrost_device *dev)
|
||||
{
|
||||
return (dev->arch >= 7);
|
||||
return (dev->arch >= 7);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,91 +39,92 @@
|
|||
static unsigned
|
||||
panfrost_small_padded_vertex_count(unsigned idx)
|
||||
{
|
||||
if (idx < 10)
|
||||
return idx;
|
||||
else
|
||||
return (idx + 1) & ~1;
|
||||
if (idx < 10)
|
||||
return idx;
|
||||
else
|
||||
return (idx + 1) & ~1;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
panfrost_large_padded_vertex_count(uint32_t vertex_count)
|
||||
{
|
||||
/* First, we have to find the highest set one */
|
||||
unsigned highest = 32 - __builtin_clz(vertex_count);
|
||||
/* First, we have to find the highest set one */
|
||||
unsigned highest = 32 - __builtin_clz(vertex_count);
|
||||
|
||||
/* Using that, we mask out the highest 4-bits */
|
||||
unsigned n = highest - 4;
|
||||
unsigned nibble = (vertex_count >> n) & 0xF;
|
||||
/* Using that, we mask out the highest 4-bits */
|
||||
unsigned n = highest - 4;
|
||||
unsigned nibble = (vertex_count >> n) & 0xF;
|
||||
|
||||
/* Great, we have the nibble. Now we can just try possibilities. Note
|
||||
* that we don't care about the bottom most bit in most cases, and we
|
||||
* know the top bit must be 1 */
|
||||
/* Great, we have the nibble. Now we can just try possibilities. Note
|
||||
* that we don't care about the bottom most bit in most cases, and we
|
||||
* know the top bit must be 1 */
|
||||
|
||||
unsigned middle_two = (nibble >> 1) & 0x3;
|
||||
unsigned middle_two = (nibble >> 1) & 0x3;
|
||||
|
||||
switch (middle_two) {
|
||||
case 0b00:
|
||||
if (!(nibble & 1))
|
||||
return (1 << n) * 9;
|
||||
else
|
||||
return (1 << (n + 1)) * 5;
|
||||
case 0b01:
|
||||
return (1 << (n + 2)) * 3;
|
||||
case 0b10:
|
||||
return (1 << (n + 1)) * 7;
|
||||
case 0b11:
|
||||
return (1 << (n + 4));
|
||||
default:
|
||||
return 0; /* unreachable */
|
||||
}
|
||||
switch (middle_two) {
|
||||
case 0b00:
|
||||
if (!(nibble & 1))
|
||||
return (1 << n) * 9;
|
||||
else
|
||||
return (1 << (n + 1)) * 5;
|
||||
case 0b01:
|
||||
return (1 << (n + 2)) * 3;
|
||||
case 0b10:
|
||||
return (1 << (n + 1)) * 7;
|
||||
case 0b11:
|
||||
return (1 << (n + 4));
|
||||
default:
|
||||
return 0; /* unreachable */
|
||||
}
|
||||
}
|
||||
|
||||
unsigned
|
||||
panfrost_padded_vertex_count(unsigned vertex_count)
|
||||
{
|
||||
if (vertex_count < 20)
|
||||
return panfrost_small_padded_vertex_count(vertex_count);
|
||||
else
|
||||
return panfrost_large_padded_vertex_count(vertex_count);
|
||||
if (vertex_count < 20)
|
||||
return panfrost_small_padded_vertex_count(vertex_count);
|
||||
else
|
||||
return panfrost_large_padded_vertex_count(vertex_count);
|
||||
}
|
||||
|
||||
/* The much, much more irritating case -- instancing is enabled. See
|
||||
* panfrost_job.h for notes on how this works */
|
||||
|
||||
unsigned
|
||||
panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned *extra_flags)
|
||||
panfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift,
|
||||
unsigned *extra_flags)
|
||||
{
|
||||
/* We have a NPOT divisor. Here's the fun one (multipling by
|
||||
* the inverse and shifting) */
|
||||
/* We have a NPOT divisor. Here's the fun one (multipling by
|
||||
* the inverse and shifting) */
|
||||
|
||||
/* floor(log2(d)) */
|
||||
unsigned shift = util_logbase2(hw_divisor);
|
||||
/* floor(log2(d)) */
|
||||
unsigned shift = util_logbase2(hw_divisor);
|
||||
|
||||
/* m = ceil(2^(32 + shift) / d) */
|
||||
uint64_t shift_hi = 32 + shift;
|
||||
uint64_t t = 1ll << shift_hi;
|
||||
double t_f = t;
|
||||
double hw_divisor_d = hw_divisor;
|
||||
double m_f = ceil(t_f / hw_divisor_d);
|
||||
unsigned m = m_f;
|
||||
/* m = ceil(2^(32 + shift) / d) */
|
||||
uint64_t shift_hi = 32 + shift;
|
||||
uint64_t t = 1ll << shift_hi;
|
||||
double t_f = t;
|
||||
double hw_divisor_d = hw_divisor;
|
||||
double m_f = ceil(t_f / hw_divisor_d);
|
||||
unsigned m = m_f;
|
||||
|
||||
/* Default case */
|
||||
uint32_t magic_divisor = m;
|
||||
/* Default case */
|
||||
uint32_t magic_divisor = m;
|
||||
|
||||
/* e = 2^(shift + 32) % d */
|
||||
uint64_t e = t % hw_divisor;
|
||||
/* e = 2^(shift + 32) % d */
|
||||
uint64_t e = t % hw_divisor;
|
||||
|
||||
/* Apply round-down algorithm? e <= 2^shift?. XXX: The blob
|
||||
* seems to use a different condition */
|
||||
if (e <= (1ll << shift)) {
|
||||
magic_divisor = m - 1;
|
||||
*extra_flags = 1;
|
||||
}
|
||||
/* Apply round-down algorithm? e <= 2^shift?. XXX: The blob
|
||||
* seems to use a different condition */
|
||||
if (e <= (1ll << shift)) {
|
||||
magic_divisor = m - 1;
|
||||
*extra_flags = 1;
|
||||
}
|
||||
|
||||
/* Top flag implicitly set */
|
||||
assert(magic_divisor & (1u << 31));
|
||||
magic_divisor &= ~(1u << 31);
|
||||
*o_shift = shift;
|
||||
/* Top flag implicitly set */
|
||||
assert(magic_divisor & (1u << 31));
|
||||
magic_divisor &= ~(1u << 31);
|
||||
*o_shift = shift;
|
||||
|
||||
return magic_divisor;
|
||||
return magic_divisor;
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -27,10 +27,10 @@
|
|||
|
||||
#include "genxml/gen_macros.h"
|
||||
|
||||
#include "util/u_dynarray.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/shader_enums.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
#include "panfrost/util/pan_ir.h"
|
||||
|
||||
|
|
@ -38,84 +38,78 @@ struct MALI_BLEND_EQUATION;
|
|||
struct panfrost_device;
|
||||
|
||||
struct pan_blend_equation {
|
||||
unsigned blend_enable : 1;
|
||||
enum blend_func rgb_func : 3;
|
||||
unsigned rgb_invert_src_factor : 1;
|
||||
enum blend_factor rgb_src_factor : 4;
|
||||
unsigned rgb_invert_dst_factor : 1;
|
||||
enum blend_factor rgb_dst_factor : 4;
|
||||
enum blend_func alpha_func : 3;
|
||||
unsigned alpha_invert_src_factor : 1;
|
||||
enum blend_factor alpha_src_factor : 4;
|
||||
unsigned alpha_invert_dst_factor : 1;
|
||||
enum blend_factor alpha_dst_factor : 4;
|
||||
unsigned color_mask : 4;
|
||||
unsigned blend_enable : 1;
|
||||
enum blend_func rgb_func : 3;
|
||||
unsigned rgb_invert_src_factor : 1;
|
||||
enum blend_factor rgb_src_factor : 4;
|
||||
unsigned rgb_invert_dst_factor : 1;
|
||||
enum blend_factor rgb_dst_factor : 4;
|
||||
enum blend_func alpha_func : 3;
|
||||
unsigned alpha_invert_src_factor : 1;
|
||||
enum blend_factor alpha_src_factor : 4;
|
||||
unsigned alpha_invert_dst_factor : 1;
|
||||
enum blend_factor alpha_dst_factor : 4;
|
||||
unsigned color_mask : 4;
|
||||
};
|
||||
|
||||
struct pan_blend_rt_state {
|
||||
/* RT format */
|
||||
enum pipe_format format;
|
||||
/* RT format */
|
||||
enum pipe_format format;
|
||||
|
||||
/* Number of samples */
|
||||
unsigned nr_samples;
|
||||
/* Number of samples */
|
||||
unsigned nr_samples;
|
||||
|
||||
struct pan_blend_equation equation;
|
||||
struct pan_blend_equation equation;
|
||||
};
|
||||
|
||||
struct pan_blend_state {
|
||||
bool logicop_enable;
|
||||
enum pipe_logicop logicop_func;
|
||||
float constants[4];
|
||||
unsigned rt_count;
|
||||
struct pan_blend_rt_state rts[8];
|
||||
bool logicop_enable;
|
||||
enum pipe_logicop logicop_func;
|
||||
float constants[4];
|
||||
unsigned rt_count;
|
||||
struct pan_blend_rt_state rts[8];
|
||||
};
|
||||
|
||||
struct pan_blend_shader_key {
|
||||
enum pipe_format format;
|
||||
nir_alu_type src0_type, src1_type;
|
||||
uint32_t rt : 3;
|
||||
uint32_t has_constants : 1;
|
||||
uint32_t logicop_enable : 1;
|
||||
uint32_t logicop_func:4;
|
||||
uint32_t nr_samples : 5;
|
||||
uint32_t padding : 18;
|
||||
struct pan_blend_equation equation;
|
||||
enum pipe_format format;
|
||||
nir_alu_type src0_type, src1_type;
|
||||
uint32_t rt : 3;
|
||||
uint32_t has_constants : 1;
|
||||
uint32_t logicop_enable : 1;
|
||||
uint32_t logicop_func : 4;
|
||||
uint32_t nr_samples : 5;
|
||||
uint32_t padding : 18;
|
||||
struct pan_blend_equation equation;
|
||||
};
|
||||
|
||||
struct pan_blend_shader_variant {
|
||||
struct list_head node;
|
||||
float constants[4];
|
||||
struct util_dynarray binary;
|
||||
unsigned first_tag;
|
||||
unsigned work_reg_count;
|
||||
struct list_head node;
|
||||
float constants[4];
|
||||
struct util_dynarray binary;
|
||||
unsigned first_tag;
|
||||
unsigned work_reg_count;
|
||||
};
|
||||
|
||||
#define PAN_BLEND_SHADER_MAX_VARIANTS 32
|
||||
|
||||
struct pan_blend_shader {
|
||||
struct pan_blend_shader_key key;
|
||||
unsigned nvariants;
|
||||
struct list_head variants;
|
||||
struct pan_blend_shader_key key;
|
||||
unsigned nvariants;
|
||||
struct list_head variants;
|
||||
};
|
||||
|
||||
bool
|
||||
pan_blend_reads_dest(const struct pan_blend_equation eq);
|
||||
bool pan_blend_reads_dest(const struct pan_blend_equation eq);
|
||||
|
||||
bool
|
||||
pan_blend_can_fixed_function(const struct pan_blend_equation equation,
|
||||
bool supports_2src);
|
||||
bool pan_blend_can_fixed_function(const struct pan_blend_equation equation,
|
||||
bool supports_2src);
|
||||
|
||||
bool
|
||||
pan_blend_is_opaque(const struct pan_blend_equation eq);
|
||||
bool pan_blend_is_opaque(const struct pan_blend_equation eq);
|
||||
|
||||
bool
|
||||
pan_blend_alpha_zero_nop(const struct pan_blend_equation eq);
|
||||
bool pan_blend_alpha_zero_nop(const struct pan_blend_equation eq);
|
||||
|
||||
bool
|
||||
pan_blend_alpha_one_store(const struct pan_blend_equation eq);
|
||||
bool pan_blend_alpha_one_store(const struct pan_blend_equation eq);
|
||||
|
||||
unsigned
|
||||
pan_blend_constant_mask(const struct pan_blend_equation eq);
|
||||
unsigned pan_blend_constant_mask(const struct pan_blend_equation eq);
|
||||
|
||||
/* Fixed-function blending only supports a single constant, so if multiple bits
|
||||
* are set in constant_mask, the constants must match. Therefore we may pick
|
||||
|
|
@ -124,7 +118,7 @@ pan_blend_constant_mask(const struct pan_blend_equation eq);
|
|||
static inline float
|
||||
pan_blend_get_constant(unsigned mask, const float *constants)
|
||||
{
|
||||
return mask ? constants[ffs(mask) - 1] : 0.0;
|
||||
return mask ? constants[ffs(mask) - 1] : 0.0;
|
||||
}
|
||||
|
||||
/* v6 doesn't support blend constants in FF blend equations whatsoever, and v7
|
||||
|
|
@ -134,7 +128,7 @@ pan_blend_get_constant(unsigned mask, const float *constants)
|
|||
static inline bool
|
||||
pan_blend_supports_constant(unsigned arch, unsigned rt)
|
||||
{
|
||||
return !((arch == 6) || (arch == 7 && rt > 0));
|
||||
return !((arch == 6) || (arch == 7 && rt > 0));
|
||||
}
|
||||
|
||||
/* The SOURCE_2 value is new in Bifrost */
|
||||
|
|
@ -142,50 +136,39 @@ pan_blend_supports_constant(unsigned arch, unsigned rt)
|
|||
static inline bool
|
||||
pan_blend_supports_2src(unsigned arch)
|
||||
{
|
||||
return (arch >= 6);
|
||||
return (arch >= 6);
|
||||
}
|
||||
|
||||
bool
|
||||
pan_blend_is_homogenous_constant(unsigned mask, const float *constants);
|
||||
bool pan_blend_is_homogenous_constant(unsigned mask, const float *constants);
|
||||
|
||||
void
|
||||
pan_blend_to_fixed_function_equation(const struct pan_blend_equation eq,
|
||||
struct MALI_BLEND_EQUATION *equation);
|
||||
void pan_blend_to_fixed_function_equation(const struct pan_blend_equation eq,
|
||||
struct MALI_BLEND_EQUATION *equation);
|
||||
|
||||
uint32_t
|
||||
pan_pack_blend(const struct pan_blend_equation equation);
|
||||
uint32_t pan_pack_blend(const struct pan_blend_equation equation);
|
||||
|
||||
void
|
||||
pan_blend_shaders_init(struct panfrost_device *dev);
|
||||
void pan_blend_shaders_init(struct panfrost_device *dev);
|
||||
|
||||
void
|
||||
pan_blend_shaders_cleanup(struct panfrost_device *dev);
|
||||
void pan_blend_shaders_cleanup(struct panfrost_device *dev);
|
||||
|
||||
#ifdef PAN_ARCH
|
||||
|
||||
nir_shader *
|
||||
GENX(pan_blend_create_shader)(const struct panfrost_device *dev,
|
||||
const struct pan_blend_state *state,
|
||||
nir_alu_type src0_type,
|
||||
nir_alu_type src1_type,
|
||||
unsigned rt);
|
||||
nir_shader *GENX(pan_blend_create_shader)(const struct panfrost_device *dev,
|
||||
const struct pan_blend_state *state,
|
||||
nir_alu_type src0_type,
|
||||
nir_alu_type src1_type, unsigned rt);
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
uint64_t
|
||||
GENX(pan_blend_get_internal_desc)(const struct panfrost_device *dev,
|
||||
enum pipe_format fmt, unsigned rt,
|
||||
unsigned force_size, bool dithered);
|
||||
uint64_t GENX(pan_blend_get_internal_desc)(const struct panfrost_device *dev,
|
||||
enum pipe_format fmt, unsigned rt,
|
||||
unsigned force_size, bool dithered);
|
||||
#endif
|
||||
|
||||
/* Take blend_shaders.lock before calling this function and release it when
|
||||
* you're done with the shader variant object.
|
||||
*/
|
||||
struct pan_blend_shader_variant *
|
||||
GENX(pan_blend_get_shader_locked)(const struct panfrost_device *dev,
|
||||
const struct pan_blend_state *state,
|
||||
nir_alu_type src0_type,
|
||||
nir_alu_type src1_type,
|
||||
unsigned rt);
|
||||
struct pan_blend_shader_variant *GENX(pan_blend_get_shader_locked)(
|
||||
const struct panfrost_device *dev, const struct pan_blend_state *state,
|
||||
nir_alu_type src0_type, nir_alu_type src1_type, unsigned rt);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -27,12 +27,12 @@
|
|||
|
||||
#include "genxml/gen_macros.h"
|
||||
|
||||
#include "panfrost-job.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "pan_cs.h"
|
||||
#include "pan_pool.h"
|
||||
#include "pan_texture.h"
|
||||
#include "pan_util.h"
|
||||
#include "util/format/u_format.h"
|
||||
#include "panfrost-job.h"
|
||||
|
||||
struct pan_fb_info;
|
||||
struct pan_scoreboard;
|
||||
|
|
@ -40,90 +40,84 @@ struct pan_pool;
|
|||
struct panfrost_device;
|
||||
|
||||
struct pan_blit_info {
|
||||
struct {
|
||||
struct {
|
||||
const struct pan_image *image;
|
||||
enum pipe_format format;
|
||||
} planes[2];
|
||||
unsigned level;
|
||||
struct {
|
||||
int32_t x, y, z;
|
||||
unsigned layer;
|
||||
} start, end;
|
||||
} src, dst;
|
||||
struct {
|
||||
bool enable;
|
||||
uint16_t minx, miny, maxx, maxy;
|
||||
} scissor;
|
||||
bool nearest;
|
||||
struct {
|
||||
struct {
|
||||
const struct pan_image *image;
|
||||
enum pipe_format format;
|
||||
} planes[2];
|
||||
unsigned level;
|
||||
struct {
|
||||
int32_t x, y, z;
|
||||
unsigned layer;
|
||||
} start, end;
|
||||
} src, dst;
|
||||
struct {
|
||||
bool enable;
|
||||
uint16_t minx, miny, maxx, maxy;
|
||||
} scissor;
|
||||
bool nearest;
|
||||
};
|
||||
|
||||
struct pan_blit_context {
|
||||
mali_ptr rsd, vpd;
|
||||
mali_ptr textures;
|
||||
mali_ptr samplers;
|
||||
mali_ptr position;
|
||||
struct {
|
||||
enum mali_texture_dimension dim;
|
||||
struct {
|
||||
float x, y;
|
||||
} start, end;
|
||||
union {
|
||||
unsigned layer_offset;
|
||||
float z_offset;
|
||||
};
|
||||
} src;
|
||||
struct {
|
||||
int32_t layer_offset;
|
||||
int32_t cur_layer;
|
||||
int32_t last_layer;
|
||||
} dst;
|
||||
float z_scale;
|
||||
mali_ptr rsd, vpd;
|
||||
mali_ptr textures;
|
||||
mali_ptr samplers;
|
||||
mali_ptr position;
|
||||
struct {
|
||||
enum mali_texture_dimension dim;
|
||||
struct {
|
||||
float x, y;
|
||||
} start, end;
|
||||
union {
|
||||
unsigned layer_offset;
|
||||
float z_offset;
|
||||
};
|
||||
} src;
|
||||
struct {
|
||||
int32_t layer_offset;
|
||||
int32_t cur_layer;
|
||||
int32_t last_layer;
|
||||
} dst;
|
||||
float z_scale;
|
||||
};
|
||||
|
||||
void
|
||||
GENX(pan_blitter_init)(struct panfrost_device *dev,
|
||||
struct pan_pool *bin_pool,
|
||||
struct pan_pool *desc_pool);
|
||||
void GENX(pan_blitter_init)(struct panfrost_device *dev,
|
||||
struct pan_pool *bin_pool,
|
||||
struct pan_pool *desc_pool);
|
||||
|
||||
void
|
||||
GENX(pan_blitter_cleanup)(struct panfrost_device *dev);
|
||||
void GENX(pan_blitter_cleanup)(struct panfrost_device *dev);
|
||||
|
||||
unsigned
|
||||
GENX(pan_preload_fb)(struct pan_pool *desc_pool,
|
||||
struct pan_scoreboard *scoreboard,
|
||||
struct pan_fb_info *fb,
|
||||
mali_ptr tsd, mali_ptr tiler,
|
||||
struct panfrost_ptr *jobs);
|
||||
unsigned GENX(pan_preload_fb)(struct pan_pool *desc_pool,
|
||||
struct pan_scoreboard *scoreboard,
|
||||
struct pan_fb_info *fb, mali_ptr tsd,
|
||||
mali_ptr tiler, struct panfrost_ptr *jobs);
|
||||
|
||||
void
|
||||
GENX(pan_blit_ctx_init)(struct panfrost_device *dev,
|
||||
const struct pan_blit_info *info,
|
||||
struct pan_pool *blit_pool,
|
||||
struct pan_blit_context *ctx);
|
||||
void GENX(pan_blit_ctx_init)(struct panfrost_device *dev,
|
||||
const struct pan_blit_info *info,
|
||||
struct pan_pool *blit_pool,
|
||||
struct pan_blit_context *ctx);
|
||||
|
||||
static inline bool
|
||||
pan_blit_next_surface(struct pan_blit_context *ctx)
|
||||
{
|
||||
if (ctx->dst.last_layer < ctx->dst.layer_offset) {
|
||||
if (ctx->dst.cur_layer <= ctx->dst.last_layer)
|
||||
return false;
|
||||
if (ctx->dst.last_layer < ctx->dst.layer_offset) {
|
||||
if (ctx->dst.cur_layer <= ctx->dst.last_layer)
|
||||
return false;
|
||||
|
||||
ctx->dst.cur_layer--;
|
||||
} else {
|
||||
if (ctx->dst.cur_layer >= ctx->dst.last_layer)
|
||||
return false;
|
||||
ctx->dst.cur_layer--;
|
||||
} else {
|
||||
if (ctx->dst.cur_layer >= ctx->dst.last_layer)
|
||||
return false;
|
||||
|
||||
ctx->dst.cur_layer++;
|
||||
}
|
||||
ctx->dst.cur_layer++;
|
||||
}
|
||||
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
struct panfrost_ptr
|
||||
GENX(pan_blit)(struct pan_blit_context *ctx,
|
||||
struct pan_pool *pool,
|
||||
struct pan_scoreboard *scoreboard,
|
||||
mali_ptr tsd, mali_ptr tiler);
|
||||
struct panfrost_ptr GENX(pan_blit)(struct pan_blit_context *ctx,
|
||||
struct pan_pool *pool,
|
||||
struct pan_scoreboard *scoreboard,
|
||||
mali_ptr tsd, mali_ptr tiler);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -24,10 +24,10 @@
|
|||
* Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
|
||||
*/
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <fcntl.h>
|
||||
#include <xf86drm.h>
|
||||
#include <pthread.h>
|
||||
#include <stdio.h>
|
||||
#include <xf86drm.h>
|
||||
#include "drm-uapi/panfrost_drm.h"
|
||||
|
||||
#include "pan_bo.h"
|
||||
|
|
@ -56,53 +56,53 @@
|
|||
*/
|
||||
|
||||
static struct panfrost_bo *
|
||||
panfrost_bo_alloc(struct panfrost_device *dev, size_t size,
|
||||
uint32_t flags, const char *label)
|
||||
panfrost_bo_alloc(struct panfrost_device *dev, size_t size, uint32_t flags,
|
||||
const char *label)
|
||||
{
|
||||
struct drm_panfrost_create_bo create_bo = { .size = size };
|
||||
struct panfrost_bo *bo;
|
||||
int ret;
|
||||
struct drm_panfrost_create_bo create_bo = {.size = size};
|
||||
struct panfrost_bo *bo;
|
||||
int ret;
|
||||
|
||||
if (dev->kernel_version->version_major > 1 ||
|
||||
dev->kernel_version->version_minor >= 1) {
|
||||
if (flags & PAN_BO_GROWABLE)
|
||||
create_bo.flags |= PANFROST_BO_HEAP;
|
||||
if (!(flags & PAN_BO_EXECUTE))
|
||||
create_bo.flags |= PANFROST_BO_NOEXEC;
|
||||
}
|
||||
if (dev->kernel_version->version_major > 1 ||
|
||||
dev->kernel_version->version_minor >= 1) {
|
||||
if (flags & PAN_BO_GROWABLE)
|
||||
create_bo.flags |= PANFROST_BO_HEAP;
|
||||
if (!(flags & PAN_BO_EXECUTE))
|
||||
create_bo.flags |= PANFROST_BO_NOEXEC;
|
||||
}
|
||||
|
||||
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
|
||||
if (ret) {
|
||||
fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
|
||||
return NULL;
|
||||
}
|
||||
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_CREATE_BO, &create_bo);
|
||||
if (ret) {
|
||||
fprintf(stderr, "DRM_IOCTL_PANFROST_CREATE_BO failed: %m\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bo = pan_lookup_bo(dev, create_bo.handle);
|
||||
assert(!memcmp(bo, &((struct panfrost_bo){}), sizeof(*bo)));
|
||||
bo = pan_lookup_bo(dev, create_bo.handle);
|
||||
assert(!memcmp(bo, &((struct panfrost_bo){}), sizeof(*bo)));
|
||||
|
||||
bo->size = create_bo.size;
|
||||
bo->ptr.gpu = create_bo.offset;
|
||||
bo->gem_handle = create_bo.handle;
|
||||
bo->flags = flags;
|
||||
bo->dev = dev;
|
||||
bo->label = label;
|
||||
return bo;
|
||||
bo->size = create_bo.size;
|
||||
bo->ptr.gpu = create_bo.offset;
|
||||
bo->gem_handle = create_bo.handle;
|
||||
bo->flags = flags;
|
||||
bo->dev = dev;
|
||||
bo->label = label;
|
||||
return bo;
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_bo_free(struct panfrost_bo *bo)
|
||||
{
|
||||
struct drm_gem_close gem_close = { .handle = bo->gem_handle };
|
||||
int ret;
|
||||
struct drm_gem_close gem_close = {.handle = bo->gem_handle};
|
||||
int ret;
|
||||
|
||||
ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
|
||||
if (ret) {
|
||||
fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
|
||||
assert(0);
|
||||
}
|
||||
ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
|
||||
if (ret) {
|
||||
fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %m\n");
|
||||
assert(0);
|
||||
}
|
||||
|
||||
/* BO will be freed with the sparse array, but zero to indicate free */
|
||||
memset(bo, 0, sizeof(*bo));
|
||||
/* BO will be freed with the sparse array, but zero to indicate free */
|
||||
memset(bo, 0, sizeof(*bo));
|
||||
}
|
||||
|
||||
/* Returns true if the BO is ready, false otherwise.
|
||||
|
|
@ -113,44 +113,44 @@ panfrost_bo_free(struct panfrost_bo *bo)
|
|||
bool
|
||||
panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers)
|
||||
{
|
||||
struct drm_panfrost_wait_bo req = {
|
||||
.handle = bo->gem_handle,
|
||||
.timeout_ns = timeout_ns,
|
||||
};
|
||||
int ret;
|
||||
struct drm_panfrost_wait_bo req = {
|
||||
.handle = bo->gem_handle,
|
||||
.timeout_ns = timeout_ns,
|
||||
};
|
||||
int ret;
|
||||
|
||||
/* If the BO has been exported or imported we can't rely on the cached
|
||||
* state, we need to call the WAIT_BO ioctl.
|
||||
*/
|
||||
if (!(bo->flags & PAN_BO_SHARED)) {
|
||||
/* If ->gpu_access is 0, the BO is idle, no need to wait. */
|
||||
if (!bo->gpu_access)
|
||||
return true;
|
||||
/* If the BO has been exported or imported we can't rely on the cached
|
||||
* state, we need to call the WAIT_BO ioctl.
|
||||
*/
|
||||
if (!(bo->flags & PAN_BO_SHARED)) {
|
||||
/* If ->gpu_access is 0, the BO is idle, no need to wait. */
|
||||
if (!bo->gpu_access)
|
||||
return true;
|
||||
|
||||
/* If the caller only wants to wait for writers and no
|
||||
* writes are pending, we don't have to wait.
|
||||
*/
|
||||
if (!wait_readers && !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
|
||||
return true;
|
||||
}
|
||||
/* If the caller only wants to wait for writers and no
|
||||
* writes are pending, we don't have to wait.
|
||||
*/
|
||||
if (!wait_readers && !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
|
||||
return true;
|
||||
}
|
||||
|
||||
/* The ioctl returns >= 0 value when the BO we are waiting for is ready
|
||||
* -1 otherwise.
|
||||
*/
|
||||
ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
|
||||
if (ret != -1) {
|
||||
/* Set gpu_access to 0 so that the next call to bo_wait()
|
||||
* doesn't have to call the WAIT_BO ioctl.
|
||||
*/
|
||||
bo->gpu_access = 0;
|
||||
return true;
|
||||
}
|
||||
/* The ioctl returns >= 0 value when the BO we are waiting for is ready
|
||||
* -1 otherwise.
|
||||
*/
|
||||
ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_WAIT_BO, &req);
|
||||
if (ret != -1) {
|
||||
/* Set gpu_access to 0 so that the next call to bo_wait()
|
||||
* doesn't have to call the WAIT_BO ioctl.
|
||||
*/
|
||||
bo->gpu_access = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
|
||||
* is invalid, which shouldn't happen here.
|
||||
*/
|
||||
assert(errno == ETIMEDOUT || errno == EBUSY);
|
||||
return false;
|
||||
/* If errno is not ETIMEDOUT or EBUSY that means the handle we passed
|
||||
* is invalid, which shouldn't happen here.
|
||||
*/
|
||||
assert(errno == ETIMEDOUT || errno == EBUSY);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Helper to calculate the bucket index of a BO */
|
||||
|
|
@ -158,24 +158,23 @@ panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers)
|
|||
static unsigned
|
||||
pan_bucket_index(unsigned size)
|
||||
{
|
||||
/* Round down to POT to compute a bucket index */
|
||||
/* Round down to POT to compute a bucket index */
|
||||
|
||||
unsigned bucket_index = util_logbase2(size);
|
||||
unsigned bucket_index = util_logbase2(size);
|
||||
|
||||
/* Clamp the bucket index; all huge allocations will be
|
||||
* sorted into the largest bucket */
|
||||
/* Clamp the bucket index; all huge allocations will be
|
||||
* sorted into the largest bucket */
|
||||
|
||||
bucket_index = CLAMP(bucket_index, MIN_BO_CACHE_BUCKET,
|
||||
MAX_BO_CACHE_BUCKET);
|
||||
bucket_index = CLAMP(bucket_index, MIN_BO_CACHE_BUCKET, MAX_BO_CACHE_BUCKET);
|
||||
|
||||
/* Reindex from 0 */
|
||||
return (bucket_index - MIN_BO_CACHE_BUCKET);
|
||||
/* Reindex from 0 */
|
||||
return (bucket_index - MIN_BO_CACHE_BUCKET);
|
||||
}
|
||||
|
||||
static struct list_head *
|
||||
pan_bucket(struct panfrost_device *dev, unsigned size)
|
||||
{
|
||||
return &dev->bo_cache.buckets[pan_bucket_index(size)];
|
||||
return &dev->bo_cache.buckets[pan_bucket_index(size)];
|
||||
}
|
||||
|
||||
/* Tries to fetch a BO of sufficient size with the appropriate flags from the
|
||||
|
|
@ -184,74 +183,71 @@ pan_bucket(struct panfrost_device *dev, unsigned size)
|
|||
* BO. */
|
||||
|
||||
static struct panfrost_bo *
|
||||
panfrost_bo_cache_fetch(struct panfrost_device *dev,
|
||||
size_t size, uint32_t flags, const char *label,
|
||||
bool dontwait)
|
||||
panfrost_bo_cache_fetch(struct panfrost_device *dev, size_t size,
|
||||
uint32_t flags, const char *label, bool dontwait)
|
||||
{
|
||||
pthread_mutex_lock(&dev->bo_cache.lock);
|
||||
struct list_head *bucket = pan_bucket(dev, size);
|
||||
struct panfrost_bo *bo = NULL;
|
||||
pthread_mutex_lock(&dev->bo_cache.lock);
|
||||
struct list_head *bucket = pan_bucket(dev, size);
|
||||
struct panfrost_bo *bo = NULL;
|
||||
|
||||
/* Iterate the bucket looking for something suitable */
|
||||
list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
|
||||
bucket_link) {
|
||||
if (entry->size < size || entry->flags != flags)
|
||||
continue;
|
||||
/* Iterate the bucket looking for something suitable */
|
||||
list_for_each_entry_safe(struct panfrost_bo, entry, bucket, bucket_link) {
|
||||
if (entry->size < size || entry->flags != flags)
|
||||
continue;
|
||||
|
||||
/* If the oldest BO in the cache is busy, likely so is
|
||||
* everything newer, so bail. */
|
||||
if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX,
|
||||
PAN_BO_ACCESS_RW))
|
||||
break;
|
||||
/* If the oldest BO in the cache is busy, likely so is
|
||||
* everything newer, so bail. */
|
||||
if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX, PAN_BO_ACCESS_RW))
|
||||
break;
|
||||
|
||||
struct drm_panfrost_madvise madv = {
|
||||
.handle = entry->gem_handle,
|
||||
.madv = PANFROST_MADV_WILLNEED,
|
||||
};
|
||||
int ret;
|
||||
struct drm_panfrost_madvise madv = {
|
||||
.handle = entry->gem_handle,
|
||||
.madv = PANFROST_MADV_WILLNEED,
|
||||
};
|
||||
int ret;
|
||||
|
||||
/* This one works, splice it out of the cache */
|
||||
list_del(&entry->bucket_link);
|
||||
list_del(&entry->lru_link);
|
||||
/* This one works, splice it out of the cache */
|
||||
list_del(&entry->bucket_link);
|
||||
list_del(&entry->lru_link);
|
||||
|
||||
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
|
||||
if (!ret && !madv.retained) {
|
||||
panfrost_bo_free(entry);
|
||||
continue;
|
||||
}
|
||||
/* Let's go! */
|
||||
bo = entry;
|
||||
bo->label = label;
|
||||
break;
|
||||
}
|
||||
pthread_mutex_unlock(&dev->bo_cache.lock);
|
||||
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
|
||||
if (!ret && !madv.retained) {
|
||||
panfrost_bo_free(entry);
|
||||
continue;
|
||||
}
|
||||
/* Let's go! */
|
||||
bo = entry;
|
||||
bo->label = label;
|
||||
break;
|
||||
}
|
||||
pthread_mutex_unlock(&dev->bo_cache.lock);
|
||||
|
||||
return bo;
|
||||
return bo;
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev)
|
||||
{
|
||||
struct timespec time;
|
||||
struct timespec time;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &time);
|
||||
list_for_each_entry_safe(struct panfrost_bo, entry,
|
||||
&dev->bo_cache.lru, lru_link) {
|
||||
/* We want all entries that have been used more than 1 sec
|
||||
* ago to be dropped, others can be kept.
|
||||
* Note the <= 2 check and not <= 1. It's here to account for
|
||||
* the fact that we're only testing ->tv_sec, not ->tv_nsec.
|
||||
* That means we might keep entries that are between 1 and 2
|
||||
* seconds old, but we don't really care, as long as unused BOs
|
||||
* are dropped at some point.
|
||||
*/
|
||||
if (time.tv_sec - entry->last_used <= 2)
|
||||
break;
|
||||
clock_gettime(CLOCK_MONOTONIC, &time);
|
||||
list_for_each_entry_safe(struct panfrost_bo, entry, &dev->bo_cache.lru,
|
||||
lru_link) {
|
||||
/* We want all entries that have been used more than 1 sec
|
||||
* ago to be dropped, others can be kept.
|
||||
* Note the <= 2 check and not <= 1. It's here to account for
|
||||
* the fact that we're only testing ->tv_sec, not ->tv_nsec.
|
||||
* That means we might keep entries that are between 1 and 2
|
||||
* seconds old, but we don't really care, as long as unused BOs
|
||||
* are dropped at some point.
|
||||
*/
|
||||
if (time.tv_sec - entry->last_used <= 2)
|
||||
break;
|
||||
|
||||
list_del(&entry->bucket_link);
|
||||
list_del(&entry->lru_link);
|
||||
panfrost_bo_free(entry);
|
||||
}
|
||||
list_del(&entry->bucket_link);
|
||||
list_del(&entry->lru_link);
|
||||
panfrost_bo_free(entry);
|
||||
}
|
||||
}
|
||||
|
||||
/* Tries to add a BO to the cache. Returns if it was
|
||||
|
|
@ -260,43 +256,43 @@ panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev)
|
|||
static bool
|
||||
panfrost_bo_cache_put(struct panfrost_bo *bo)
|
||||
{
|
||||
struct panfrost_device *dev = bo->dev;
|
||||
struct panfrost_device *dev = bo->dev;
|
||||
|
||||
if (bo->flags & PAN_BO_SHARED || dev->debug & PAN_DBG_NO_CACHE)
|
||||
return false;
|
||||
if (bo->flags & PAN_BO_SHARED || dev->debug & PAN_DBG_NO_CACHE)
|
||||
return false;
|
||||
|
||||
/* Must be first */
|
||||
pthread_mutex_lock(&dev->bo_cache.lock);
|
||||
/* Must be first */
|
||||
pthread_mutex_lock(&dev->bo_cache.lock);
|
||||
|
||||
struct list_head *bucket = pan_bucket(dev, MAX2(bo->size, 4096));
|
||||
struct drm_panfrost_madvise madv;
|
||||
struct timespec time;
|
||||
struct list_head *bucket = pan_bucket(dev, MAX2(bo->size, 4096));
|
||||
struct drm_panfrost_madvise madv;
|
||||
struct timespec time;
|
||||
|
||||
madv.handle = bo->gem_handle;
|
||||
madv.madv = PANFROST_MADV_DONTNEED;
|
||||
madv.retained = 0;
|
||||
madv.handle = bo->gem_handle;
|
||||
madv.madv = PANFROST_MADV_DONTNEED;
|
||||
madv.retained = 0;
|
||||
|
||||
drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
|
||||
drmIoctl(dev->fd, DRM_IOCTL_PANFROST_MADVISE, &madv);
|
||||
|
||||
/* Add us to the bucket */
|
||||
list_addtail(&bo->bucket_link, bucket);
|
||||
/* Add us to the bucket */
|
||||
list_addtail(&bo->bucket_link, bucket);
|
||||
|
||||
/* Add us to the LRU list and update the last_used field. */
|
||||
list_addtail(&bo->lru_link, &dev->bo_cache.lru);
|
||||
clock_gettime(CLOCK_MONOTONIC, &time);
|
||||
bo->last_used = time.tv_sec;
|
||||
/* Add us to the LRU list and update the last_used field. */
|
||||
list_addtail(&bo->lru_link, &dev->bo_cache.lru);
|
||||
clock_gettime(CLOCK_MONOTONIC, &time);
|
||||
bo->last_used = time.tv_sec;
|
||||
|
||||
/* Let's do some cleanup in the BO cache while we hold the
|
||||
* lock.
|
||||
*/
|
||||
panfrost_bo_cache_evict_stale_bos(dev);
|
||||
/* Let's do some cleanup in the BO cache while we hold the
|
||||
* lock.
|
||||
*/
|
||||
panfrost_bo_cache_evict_stale_bos(dev);
|
||||
|
||||
/* Update the label to help debug BO cache memory usage issues */
|
||||
bo->label = "Unused (BO cache)";
|
||||
/* Update the label to help debug BO cache memory usage issues */
|
||||
bo->label = "Unused (BO cache)";
|
||||
|
||||
/* Must be last */
|
||||
pthread_mutex_unlock(&dev->bo_cache.lock);
|
||||
return true;
|
||||
/* Must be last */
|
||||
pthread_mutex_unlock(&dev->bo_cache.lock);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Evicts all BOs from the cache. Called during context
|
||||
|
|
@ -306,228 +302,226 @@ panfrost_bo_cache_put(struct panfrost_bo *bo)
|
|||
* OS) */
|
||||
|
||||
void
|
||||
panfrost_bo_cache_evict_all(
|
||||
struct panfrost_device *dev)
|
||||
panfrost_bo_cache_evict_all(struct panfrost_device *dev)
|
||||
{
|
||||
pthread_mutex_lock(&dev->bo_cache.lock);
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
|
||||
struct list_head *bucket = &dev->bo_cache.buckets[i];
|
||||
pthread_mutex_lock(&dev->bo_cache.lock);
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
|
||||
struct list_head *bucket = &dev->bo_cache.buckets[i];
|
||||
|
||||
list_for_each_entry_safe(struct panfrost_bo, entry, bucket,
|
||||
bucket_link) {
|
||||
list_del(&entry->bucket_link);
|
||||
list_del(&entry->lru_link);
|
||||
panfrost_bo_free(entry);
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(&dev->bo_cache.lock);
|
||||
list_for_each_entry_safe(struct panfrost_bo, entry, bucket, bucket_link) {
|
||||
list_del(&entry->bucket_link);
|
||||
list_del(&entry->lru_link);
|
||||
panfrost_bo_free(entry);
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(&dev->bo_cache.lock);
|
||||
}
|
||||
|
||||
void
|
||||
panfrost_bo_mmap(struct panfrost_bo *bo)
|
||||
{
|
||||
struct drm_panfrost_mmap_bo mmap_bo = { .handle = bo->gem_handle };
|
||||
int ret;
|
||||
struct drm_panfrost_mmap_bo mmap_bo = {.handle = bo->gem_handle};
|
||||
int ret;
|
||||
|
||||
if (bo->ptr.cpu)
|
||||
return;
|
||||
if (bo->ptr.cpu)
|
||||
return;
|
||||
|
||||
ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
|
||||
if (ret) {
|
||||
fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
|
||||
assert(0);
|
||||
}
|
||||
ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PANFROST_MMAP_BO, &mmap_bo);
|
||||
if (ret) {
|
||||
fprintf(stderr, "DRM_IOCTL_PANFROST_MMAP_BO failed: %m\n");
|
||||
assert(0);
|
||||
}
|
||||
|
||||
bo->ptr.cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
bo->dev->fd, mmap_bo.offset);
|
||||
if (bo->ptr.cpu == MAP_FAILED) {
|
||||
bo->ptr.cpu = NULL;
|
||||
fprintf(stderr,
|
||||
"mmap failed: result=%p size=0x%llx fd=%i offset=0x%llx %m\n",
|
||||
bo->ptr.cpu, (long long)bo->size, bo->dev->fd,
|
||||
(long long)mmap_bo.offset);
|
||||
}
|
||||
bo->ptr.cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||
bo->dev->fd, mmap_bo.offset);
|
||||
if (bo->ptr.cpu == MAP_FAILED) {
|
||||
bo->ptr.cpu = NULL;
|
||||
fprintf(stderr,
|
||||
"mmap failed: result=%p size=0x%llx fd=%i offset=0x%llx %m\n",
|
||||
bo->ptr.cpu, (long long)bo->size, bo->dev->fd,
|
||||
(long long)mmap_bo.offset);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
panfrost_bo_munmap(struct panfrost_bo *bo)
|
||||
{
|
||||
if (!bo->ptr.cpu)
|
||||
return;
|
||||
if (!bo->ptr.cpu)
|
||||
return;
|
||||
|
||||
if (os_munmap((void *) (uintptr_t)bo->ptr.cpu, bo->size)) {
|
||||
perror("munmap");
|
||||
abort();
|
||||
}
|
||||
if (os_munmap((void *)(uintptr_t)bo->ptr.cpu, bo->size)) {
|
||||
perror("munmap");
|
||||
abort();
|
||||
}
|
||||
|
||||
bo->ptr.cpu = NULL;
|
||||
bo->ptr.cpu = NULL;
|
||||
}
|
||||
|
||||
struct panfrost_bo *
|
||||
panfrost_bo_create(struct panfrost_device *dev, size_t size,
|
||||
uint32_t flags, const char *label)
|
||||
panfrost_bo_create(struct panfrost_device *dev, size_t size, uint32_t flags,
|
||||
const char *label)
|
||||
{
|
||||
struct panfrost_bo *bo;
|
||||
struct panfrost_bo *bo;
|
||||
|
||||
/* Kernel will fail (confusingly) with EPERM otherwise */
|
||||
assert(size > 0);
|
||||
/* Kernel will fail (confusingly) with EPERM otherwise */
|
||||
assert(size > 0);
|
||||
|
||||
/* To maximize BO cache usage, don't allocate tiny BOs */
|
||||
size = ALIGN_POT(size, 4096);
|
||||
/* To maximize BO cache usage, don't allocate tiny BOs */
|
||||
size = ALIGN_POT(size, 4096);
|
||||
|
||||
/* GROWABLE BOs cannot be mmapped */
|
||||
if (flags & PAN_BO_GROWABLE)
|
||||
assert(flags & PAN_BO_INVISIBLE);
|
||||
/* GROWABLE BOs cannot be mmapped */
|
||||
if (flags & PAN_BO_GROWABLE)
|
||||
assert(flags & PAN_BO_INVISIBLE);
|
||||
|
||||
/* Ideally, we get a BO that's ready in the cache, or allocate a fresh
|
||||
* BO. If allocation fails, we can try waiting for something in the
|
||||
* cache. But if there's no nothing suitable, we should flush the cache
|
||||
* to make space for the new allocation.
|
||||
*/
|
||||
bo = panfrost_bo_cache_fetch(dev, size, flags, label, true);
|
||||
if (!bo)
|
||||
bo = panfrost_bo_alloc(dev, size, flags, label);
|
||||
if (!bo)
|
||||
bo = panfrost_bo_cache_fetch(dev, size, flags, label, false);
|
||||
if (!bo) {
|
||||
panfrost_bo_cache_evict_all(dev);
|
||||
bo = panfrost_bo_alloc(dev, size, flags, label);
|
||||
}
|
||||
/* Ideally, we get a BO that's ready in the cache, or allocate a fresh
|
||||
* BO. If allocation fails, we can try waiting for something in the
|
||||
* cache. But if there's no nothing suitable, we should flush the cache
|
||||
* to make space for the new allocation.
|
||||
*/
|
||||
bo = panfrost_bo_cache_fetch(dev, size, flags, label, true);
|
||||
if (!bo)
|
||||
bo = panfrost_bo_alloc(dev, size, flags, label);
|
||||
if (!bo)
|
||||
bo = panfrost_bo_cache_fetch(dev, size, flags, label, false);
|
||||
if (!bo) {
|
||||
panfrost_bo_cache_evict_all(dev);
|
||||
bo = panfrost_bo_alloc(dev, size, flags, label);
|
||||
}
|
||||
|
||||
if (!bo) {
|
||||
unreachable("BO creation failed. We don't handle that yet.");
|
||||
return NULL;
|
||||
}
|
||||
if (!bo) {
|
||||
unreachable("BO creation failed. We don't handle that yet.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Only mmap now if we know we need to. For CPU-invisible buffers, we
|
||||
* never map since we don't care about their contents; they're purely
|
||||
* for GPU-internal use. But we do trace them anyway. */
|
||||
/* Only mmap now if we know we need to. For CPU-invisible buffers, we
|
||||
* never map since we don't care about their contents; they're purely
|
||||
* for GPU-internal use. But we do trace them anyway. */
|
||||
|
||||
if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
|
||||
panfrost_bo_mmap(bo);
|
||||
if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
|
||||
panfrost_bo_mmap(bo);
|
||||
|
||||
p_atomic_set(&bo->refcnt, 1);
|
||||
p_atomic_set(&bo->refcnt, 1);
|
||||
|
||||
if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
|
||||
if (flags & PAN_BO_INVISIBLE)
|
||||
pandecode_inject_mmap(bo->ptr.gpu, NULL, bo->size, NULL);
|
||||
else if (!(flags & PAN_BO_DELAY_MMAP))
|
||||
pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL);
|
||||
}
|
||||
if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
|
||||
if (flags & PAN_BO_INVISIBLE)
|
||||
pandecode_inject_mmap(bo->ptr.gpu, NULL, bo->size, NULL);
|
||||
else if (!(flags & PAN_BO_DELAY_MMAP))
|
||||
pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL);
|
||||
}
|
||||
|
||||
return bo;
|
||||
return bo;
|
||||
}
|
||||
|
||||
void
|
||||
panfrost_bo_reference(struct panfrost_bo *bo)
|
||||
{
|
||||
if (bo) {
|
||||
ASSERTED int count = p_atomic_inc_return(&bo->refcnt);
|
||||
assert(count != 1);
|
||||
}
|
||||
if (bo) {
|
||||
ASSERTED int count = p_atomic_inc_return(&bo->refcnt);
|
||||
assert(count != 1);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
panfrost_bo_unreference(struct panfrost_bo *bo)
|
||||
{
|
||||
if (!bo)
|
||||
return;
|
||||
if (!bo)
|
||||
return;
|
||||
|
||||
/* Don't return to cache if there are still references */
|
||||
if (p_atomic_dec_return(&bo->refcnt))
|
||||
return;
|
||||
/* Don't return to cache if there are still references */
|
||||
if (p_atomic_dec_return(&bo->refcnt))
|
||||
return;
|
||||
|
||||
struct panfrost_device *dev = bo->dev;
|
||||
struct panfrost_device *dev = bo->dev;
|
||||
|
||||
pthread_mutex_lock(&dev->bo_map_lock);
|
||||
pthread_mutex_lock(&dev->bo_map_lock);
|
||||
|
||||
/* Someone might have imported this BO while we were waiting for the
|
||||
* lock, let's make sure it's still not referenced before freeing it.
|
||||
*/
|
||||
if (p_atomic_read(&bo->refcnt) == 0) {
|
||||
/* When the reference count goes to zero, we need to cleanup */
|
||||
panfrost_bo_munmap(bo);
|
||||
/* Someone might have imported this BO while we were waiting for the
|
||||
* lock, let's make sure it's still not referenced before freeing it.
|
||||
*/
|
||||
if (p_atomic_read(&bo->refcnt) == 0) {
|
||||
/* When the reference count goes to zero, we need to cleanup */
|
||||
panfrost_bo_munmap(bo);
|
||||
|
||||
if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
|
||||
pandecode_inject_free(bo->ptr.gpu, bo->size);
|
||||
if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
|
||||
pandecode_inject_free(bo->ptr.gpu, bo->size);
|
||||
|
||||
/* Rather than freeing the BO now, we'll cache the BO for later
|
||||
* allocations if we're allowed to.
|
||||
*/
|
||||
if (!panfrost_bo_cache_put(bo))
|
||||
panfrost_bo_free(bo);
|
||||
|
||||
}
|
||||
pthread_mutex_unlock(&dev->bo_map_lock);
|
||||
/* Rather than freeing the BO now, we'll cache the BO for later
|
||||
* allocations if we're allowed to.
|
||||
*/
|
||||
if (!panfrost_bo_cache_put(bo))
|
||||
panfrost_bo_free(bo);
|
||||
}
|
||||
pthread_mutex_unlock(&dev->bo_map_lock);
|
||||
}
|
||||
|
||||
struct panfrost_bo *
|
||||
panfrost_bo_import(struct panfrost_device *dev, int fd)
|
||||
{
|
||||
struct panfrost_bo *bo;
|
||||
struct drm_panfrost_get_bo_offset get_bo_offset = {0,};
|
||||
ASSERTED int ret;
|
||||
unsigned gem_handle;
|
||||
struct panfrost_bo *bo;
|
||||
struct drm_panfrost_get_bo_offset get_bo_offset = {
|
||||
0,
|
||||
};
|
||||
ASSERTED int ret;
|
||||
unsigned gem_handle;
|
||||
|
||||
ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
|
||||
assert(!ret);
|
||||
ret = drmPrimeFDToHandle(dev->fd, fd, &gem_handle);
|
||||
assert(!ret);
|
||||
|
||||
pthread_mutex_lock(&dev->bo_map_lock);
|
||||
bo = pan_lookup_bo(dev, gem_handle);
|
||||
pthread_mutex_lock(&dev->bo_map_lock);
|
||||
bo = pan_lookup_bo(dev, gem_handle);
|
||||
|
||||
if (!bo->dev) {
|
||||
get_bo_offset.handle = gem_handle;
|
||||
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
|
||||
assert(!ret);
|
||||
if (!bo->dev) {
|
||||
get_bo_offset.handle = gem_handle;
|
||||
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_GET_BO_OFFSET, &get_bo_offset);
|
||||
assert(!ret);
|
||||
|
||||
bo->dev = dev;
|
||||
bo->ptr.gpu = (mali_ptr) get_bo_offset.offset;
|
||||
bo->size = lseek(fd, 0, SEEK_END);
|
||||
/* Sometimes this can fail and return -1. size of -1 is not
|
||||
* a nice thing for mmap to try mmap. Be more robust also
|
||||
* for zero sized maps and fail nicely too
|
||||
*/
|
||||
if ((bo->size == 0) || (bo->size == (size_t)-1)) {
|
||||
pthread_mutex_unlock(&dev->bo_map_lock);
|
||||
return NULL;
|
||||
}
|
||||
bo->flags = PAN_BO_SHARED;
|
||||
bo->gem_handle = gem_handle;
|
||||
p_atomic_set(&bo->refcnt, 1);
|
||||
} else {
|
||||
/* bo->refcnt == 0 can happen if the BO
|
||||
* was being released but panfrost_bo_import() acquired the
|
||||
* lock before panfrost_bo_unreference(). In that case, refcnt
|
||||
* is 0 and we can't use panfrost_bo_reference() directly, we
|
||||
* have to re-initialize the refcnt().
|
||||
* Note that panfrost_bo_unreference() checks
|
||||
* refcnt value just after acquiring the lock to
|
||||
* make sure the object is not freed if panfrost_bo_import()
|
||||
* acquired it in the meantime.
|
||||
*/
|
||||
if (p_atomic_read(&bo->refcnt) == 0)
|
||||
p_atomic_set(&bo->refcnt, 1);
|
||||
else
|
||||
panfrost_bo_reference(bo);
|
||||
}
|
||||
pthread_mutex_unlock(&dev->bo_map_lock);
|
||||
bo->dev = dev;
|
||||
bo->ptr.gpu = (mali_ptr)get_bo_offset.offset;
|
||||
bo->size = lseek(fd, 0, SEEK_END);
|
||||
/* Sometimes this can fail and return -1. size of -1 is not
|
||||
* a nice thing for mmap to try mmap. Be more robust also
|
||||
* for zero sized maps and fail nicely too
|
||||
*/
|
||||
if ((bo->size == 0) || (bo->size == (size_t)-1)) {
|
||||
pthread_mutex_unlock(&dev->bo_map_lock);
|
||||
return NULL;
|
||||
}
|
||||
bo->flags = PAN_BO_SHARED;
|
||||
bo->gem_handle = gem_handle;
|
||||
p_atomic_set(&bo->refcnt, 1);
|
||||
} else {
|
||||
/* bo->refcnt == 0 can happen if the BO
|
||||
* was being released but panfrost_bo_import() acquired the
|
||||
* lock before panfrost_bo_unreference(). In that case, refcnt
|
||||
* is 0 and we can't use panfrost_bo_reference() directly, we
|
||||
* have to re-initialize the refcnt().
|
||||
* Note that panfrost_bo_unreference() checks
|
||||
* refcnt value just after acquiring the lock to
|
||||
* make sure the object is not freed if panfrost_bo_import()
|
||||
* acquired it in the meantime.
|
||||
*/
|
||||
if (p_atomic_read(&bo->refcnt) == 0)
|
||||
p_atomic_set(&bo->refcnt, 1);
|
||||
else
|
||||
panfrost_bo_reference(bo);
|
||||
}
|
||||
pthread_mutex_unlock(&dev->bo_map_lock);
|
||||
|
||||
return bo;
|
||||
return bo;
|
||||
}
|
||||
|
||||
int
|
||||
panfrost_bo_export(struct panfrost_bo *bo)
|
||||
{
|
||||
struct drm_prime_handle args = {
|
||||
.handle = bo->gem_handle,
|
||||
.flags = DRM_CLOEXEC,
|
||||
};
|
||||
struct drm_prime_handle args = {
|
||||
.handle = bo->gem_handle,
|
||||
.flags = DRM_CLOEXEC,
|
||||
};
|
||||
|
||||
int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
|
||||
if (ret == -1)
|
||||
return -1;
|
||||
int ret = drmIoctl(bo->dev->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &args);
|
||||
if (ret == -1)
|
||||
return -1;
|
||||
|
||||
bo->flags |= PAN_BO_SHARED;
|
||||
return args.fd;
|
||||
bo->flags |= PAN_BO_SHARED;
|
||||
return args.fd;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,113 +26,106 @@
|
|||
#ifndef __PAN_BO_H__
|
||||
#define __PAN_BO_H__
|
||||
|
||||
#include <time.h>
|
||||
#include "util/list.h"
|
||||
#include "panfrost-job.h"
|
||||
#include <time.h>
|
||||
|
||||
/* Flags for allocated memory */
|
||||
|
||||
/* This memory region is executable */
|
||||
#define PAN_BO_EXECUTE (1 << 0)
|
||||
#define PAN_BO_EXECUTE (1 << 0)
|
||||
|
||||
/* This memory region should be lazily allocated and grow-on-page-fault. Must
|
||||
* be used in conjunction with INVISIBLE */
|
||||
#define PAN_BO_GROWABLE (1 << 1)
|
||||
#define PAN_BO_GROWABLE (1 << 1)
|
||||
|
||||
/* This memory region should not be mapped to the CPU */
|
||||
#define PAN_BO_INVISIBLE (1 << 2)
|
||||
#define PAN_BO_INVISIBLE (1 << 2)
|
||||
|
||||
/* This region may not be used immediately and will not mmap on allocate
|
||||
* (semantically distinct from INVISIBLE, which cannot never be mmaped) */
|
||||
#define PAN_BO_DELAY_MMAP (1 << 3)
|
||||
#define PAN_BO_DELAY_MMAP (1 << 3)
|
||||
|
||||
/* BO is shared across processes (imported or exported) and therefore cannot be
|
||||
* cached locally */
|
||||
#define PAN_BO_SHARED (1 << 4)
|
||||
#define PAN_BO_SHARED (1 << 4)
|
||||
|
||||
/* GPU access flags */
|
||||
|
||||
/* BO is either shared (can be accessed by more than one GPU batch) or private
|
||||
* (reserved by a specific GPU job). */
|
||||
#define PAN_BO_ACCESS_PRIVATE (0 << 0)
|
||||
#define PAN_BO_ACCESS_SHARED (1 << 0)
|
||||
#define PAN_BO_ACCESS_PRIVATE (0 << 0)
|
||||
#define PAN_BO_ACCESS_SHARED (1 << 0)
|
||||
|
||||
/* BO is being read/written by the GPU */
|
||||
#define PAN_BO_ACCESS_READ (1 << 1)
|
||||
#define PAN_BO_ACCESS_WRITE (1 << 2)
|
||||
#define PAN_BO_ACCESS_RW (PAN_BO_ACCESS_READ | PAN_BO_ACCESS_WRITE)
|
||||
#define PAN_BO_ACCESS_READ (1 << 1)
|
||||
#define PAN_BO_ACCESS_WRITE (1 << 2)
|
||||
#define PAN_BO_ACCESS_RW (PAN_BO_ACCESS_READ | PAN_BO_ACCESS_WRITE)
|
||||
|
||||
/* BO is accessed by the vertex/tiler job. */
|
||||
#define PAN_BO_ACCESS_VERTEX_TILER (1 << 3)
|
||||
#define PAN_BO_ACCESS_VERTEX_TILER (1 << 3)
|
||||
|
||||
/* BO is accessed by the fragment job. */
|
||||
#define PAN_BO_ACCESS_FRAGMENT (1 << 4)
|
||||
#define PAN_BO_ACCESS_FRAGMENT (1 << 4)
|
||||
|
||||
typedef uint8_t pan_bo_access;
|
||||
|
||||
struct panfrost_device;
|
||||
|
||||
struct panfrost_ptr {
|
||||
/* CPU address */
|
||||
void *cpu;
|
||||
/* CPU address */
|
||||
void *cpu;
|
||||
|
||||
/* GPU address */
|
||||
mali_ptr gpu;
|
||||
/* GPU address */
|
||||
mali_ptr gpu;
|
||||
};
|
||||
|
||||
struct panfrost_bo {
|
||||
/* Must be first for casting */
|
||||
struct list_head bucket_link;
|
||||
/* Must be first for casting */
|
||||
struct list_head bucket_link;
|
||||
|
||||
/* Used to link the BO to the BO cache LRU list. */
|
||||
struct list_head lru_link;
|
||||
/* Used to link the BO to the BO cache LRU list. */
|
||||
struct list_head lru_link;
|
||||
|
||||
/* Store the time this BO was use last, so the BO cache logic can evict
|
||||
* stale BOs.
|
||||
*/
|
||||
time_t last_used;
|
||||
/* Store the time this BO was use last, so the BO cache logic can evict
|
||||
* stale BOs.
|
||||
*/
|
||||
time_t last_used;
|
||||
|
||||
/* Atomic reference count */
|
||||
int32_t refcnt;
|
||||
/* Atomic reference count */
|
||||
int32_t refcnt;
|
||||
|
||||
struct panfrost_device *dev;
|
||||
struct panfrost_device *dev;
|
||||
|
||||
/* Mapping for the entire object (all levels) */
|
||||
struct panfrost_ptr ptr;
|
||||
/* Mapping for the entire object (all levels) */
|
||||
struct panfrost_ptr ptr;
|
||||
|
||||
/* Size of all entire trees */
|
||||
size_t size;
|
||||
/* Size of all entire trees */
|
||||
size_t size;
|
||||
|
||||
int gem_handle;
|
||||
int gem_handle;
|
||||
|
||||
uint32_t flags;
|
||||
uint32_t flags;
|
||||
|
||||
/* Combination of PAN_BO_ACCESS_{READ,WRITE} flags encoding pending
|
||||
* GPU accesses to this BO. Useful to avoid calling the WAIT_BO ioctl
|
||||
* when the BO is idle.
|
||||
*/
|
||||
uint32_t gpu_access;
|
||||
/* Combination of PAN_BO_ACCESS_{READ,WRITE} flags encoding pending
|
||||
* GPU accesses to this BO. Useful to avoid calling the WAIT_BO ioctl
|
||||
* when the BO is idle.
|
||||
*/
|
||||
uint32_t gpu_access;
|
||||
|
||||
/* Human readable description of the BO for debugging. */
|
||||
const char *label;
|
||||
/* Human readable description of the BO for debugging. */
|
||||
const char *label;
|
||||
};
|
||||
|
||||
bool
|
||||
panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers);
|
||||
void
|
||||
panfrost_bo_reference(struct panfrost_bo *bo);
|
||||
void
|
||||
panfrost_bo_unreference(struct panfrost_bo *bo);
|
||||
struct panfrost_bo *
|
||||
panfrost_bo_create(struct panfrost_device *dev, size_t size,
|
||||
uint32_t flags, const char *label);
|
||||
void
|
||||
panfrost_bo_mmap(struct panfrost_bo *bo);
|
||||
struct panfrost_bo *
|
||||
panfrost_bo_import(struct panfrost_device *dev, int fd);
|
||||
int
|
||||
panfrost_bo_export(struct panfrost_bo *bo);
|
||||
void
|
||||
panfrost_bo_cache_evict_all(struct panfrost_device *dev);
|
||||
bool panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns,
|
||||
bool wait_readers);
|
||||
void panfrost_bo_reference(struct panfrost_bo *bo);
|
||||
void panfrost_bo_unreference(struct panfrost_bo *bo);
|
||||
struct panfrost_bo *panfrost_bo_create(struct panfrost_device *dev, size_t size,
|
||||
uint32_t flags, const char *label);
|
||||
void panfrost_bo_mmap(struct panfrost_bo *bo);
|
||||
struct panfrost_bo *panfrost_bo_import(struct panfrost_device *dev, int fd);
|
||||
int panfrost_bo_export(struct panfrost_bo *bo);
|
||||
void panfrost_bo_cache_evict_all(struct panfrost_device *dev);
|
||||
|
||||
#endif /* __PAN_BO_H__ */
|
||||
|
|
|
|||
|
|
@ -26,11 +26,11 @@
|
|||
#include "genxml/gen_macros.h"
|
||||
|
||||
#include <string.h>
|
||||
#include "pan_util.h"
|
||||
#include "pan_format.h"
|
||||
#include "gallium/auxiliary/util/u_pack_color.h"
|
||||
#include "util/rounding.h"
|
||||
#include "util/format_srgb.h"
|
||||
#include "util/rounding.h"
|
||||
#include "pan_format.h"
|
||||
#include "pan_util.h"
|
||||
|
||||
/* Clear colours are packed as the internal format of the tilebuffer, looked up
|
||||
* in the blendable formats table given the render target format.
|
||||
|
|
@ -49,8 +49,8 @@
|
|||
static void
|
||||
pan_pack_color_32(uint32_t *packed, uint32_t v)
|
||||
{
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
packed[i] = v;
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
packed[i] = v;
|
||||
}
|
||||
|
||||
/* For m integer bits and n fractional bits, calculate the conversion factor,
|
||||
|
|
@ -61,22 +61,22 @@ pan_pack_color_32(uint32_t *packed, uint32_t v)
|
|||
static inline uint32_t
|
||||
float_to_fixed(float f, unsigned bits_int, unsigned bits_frac, bool dither)
|
||||
{
|
||||
uint32_t m = (1 << bits_int) - 1;
|
||||
uint32_t m = (1 << bits_int) - 1;
|
||||
|
||||
if (dither) {
|
||||
float factor = m << bits_frac;
|
||||
return _mesa_roundevenf(f * factor);
|
||||
} else {
|
||||
uint32_t v = _mesa_roundevenf(f * (float) m);
|
||||
return v << bits_frac;
|
||||
}
|
||||
if (dither) {
|
||||
float factor = m << bits_frac;
|
||||
return _mesa_roundevenf(f * factor);
|
||||
} else {
|
||||
uint32_t v = _mesa_roundevenf(f * (float)m);
|
||||
return v << bits_frac;
|
||||
}
|
||||
}
|
||||
|
||||
struct mali_tib_layout {
|
||||
unsigned int_r, frac_r;
|
||||
unsigned int_g, frac_g;
|
||||
unsigned int_b, frac_b;
|
||||
unsigned int_a, frac_a;
|
||||
unsigned int_r, frac_r;
|
||||
unsigned int_g, frac_g;
|
||||
unsigned int_b, frac_b;
|
||||
unsigned int_a, frac_a;
|
||||
};
|
||||
|
||||
/* clang-format off */
|
||||
|
|
@ -93,76 +93,77 @@ static const struct mali_tib_layout tib_layouts[] = {
|
|||
/* Raw values are stored as-is but replicated for multisampling */
|
||||
|
||||
static void
|
||||
pan_pack_raw(uint32_t *packed, const union pipe_color_union *color, enum pipe_format format)
|
||||
pan_pack_raw(uint32_t *packed, const union pipe_color_union *color,
|
||||
enum pipe_format format)
|
||||
{
|
||||
union util_color out = { 0 };
|
||||
unsigned size = util_format_get_blocksize(format);
|
||||
assert(size <= 16);
|
||||
union util_color out = {0};
|
||||
unsigned size = util_format_get_blocksize(format);
|
||||
assert(size <= 16);
|
||||
|
||||
util_pack_color(color->f, format, &out);
|
||||
util_pack_color(color->f, format, &out);
|
||||
|
||||
if (size == 1) {
|
||||
unsigned s = out.ui[0] | (out.ui[0] << 8);
|
||||
pan_pack_color_32(packed, s | (s << 16));
|
||||
} else if (size == 2)
|
||||
pan_pack_color_32(packed, out.ui[0] | (out.ui[0] << 16));
|
||||
else if (size <= 4)
|
||||
pan_pack_color_32(packed, out.ui[0]);
|
||||
else if (size <= 8) {
|
||||
memcpy(packed + 0, out.ui, 8);
|
||||
memcpy(packed + 2, out.ui, 8);
|
||||
} else {
|
||||
memcpy(packed, out.ui, 16);
|
||||
}
|
||||
if (size == 1) {
|
||||
unsigned s = out.ui[0] | (out.ui[0] << 8);
|
||||
pan_pack_color_32(packed, s | (s << 16));
|
||||
} else if (size == 2)
|
||||
pan_pack_color_32(packed, out.ui[0] | (out.ui[0] << 16));
|
||||
else if (size <= 4)
|
||||
pan_pack_color_32(packed, out.ui[0]);
|
||||
else if (size <= 8) {
|
||||
memcpy(packed + 0, out.ui, 8);
|
||||
memcpy(packed + 2, out.ui, 8);
|
||||
} else {
|
||||
memcpy(packed, out.ui, 16);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pan_pack_color(uint32_t *packed, const union pipe_color_union *color,
|
||||
enum pipe_format format, bool dithered)
|
||||
{
|
||||
/* Set of blendable formats is common across versions. TODO: v9 */
|
||||
enum mali_color_buffer_internal_format internal =
|
||||
panfrost_blendable_formats_v7[format].internal;
|
||||
/* Set of blendable formats is common across versions. TODO: v9 */
|
||||
enum mali_color_buffer_internal_format internal =
|
||||
panfrost_blendable_formats_v7[format].internal;
|
||||
|
||||
if (internal == MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW_VALUE) {
|
||||
pan_pack_raw(packed, color, format);
|
||||
return;
|
||||
}
|
||||
if (internal == MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW_VALUE) {
|
||||
pan_pack_raw(packed, color, format);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Saturate to [0, 1] by definition of UNORM. Prevents overflow. */
|
||||
float r = SATURATE(color->f[0]);
|
||||
float g = SATURATE(color->f[1]);
|
||||
float b = SATURATE(color->f[2]);
|
||||
float a = SATURATE(color->f[3]);
|
||||
/* Saturate to [0, 1] by definition of UNORM. Prevents overflow. */
|
||||
float r = SATURATE(color->f[0]);
|
||||
float g = SATURATE(color->f[1]);
|
||||
float b = SATURATE(color->f[2]);
|
||||
float a = SATURATE(color->f[3]);
|
||||
|
||||
/* Fill in alpha = 1.0 by default */
|
||||
if (!util_format_has_alpha(format))
|
||||
a = 1.0;
|
||||
/* Fill in alpha = 1.0 by default */
|
||||
if (!util_format_has_alpha(format))
|
||||
a = 1.0;
|
||||
|
||||
/* Convert colourspace while we still have floats */
|
||||
if (util_format_is_srgb(format)) {
|
||||
r = util_format_linear_to_srgb_float(r);
|
||||
g = util_format_linear_to_srgb_float(g);
|
||||
b = util_format_linear_to_srgb_float(b);
|
||||
}
|
||||
/* Convert colourspace while we still have floats */
|
||||
if (util_format_is_srgb(format)) {
|
||||
r = util_format_linear_to_srgb_float(r);
|
||||
g = util_format_linear_to_srgb_float(g);
|
||||
b = util_format_linear_to_srgb_float(b);
|
||||
}
|
||||
|
||||
/* Look up the layout of the tilebuffer */
|
||||
assert(internal < ARRAY_SIZE(tib_layouts));
|
||||
struct mali_tib_layout l = tib_layouts[internal];
|
||||
/* Look up the layout of the tilebuffer */
|
||||
assert(internal < ARRAY_SIZE(tib_layouts));
|
||||
struct mali_tib_layout l = tib_layouts[internal];
|
||||
|
||||
unsigned count_r = l.int_r + l.frac_r;
|
||||
unsigned count_g = l.int_g + l.frac_g + count_r;
|
||||
unsigned count_b = l.int_b + l.frac_b + count_g;
|
||||
ASSERTED unsigned count_a = l.int_a + l.frac_a + count_b;
|
||||
unsigned count_r = l.int_r + l.frac_r;
|
||||
unsigned count_g = l.int_g + l.frac_g + count_r;
|
||||
unsigned count_b = l.int_b + l.frac_b + count_g;
|
||||
ASSERTED unsigned count_a = l.int_a + l.frac_a + count_b;
|
||||
|
||||
/* Must fill the word */
|
||||
assert(count_a == 32);
|
||||
/* Must fill the word */
|
||||
assert(count_a == 32);
|
||||
|
||||
/* Convert the transformed float colour to the given layout */
|
||||
uint32_t ur = float_to_fixed(r, l.int_r, l.frac_r, dithered) << 0;
|
||||
uint32_t ug = float_to_fixed(g, l.int_g, l.frac_g, dithered) << count_r;
|
||||
uint32_t ub = float_to_fixed(b, l.int_b, l.frac_b, dithered) << count_g;
|
||||
uint32_t ua = float_to_fixed(a, l.int_a, l.frac_a, dithered) << count_b;
|
||||
/* Convert the transformed float colour to the given layout */
|
||||
uint32_t ur = float_to_fixed(r, l.int_r, l.frac_r, dithered) << 0;
|
||||
uint32_t ug = float_to_fixed(g, l.int_g, l.frac_g, dithered) << count_r;
|
||||
uint32_t ub = float_to_fixed(b, l.int_b, l.frac_b, dithered) << count_g;
|
||||
uint32_t ua = float_to_fixed(a, l.int_a, l.frac_a, dithered) << count_b;
|
||||
|
||||
pan_pack_color_32(packed, ur | ug | ub | ua);
|
||||
pan_pack_color_32(packed, ur | ug | ub | ua);
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue