mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 02:38:04 +02:00
Merge branch 'rusticl/asahi/fixes/2026/05/01' into 'main'
nir,asahi: Fix OpenCL regressions See merge request mesa/mesa!41386
This commit is contained in:
commit
9782c793f5
4 changed files with 32 additions and 20 deletions
|
|
@ -3619,8 +3619,6 @@ agx_preprocess_nir(nir_shader *nir)
|
|||
nir_metadata_control_flow, NULL);
|
||||
NIR_PASS(_, nir, agx_nir_lower_subgroups);
|
||||
NIR_PASS(_, nir, nir_lower_all_phis_to_scalar);
|
||||
NIR_PASS(_, nir, nir_shader_alu_pass, agx_nir_lower_fdiv,
|
||||
nir_metadata_control_flow, NULL);
|
||||
|
||||
/* After lowering, run through the standard suite of NIR optimizations. We
|
||||
* will run through the loop later, once we have the shader key, but if we
|
||||
|
|
@ -3638,6 +3636,11 @@ agx_preprocess_nir(nir_shader *nir)
|
|||
};
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_idiv, &idiv_options);
|
||||
|
||||
/* Has to run after nir_lower_idiv */
|
||||
NIR_PASS(_, nir, nir_shader_alu_pass, agx_nir_lower_fdiv,
|
||||
nir_metadata_control_flow, NULL);
|
||||
|
||||
NIR_PASS(_, nir, nir_opt_deref);
|
||||
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
|
||||
|
||||
|
|
|
|||
|
|
@ -213,6 +213,7 @@ lower_alu_instr(nir_builder *b, nir_alu_instr *instr, UNUSED void *cb_data)
|
|||
/* Fallback on the emulation */
|
||||
if (!lowered) {
|
||||
nir_def *iminmax = max ? nir_imax(b, s0, s1) : nir_imin(b, s0, s1);
|
||||
iminmax = nir_fcanonicalize(b, iminmax);
|
||||
lowered = nir_bcsel(b, nir_feq(b, s0, s1), iminmax, fminmax);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -111,6 +111,13 @@ asahi_blit_compute_shader(struct pipe_context *ctx, struct asahi_blit_key *key)
|
|||
nir_iand(b, in_bounds, nir_ilt(b, logical_id_el_2d, dimensions_el_2d));
|
||||
}
|
||||
|
||||
unsigned bit_size = 32;
|
||||
nir_alu_type dst_type = nir_type_uint32;
|
||||
if (util_format_is_float16(key->dst_format)) {
|
||||
bit_size = 16;
|
||||
dst_type = nir_type_float16;
|
||||
}
|
||||
|
||||
nir_def *colour0, *colour1;
|
||||
nir_push_if(b, nir_ball(b, in_bounds));
|
||||
{
|
||||
|
|
@ -127,15 +134,15 @@ asahi_blit_compute_shader(struct pipe_context *ctx, struct asahi_blit_key *key)
|
|||
colour0 = nir_tex(b, coords_el_nd, .texture_index = 0, .sampler_index = 0,
|
||||
.backend_flags = AGX_TEXTURE_FLAG_NO_CLAMP,
|
||||
.dim = GLSL_SAMPLER_DIM_2D, .is_array = key->array,
|
||||
.dest_type = nir_type_uint32);
|
||||
.dest_type = dst_type);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
/* For out-of-bounds pixels, copy in the destination */
|
||||
colour1 = nir_image_load(
|
||||
b, 4, 32, nir_imm_int(b, 0), nir_pad_vec4(b, image_pos_nd), zero, zero,
|
||||
b, 4, bit_size, nir_imm_int(b, 0), nir_pad_vec4(b, image_pos_nd), zero, zero,
|
||||
.image_array = key->array, .image_dim = GLSL_SAMPLER_DIM_2D,
|
||||
.access = ACCESS_IN_BOUNDS, .dest_type = nir_type_uint32);
|
||||
.access = ACCESS_IN_BOUNDS, .dest_type = dst_type);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
nir_def *color = nir_if_phi(b, colour0, colour1);
|
||||
|
|
@ -619,11 +626,12 @@ agx_resource_copy_region(struct pipe_context *pctx, struct pipe_resource *dst,
|
|||
assert(dst->format == src->format);
|
||||
unsigned bs = util_format_get_blocksize(dst->format);
|
||||
unsigned size = bs * src_box->width;
|
||||
uint64_t dst_addr = agx_map_gpu(agx_resource(dst)) + dstx * bs;
|
||||
unsigned dst_offset = dstx * bs;
|
||||
uint64_t dst_addr = agx_map_gpu(agx_resource(dst)) + dst_offset;
|
||||
uint64_t src_addr = agx_map_gpu(agx_resource(src)) + src_box->x * bs;
|
||||
|
||||
agx_batch_reads(batch, agx_resource(src));
|
||||
agx_batch_writes_range(batch, agx_resource(dst), dst_addr, size);
|
||||
agx_batch_writes_range(batch, agx_resource(dst), dst_offset, size);
|
||||
/* Use vectorized copies for as much of the buffer as possible. This requires
|
||||
* that dst, src, and size are all properly aligned. Failing to check for
|
||||
* alignment on the buffers causes subtle and hard-to-debug issues!
|
||||
|
|
|
|||
|
|
@ -3003,7 +3003,6 @@ agx_launch_internal(struct agx_batch *batch, struct agx_grid grid,
|
|||
struct agx_context *ctx = batch->ctx;
|
||||
struct agx_device *dev = agx_device(ctx->base.screen);
|
||||
|
||||
/* TODO: Ensure space if we allow multiple kernels in a batch */
|
||||
uint32_t *out = (uint32_t *)batch->cdm.current;
|
||||
|
||||
out = agx_cdm_launch(out, dev->chip, grid, wg, launch, usc);
|
||||
|
|
@ -3012,6 +3011,19 @@ agx_launch_internal(struct agx_batch *batch, struct agx_grid grid,
|
|||
batch->cdm.current = (void *)out;
|
||||
assert(batch->cdm.current <= batch->cdm.end &&
|
||||
"Failed to reserve sufficient space in encoder");
|
||||
|
||||
/* If the next dispatch might overflow, flush now. TODO: If this is ever hit
|
||||
* in practice, we can use CDM stream links.
|
||||
*/
|
||||
size_t dispatch_upper_bound =
|
||||
AGX_CDM_LAUNCH_WORD_0_LENGTH + AGX_CDM_LAUNCH_WORD_1_LENGTH +
|
||||
AGX_CDM_UNK_G14X_LENGTH + AGX_CDM_INDIRECT_LENGTH +
|
||||
AGX_CDM_GLOBAL_SIZE_LENGTH + AGX_CDM_LOCAL_SIZE_LENGTH +
|
||||
AGX_CDM_BARRIER_LENGTH;
|
||||
|
||||
if (batch->cdm.current + dispatch_upper_bound >= batch->cdm.end)
|
||||
agx_flush_batch_for_reason(ctx, batch, "CDM overfull");
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -5408,18 +5420,6 @@ agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
|
|||
agx_dirty_all(ctx);
|
||||
|
||||
batch->uniforms.tables[AGX_SYSVAL_TABLE_GRID] = 0;
|
||||
|
||||
/* If the next dispatch might overflow, flush now. TODO: If this is ever hit
|
||||
* in practice, we can use CDM stream links.
|
||||
*/
|
||||
size_t dispatch_upper_bound =
|
||||
AGX_CDM_LAUNCH_WORD_0_LENGTH + AGX_CDM_LAUNCH_WORD_1_LENGTH +
|
||||
AGX_CDM_UNK_G14X_LENGTH + AGX_CDM_INDIRECT_LENGTH +
|
||||
AGX_CDM_GLOBAL_SIZE_LENGTH + AGX_CDM_LOCAL_SIZE_LENGTH +
|
||||
AGX_CDM_BARRIER_LENGTH;
|
||||
|
||||
if (batch->cdm.current + dispatch_upper_bound >= batch->cdm.end)
|
||||
agx_flush_batch_for_reason(ctx, batch, "CDM overfull");
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue