diff --git a/src/gallium/drivers/crocus/crocus_blorp.c b/src/gallium/drivers/crocus/crocus_blorp.c index ba8425a518b..3abe041d75d 100644 --- a/src/gallium/drivers/crocus/crocus_blorp.c +++ b/src/gallium/drivers/crocus/crocus_blorp.c @@ -447,5 +447,6 @@ genX(crocus_init_blorp)(struct crocus_context *ice) blorp_init_elk(&ice->blorp, ice, &screen->isl_dev, screen->compiler, NULL); ice->blorp.lookup_shader = crocus_blorp_lookup_shader; ice->blorp.upload_shader = crocus_blorp_upload_shader; + ice->blorp.get_surface_address = blorp_get_surface_address; ice->blorp.exec = crocus_blorp_exec; } diff --git a/src/gallium/drivers/iris/iris_blorp.c b/src/gallium/drivers/iris/iris_blorp.c index 53329d90a04..39737142d11 100644 --- a/src/gallium/drivers/iris/iris_blorp.c +++ b/src/gallium/drivers/iris/iris_blorp.c @@ -513,6 +513,7 @@ genX(init_blorp)(struct iris_context *ice) #endif ice->blorp.lookup_shader = iris_blorp_lookup_shader; ice->blorp.upload_shader = iris_blorp_upload_shader; + ice->blorp.get_surface_address = blorp_get_surface_address; ice->blorp.exec = iris_blorp_exec; ice->blorp.enable_tbimr = screen->driconf.enable_tbimr; } diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c index 29405390110..741036ad34a 100644 --- a/src/intel/blorp/blorp.c +++ b/src/intel/blorp/blorp.c @@ -253,6 +253,26 @@ blorp_surface_info_init(struct blorp_batch *batch, info->surf.phys_level0_sa.w += surf->tile_x_sa; info->surf.phys_level0_sa.h += surf->tile_y_sa; } + + if (blorp->isl_dev->requires_padding && !is_dest && + (batch->flags & BLORP_BATCH_SRC_UNPADDED)) { + blorp_assert_is_buffer(info->surf, info->view); + + /* Infers the page boundaries for a buffer to image copy based on the + * surface address and dimensions, following Vulkan semantics to + * determine the extent of the final row. + */ + uint64_t size_B = + (uint64_t) info->surf.phys_level0_sa.w * + (isl_format_get_layout(info->view.format)->bpb / 8) + + (uint64_t) (info->surf.phys_level0_sa.h - 1) * + info->surf.row_pitch_B; + + uint64_t mask = blorp->isl_dev->info->mem_alignment - 1; + uint64_t address = batch->blorp->get_surface_address(batch, info->addr); + info->page_base = address & ~mask; + info->page_limit = (address + size_B + mask) & ~mask; + } } diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h index 8109c12c27f..b07006777f5 100644 --- a/src/intel/blorp/blorp.h +++ b/src/intel/blorp/blorp.h @@ -79,6 +79,20 @@ enum blorp_dynamic_state { BLORP_DYNAMIC_STATE_COUNT, }; +struct blorp_address { + void *buffer; + int64_t offset; + unsigned reloc_flags; + uint32_t mocs; + + /** + * True if this buffer is intended to live in device-local memory. + * This is only a performance hint; it's OK to set it to true even + * if eviction has temporarily forced the buffer to system memory. + */ + bool local_hint; +}; + struct blorp_context { void *driver_ctx; @@ -105,6 +119,8 @@ struct blorp_context { const void *prog_data, uint32_t prog_data_size, uint32_t *kernel_out, void *prog_data_out); + uint64_t (*get_surface_address)(struct blorp_batch *batch, + struct blorp_address addr); void (*exec)(struct blorp_batch *batch, const struct blorp_params *params); struct blorp_config config; @@ -155,6 +171,10 @@ enum blorp_batch_flags { * Mostly for debug */ BLORP_BATCH_DISABLE_VF_DISTRIBUTION = BITFIELD_BIT(6), + + /** Source buffer is unpadded and needs careful accesses + */ + BLORP_BATCH_SRC_UNPADDED = BITFIELD_BIT(7), }; struct blorp_batch { @@ -186,20 +206,6 @@ blorp_batch_isl_copy_usage(const struct blorp_batch *batch, bool is_dest, return usage; } -struct blorp_address { - void *buffer; - int64_t offset; - unsigned reloc_flags; - uint32_t mocs; - - /** - * True if this buffer is intended to live in device-local memory. - * This is only a performance hint; it's OK to set it to true even - * if eviction has temporarily forced the buffer to system memory. - */ - bool local_hint; -}; - static inline bool blorp_address_is_null(struct blorp_address address) { diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index e81b7411a92..a25566a7154 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -45,6 +45,8 @@ struct blorp_blit_vars { nir_variable *v_src_offset; nir_variable *v_dst_offset; nir_variable *v_src_inv_size; + nir_variable *v_src_buffer_first_row; + nir_variable *v_src_buffer_row_pitch; }; static void @@ -60,6 +62,8 @@ blorp_blit_vars_init(nir_builder *b, struct blorp_blit_vars *v) LOAD_INPUT(src_offset, glsl_vector_type(GLSL_TYPE_UINT, 2)) LOAD_INPUT(dst_offset, glsl_vector_type(GLSL_TYPE_UINT, 2)) LOAD_INPUT(src_inv_size, glsl_vector_type(GLSL_TYPE_FLOAT, 2)) + LOAD_INPUT(src_buffer_first_row, glsl_uint_type()) + LOAD_INPUT(src_buffer_row_pitch, glsl_uint_type()) #undef LOAD_INPUT } @@ -224,6 +228,47 @@ blorp_nir_txf(nir_builder *b, struct blorp_blit_vars *v, return &tex->def; } +/* Same as blorp_nir_txf, except the last few rows may be loaded from a texel + * buffer bound to BLORP_TEXBUF_BT_INDEX instead to avoid page faults due to + * an unaligned source. + */ +static nir_def * +blorp_nir_txf_buf(nir_builder *b, struct blorp_blit_vars *v, + nir_def *pos, nir_alu_type dst_type, + const struct intel_device_info *devinfo) +{ + nir_def *buf_start = nir_load_var(b, v->v_src_buffer_first_row); + + /* Just use if statements, non uniform texture access is expensive */ + nir_push_if(b, nir_ilt(b, nir_channel(b, pos, 1), buf_start)); + + nir_def *tex = blorp_nir_txf(b, v, pos, dst_type, devinfo); + + nir_push_else(b, NULL); + + /* Get the offset into the buffer if we're beyond src_buffer_first_row */ + pos = nir_vec2(b, + nir_iadd(b, + nir_imul(b, + nir_isub(b, + nir_channel(b, pos, 1), + buf_start), + nir_load_var(b, v->v_src_buffer_row_pitch)), + nir_channel(b, pos, 0)), + nir_imm_int(b, 0)); + + nir_tex_instr *buf = + blorp_create_nir_tex_instr(b, v, nir_texop_txf, pos, 1, dst_type, devinfo); + + buf->texture_index = BLORP_TEXBUF_BT_INDEX; + buf->sampler_dim = GLSL_SAMPLER_DIM_BUF; + + nir_builder_instr_insert(b, &buf->instr); + + nir_pop_if(b, NULL); + return nir_if_phi(b, tex, &buf->def); +} + static nir_def * blorp_nir_txf_ms(nir_builder *b, struct blorp_blit_vars *v, nir_def *pos, nir_alu_type dst_type, @@ -1322,6 +1367,7 @@ blorp_build_nir_shader(struct blorp_context *blorp, case BLORP_FILTER_NONE: case BLORP_FILTER_NEAREST: case BLORP_FILTER_SAMPLE_0: + assert(!key->need_src_buffer || key->src_samples == 1); /* We're going to use texelFetch, so we need integers */ if (src_pos->num_components == 2) { src_pos = nir_f2i32(&b, src_pos); @@ -1364,7 +1410,9 @@ blorp_build_nir_shader(struct blorp_context *blorp, * the texturing unit, will cause data to be read from the correct * memory location. So we can fetch the texel now. */ - if (key->src_samples == 1) { + if (key->need_src_buffer) { + color = blorp_nir_txf_buf(&b, &v, src_pos, key->texture_data_type, devinfo); + } else if (key->src_samples == 1) { color = blorp_nir_txf(&b, &v, src_pos, key->texture_data_type, devinfo); } else { color = blorp_nir_txf_ms(&b, &v, src_pos, key->texture_data_type, devinfo); @@ -1373,6 +1421,7 @@ blorp_build_nir_shader(struct blorp_context *blorp, case BLORP_FILTER_BILINEAR: assert(!key->src_tiled_w); + assert(!key->need_src_buffer); assert(key->tex_samples == key->src_samples); assert(key->tex_layout == key->src_layout); @@ -1389,6 +1438,7 @@ blorp_build_nir_shader(struct blorp_context *blorp, case BLORP_FILTER_MIN_SAMPLE: case BLORP_FILTER_MAX_SAMPLE: assert(!key->src_tiled_w); + assert(!key->need_src_buffer); assert(key->tex_samples == key->src_samples); assert(key->tex_layout == key->src_layout); @@ -2011,6 +2061,48 @@ surf_fake_rgb_with_red(const struct isl_device *isl_dev, info->surf.format = info->view.format = red_format; } +/** + * Converts the overfetching part of a linear 2D surface to a 1D buffer, this + * is part of a workaround for performing buffer-to-image-copies when source + * straddles an extra page due to a misaligned sampler cache. + */ +static inline void +blorp_surf_convert_overfetch_to_buffer(struct blorp_batch *batch, + struct blorp_surface_info *info) +{ + const struct isl_device *isl_dev = batch->blorp->isl_dev; + + blorp_assert_is_buffer(info->surf, info->view); + assert(isl_format_block_is_1x1x1(info->view.format)); + + uint64_t address = batch->blorp->get_surface_address(batch, info->addr); + uint64_t max_size_B = info->page_limit - address; + uint64_t overfetch_B = + isl_surf_get_sampler_overfetch_size_B(isl_dev, &info->surf, &info->view); + + if (overfetch_B > max_size_B) { + uint32_t rows = (uint32_t) DIV_ROUND_UP(overfetch_B - max_size_B, + info->surf.row_pitch_B); + + /* We could overflow the subtraction below in some cases */ + rows = MIN2(rows, info->surf.logical_level0_px.h); + + info->buffer = true; + info->buffer_rows = rows; + info->surf.logical_level0_px.h -= rows; + info->surf.phys_level0_sa.h -= rows; + info->surf.size_B -= rows * info->surf.row_pitch_B; + + if (info->surf.logical_level0_px.h == 0) { + info->surf.size_B = 0; + return; + } + + assert(isl_surf_get_sampler_overfetch_size_B(isl_dev, + &info->surf, &info->view) <= max_size_B); + } +} + enum blit_shrink_status { BLIT_NO_SHRINK = 0, BLIT_SRC_WIDTH_SHRINK = (1 << 0), @@ -2359,6 +2451,21 @@ try_blorp_blit(struct blorp_batch *batch, key->use_kill = true; } + if (batch->blorp->isl_dev->requires_padding && + (batch->flags & BLORP_BATCH_SRC_UNPADDED)) { + params->src.view.usage |= ISL_SURF_USAGE_NO_ARRAY_OVERFETCH_BIT; + blorp_surf_convert_overfetch_to_buffer(batch, ¶ms->src); + } + + key->need_src_buffer = params->src.buffer; + if (key->need_src_buffer) { + params->wm_inputs.blit.src_buffer_first_row = + params->src.surf.logical_level0_px.h; + params->wm_inputs.blit.src_buffer_row_pitch = + params->src.surf.row_pitch_B / + (isl_format_get_layout(params->src.view.format)->bpb / 8); + } + if (compute) { if (!blorp_get_blit_kernel_cs(batch, params, key)) return 0; @@ -2434,9 +2541,11 @@ shrink_surface_params(const struct isl_device *dev, struct blorp_surface_info *info, double *x0, double *x1, double *y0, double *y1) { - uint64_t offset_B; + uint64_t start_offset_B; + uint64_t end_offset_B; uint32_t x_offset_sa, y_offset_sa, size; struct isl_extent2d px_size_sa; + struct isl_extent4d surf_size_sa; int adjust; blorp_surf_convert_to_single_slice(dev, info); @@ -2449,19 +2558,28 @@ shrink_surface_params(const struct isl_device *dev, */ x_offset_sa = (uint32_t)*x0 * px_size_sa.w + info->tile_x_sa; y_offset_sa = (uint32_t)*y0 * px_size_sa.h + info->tile_y_sa; + surf_size_sa = (struct isl_extent4d) { + .w = (uint32_t)ceil(*x1) * px_size_sa.w + info->tile_x_sa, + .h = (uint32_t)ceil(*y1) * px_size_sa.h + info->tile_y_sa, + .d = 1, + .a = 1, + }; + uint32_t tile_z_sa, tile_a; - isl_tiling_get_intratile_offset_sa(info->surf.tiling, info->surf.dim, - info->surf.msaa_layout, - info->surf.format, info->surf.samples, - info->surf.row_pitch_B, - info->surf.array_pitch_el_rows, - x_offset_sa, y_offset_sa, 0, 0, - &offset_B, - &info->tile_x_sa, &info->tile_y_sa, - &tile_z_sa, &tile_a); + isl_tiling_get_intratile_range_sa(info->surf.tiling, info->surf.dim, + info->surf.msaa_layout, + info->surf.format, info->surf.samples, + info->surf.row_pitch_B, + info->surf.array_pitch_el_rows, + x_offset_sa, y_offset_sa, 0, 0, + surf_size_sa, + &start_offset_B, + &end_offset_B, + &info->tile_x_sa, &info->tile_y_sa, + &tile_z_sa, &tile_a); assert(tile_z_sa == 0 && tile_a == 0); - info->addr.offset += offset_B; + info->addr.offset += start_offset_B; adjust = (int)info->tile_x_sa / px_size_sa.w - (int)*x0; *x0 += adjust; @@ -2481,6 +2599,7 @@ shrink_surface_params(const struct isl_device *dev, info->surf.logical_level0_px.height = size; info->surf.phys_level0_sa.height = size * px_size_sa.h; + info->surf.size_B = end_offset_B - start_offset_B; info->surf.usage |= ISL_SURF_USAGE_NO_OVERFETCH_PADDING_BIT; /* Stomp the 64B alignment because we set NO_OVERFETCH_PADDING_BIT */ @@ -3085,7 +3204,8 @@ blorp_copy_get_formats(const struct isl_device *isl_dev, static int get_max_format_scale(const struct isl_device *isl_dev, const struct blorp_surface_info *info, - uint32_t x, uint32_t width, uint32_t height) + uint32_t x, uint32_t width, uint32_t height, + bool unpadded) { const bool full_width = u_minify(info->surf.logical_level0_px.width, info->view.base_level) == width; @@ -3168,9 +3288,10 @@ get_max_format_scale(const struct isl_device *isl_dev, continue; } - if (!(info->view.usage & ISL_SURF_USAGE_TEXTURE_BIT)) { - /* All surface types except for textures need their row pitch aligned - * to the pixel block size. + if (!(info->view.usage & ISL_SURF_USAGE_TEXTURE_BIT) || + (isl_dev->requires_padding && unpadded)) { + /* All surface types except for padded textures need their row pitch + * aligned to the pixel block size. */ if (info->surf.row_pitch_B * 8 % max_bpb) continue; @@ -3336,9 +3457,11 @@ blorp_copy(struct blorp_batch *batch, dst_width = src_width * src_fmtl->bpb / dst_fmtl->bpb; int max_fmt_scale_src = get_max_format_scale(isl_dev, ¶ms.src, src_x, - src_width, src_height); + src_width, src_height, + batch->flags & BLORP_BATCH_SRC_UNPADDED); int max_fmt_scale_dst = get_max_format_scale(isl_dev, ¶ms.dst, dst_x, - dst_width, dst_height); + dst_width, dst_height, + false); int copy_fmt_bpb = MIN2(src_fmtl->bpb * max_fmt_scale_src, dst_fmtl->bpb * max_fmt_scale_dst); diff --git a/src/intel/blorp/blorp_genX_exec_brw.h b/src/intel/blorp/blorp_genX_exec_brw.h index cf216d650b6..ba50dd8efc3 100644 --- a/src/intel/blorp/blorp_genX_exec_brw.h +++ b/src/intel/blorp/blorp_genX_exec_brw.h @@ -108,10 +108,6 @@ static void blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, struct blorp_address address, uint32_t delta); -static uint64_t -blorp_get_surface_address(struct blorp_batch *batch, - struct blorp_address address); - #if GFX_VER < 10 static struct blorp_address blorp_get_surface_base_address(struct blorp_batch *batch); @@ -1241,11 +1237,11 @@ blorp_emit_surface_state(struct blorp_batch *batch, .aux_surf = &surface->aux_surf, .aux_usage = aux_usage, .aux_format = surface->aux_format, .address = - blorp_get_surface_address(batch, surface->addr), + batch->blorp->get_surface_address(batch, surface->addr), .aux_address = !use_aux_address ? 0 : - blorp_get_surface_address(batch, surface->aux_addr), + batch->blorp->get_surface_address(batch, surface->aux_addr), .clear_address = !use_clear_address ? 0 : - blorp_get_surface_address(batch, op_clear_addr), + batch->blorp->get_surface_address(batch, op_clear_addr), .mocs = surface->addr.mocs, .clear_color = surface->clear_color, .use_clear_address = use_clear_address); @@ -1287,6 +1283,45 @@ blorp_emit_surface_state(struct blorp_batch *batch, blorp_flush_range(batch, state, GENX(RENDER_SURFACE_STATE_length) * 4); } +/** + * Emits the remaining rows of the 2D linear surface as a texel buffer, this + * is part of a workaround for performing buffer to image copies when the + * surface is straddling an extra page due to a misaligned sampler cache. + */ +static void +blorp_emit_buffer_surface_state(struct blorp_batch *batch, + const struct blorp_surface_info *surface, + void *state, uint32_t state_offset) +{ + blorp_assert_is_buffer(surface->surf, surface->view); + assert(isl_format_block_is_1x1x1(surface->view.format)); + + const struct isl_device *isl_dev = batch->blorp->isl_dev; + + struct blorp_address buffer_addr = surface->addr; + buffer_addr.offset += + surface->surf.row_pitch_B * surface->surf.logical_level0_px.h; + + uint32_t element_size_B = + isl_format_get_layout(surface->view.format)->bpb / 8; + uint64_t surface_size_B = + (uint64_t) surface->surf.row_pitch_B * (surface->buffer_rows - 1) + + surface->surf.logical_level0_px.w * element_size_B; + + isl_buffer_fill_state(isl_dev, state, + .address = + batch->blorp->get_surface_address(batch, buffer_addr), + .size_B = surface_size_B, + .stride_B = element_size_B, + .format = surface->view.format, + .swizzle = surface->view.swizzle, + .mocs = surface->addr.mocs, + .usage = surface->surf.usage | surface->view.usage); + + blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset, + buffer_addr, 0); +} + static void blorp_emit_null_surface_state(struct blorp_batch *batch, const struct blorp_surface_info *surface, @@ -1295,8 +1330,8 @@ blorp_emit_null_surface_state(struct blorp_batch *batch, struct GENX(RENDER_SURFACE_STATE) ss = { .SurfaceType = SURFTYPE_NULL, .SurfaceFormat = ISL_FORMAT_R8G8B8A8_UNORM, - .Width = surface->surf.logical_level0_px.width - 1, - .Height = surface->surf.logical_level0_px.height - 1, + .Width = MAX2(surface->surf.logical_level0_px.width, 1) - 1, + .Height = MAX2(surface->surf.logical_level0_px.height, 1) - 1, .MIPCountLOD = surface->view.base_level, .MinimumArrayElement = surface->view.base_array_layer, .Depth = surface->view.array_len - 1, @@ -1329,7 +1364,7 @@ blorp_setup_binding_table(struct blorp_batch *batch, if (params->use_pre_baked_binding_table) { bind_offset = params->pre_baked_binding_table_offset; } else { - unsigned num_surfaces = 1 + params->src.enabled; + unsigned num_surfaces = 1 + params->src.enabled + params->src.buffer; if (!blorp_alloc_binding_table(batch, num_surfaces, isl_dev->ss.size, isl_dev->ss.align, &bind_offset, surface_offsets, surface_maps)) @@ -1350,11 +1385,23 @@ blorp_setup_binding_table(struct blorp_batch *batch, } if (params->src.enabled) { - blorp_emit_surface_state(batch, ¶ms->src, - params->fast_clear_op, - surface_maps[BLORP_TEXTURE_BT_INDEX], - surface_offsets[BLORP_TEXTURE_BT_INDEX], - 0, false); + if (params->src.surf.size_B != 0) { + blorp_emit_surface_state(batch, ¶ms->src, + params->fast_clear_op, + surface_maps[BLORP_TEXTURE_BT_INDEX], + surface_offsets[BLORP_TEXTURE_BT_INDEX], + 0, false); + } else { + /* Nothing to do, the entire surface got converted to a buffer */ + blorp_emit_null_surface_state(batch, ¶ms->src, + surface_maps[BLORP_TEXTURE_BT_INDEX]); + } + + if (params->src.buffer) { + blorp_emit_buffer_surface_state(batch, ¶ms->src, + surface_maps[BLORP_TEXBUF_BT_INDEX], + surface_offsets[BLORP_TEXBUF_BT_INDEX]); + } } } diff --git a/src/intel/blorp/blorp_genX_exec_elk.h b/src/intel/blorp/blorp_genX_exec_elk.h index dd1a49163ea..fcc6f5aa147 100644 --- a/src/intel/blorp/blorp_genX_exec_elk.h +++ b/src/intel/blorp/blorp_genX_exec_elk.h @@ -107,10 +107,6 @@ static void blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, struct blorp_address address, uint32_t delta); -static uint64_t -blorp_get_surface_address(struct blorp_batch *batch, - struct blorp_address address); - #if GFX_VER >= 7 static struct blorp_address blorp_get_surface_base_address(struct blorp_batch *batch); @@ -1445,6 +1441,7 @@ blorp_emit_surface_state(struct blorp_batch *batch, uint8_t color_write_disable, bool is_render_target) { + assert(!surface->buffer); const struct isl_device *isl_dev = batch->blorp->isl_dev; struct isl_surf surf = surface->surf; @@ -1483,9 +1480,9 @@ blorp_emit_surface_state(struct blorp_batch *batch, .surf = &surf, .view = &surface->view, .aux_surf = &surface->aux_surf, .aux_usage = aux_usage, .address = - blorp_get_surface_address(batch, surface->addr), + batch->blorp->get_surface_address(batch, surface->addr), .aux_address = !use_aux_address ? 0 : - blorp_get_surface_address(batch, surface->aux_addr), + batch->blorp->get_surface_address(batch, surface->aux_addr), .mocs = surface->addr.mocs, .clear_color = surface->clear_color, .write_disables = write_disable_mask); diff --git a/src/intel/blorp/blorp_priv.h b/src/intel/blorp/blorp_priv.h index 5155f995444..cc77cd770bd 100644 --- a/src/intel/blorp/blorp_priv.h +++ b/src/intel/blorp/blorp_priv.h @@ -75,6 +75,7 @@ struct blorp_compiler { enum { BLORP_RENDERBUFFER_BT_INDEX, BLORP_TEXTURE_BT_INDEX, + BLORP_TEXBUF_BT_INDEX, BLORP_NUM_BT_ENTRIES }; @@ -84,9 +85,16 @@ struct blorp_surface_info { bool enabled; + /* Should we unpack the last few rows using a texel buffer? */ + bool buffer; + uint32_t buffer_rows; + struct isl_surf surf; struct blorp_address addr; + /* Inferred page boundaries of the surface address */ + uint64_t page_base, page_limit; + struct isl_surf aux_surf; struct blorp_address aux_addr; enum isl_aux_usage aux_usage; @@ -182,6 +190,9 @@ struct blorp_wm_inputs_blit /* (1/width, 1/height) for the source surface */ float src_inv_size[2]; + uint32_t src_buffer_first_row; + uint32_t src_buffer_row_pitch; + /* Minimum layer setting works for all the textures types but texture_3d * for which the setting has no effect. Use the z-coordinate instead. */ @@ -204,7 +215,7 @@ struct blorp_wm_inputs /* Note: Pad out to an integral number of registers when extending, but * make sure subgroup_id is the last 32-bit item. */ - uint32_t pad[4]; + uint32_t pad[2]; uint32_t subgroup_id; }; @@ -434,6 +445,12 @@ struct blorp_blit_prog_key */ bool need_src_offset; + /* True if this blit operation is unpacking the last few rows of the 2D image + * from a 1D buffer. This is part of a workaround for performing buffer-to-image + * copies when the source is straddling an extra page due to a misaligned cache. + */ + bool need_src_buffer; + /* True if this blit operation may involve intratile offsets on the * destination. In this case, we need to add the offset to gl_FragCoord. */ @@ -580,6 +597,20 @@ blorp_op_type_is_clear(enum blorp_op op) } } +/* Asserts unless the surface is a buffer to image copy */ +#define blorp_assert_is_buffer(surf, view) \ + do { \ + assert((surf).dim == ISL_SURF_DIM_2D); \ + assert((surf).tiling == ISL_TILING_LINEAR); \ + assert((surf).logical_level0_px.d == 1); \ + assert((surf).logical_level0_px.array_len == 1); \ + assert((surf).samples == 1); \ + assert((surf).levels == 1); \ + UNUSED const struct isl_format_layout *fmtl = \ + isl_format_get_layout((view).format); \ + assert((surf).row_pitch_B % (fmtl->bpb / 8) == 0); \ + } while (false) + /** \} */ #ifdef __cplusplus diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index b8f1d2f4ef4..61aca743509 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -110,6 +110,17 @@ get_fp64_nir(struct blorp_context *context) return device->fp64_nir; } +static uint64_t +blorp_get_surface_address(struct blorp_batch *blorp_batch, + struct blorp_address address) +{ + struct anv_address anv_addr = { + .bo = address.buffer, + .offset = address.offset, + }; + return anv_address_physical(anv_addr); +} + void anv_device_init_blorp(struct anv_device *device) { @@ -126,6 +137,7 @@ anv_device_init_blorp(struct anv_device *device) device->blorp.context.lookup_shader = lookup_blorp_shader; device->blorp.context.upload_shader = upload_blorp_shader; device->blorp.context.enable_tbimr = device->physical->instance->enable_tbimr; + device->blorp.context.get_surface_address = blorp_get_surface_address; device->blorp.context.exec = anv_genX(device->info, blorp_exec); device->blorp.context.upload_dynamic_state = upload_dynamic_state; @@ -853,7 +865,7 @@ void anv_CmdCopyBufferToImage2( anv_cmd_require_rcs(cmd_buffer, blorp_execute_on_companion) { struct blorp_batch batch; - anv_blorp_batch_init(cmd_buffer, &batch, 0); + anv_blorp_batch_init(cmd_buffer, &batch, BLORP_BATCH_SRC_UNPADDED); for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) { const VkBufferImageCopy2 *region = &pCopyBufferToImageInfo->pRegions[r]; @@ -1183,9 +1195,10 @@ anv_cmd_copy_addr(struct anv_cmd_buffer *cmd_buffer, struct blorp_batch batch; anv_blorp_batch_init(cmd_buffer, &batch, - cmd_buffer->state.current_pipeline == - cmd_buffer->device->physical->gpgpu_pipeline_value ? - BLORP_BATCH_USE_COMPUTE : 0); + BLORP_BATCH_SRC_UNPADDED | + (cmd_buffer->state.current_pipeline == + cmd_buffer->device->physical->gpgpu_pipeline_value ? + BLORP_BATCH_USE_COMPUTE : 0)); copy_memory(device, &batch, src_addr, dst_addr, size); @@ -1203,9 +1216,10 @@ void anv_CmdCopyBuffer2( struct blorp_batch batch; anv_blorp_batch_init(cmd_buffer, &batch, - cmd_buffer->state.current_pipeline == - cmd_buffer->device->physical->gpgpu_pipeline_value ? - BLORP_BATCH_USE_COMPUTE : 0); + BLORP_BATCH_SRC_UNPADDED | + (cmd_buffer->state.current_pipeline == + cmd_buffer->device->physical->gpgpu_pipeline_value ? + BLORP_BATCH_USE_COMPUTE : 0)); for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) { const VkBufferCopy2 *region = &pCopyBufferInfo->pRegions[r]; @@ -1230,9 +1244,10 @@ anv_cmd_buffer_update_addr( { struct blorp_batch batch; anv_blorp_batch_init(cmd_buffer, &batch, - cmd_buffer->state.current_pipeline == - cmd_buffer->device->physical->gpgpu_pipeline_value ? - BLORP_BATCH_USE_COMPUTE : 0); + BLORP_BATCH_SRC_UNPADDED | + (cmd_buffer->state.current_pipeline == + cmd_buffer->device->physical->gpgpu_pipeline_value ? + BLORP_BATCH_USE_COMPUTE : 0)); /* We can't quite grab a full block because the state stream needs a * little data at the top to build its linked list. diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c index f6f212479f5..b0f59e6edad 100644 --- a/src/intel/vulkan/genX_blorp_exec.c +++ b/src/intel/vulkan/genX_blorp_exec.c @@ -95,17 +95,6 @@ blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, anv_batch_set_error(&cmd_buffer->batch, result); } -static uint64_t -blorp_get_surface_address(struct blorp_batch *blorp_batch, - struct blorp_address address) -{ - struct anv_address anv_addr = { - .bo = address.buffer, - .offset = address.offset, - }; - return anv_address_physical(anv_addr); -} - #if GFX_VER == 9 static struct blorp_address blorp_get_surface_base_address(struct blorp_batch *batch) diff --git a/src/intel/vulkan_hasvk/anv_blorp.c b/src/intel/vulkan_hasvk/anv_blorp.c index 13ab2f65e8e..e79897b7e9e 100644 --- a/src/intel/vulkan_hasvk/anv_blorp.c +++ b/src/intel/vulkan_hasvk/anv_blorp.c @@ -84,6 +84,14 @@ upload_blorp_shader(struct blorp_batch *batch, uint32_t stage, return true; } +static uint64_t +blorp_get_surface_address(struct blorp_batch *blorp_batch, + struct blorp_address address) +{ + /* We'll let blorp_surface_reloc write the address. */ + return 0; +} + void anv_device_init_blorp(struct anv_device *device) { @@ -93,6 +101,7 @@ anv_device_init_blorp(struct anv_device *device) device->physical->compiler, &config); device->blorp.lookup_shader = lookup_blorp_shader; device->blorp.upload_shader = upload_blorp_shader; + device->blorp.get_surface_address = blorp_get_surface_address; switch (device->info->verx10) { case 70: device->blorp.exec = gfx7_blorp_exec; diff --git a/src/intel/vulkan_hasvk/genX_blorp_exec.c b/src/intel/vulkan_hasvk/genX_blorp_exec.c index 020236b54f0..0a8f431e01f 100644 --- a/src/intel/vulkan_hasvk/genX_blorp_exec.c +++ b/src/intel/vulkan_hasvk/genX_blorp_exec.c @@ -100,14 +100,6 @@ blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset, write_reloc(cmd_buffer->device, dest, address_u64, false); } -static uint64_t -blorp_get_surface_address(struct blorp_batch *blorp_batch, - struct blorp_address address) -{ - /* We'll let blorp_surface_reloc write the address. */ - return 0; -} - static struct blorp_address blorp_get_surface_base_address(struct blorp_batch *batch) {