diff --git a/src/intel/compiler/brw_mesh.cpp b/src/intel/compiler/brw_mesh.cpp index a2cdc73be03..24c5ad77065 100644 --- a/src/intel/compiler/brw_mesh.cpp +++ b/src/intel/compiler/brw_mesh.cpp @@ -927,7 +927,6 @@ emit_urb_direct_vec4_write(const fs_builder &bld, unsigned urb_global_offset, const fs_reg &src, fs_reg urb_handle, - unsigned src_comp_offset, unsigned dst_comp_offset, unsigned comps, unsigned mask) @@ -942,7 +941,7 @@ emit_urb_direct_vec4_write(const fs_builder &bld, payload_srcs[length++] = reg_undef; for (unsigned c = 0; c < comps; c++) - payload_srcs[length++] = quarter(offset(src, bld, c + src_comp_offset), q); + payload_srcs[length++] = quarter(offset(src, bld, c), q); fs_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; @@ -969,39 +968,23 @@ emit_urb_direct_writes(const fs_builder &bld, nir_intrinsic_instr *instr, assert(nir_src_is_const(*offset_nir_src)); const unsigned comps = nir_src_num_components(instr->src[0]); - assert(comps <= 8); + assert(comps <= 4); - const unsigned mask = nir_intrinsic_write_mask(instr); const unsigned offset_in_dwords = nir_intrinsic_base(instr) + nir_src_as_uint(*offset_nir_src) + component_from_intrinsic(instr); /* URB writes are vec4 aligned but the intrinsic offsets are in dwords. - * With a max of 4 components, an intrinsic can require up to two writes. - * - * First URB write will be shifted by comp_shift. If there are other - * components left, then dispatch a second write. In addition to that, - * take mask into account to decide whether each write will be actually - * needed. + * We can write up to 8 dwords, so single vec4 write is enough. */ - const unsigned comp_shift = offset_in_dwords % 4; - const unsigned first_comps = MIN2(comps, 8 - comp_shift); - const unsigned second_comps = comps - first_comps; - const unsigned first_mask = (mask << comp_shift) & 0xFF; - const unsigned second_mask = (mask >> (8 - comp_shift)) & 0xFF; + const unsigned comp_shift = offset_in_dwords % 4; + const unsigned mask = nir_intrinsic_write_mask(instr) << comp_shift; unsigned urb_global_offset = offset_in_dwords / 4; adjust_handle_and_offset(bld, urb_handle, urb_global_offset); - if (first_mask > 0) - emit_urb_direct_vec4_write(bld, urb_global_offset, src, urb_handle, 0, comp_shift, first_comps, first_mask); - - if (second_mask > 0) { - urb_global_offset += 2; - adjust_handle_and_offset(bld, urb_handle, urb_global_offset); - - emit_urb_direct_vec4_write(bld, urb_global_offset, src, urb_handle, first_comps, 0, second_comps, second_mask); - } + emit_urb_direct_vec4_write(bld, urb_global_offset, src, urb_handle, + comp_shift, comps, mask); } static void @@ -1010,7 +993,6 @@ emit_urb_indirect_vec4_write(const fs_builder &bld, unsigned base, const fs_reg &src, fs_reg urb_handle, - unsigned src_comp_offset, unsigned dst_comp_offset, unsigned comps, unsigned mask) @@ -1030,7 +1012,7 @@ emit_urb_indirect_vec4_write(const fs_builder &bld, payload_srcs[length++] = reg_undef; for (unsigned c = 0; c < comps; c++) - payload_srcs[length++] = quarter(offset(src, bld, c + src_comp_offset), q); + payload_srcs[length++] = quarter(offset(src, bld, c), q); fs_reg srcs[URB_LOGICAL_NUM_SRCS]; srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle; @@ -1055,29 +1037,16 @@ emit_urb_indirect_writes_mod(const fs_builder &bld, nir_intrinsic_instr *instr, assert(nir_src_bit_size(instr->src[0]) == 32); const unsigned comps = nir_src_num_components(instr->src[0]); - assert(comps <= 8); + assert(comps <= 4); - const unsigned mask = nir_intrinsic_write_mask(instr); const unsigned base_in_dwords = nir_intrinsic_base(instr) + component_from_intrinsic(instr); - const unsigned comp_shift = mod; - const unsigned first_comps = MIN2(comps, 8 - comp_shift); - const unsigned second_comps = comps - first_comps; - const unsigned first_mask = (mask << comp_shift) & 0xFF; - const unsigned second_mask = (mask >> (8 - comp_shift)) & 0xFF; + const unsigned comp_shift = mod; + const unsigned mask = nir_intrinsic_write_mask(instr) << comp_shift; - if (first_mask > 0) { - emit_urb_indirect_vec4_write(bld, offset_src, base_in_dwords, src, - urb_handle, 0, comp_shift, first_comps, - first_mask); - } - - if (second_mask > 0) { - emit_urb_indirect_vec4_write(bld, offset_src, base_in_dwords + 8, src, - urb_handle, first_comps, 0, second_comps, - second_mask); - } + emit_urb_indirect_vec4_write(bld, offset_src, base_in_dwords, src, + urb_handle, comp_shift, comps, mask); } static void @@ -1268,8 +1237,7 @@ fs_visitor::emit_task_mesh_store(const fs_builder &bld, nir_intrinsic_instr *ins if (offset_nir_src->is_ssa) { /* Try to calculate the value of (offset + base) % 4. If we can do - * this, then we can do indirect writes using only up to 2 URB - * writes (1 if modulo + num_comps is <= 4). + * this, then we can do indirect writes using only 1 URB write. */ use_mod = nir_mod_analysis(nir_get_ssa_scalar(offset_nir_src->ssa, 0), nir_type_uint, 4, &mod); if (use_mod) {