diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 4b1c0887a0d..c89ef6621ae 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1773,7 +1773,7 @@ calculate_urb_setup(const struct intel_device_info *devinfo, nir->info.inputs_read & ~nir->info.per_primitive_inputs; /* Figure out where each of the incoming setup attributes lands. */ - if (mue_map) { + if (key->mesh_input != BRW_NEVER) { /* Per-Primitive Attributes are laid out by Hardware before the regular * attributes, so order them like this to make easy later to map setup * into real HW registers. @@ -1782,9 +1782,6 @@ calculate_urb_setup(const struct intel_device_info *devinfo, uint64_t per_prim_inputs_read = nir->info.inputs_read & nir->info.per_primitive_inputs; - unsigned per_prim_start_dw = mue_map->per_primitive_start_dw; - unsigned per_prim_size_dw = mue_map->per_primitive_pitch_dw; - /* In Mesh, PRIMITIVE_SHADING_RATE, VIEWPORT and LAYER slots * are always at the beginning, because they come from MUE * Primitive Header, not Per-Primitive Attributes. @@ -1792,47 +1789,68 @@ calculate_urb_setup(const struct intel_device_info *devinfo, const uint64_t primitive_header_bits = VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER | VARYING_BIT_PRIMITIVE_SHADING_RATE; - bool reads_header = (per_prim_inputs_read & primitive_header_bits) != 0; - if (reads_header || mue_map->user_data_in_primitive_header) { - /* Primitive Shading Rate, Layer and Viewport live in the same - * 4-dwords slot (psr is dword 0, layer is dword 1, and viewport - * is dword 2). - */ - if (per_prim_inputs_read & VARYING_BIT_PRIMITIVE_SHADING_RATE) - prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_SHADING_RATE] = 0; + if (mue_map) { + unsigned per_prim_start_dw = mue_map->per_primitive_start_dw; + unsigned per_prim_size_dw = mue_map->per_primitive_pitch_dw; - if (per_prim_inputs_read & VARYING_BIT_LAYER) - prog_data->urb_setup[VARYING_SLOT_LAYER] = 0; + bool reads_header = (per_prim_inputs_read & primitive_header_bits) != 0; - if (per_prim_inputs_read & VARYING_BIT_VIEWPORT) - prog_data->urb_setup[VARYING_SLOT_VIEWPORT] = 0; + if (reads_header || mue_map->user_data_in_primitive_header) { + /* Primitive Shading Rate, Layer and Viewport live in the same + * 4-dwords slot (psr is dword 0, layer is dword 1, and viewport + * is dword 2). + */ + if (per_prim_inputs_read & VARYING_BIT_PRIMITIVE_SHADING_RATE) + prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_SHADING_RATE] = 0; - per_prim_inputs_read &= ~primitive_header_bits; + if (per_prim_inputs_read & VARYING_BIT_LAYER) + prog_data->urb_setup[VARYING_SLOT_LAYER] = 0; + + if (per_prim_inputs_read & VARYING_BIT_VIEWPORT) + prog_data->urb_setup[VARYING_SLOT_VIEWPORT] = 0; + + per_prim_inputs_read &= ~primitive_header_bits; + } else { + /* If fs doesn't need primitive header, then it won't be made + * available through SBE_MESH, so we have to skip them when + * calculating offset from start of per-prim data. + */ + per_prim_start_dw += mue_map->per_primitive_header_size_dw; + per_prim_size_dw -= mue_map->per_primitive_header_size_dw; + } + + u_foreach_bit64(i, per_prim_inputs_read) { + int start = mue_map->start_dw[i]; + + assert(start >= 0); + assert(mue_map->len_dw[i] > 0); + + assert(unsigned(start) >= per_prim_start_dw); + unsigned pos_dw = unsigned(start) - per_prim_start_dw; + + prog_data->urb_setup[i] = urb_next + pos_dw / 4; + prog_data->urb_setup_channel[i] = pos_dw % 4; + } + + urb_next = per_prim_size_dw / 4; } else { - /* If fs doesn't need primitive header, then it won't be made - * available through SBE_MESH, so we have to skip them when - * calculating offset from start of per-prim data. + /* With no MUE map, we never read the primitive header, and + * per-primitive attributes won't be packed either, so just lay + * them in varying order. */ - per_prim_start_dw += mue_map->per_primitive_header_size_dw; - per_prim_size_dw -= mue_map->per_primitive_header_size_dw; + per_prim_inputs_read &= ~primitive_header_bits; + + for (unsigned i = 0; i < VARYING_SLOT_MAX; i++) { + if (per_prim_inputs_read & BITFIELD64_BIT(i)) { + prog_data->urb_setup[i] = urb_next++; + } + } + + /* The actual setup attributes later must be aligned to a full GRF. */ + urb_next = ALIGN(urb_next, 2); } - u_foreach_bit64(i, per_prim_inputs_read) { - int start = mue_map->start_dw[i]; - - assert(start >= 0); - assert(mue_map->len_dw[i] > 0); - - assert(unsigned(start) >= per_prim_start_dw); - unsigned pos_dw = unsigned(start) - per_prim_start_dw; - - prog_data->urb_setup[i] = urb_next + pos_dw / 4; - prog_data->urb_setup_channel[i] = pos_dw % 4; - } - - urb_next = per_prim_size_dw / 4; - prog_data->num_per_primitive_inputs = urb_next; } @@ -1842,52 +1860,68 @@ calculate_urb_setup(const struct intel_device_info *devinfo, uint64_t unique_fs_attrs = inputs_read & BRW_FS_VARYING_INPUT_MASK; if (inputs_read & clip_dist_bits) { - assert(mue_map->per_vertex_header_size_dw > 8); + assert(!mue_map || mue_map->per_vertex_header_size_dw > 8); unique_fs_attrs &= ~clip_dist_bits; } - unsigned per_vertex_start_dw = mue_map->per_vertex_start_dw; - unsigned per_vertex_size_dw = mue_map->per_vertex_pitch_dw; + if (mue_map) { + unsigned per_vertex_start_dw = mue_map->per_vertex_start_dw; + unsigned per_vertex_size_dw = mue_map->per_vertex_pitch_dw; - /* Per-Vertex header is available to fragment shader only if there's - * user data there. - */ - if (!mue_map->user_data_in_vertex_header) { - per_vertex_start_dw += 8; - per_vertex_size_dw -= 8; + /* Per-Vertex header is available to fragment shader only if there's + * user data there. + */ + if (!mue_map->user_data_in_vertex_header) { + per_vertex_start_dw += 8; + per_vertex_size_dw -= 8; + } + + /* In Mesh, CLIP_DIST slots are always at the beginning, because + * they come from MUE Vertex Header, not Per-Vertex Attributes. + */ + if (inputs_read & clip_dist_bits) { + prog_data->urb_setup[VARYING_SLOT_CLIP_DIST0] = urb_next; + prog_data->urb_setup[VARYING_SLOT_CLIP_DIST1] = urb_next + 1; + } else if (mue_map && mue_map->per_vertex_header_size_dw > 8) { + /* Clip distances are in MUE, but we are not reading them in FS. */ + per_vertex_start_dw += 8; + per_vertex_size_dw -= 8; + } + + /* Per-Vertex attributes are laid out ordered. Because we always link + * Mesh and Fragment shaders, the which slots are written and read by + * each of them will match. */ + u_foreach_bit64(i, unique_fs_attrs) { + int start = mue_map->start_dw[i]; + + assert(start >= 0); + assert(mue_map->len_dw[i] > 0); + + assert(unsigned(start) >= per_vertex_start_dw); + unsigned pos_dw = unsigned(start) - per_vertex_start_dw; + + prog_data->urb_setup[i] = urb_next + pos_dw / 4; + prog_data->urb_setup_channel[i] = pos_dw % 4; + } + + urb_next += per_vertex_size_dw / 4; + } else { + /* If we don't have an MUE map, just lay down the inputs the FS reads + * in varying order, as we do for the legacy pipeline. + */ + if (inputs_read & clip_dist_bits) { + prog_data->urb_setup[VARYING_SLOT_CLIP_DIST0] = urb_next++; + prog_data->urb_setup[VARYING_SLOT_CLIP_DIST1] = urb_next++; + } + + for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) { + if (unique_fs_attrs & BITFIELD64_BIT(i)) + prog_data->urb_setup[i] = urb_next++; + } } - - /* In Mesh, CLIP_DIST slots are always at the beginning, because - * they come from MUE Vertex Header, not Per-Vertex Attributes. - */ - if (inputs_read & clip_dist_bits) { - prog_data->urb_setup[VARYING_SLOT_CLIP_DIST0] = urb_next; - prog_data->urb_setup[VARYING_SLOT_CLIP_DIST1] = urb_next + 1; - } else if (mue_map->per_vertex_header_size_dw > 8) { - /* Clip distances are in MUE, but we are not reading them in FS. */ - per_vertex_start_dw += 8; - per_vertex_size_dw -= 8; - } - - /* Per-Vertex attributes are laid out ordered. Because we always link - * Mesh and Fragment shaders, the which slots are written and read by - * each of them will match. */ - - u_foreach_bit64(i, unique_fs_attrs) { - int start = mue_map->start_dw[i]; - - assert(start >= 0); - assert(mue_map->len_dw[i] > 0); - - assert(unsigned(start) >= per_vertex_start_dw); - unsigned pos_dw = unsigned(start) - per_vertex_start_dw; - - prog_data->urb_setup[i] = urb_next + pos_dw / 4; - prog_data->urb_setup_channel[i] = pos_dw % 4; - } - - urb_next += per_vertex_size_dw / 4; } else if (devinfo->ver >= 6) { + assert(!nir->info.per_primitive_inputs); + uint64_t vue_header_bits = VARYING_BIT_PSIZ | VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT;