mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 02:20:11 +01:00
radv/bvh: Adjust sah cost based on depth
Adds a cost field to radv_ir_node and uses it to model the cost of tree depth. This improves framerates by 2% if my benchmarking is correct. Reviewed-by: Adam Jackson <ajax@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19756>
This commit is contained in:
parent
2ba55ec504
commit
6f45c98b58
3 changed files with 38 additions and 5 deletions
|
|
@ -98,6 +98,8 @@ struct radv_accel_struct_header {
|
|||
|
||||
struct radv_ir_node {
|
||||
radv_aabb aabb;
|
||||
/* Generic normalized cost of not merging this node. */
|
||||
float cost;
|
||||
};
|
||||
|
||||
#define FINAL_TREE_PRESENT 0
|
||||
|
|
|
|||
|
|
@ -208,6 +208,7 @@ build_instance(inout radv_aabb bounds, VOID_REF src_ptr, VOID_REF dst_ptr, uint3
|
|||
DEREF(node).instance_id = global_id;
|
||||
|
||||
DEREF(node).base.aabb = bounds;
|
||||
DEREF(node).base.cost = 0.0;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -268,6 +269,7 @@ main(void)
|
|||
}
|
||||
|
||||
DEREF(node).base.aabb = bounds;
|
||||
DEREF(node).base.cost = 0.0;
|
||||
|
||||
DEREF(node).triangle_id = global_id;
|
||||
DEREF(node).geometry_id_and_flags = args.geometry_id;
|
||||
|
|
@ -289,6 +291,7 @@ main(void)
|
|||
}
|
||||
|
||||
DEREF(node).base.aabb = bounds;
|
||||
DEREF(node).base.cost = 0.0;
|
||||
DEREF(node).primitive_id = global_id;
|
||||
DEREF(node).geometry_id_and_flags = args.geometry_id;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -101,6 +101,9 @@ prefix_scan(uvec4 ballot, REF(ploc_prefix_scan_partition) partitions, uint32_t t
|
|||
return aggregate_sums[gl_SubgroupID] + subgroupBallotExclusiveBitCount(ballot);
|
||||
}
|
||||
|
||||
/* Relative cost of increasing the BVH depth. Deep BVHs will require more backtracking. */
|
||||
#define BVH_LEVEL_COST 0.2
|
||||
|
||||
uint32_t
|
||||
push_node(uint32_t children[2])
|
||||
{
|
||||
|
|
@ -113,6 +116,8 @@ push_node(uint32_t children[2])
|
|||
total_bounds.min = vec3(INFINITY);
|
||||
total_bounds.max = vec3(-INFINITY);
|
||||
|
||||
float cost = 0.0;
|
||||
|
||||
for (uint i = 0; i < 2; ++i) {
|
||||
if (children[i] != RADV_BVH_INVALID_NODE) {
|
||||
VOID_REF node = OFFSET(args.bvh, ir_id_to_offset(children[i]));
|
||||
|
|
@ -121,12 +126,15 @@ push_node(uint32_t children[2])
|
|||
|
||||
total_bounds.min = min(total_bounds.min, bounds.min);
|
||||
total_bounds.max = max(total_bounds.max, bounds.max);
|
||||
|
||||
cost += DEREF(child).cost;
|
||||
}
|
||||
|
||||
DEREF(dst_node).children[i] = children[i];
|
||||
}
|
||||
|
||||
DEREF(dst_node).base.aabb = total_bounds;
|
||||
DEREF(dst_node).base.cost = cost * 0.5 + BVH_LEVEL_COST;
|
||||
DEREF(dst_node).in_final_tree = FINAL_TREE_UNKNOWN;
|
||||
return dst_id;
|
||||
}
|
||||
|
|
@ -152,6 +160,7 @@ decode_neighbour_offset(uint32_t encoded_offset)
|
|||
#define NUM_PLOC_LDS_ITEMS PLOC_WORKGROUP_SIZE + 4 * PLOC_NEIGHBOURHOOD
|
||||
|
||||
shared radv_aabb shared_bounds[NUM_PLOC_LDS_ITEMS];
|
||||
shared float shared_costs[NUM_PLOC_LDS_ITEMS];
|
||||
shared uint32_t nearest_neighbour_indices[NUM_PLOC_LDS_ITEMS];
|
||||
|
||||
uint32_t
|
||||
|
|
@ -177,18 +186,37 @@ load_bounds(VOID_REF ids, uint32_t iter, uint32_t task_index, uint32_t lds_base,
|
|||
REF(radv_ir_node) node = REF(radv_ir_node)(addr);
|
||||
|
||||
shared_bounds[i - lds_base] = DEREF(node).aabb;
|
||||
shared_costs[i - lds_base] = DEREF(node).cost;
|
||||
}
|
||||
}
|
||||
|
||||
float
|
||||
combined_node_area(uint32_t lds_base, uint32_t i, uint32_t j)
|
||||
combined_node_cost(uint32_t lds_base, uint32_t i, uint32_t j)
|
||||
{
|
||||
radv_aabb combined_bounds;
|
||||
combined_bounds.min = min(shared_bounds[i - lds_base].min, shared_bounds[j - lds_base].min);
|
||||
combined_bounds.max = max(shared_bounds[i - lds_base].max, shared_bounds[j - lds_base].max);
|
||||
vec3 size = combined_bounds.max - combined_bounds.min;
|
||||
float area = aabb_surface_area(combined_bounds);
|
||||
|
||||
return size.x * size.y + size.y * size.z + size.z * size.x;
|
||||
/* p_a and p_b are the probabilities that i or j are hit by a ray:
|
||||
* Assuming that the current node is hit (p = 1) and the probability of hitting a node
|
||||
* is proportional to its surface area, p = area * c with p = 1 for the current node.
|
||||
* -> c = 1 / area
|
||||
*
|
||||
* We can use those probabilities to limit the impact of child cost to be proportional to
|
||||
* its hit probability. (Child cost is the cost of not merging a node which increases with
|
||||
* tree depth for internal nodes)
|
||||
*
|
||||
* Dividing area by both relative costs will make it more likely that we merge nodes with
|
||||
* a hight child cost.
|
||||
*/
|
||||
float p_i = aabb_surface_area(shared_bounds[i - lds_base]) / area;
|
||||
float p_j = aabb_surface_area(shared_bounds[j - lds_base]) / area;
|
||||
|
||||
float combined_cost =
|
||||
(1.0 + shared_costs[i - lds_base] * p_i) * (1.0 + shared_costs[j - lds_base] * p_j);
|
||||
|
||||
return area / combined_cost;
|
||||
}
|
||||
|
||||
shared uint32_t shared_aggregate_sum;
|
||||
|
|
@ -240,7 +268,7 @@ main(void)
|
|||
for (uint32_t j = max(i + 1, base_index - neigbourhood_overlap); j <= i + right_bound;
|
||||
++j) {
|
||||
|
||||
float sah = combined_node_area(lds_base, i, j);
|
||||
float sah = combined_node_cost(lds_base, i, j);
|
||||
uint32_t i_encoded_offset = encode_neighbour_offset(sah, i, j);
|
||||
uint32_t j_encoded_offset = encode_neighbour_offset(sah, j, i);
|
||||
min_offset = min(min_offset, i_encoded_offset);
|
||||
|
|
@ -276,7 +304,7 @@ main(void)
|
|||
if (preferred_pair != i) {
|
||||
uint32_t encoded_min_sah =
|
||||
nearest_neighbour_indices[i - lds_base] & (~PLOC_OFFSET_MASK);
|
||||
float sah = combined_node_area(lds_base, i, preferred_pair);
|
||||
float sah = combined_node_cost(lds_base, i, preferred_pair);
|
||||
uint32_t encoded_sah = floatBitsToUint(sah) & (~PLOC_OFFSET_MASK);
|
||||
uint32_t encoded_offset = encode_neighbour_offset(sah, i, preferred_pair);
|
||||
if (encoded_sah <= encoded_min_sah) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue