mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 13:58:04 +02:00
glthread: generate packed versions of gl*Pointer/Offset calls
The pointer/offset parameter is often NULL or a small number with VBOs.
The idea is:
- If the pointer/offset parameter is NULL/0, use a different cmd structure
and unmarshal function that doesn't contain the pointer/offset parameter
to save 8 bytes per call.
- If the cmd structure has a hole and the pointer/offset parameter is
a small number that fits into the hole, use a different cmd structure
and unmarshal function that stores the value within the hole using
a smaller type to save 8 bytes per call.
This implements those ideas. It will continue generating the most optimal
code even if we change the packing of other parameters.
This decreases the size of 1 frame in glthread batches by 21%
in Viewperf2020/Catia1.
Example of generated code for glVertexPointer with and without the pointer
parameter if it's NULL. See the arrows for comments.
/* VertexPointer: marshalled asynchronously */
struct marshal_cmd_VertexPointer
{
struct marshal_cmd_base cmd_base;
GLpacked16i size;
GLenum16 type;
GLclamped16i stride;
const GLvoid * pointer;
};
struct marshal_cmd_VertexPointer_packed
{
struct marshal_cmd_base cmd_base;
GLpacked16i size;
GLenum16 type;
GLclamped16i stride; // <------- no "pointer"
};
uint32_t _mesa_unmarshal_VertexPointer(struct gl_context *ctx, const struct marshal_cmd_VertexPointer *restrict cmd)
{
GLpacked16i size = cmd->size;
GLenum16 type = cmd->type;
GLclamped16i stride = cmd->stride;
const GLvoid * pointer = cmd->pointer;
CALL_VertexPointer(ctx->Dispatch.Current, (size, type, stride, pointer));
return align(sizeof(struct marshal_cmd_VertexPointer), 8) / 8;
}
uint32_t _mesa_unmarshal_VertexPointer_packed(struct gl_context *ctx, const struct marshal_cmd_VertexPointer_packed *restrict cmd)
{
GLpacked16i size = cmd->size;
GLenum16 type = cmd->type;
GLclamped16i stride = cmd->stride;
const GLvoid * pointer = (const GLvoid *)(uintptr_t)0; // <------- using NULL
CALL_VertexPointer(ctx->Dispatch.Current, (size, type, stride, pointer));
return align(sizeof(struct marshal_cmd_VertexPointer_packed), 8) / 8;
}
static void GLAPIENTRY
_mesa_marshal_VertexPointer(GLint size, GLenum type, GLsizei stride, const GLvoid *pointer)
{
GET_CURRENT_CONTEXT(ctx);
if (!pointer) { // <------- the condition
int cmd_size = sizeof(struct marshal_cmd_VertexPointer_packed);
struct marshal_cmd_VertexPointer_packed *cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_VertexPointer_packed, cmd_size);
cmd->size = size < 0 ? UINT16_MAX : MIN2(size, UINT16_MAX);
cmd->type = MIN2(type, 0xffff); /* clamped to 0xffff (invalid enum) */
cmd->stride = CLAMP(stride, INT16_MIN, INT16_MAX);
} else {
int cmd_size = sizeof(struct marshal_cmd_VertexPointer);
struct marshal_cmd_VertexPointer *cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_VertexPointer, cmd_size);
cmd->size = size < 0 ? UINT16_MAX : MIN2(size, UINT16_MAX);
cmd->type = MIN2(type, 0xffff); /* clamped to 0xffff (invalid enum) */
cmd->stride = CLAMP(stride, INT16_MIN, INT16_MAX);
cmd->pointer = pointer;
}
_mesa_glthread_AttribPointer(ctx, VERT_ATTRIB_POS, MESA_PACK_VFORMAT(type, size, 0, 0, 0), stride, pointer);
}
Example of generated code for glNormalPointer using a smaller type:
/* NormalPointer: marshalled asynchronously */
struct marshal_cmd_NormalPointer
{
struct marshal_cmd_base cmd_base;
GLenum16 type;
GLclamped16i stride;
const GLvoid * pointer;
};
struct marshal_cmd_NormalPointer_packed
{
struct marshal_cmd_base cmd_base;
GLenum16 type;
GLclamped16i stride;
GLushort pointer; // <-------- truncated "pointer"
};
uint32_t _mesa_unmarshal_NormalPointer(struct gl_context *ctx, const struct marshal_cmd_NormalPointer *restrict cmd)
{
GLenum16 type = cmd->type;
GLclamped16i stride = cmd->stride;
const GLvoid * pointer = cmd->pointer;
CALL_NormalPointer(ctx->Dispatch.Current, (type, stride, pointer));
return align(sizeof(struct marshal_cmd_NormalPointer), 8) / 8;
}
uint32_t _mesa_unmarshal_NormalPointer_packed(struct gl_context *ctx, const struct marshal_cmd_NormalPointer_packed *restrict cmd)
{
GLenum16 type = cmd->type;
GLclamped16i stride = cmd->stride;
const GLvoid * pointer = (const GLvoid *)(uintptr_t)cmd->pointer; // <-------- upcasting
CALL_NormalPointer(ctx->Dispatch.Current, (type, stride, pointer));
return align(sizeof(struct marshal_cmd_NormalPointer_packed), 8) / 8;
}
static void GLAPIENTRY
_mesa_marshal_NormalPointer(GLenum type, GLsizei stride, const GLvoid *pointer)
{
GET_CURRENT_CONTEXT(ctx);
if (((uintptr_t)pointer & 0xffff) == (uintptr_t)pointer) { // <-------- the condition
int cmd_size = sizeof(struct marshal_cmd_NormalPointer_packed);
struct marshal_cmd_NormalPointer_packed *cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_NormalPointer_packed, cmd_size);
cmd->type = MIN2(type, 0xffff); /* clamped to 0xffff (invalid enum) */
cmd->stride = CLAMP(stride, INT16_MIN, INT16_MAX);
cmd->pointer = (uintptr_t)pointer; /* truncated */ // <-------- the truncation
} else {
int cmd_size = sizeof(struct marshal_cmd_NormalPointer);
struct marshal_cmd_NormalPointer *cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_NormalPointer, cmd_size);
cmd->type = MIN2(type, 0xffff); /* clamped to 0xffff (invalid enum) */
cmd->stride = CLAMP(stride, INT16_MIN, INT16_MAX);
cmd->pointer = pointer;
}
_mesa_glthread_AttribPointer(ctx, VERT_ATTRIB_NORMAL, MESA_PACK_VFORMAT(type, 3, 1, 0, 0), stride, pointer);
}
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27350>
This commit is contained in:
parent
24f14f8daa
commit
c9abb7ff6e
4 changed files with 141 additions and 23 deletions
|
|
@ -103,19 +103,23 @@ class PrintCode(gl_XML.gl_print_base):
|
|||
out('')
|
||||
out('')
|
||||
|
||||
def print_unmarshal_func(self, func):
|
||||
out('uint32_t')
|
||||
out(('_mesa_unmarshal_{0}(struct gl_context *ctx, '
|
||||
'const struct marshal_cmd_{0} *restrict cmd)').format(func.name))
|
||||
def print_unmarshal_func(self, func, is_packed=False):
|
||||
func.print_unmarshal_prototype(is_packed=is_packed)
|
||||
out('{')
|
||||
with indent():
|
||||
for p in func.fixed_params:
|
||||
type = func.get_marshal_type(p)
|
||||
|
||||
if p.count:
|
||||
p_decl = '{0} *{1} = cmd->{1};'.format(
|
||||
p.get_base_type_string(), p.name)
|
||||
elif is_packed and func.packed_param_name == p.name:
|
||||
if func.packed_param_size == 0:
|
||||
p_decl = '{0} {1} = ({0})(uintptr_t)0;'.format(type, p.name)
|
||||
else:
|
||||
p_decl = '{0} {1} = ({0})(uintptr_t)cmd->{1};'.format(type, p.name)
|
||||
else:
|
||||
p_decl = '{0} {1} = cmd->{1};'.format(
|
||||
func.get_marshal_type(p), p.name)
|
||||
p_decl = '{0} {1} = cmd->{1};'.format(type, p.name)
|
||||
|
||||
if not p_decl.startswith('const ') and p.count:
|
||||
# Declare all local function variables as const, even if
|
||||
|
|
@ -150,12 +154,14 @@ class PrintCode(gl_XML.gl_print_base):
|
|||
if func.variable_params:
|
||||
out('return cmd->num_slots;')
|
||||
else:
|
||||
struct = 'struct marshal_cmd_{0}'.format(func.name)
|
||||
out('return align(sizeof({0}), 8) / 8;'.format(struct))
|
||||
out('return align(sizeof({0}), 8) / 8;'.format(func.get_marshal_struct_name(is_packed)))
|
||||
out('}')
|
||||
|
||||
def print_marshal_async_code(self, func):
|
||||
struct = 'struct marshal_cmd_{0}'.format(func.name)
|
||||
if not is_packed and func.packed_fixed_params:
|
||||
self.print_unmarshal_func(func, is_packed=True)
|
||||
|
||||
def print_marshal_async_code(self, func, is_packed=False):
|
||||
struct = func.get_marshal_struct_name(is_packed)
|
||||
|
||||
if func.marshal_sync:
|
||||
out('int cmd_size = sizeof({0});'.format(struct))
|
||||
|
|
@ -205,17 +211,24 @@ class PrintCode(gl_XML.gl_print_base):
|
|||
out('assert(cmd_size >= 0 && cmd_size <= MARSHAL_MAX_CMD_SIZE);')
|
||||
|
||||
# Add the call into the batch.
|
||||
out('{0} *cmd = _mesa_glthread_allocate_command(ctx, '
|
||||
'DISPATCH_CMD_{1}, cmd_size);'.format(struct, func.name))
|
||||
dispatch_cmd = 'DISPATCH_CMD_{0}{1}'.format(func.name, '_packed' if is_packed else '')
|
||||
if func.get_fixed_params(is_packed) or func.variable_params:
|
||||
out('{0} *cmd = _mesa_glthread_allocate_command(ctx, {1}, cmd_size);'
|
||||
.format(struct, dispatch_cmd))
|
||||
else:
|
||||
out('_mesa_glthread_allocate_command(ctx, {0}, cmd_size);'.format(dispatch_cmd))
|
||||
|
||||
if func.variable_params:
|
||||
out('cmd->num_slots = align(cmd_size, 8) / 8;')
|
||||
|
||||
for p in func.fixed_params:
|
||||
for p in func.get_fixed_params(is_packed):
|
||||
type = func.get_marshal_type(p)
|
||||
|
||||
if p.count:
|
||||
out('memcpy(cmd->{0}, {0}, {1});'.format(
|
||||
p.name, p.size_string()))
|
||||
elif is_packed and p.name == func.packed_param_name:
|
||||
out('cmd->{0} = (uintptr_t){0}; /* truncated */'.format(p.name))
|
||||
elif type == 'GLenum8':
|
||||
out('cmd->{0} = MIN2({0}, 0xff); /* clamped to 0xff (invalid enum) */'.format(p.name))
|
||||
elif type == 'GLenum16':
|
||||
|
|
@ -226,6 +239,7 @@ class PrintCode(gl_XML.gl_print_base):
|
|||
out('cmd->{0} = {0} < 0 ? UINT16_MAX : MIN2({0}, UINT16_MAX);'.format(p.name))
|
||||
else:
|
||||
out('cmd->{0} = {0};'.format(p.name))
|
||||
|
||||
if func.variable_params:
|
||||
out('char *variable_data = (char *) (cmd + 1);')
|
||||
i = 1
|
||||
|
|
@ -244,9 +258,6 @@ class PrintCode(gl_XML.gl_print_base):
|
|||
out('variable_data += {0}_size;'.format(p.name))
|
||||
i += 1
|
||||
|
||||
if not func.fixed_params and not func.variable_params:
|
||||
out('(void) cmd;')
|
||||
|
||||
def print_async_body(self, func):
|
||||
out('/* {0}: marshalled asynchronously */'.format(func.name))
|
||||
func.print_struct()
|
||||
|
|
@ -261,7 +272,22 @@ class PrintCode(gl_XML.gl_print_base):
|
|||
if func.marshal_call_before:
|
||||
out(func.marshal_call_before);
|
||||
|
||||
self.print_marshal_async_code(func)
|
||||
if func.packed_fixed_params:
|
||||
if func.packed_param_size > 0:
|
||||
out('if (((uintptr_t){0} & 0x{1}) == (uintptr_t){0}) {{'
|
||||
.format(func.packed_param_name,
|
||||
'ff' * func.packed_param_size))
|
||||
else:
|
||||
out('if (!{0}) {{'.format(func.packed_param_name))
|
||||
|
||||
with indent():
|
||||
self.print_marshal_async_code(func, is_packed=True)
|
||||
out('} else {')
|
||||
with indent():
|
||||
self.print_marshal_async_code(func)
|
||||
out('}')
|
||||
else:
|
||||
self.print_marshal_async_code(func)
|
||||
|
||||
if func.marshal_call_after:
|
||||
out(func.marshal_call_after)
|
||||
|
|
|
|||
|
|
@ -61,6 +61,8 @@ class PrintCode(gl_XML.gl_print_base):
|
|||
if flavor in ('skip', 'sync'):
|
||||
continue
|
||||
print(' DISPATCH_CMD_{0},'.format(func.name))
|
||||
if func.packed_fixed_params:
|
||||
print(' DISPATCH_CMD_{0}_packed,'.format(func.name))
|
||||
print(' NUM_DISPATCH_CMD,')
|
||||
print('};')
|
||||
print('')
|
||||
|
|
@ -71,8 +73,9 @@ class PrintCode(gl_XML.gl_print_base):
|
|||
flavor = func.marshal_flavor()
|
||||
|
||||
if flavor in ('custom', 'async'):
|
||||
print(('uint32_t _mesa_unmarshal_{0}(struct gl_context *ctx, '
|
||||
'const struct marshal_cmd_{0} *restrict cmd);').format(func.name))
|
||||
func.print_unmarshal_prototype(suffix=';')
|
||||
if func.packed_fixed_params:
|
||||
func.print_unmarshal_prototype(suffix=';', is_packed=True)
|
||||
|
||||
if flavor in ('custom', 'async', 'sync') and not func.marshal_is_static():
|
||||
print('{0} GLAPIENTRY _mesa_marshal_{1}({2});'.format(func.return_type, func.name, func.get_parameter_string()))
|
||||
|
|
|
|||
|
|
@ -68,6 +68,8 @@ class PrintCode(gl_XML.gl_print_base):
|
|||
if func.marshal_flavor() in ('skip', 'sync'):
|
||||
continue
|
||||
out('[DISPATCH_CMD_{0}] = (_mesa_unmarshal_func)_mesa_unmarshal_{0},'.format(func.name))
|
||||
if func.packed_fixed_params:
|
||||
out('[DISPATCH_CMD_{0}_packed] = (_mesa_unmarshal_func)_mesa_unmarshal_{0}_packed,'.format(func.name))
|
||||
out('};')
|
||||
|
||||
# Print the string table of function names.
|
||||
|
|
@ -78,6 +80,8 @@ class PrintCode(gl_XML.gl_print_base):
|
|||
if func.marshal_flavor() in ('skip', 'sync'):
|
||||
continue
|
||||
out('[DISPATCH_CMD_{0}] = "{0}",'.format(func.name))
|
||||
if func.packed_fixed_params:
|
||||
out('[DISPATCH_CMD_{0}_packed] = "{0}_packed",'.format(func.name))
|
||||
out('};')
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,11 @@
|
|||
|
||||
import gl_XML
|
||||
import sys
|
||||
import copy
|
||||
import typeexpr
|
||||
|
||||
def pot_align(base, pot_alignment):
|
||||
return (base + pot_alignment - 1) & ~(pot_alignment - 1);
|
||||
|
||||
|
||||
class marshal_item_factory(gl_XML.gl_item_factory):
|
||||
|
|
@ -163,6 +168,74 @@ class marshal_function(gl_XML.gl_function):
|
|||
# from smallest to biggest.
|
||||
self.fixed_params = sorted(self.fixed_params, key=lambda p: self.get_type_size(p))
|
||||
|
||||
# Compute the marshal structure size and the largest hole
|
||||
self.struct_size = 2 # sizeof(struct marshal_cmd_base)
|
||||
largest_hole = 0
|
||||
|
||||
for p in self.fixed_params:
|
||||
type_size = self.get_type_size(p)
|
||||
aligned_size = pot_align(self.struct_size, type_size)
|
||||
largest_hole = max(aligned_size - self.struct_size, largest_hole)
|
||||
self.struct_size = aligned_size
|
||||
self.struct_size = self.struct_size + type_size
|
||||
|
||||
# Round down largest_hole to a power of two.
|
||||
largest_hole = int(2 ** (largest_hole.bit_length() - 1))
|
||||
|
||||
# Align the structure to 8 bytes.
|
||||
aligned_size = pot_align(self.struct_size, 8)
|
||||
padding_hole = aligned_size - self.struct_size
|
||||
self.struct_size = aligned_size
|
||||
|
||||
# Determine whether to generate a packed version of gl*Pointer calls.
|
||||
# If there is a hole in the cmd structure, the pointer/offset parameter
|
||||
# can be truncated and stored in the hole to save 8 bytes per call.
|
||||
# The version of the structure is determined at runtime based on
|
||||
# whether the truncation doesn't change the value. This is common with
|
||||
# VBOs because the pointer/offset is usually small.
|
||||
#
|
||||
# If there is no hole, the packed version completely removes
|
||||
# the pointer/offset parameter and is used when the value is NULL/0
|
||||
# to remove 8 bytes per call. This is common with VBOs.
|
||||
self.packed_param_name = None
|
||||
|
||||
if (self.is_vertex_pointer_call and
|
||||
# 32-bit CPUs only benefit if we remove the whole 8-byte slot,
|
||||
# which means there must be exactly 4-byte padding after the 4-byte
|
||||
# pointer/offset parameter.
|
||||
(self.context.pointer_size != 4 or padding_hole == 4)):
|
||||
for pname in ['pointer', 'offset']:
|
||||
if pname in [p.name for p in self.fixed_params]:
|
||||
self.packed_param_name = pname
|
||||
|
||||
assert self.packed_param_name
|
||||
assert not self.variable_params
|
||||
assert not self.marshal_sync
|
||||
|
||||
# Prepare the parameters for the packed version by replacing the type
|
||||
# of the packed variable or removing it completely.
|
||||
self.packed_fixed_params = []
|
||||
if self.packed_param_name:
|
||||
for p in self.fixed_params:
|
||||
if p.name == self.packed_param_name:
|
||||
if largest_hole > 0:
|
||||
# Select the truncated type.
|
||||
type = ['GLubyte', 'GLushort', 'GLuint'][largest_hole.bit_length() - 1]
|
||||
|
||||
# Clone the parameter and change its type
|
||||
new_param = copy.deepcopy(p)
|
||||
new_param.type_expr = typeexpr.type_expression(type, self.context)
|
||||
self.packed_fixed_params.append(new_param)
|
||||
else:
|
||||
self.packed_fixed_params.append(p)
|
||||
self.packed_param_size = largest_hole
|
||||
# Sort the parameters by size to move the truncated type into the hole.
|
||||
self.packed_fixed_params = sorted(self.packed_fixed_params, key=lambda p: self.get_type_size(p))
|
||||
|
||||
|
||||
def get_fixed_params(self, is_packed):
|
||||
return self.packed_fixed_params if is_packed else self.fixed_params
|
||||
|
||||
def marshal_flavor(self):
|
||||
"""Find out how this function should be marshalled between
|
||||
client and server threads."""
|
||||
|
|
@ -195,15 +268,15 @@ class marshal_function(gl_XML.gl_function):
|
|||
self.name[0:8] != 'Internal' and
|
||||
self.exec_flavor != 'beginend')
|
||||
|
||||
def print_struct(self, is_header=False):
|
||||
def print_struct(self, is_header=False, is_packed=False):
|
||||
if (self.marshal_struct == 'public') == is_header:
|
||||
print('struct marshal_cmd_{0}'.format(self.name))
|
||||
print(self.get_marshal_struct_name(is_packed))
|
||||
print('{')
|
||||
print(' struct marshal_cmd_base cmd_base;')
|
||||
if self.variable_params:
|
||||
print(' uint16_t num_slots;')
|
||||
|
||||
for p in self.fixed_params:
|
||||
for p in self.get_fixed_params(is_packed):
|
||||
if p.count:
|
||||
print(' {0} {1}[{2}];'.format(
|
||||
p.get_base_type_string(), p.name, p.count))
|
||||
|
|
@ -228,4 +301,16 @@ class marshal_function(gl_XML.gl_function):
|
|||
p.name, p.counter))
|
||||
print('};')
|
||||
elif self.marshal_flavor() in ('custom', 'async'):
|
||||
print('struct marshal_cmd_{0};'.format(self.name))
|
||||
print('{0};'.format(self.get_marshal_struct_name(is_packed)))
|
||||
|
||||
if not is_packed and self.packed_fixed_params:
|
||||
self.print_struct(is_header, True)
|
||||
|
||||
def get_marshal_struct_name(self, is_packed=False):
|
||||
return 'struct marshal_cmd_{0}{1}'.format(self.name, '_packed' if is_packed else '')
|
||||
|
||||
def print_unmarshal_prototype(self, is_packed=False, suffix=''):
|
||||
print(('uint32_t _mesa_unmarshal_{0}{1}(struct gl_context *ctx, '
|
||||
'const {2} *restrict cmd){3}')
|
||||
.format(self.name, '_packed' if is_packed else '',
|
||||
self.get_marshal_struct_name(is_packed), suffix))
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue