From e8721fefcde8ae008836ce2786c977c4001846e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak@amd.com>
Date: Mon, 29 Jan 2024 19:23:41 -0500
Subject: [PATCH] glthread: don't check cmd_size for small variable-sized calls

This removes the size checking, syncing, and direct execution if
the variable-sized call is always small. Don't use safe_mul in that case
either. Only calls already using marshal_count are affected. Example:

Before:

static void GLAPIENTRY
_mesa_marshal_PointParameterfv(GLenum pname, const GLfloat *params)
{
   GET_CURRENT_CONTEXT(ctx);
   int params_size = safe_mul(_mesa_point_param_enum_to_count(pname), 1 * sizeof(GLfloat));
   int cmd_size = sizeof(struct marshal_cmd_PointParameterfv) + params_size;
   if (unlikely(params_size < 0 || (params_size > 0 && !params) || (unsigned)cmd_size > MARSHAL_MAX_CMD_SIZE)) {
      _mesa_glthread_finish_before(ctx, "PointParameterfv");
      CALL_PointParameterfv(ctx->Dispatch.Current, (pname, params));
      return;
   }
   struct marshal_cmd_PointParameterfv *cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_PointParameterfv, cmd_size);
   cmd->num_slots = align(cmd_size, 8) / 8;
   cmd->pname = MIN2(pname, 0xffff); /* clamped to 0xffff (invalid enum) */
   char *variable_data = (char *) (cmd + 1);
   memcpy(variable_data, params, params_size);
}

After:

static void GLAPIENTRY
_mesa_marshal_PointParameterfv(GLenum pname, const GLfloat *params)
{
   GET_CURRENT_CONTEXT(ctx);
   int params_size = _mesa_point_param_enum_to_count(pname) * 1 * sizeof(GLfloat);
   int cmd_size = sizeof(struct marshal_cmd_PointParameterfv) + params_size;
   assert(cmd_size >= 0 && cmd_size <= MARSHAL_MAX_CMD_SIZE);
   struct marshal_cmd_PointParameterfv *cmd = _mesa_glthread_allocate_command(ctx, DISPATCH_CMD_PointParameterfv, cmd_size);
   cmd->num_slots = align(cmd_size, 8) / 8;
   cmd->pname = MIN2(pname, 0xffff); /* clamped to 0xffff (invalid enum) */
   char *variable_data = (char *) (cmd + 1);
   memcpy(variable_data, params, params_size);
}

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27350>
---
 src/mapi/glapi/gen/gl_API.dtd     | 13 +++++++++++--
 src/mapi/glapi/gen/gl_API.xml     |  2 +-
 src/mapi/glapi/gen/gl_XML.py      | 16 ++++++++++++----
 src/mapi/glapi/gen/gl_marshal.py  | 10 ++++++++--
 src/mapi/glapi/gen/marshal_XML.py |  5 +++--
 5 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/src/mapi/glapi/gen/gl_API.dtd b/src/mapi/glapi/gen/gl_API.dtd
index 0358dc9609d..45df8e9fd7f 100644
--- a/src/mapi/glapi/gen/gl_API.dtd
+++ b/src/mapi/glapi/gen/gl_API.dtd
@@ -42,6 +42,7 @@
                    marshal             NMTOKEN #IMPLIED
                    marshal_sync        CDATA #IMPLIED>
                    marshal_count       CDATA #IMPLIED>
+                   marshal_large_count CDATA #IMPLIED>
                    marshal_call_before CDATA #IMPLIED>
                    marshal_call_after  CDATA #IMPLIED>
                    marshal_struct      CDATA #IMPLIED>
@@ -135,8 +136,16 @@ param:
         generated but a custom implementation will be present in marshal.c.
      marshal_sync - an expression that, if it evaluates true, causes glthread
         to sync and execute the call directly.
-     marshal_count - same as count, but variable_param is ignored. Used by
-        glthread.
+     marshal_count - alternative to using "count" for pointer parameters.
+        It contains an expression computing the size of the array pointed to
+        by the pointer parameter. The maximum size must be small enough that
+        it always fits into a glthread batch, so that glthread never has to
+        sync and execute directly.
+     marshal_large_count - alternative to using "count" for pointer parameters.
+        It contains an expression computing the size of the array pointed to
+        by the pointer parameter. It can be an arbitrary size, which may cause
+        glthread to flush, sync, and execute directly if the size is greater
+        than the maximum call size that fits in a glthread batch.
      marshal_call_before - insert the string at the beginning of the marshal
         function
      marshal_call_after - insert the string at the end of the marshal function
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 7dbcced8caa..f3b31996825 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -1132,7 +1132,7 @@
         <param name="n" type="GLsizei" counter="true"/>
         <param name="type" type="GLenum"/>
         <param name="lists" type="const GLvoid *" variable_param="type" count="n"
-               marshal_count="(n * _mesa_calllists_enum_to_count(type))"/>
+               marshal_large_count="(n * _mesa_calllists_enum_to_count(type))"/>
         <glx rop="2" large="true"/>
     </function>
 
diff --git a/src/mapi/glapi/gen/gl_XML.py b/src/mapi/glapi/gen/gl_XML.py
index f7802bb48a5..4dcaaeb463e 100644
--- a/src/mapi/glapi/gen/gl_XML.py
+++ b/src/mapi/glapi/gen/gl_XML.py
@@ -430,6 +430,7 @@ class gl_parameter(object):
             self.counter = c
 
         self.marshal_count = element.get("marshal_count")
+        self.marshal_large_count = element.get("marshal_large_count")
         self.count_scale = int(element.get( "count_scale", "1" ))
 
         elements = (count * self.count_scale)
@@ -492,7 +493,8 @@ class gl_parameter(object):
 
 
     def is_variable_length(self):
-        return len(self.count_parameter_list) or self.counter or self.marshal_count
+        return (len(self.count_parameter_list) or self.counter or
+                self.marshal_count or self.marshal_large_count)
 
 
     def is_64_bit(self):
@@ -575,11 +577,14 @@ class gl_parameter(object):
 
         base_size_str += "sizeof(%s)" % ( self.get_base_type_string() )
 
-        if self.counter or self.count_parameter_list or (self.marshal_count and marshal):
+        if (self.counter or self.count_parameter_list or
+            (marshal and (self.marshal_count or self.marshal_large_count))):
             list = [ "compsize" ]
 
-            if self.marshal_count and marshal:
+            if marshal and self.marshal_count:
                 list = [ self.marshal_count ]
+            elif marshal and self.marshal_large_count:
+                list = [ self.marshal_large_count ]
             elif self.counter and self.count_parameter_list:
                 list.append( self.counter )
             elif self.counter:
@@ -588,7 +593,9 @@ class gl_parameter(object):
             if self.size() > 1:
                 list.append( base_size_str )
 
-            if len(list) > 1 and use_parens :
+            # Don't use safe_mul if marshal_count is used, which indicates
+            # a small size.
+            if len(list) > 1 and use_parens and not self.marshal_count:
                 return "safe_mul(%s)" % ", ".join(list)
             else:
                 return " * ".join(list)
@@ -658,6 +665,7 @@ class gl_function( gl_item ):
         # marshal isn't allowed with alias
         assert not alias or not element.get('marshal')
         assert not alias or not element.get('marshal_count')
+        assert not alias or not element.get('marshal_large_count')
         assert not alias or not element.get('marshal_sync')
         assert not alias or not element.get('marshal_call_before')
         assert not alias or not element.get('marshal_call_after')
diff --git a/src/mapi/glapi/gen/gl_marshal.py b/src/mapi/glapi/gen/gl_marshal.py
index f82194603fa..cf51608557b 100644
--- a/src/mapi/glapi/gen/gl_marshal.py
+++ b/src/mapi/glapi/gen/gl_marshal.py
@@ -183,10 +183,14 @@ class PrintCode(gl_XML.gl_print_base):
             # which case the command alloc or the memcpy would blow up before we
             # get to the validation in Mesa core.
             list = []
+            assert_size = False
             for p in func.parameters:
                 if p.is_variable_length():
-                    list.append('{0}_size < 0'.format(p.name))
-                    list.append('({0}_size > 0 && !{0})'.format(p.name))
+                    if p.marshal_count:
+                        assert_size = True
+                    else:
+                        list.append('{0}_size < 0'.format(p.name))
+                        list.append('({0}_size > 0 && !{0})'.format(p.name))
 
             if len(list) != 0:
                 list.append('(unsigned)cmd_size > MARSHAL_MAX_CMD_SIZE')
@@ -197,6 +201,8 @@ class PrintCode(gl_XML.gl_print_base):
                     self.print_call(func)
                     out('return;')
                 out('}')
+            elif assert_size:
+                out('assert(cmd_size >= 0 && cmd_size <= MARSHAL_MAX_CMD_SIZE);')
 
         # Add the call into the batch.
         out('{0} *cmd = _mesa_glthread_allocate_command(ctx, '
diff --git a/src/mapi/glapi/gen/marshal_XML.py b/src/mapi/glapi/gen/marshal_XML.py
index 3ef91dd8a40..2a00fa61b52 100644
--- a/src/mapi/glapi/gen/marshal_XML.py
+++ b/src/mapi/glapi/gen/marshal_XML.py
@@ -181,9 +181,10 @@ class marshal_function(gl_XML.gl_function):
         for p in self.parameters:
             if p.is_output:
                 return 'sync'
-            if (p.is_pointer() and not (p.count or p.counter or p.marshal_count)):
+            if (p.is_pointer() and not
+                (p.count or p.counter or p.marshal_count or p.marshal_large_count)):
                 return 'sync'
-            if p.count_parameter_list and not p.marshal_count:
+            if p.count_parameter_list and not (p.marshal_count or p.marshal_large_count):
                 # Parameter size is determined by enums; haven't
                 # written logic to handle this yet.  TODO: fix.
                 return 'sync'