diff --git a/src/freedreno/.gitlab-ci/reference/compute-a540.log b/src/freedreno/.gitlab-ci/reference/compute-a540.log new file mode 100644 index 00000000000..a25347e6a43 --- /dev/null +++ b/src/freedreno/.gitlab-ci/reference/compute-a540.log @@ -0,0 +1,994 @@ +test: 50 +cmd: get_display:469: >>> eglInitialize(display, &egl_major, &egl_minor) +cmd: eglInitialize(display, &egl_major, &egl_minor) +gpu_id: 540 +cmd: get_display:469: <<< eglInitialize(display, &egl_major, &egl_minor): succeeded +cmd: get_display:472: Using display 0x1 with EGL version 1.4 +cmd: get_display:474: EGL Version "1.4 Android META-EGL" +cmd: get_display:475: EGL Vendor "Android" +cmd: get_display:476: EGL Extensions "EGL_KHR_get_all_proc_addresses EGL_ANDROID_presentation_time EGL_KHR_swap_buffers_with_damage EGL_ANDROID_get_native_client_buffer EGL_ANDROID_front_buffer_auto_refresh EGL_ANDROID_get_frame_timestamps EGL_EXT_surface_SMPTE2086_metadata EGL_EXT_surface_CTA861_3_metadata EGL_EXT_gl_colorspace_scrgb EGL_EXT_gl_colorspace_scrgb_linear EGL_EXT_gl_colorspace_display_p3_linear EGL_EXT_gl_colorspace_display_p3 EGL_KHR_image EGL_KHR_image_base EGL_EXT_image_gl_colorspace EGL_KHR_lock_surface EGL_KHR_gl_colorspace EGL_KHR_gl_texture_2D_image EGL_KHR_gl_texture_3D_image EGL_KHR_gl_texture_cubemap_image EGL_KHR_gl_renderbuffer_image EGL_KHR_reusable_sync EGL_KHR_fence_sync EGL_KHR_create_context EGL_KHR_surfaceless_context EGL_EXT_create_context_robustness EGL_ANDROID_image_native_buffer EGL_KHR_wait_sync EGL_ANDROID_recordable EGL_KHR_partial_update EGL_EXT_pixel_format_float EGL_KHR_create_context_no_error EGL_KHR_mutable_render_buffer EGL_EXT_yuv_surface EGL_EXT_protected_content EGL_IMG_context_priority EGL_KHR_no_config_context " +cmd: setup:425: >>> eglChooseConfig(display, config_attribute_list, &config, 1, &num_config) +cmd: eglChooseConfig(display, config_attribute_list, &config, 1, &num_config) +cmd: setup:425: <<< eglChooseConfig(display, config_attribute_list, &config, 1, &num_config): succeeded +cmd: setup:426: num_config: 1 +cmd: setup:429: >>> context = eglCreateContext(display, config, EGL_NO_CONTEXT, context_attribute_list) +cmd: context = eglCreateContext(display, config, EGL_NO_CONTEXT, context_attribute_list) +cmd: setup:429: <<< context = eglCreateContext(display, config, EGL_NO_CONTEXT, context_attribute_list): succeeded +cmd: setup:430: >>> surface = eglCreatePbufferSurface(display, config, pbuffer_attribute_list) +cmd: surface = eglCreatePbufferSurface(display, config, pbuffer_attribute_list) +cmd: setup:430: <<< surface = eglCreatePbufferSurface(display, config, pbuffer_attribute_list): succeeded +cmd: setup:432: >>> eglQuerySurface(display, surface, EGL_WIDTH, &width) +cmd: eglQuerySurface(display, surface, EGL_WIDTH, &width) +cmd: setup:432: <<< eglQuerySurface(display, surface, EGL_WIDTH, &width): succeeded +cmd: setup:433: >>> eglQuerySurface(display, surface, EGL_HEIGHT, &height) +cmd: eglQuerySurface(display, surface, EGL_HEIGHT, &height) +cmd: setup:433: <<< eglQuerySurface(display, surface, EGL_HEIGHT, &height): succeeded +cmd: setup:435: PBuffer: 256x256 +cmd: setup:438: >>> eglMakeCurrent(display, surface, surface, context) +cmd: eglMakeCurrent(display, surface, surface, context) +cmd: setup:438: <<< eglMakeCurrent(display, surface, surface, context): succeeded +cmd: setup:439: >>> glFlush() +cmd: glFlush() +cmd: setup:439: <<< glFlush(): succeeded +cmd: get_compute_program:731: compute shader: +#version 310 es +precision highp float; +precision highp int; + +layout(local_size_x=5, local_size_y=6, local_size_z=7) in; + +layout(binding = 1) buffer buffer_Out { + uint Out; +}; + +shared uint a[64]; + +void main(void) { + a[0] = 0u; + a[uint(gl_LocalInvocationID.x)] = 1u; + + Out = a[0]; +} + +fragment shader: +#version 310 es +precision highp float; +precision highp int; + +layout(local_size_x=5, local_size_y=6, local_size_z=7) in; + +layout(binding = 1) buffer buffer_Out { + uint Out; +}; + +shared uint a[64]; + +void main(void) { + a[0] = 0u; + a[uint(gl_LocalInvocationID.x)] = 1u; + + Out = a[0]; +} + +cmd: get_shader:673: compute shader: +#version 310 es +precision highp float; +precision highp int; + +layout(local_size_x=5, local_size_y=6, local_size_z=7) in; + +layout(binding = 1) buffer buffer_Out { + uint Out; +}; + +shared uint a[64]; + +void main(void) { + a[0] = 0u; + a[uint(gl_LocalInvocationID.x)] = 1u; + + Out = a[0]; +} + +cmd: get_shader:675: >>> shader = glCreateShader(stage) +cmd: shader = glCreateShader(stage) +cmd: get_shader:675: <<< shader = glCreateShader(stage): succeeded +cmd: get_shader:677: >>> glShaderSource(shader, 1, &source, NULL) +cmd: glShaderSource(shader, 1, &source, NULL) +cmd: get_shader:677: <<< glShaderSource(shader, 1, &source, NULL): succeeded +cmd: get_shader:678: >>> glCompileShader(shader) +cmd: glCompileShader(shader) +cmd: get_shader:678: <<< glCompileShader(shader): succeeded +cmd: get_shader:680: >>> glGetShaderiv(shader, GL_COMPILE_STATUS, &ret) +cmd: glGetShaderiv(shader, GL_COMPILE_STATUS, &ret) +cmd: get_shader:680: <<< glGetShaderiv(shader, GL_COMPILE_STATUS, &ret): succeeded +cmd: get_shader:681: ret=1 +cmd: get_shader:696: compute shader compilation succeeded! +cmd: get_compute_program:738: >>> program = glCreateProgram() +cmd: program = glCreateProgram() +cmd: get_compute_program:738: <<< program = glCreateProgram(): succeeded +cmd: get_compute_program:739: >>> glAttachShader(program, shader) +cmd: glAttachShader(program, shader) +cmd: get_compute_program:739: <<< glAttachShader(program, shader): succeeded +cmd: link_program:811: >>> glLinkProgram(program) +cmd: glLinkProgram(program) +cmd: link_program:811: <<< glLinkProgram(program): succeeded +cmd: link_program:813: >>> glGetProgramiv(program, GL_LINK_STATUS, &ret) +cmd: glGetProgramiv(program, GL_LINK_STATUS, &ret) +cmd: link_program:813: <<< glGetProgramiv(program, GL_LINK_STATUS, &ret): succeeded +cmd: link_program:828: program linking succeeded! +cmd: link_program:830: >>> glUseProgram(program) +cmd: glUseProgram(program) +cmd: link_program:830: <<< glUseProgram(program): succeeded +cmd: link_program:836: >>> glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH_OES, &len) +cmd: glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH_OES, &len) +cmd: link_program:836: <<< glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH_OES, &len): succeeded +cmd: link_program:838: >>> glGetProgramBinaryOES(program, len, &ret, &binary_format, binary) +cmd: glGetProgramBinaryOES(program, len, &ret, &binary_format, binary) +cmd: link_program:838: <<< glGetProgramBinaryOES(program, len, &ret, &binary_format, binary): succeeded +cmd: link_program:839: program dump: len=3336, actual len=3336 +cmd: test_compiler:341: >>> glFlush() +cmd: glFlush() +cmd: test_compiler:341: <<< glFlush(): succeeded +cmd: setup_ssbo:266: SSBO: buffer_Out at 0 +cmd: test_compiler:379: >>> glDispatchCompute(1, 2, 3) +cmd: glDispatchCompute(1, 2, 3) +cmd: test_compiler:379: <<< glDispatchCompute(1, 2, 3): succeeded +cmd: test_compiler:384: >>> eglSwapBuffers(display, surface) +cmd: eglSwapBuffers(display, surface) +############################################################ +cmdstream[0]: 207 dwords + opcode: CP_SET_RENDER_MODE (6c) (9 dwords) + { MODE = BYPASS } + { ADDR_0_LO = 0x15000 } + { ADDR_0_HI = 0x5 } + { 0 } + { 4 = 0x3 } + { ADDR_1_LEN = 15 } + { ADDR_1_LO = 0x1f010 } + { ADDR_1_HI = 0x5 } +0000000500015000: 0000: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 +* + addr: 0x000000050001f010 + len: 0xf + write GRAS_LRZ_CNTL (e100) + GRAS_LRZ_CNTL: { 0x8 } +000000050001f010: 0000: 48e10001 00000008 + write CP_SCRATCH[0].REG (0b78) + CP_SCRATCH[0].REG: 1 +000000050001f018: 0000: 400b7801 00000001 + opcode: CP_WAIT_MEM_WRITES (12) (1 dwords) +000000050001f020: 0000: 70928000 + opcode: (null) (74) (6 dwords) +000000050001f024: 0000: 70f48005 c0000b78 00012c40 00000005 00015000 00000005 + opcode: CP_MEM_WRITE (3d) (4 dwords) + { ADDR_LO = 0x11000 } + { ADDR_HI = 0x5 } + gpuaddr:0000000500011000 +000000050001f048: 0000: 00000001 +000000050001f03c: 0000: 703d8003 00011000 00000005 00000001 +000000050001f010: 0000: 48e10001 00000008 400b7801 00000001 70928000 70f48005 c0000b78 00012c40 +000000050001f030: 0020: 00000005 00015000 00000005 703d8003 00011000 00000005 00000001 +0000000500190000: 0000: 70ec0008 00000001 00015000 00000005 00000000 00000003 0000000f 0001f010 +0000000500190020: 0020: 00000005 + opcode: CP_PREEMPT_ENABLE_LOCAL (6a) (2 dwords) +0000000500190024: 0000: 70ea0001 00000001 + write UCHE_CACHE_INVALIDATE_MIN_LO (0e91) + UCHE_CACHE_INVALIDATE_MIN_LO: 0 + UCHE_CACHE_INVALIDATE_MIN_HI: 0 + UCHE_CACHE_INVALIDATE_MAX_LO: 0 + UCHE_CACHE_INVALIDATE_MAX_HI: 0 + UCHE_CACHE_INVALIDATE: 0x12 +000000050019002c: 0000: 480e9185 00000000 00000000 00000000 00000000 00000012 + opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) +0000000500190044: 0000: 70268000 + write HLSQ_UPDATE_CNTL (e78a) + HLSQ_UPDATE_CNTL: 0xfffff +0000000500190048: 0000: 40e78a01 000fffff + opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords) + { 0 = 0 } + { ADDR_0_LO = 0x12000 } + { ADDR_0_HI = 0x5 } +0000000500190050: 0000: 70d08003 00000000 00012000 00000005 + opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords) + { 0 = 0x10 } + { ADDR_0_LO = 0x13000 } + { ADDR_0_HI = 0x5 } +0000000500190060: 0000: 70d08003 00000010 00013000 00000005 + opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) +0000000500190070: 0000: 70268000 + write PC_RESTART_INDEX (e38c) + PC_RESTART_INDEX: 0xffffffff +0000000500190074: 0000: 48e38c01 ffffffff + write PC_RASTER_CNTL (e388) + PC_RASTER_CNTL: { POLYMODE_FRONT_PTYPE = PC_DRAW_TRIANGLES | POLYMODE_BACK_PTYPE = PC_DRAW_TRIANGLES } +000000050019007c: 0000: 40e38801 00000012 + write GRAS_SU_POINT_MINMAX (e091) + GRAS_SU_POINT_MINMAX: { MIN = 1.000000 | MAX = 1023.000000 } + GRAS_SU_POINT_SIZE: 0.500000 +0000000500190084: 0000: 48e09102 3ff00010 00000008 + write GRAS_SU_CONSERVATIVE_RAS_CNTL (e099) + GRAS_SU_CONSERVATIVE_RAS_CNTL: 0 +0000000500190090: 0000: 40e09901 00000000 + write GRAS_SC_SCREEN_SCISSOR_CNTL (e0a4) + GRAS_SC_SCREEN_SCISSOR_CNTL: 0 +0000000500190098: 0000: 48e0a401 00000000 + write SP_VS_CONFIG_MAX_CONST (e58a) + SP_VS_CONFIG_MAX_CONST: 0 +00000005001900a0: 0000: 48e58a01 00000000 + write SP_FS_CONFIG_MAX_CONST (e58b) + SP_FS_CONFIG_MAX_CONST: 0 +00000005001900a8: 0000: 40e58b01 00000000 + write UNKNOWN_E292 (e292) + UNKNOWN_E292: 0 + UNKNOWN_E293: 0 +00000005001900b0: 0000: 40e29202 00000000 00000000 + write RB_MODE_CNTL (0cc6) + RB_MODE_CNTL: 0x44 +00000005001900bc: 0000: 480cc601 00000044 + write RB_DBG_ECO_CNTL (0cc4) + RB_DBG_ECO_CNTL: 0x100000 +00000005001900c4: 0000: 400cc401 00100000 + write VFD_MODE_CNTL (0e42) + VFD_MODE_CNTL: 0 +00000005001900cc: 0000: 400e4201 00000000 + write PC_MODE_CNTL (0d02) + PC_MODE_CNTL: 0x1f +00000005001900d4: 0000: 480d0201 0000001f + write SP_MODE_CNTL (0ec2) + SP_MODE_CNTL: 0x1e +00000005001900dc: 0000: 480ec201 0000001e + write SP_DBG_ECO_CNTL (0ec0) + SP_DBG_ECO_CNTL: 0x800 +00000005001900e4: 0000: 400ec001 00000800 + write TPL1_MODE_CNTL (0f02) + TPL1_MODE_CNTL: 0x544 +00000005001900ec: 0000: 400f0201 00000544 + write HLSQ_TIMEOUT_THRESHOLD_0 (0e00) + HLSQ_TIMEOUT_THRESHOLD_0: 0x80 + HLSQ_TIMEOUT_THRESHOLD_1: 0 +00000005001900f4: 0000: 400e0002 00000080 00000000 + write VPC_DBG_ECO_CNTL (0e60) + VPC_DBG_ECO_CNTL: { ALLFLATOPTDIS } +0000000500190100: 0000: 400e6001 00000400 + write HLSQ_MODE_CNTL (0e06) + HLSQ_MODE_CNTL: 0x1 +0000000500190108: 0000: 400e0601 00000001 + write VPC_MODE_CNTL (0e62) + VPC_MODE_CNTL: { 0 } +0000000500190110: 0000: 480e6201 00000000 + opcode: CP_MEM_TO_REG (42) (4 dwords) + { REG = 0xc10 | CNT = 16 } + { SRC = 0x14000 } + { SRC_HI = 0x5 } + base register: 0xc10 + gpuaddr:0000000500014000 +0000000500014000: 0000: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 +* +0000000500190118: 0000: 70c28003 00800c10 00014000 00000005 + opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) +0000000500190128: 0000: 70268000 + write PC_TESSFACTOR_ADDR_LO (0d08) + PC_TESSFACTOR_ADDR_LO: 0 + PC_TESSFACTOR_ADDR_HI: 0 +000000050019012c: 0000: 480d0802 00000000 00000000 + opcode: CP_SET_DRAW_STATE (43) (4 dwords) + { COUNT = 0 | DISABLE_ALL_GROUPS | GROUP_ID = 0 } + { ADDR_LO = 0 } + { ADDR_HI = 0 } +0000000500190138: 0000: 70438003 00040000 00000000 00000000 + write UNKNOWN_E7C0 (e7c0) + UNKNOWN_E7C0: 0 + 0xe7c1: 00000000 + 0xe7c2: 00000000 + HLSQ_VS_CONSTLEN: 0 + HLSQ_VS_INSTRLEN: 0 + UNKNOWN_E7C5: 0 + 0xe7c6: 00000000 + 0xe7c7: 00000000 + HLSQ_HS_CONSTLEN: 0 + HLSQ_HS_INSTRLEN: 0 + UNKNOWN_E7CA: 0 + 0xe7cb: 00000000 + 0xe7cc: 00000000 + HLSQ_DS_CONSTLEN: 0 + HLSQ_DS_INSTRLEN: 0 + UNKNOWN_E7CF: 0 + 0xe7d0: 00000000 + 0xe7d1: 00000000 + HLSQ_GS_CONSTLEN: 0 + HLSQ_GS_INSTRLEN: 0 + UNKNOWN_E7D4: 0 + 0xe7d5: 00000000 + 0xe7d6: 00000000 + HLSQ_FS_CONSTLEN: 0 + HLSQ_FS_INSTRLEN: 0 + UNKNOWN_E7D9: 0 + 0xe7da: 00000000 + 0xe7db: 00000000 + HLSQ_CS_CONSTLEN: 0 + HLSQ_CS_INSTRLEN: 0 +0000000500190148: 0000: 48e7c09e 00000000 00000000 00000000 00000000 00000000 00000000 00000000 +* + write RB_CCU_CNTL (0cc7) + RB_CCU_CNTL: 0x7c13c080 +00000005001901c4: 0000: 400cc701 7c13c080 + opcode: CP_PREEMPT_ENABLE_LOCAL (6a) (2 dwords) +00000005001901cc: 0000: 70ea0001 00000001 + opcode: CP_COMPUTE_CHECKPOINT (6e) (8 dwords) + { ADDR_0_LO = 0x15000 } + { ADDR_0_HI = 0x5 } + { 2 = 0x18 } + { 3 = 0x3 } + { ADDR_1_LEN = 15 } + { ADDR_1_LO = 0x1f010 } + { ADDR_1_HI = 0x5 } + addr: 0x000000050001f010 + len: 0xf + write GRAS_LRZ_CNTL (e100) + GRAS_LRZ_CNTL: { 0x8 } +000000050001f010: 0000: 48e10001 00000008 + write CP_SCRATCH[0].REG (0b78) + CP_SCRATCH[0].REG: 1 +000000050001f018: 0000: 400b7801 00000001 + opcode: CP_WAIT_MEM_WRITES (12) (1 dwords) +000000050001f020: 0000: 70928000 + opcode: (null) (74) (6 dwords) +000000050001f024: 0000: 70f48005 c0000b78 00012c40 00000005 00015000 00000005 + opcode: CP_MEM_WRITE (3d) (4 dwords) + { ADDR_LO = 0x11000 } + { ADDR_HI = 0x5 } + gpuaddr:0000000500011000 +000000050001f048: 0000: 00000001 +000000050001f03c: 0000: 703d8003 00011000 00000005 00000001 +000000050001f010: 0000: 48e10001 00000008 400b7801 00000001 70928000 70f48005 c0000b78 00012c40 +000000050001f030: 0020: 00000005 00015000 00000005 703d8003 00011000 00000005 00000001 +00000005001901d4: 0000: 706e0007 00015000 00000005 00000018 00000003 0000000f 0001f010 00000005 + opcode: CP_SET_DRAW_STATE (43) (4 dwords) + { COUNT = 0 | DISABLE_ALL_GROUPS | GROUP_ID = 0 } + { ADDR_LO = 0 } + { ADDR_HI = 0 } +00000005001901f4: 0000: 70438003 00040000 00000000 00000000 + write RB_CNTL (e140) + RB_CNTL: { WIDTH = 0 | HEIGHT = 0 | BYPASS } +0000000500190204: 0000: 40e14001 00020000 + write GRAS_LRZ_CNTL (e100) + GRAS_LRZ_CNTL: { 0 } +000000050019020c: 0000: 48e10001 00000000 + opcode: CP_EVENT_WRITE (46) (2 dwords) + { EVENT = LRZ_FLUSH } + event LRZ_FLUSH +0000000500190214: 0000: 70460001 00000026 + opcode: CP_SKIP_IB2_ENABLE_GLOBAL (1d) (2 dwords) +000000050019021c: 0000: 709d0001 00000000 + opcode: CP_EVENT_WRITE (46) (2 dwords) + { EVENT = PC_CCU_INVALIDATE_COLOR } + event PC_CCU_INVALIDATE_COLOR +0000000500190224: 0000: 70460001 00000019 + opcode: CP_EVENT_WRITE (46) (2 dwords) + { EVENT = PC_CCU_INVALIDATE_DEPTH } + event PC_CCU_INVALIDATE_DEPTH +000000050019022c: 0000: 70460001 00000018 + write PC_POWER_CNTL (e3b0) + PC_POWER_CNTL: 0x3 +0000000500190234: 0000: 48e3b001 00000003 + write VFD_POWER_CNTL (e4f0) + VFD_POWER_CNTL: 0x3 +000000050019023c: 0000: 48e4f001 00000003 + opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) +0000000500190244: 0000: 70268000 + write RB_CCU_CNTL (0cc7) + RB_CCU_CNTL: 0x10000000 +0000000500190248: 0000: 400cc701 10000000 + write GRAS_SC_WINDOW_SCISSOR_TL (e0ea) + GRAS_SC_WINDOW_SCISSOR_TL: { X = 0 | Y = 0 } + GRAS_SC_WINDOW_SCISSOR_BR: { X = 255 | Y = 255 } +0000000500190250: 0000: 48e0ea02 00000000 00ff00ff + write RB_RESOLVE_CNTL_1 (e211) + RB_RESOLVE_CNTL_1: { X = 0 | Y = 0 } + RB_RESOLVE_CNTL_2: { X = 255 | Y = 255 } +000000050019025c: 0000: 48e21102 00000000 00ff00ff + write RB_WINDOW_OFFSET (e1d0) + RB_WINDOW_OFFSET: { X = 0 | Y = 0 } +0000000500190268: 0000: 40e1d001 00000000 + write HLSQ_UPDATE_CNTL (e78a) + HLSQ_UPDATE_CNTL: 0x1f00000 +0000000500190270: 0000: 40e78a01 01f00000 + opcode: CP_INDIRECT_BUFFER (3f) (4 dwords) + ibaddr:000000050001b000 + ibsize:0000004b + opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords) + { 0 = 0 } + { ADDR_0_LO = 0x12000 } + { ADDR_0_HI = 0x5 } +000000050001b000: 0000: 70d08003 00000000 00012000 00000005 + opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords) + { 0 = 0x10 } + { ADDR_0_LO = 0x13000 } + { ADDR_0_HI = 0x5 } +000000050001b010: 0000: 70d08003 00000010 00013000 00000005 + write SP_SP_CNTL (e580) + SP_SP_CNTL: 0 +000000050001b020: 0000: 48e58001 00000000 + write HLSQ_CONTROL_0_REG (e784) + HLSQ_CONTROL_0_REG: { FSTHREADSIZE = FOUR_QUADS | CSTHREADSIZE = TWO_QUADS | 0x880 } +000000050001b028: 0000: 48e78401 00000881 + write SP_CS_CTRL_REG0 (e5f0) + SP_CS_CTRL_REG0: { BUFFER | THREADSIZE = TWO_QUADS | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 2 | BRANCHSTACK = 0 | 0x2 } +000000050001b030: 0000: 40e5f001 00000806 + write HLSQ_CS_CONFIG (e790) + HLSQ_CS_CONFIG: { ENABLED | CONSTOBJECTOFFSET = 0 | SHADEROBJOFFSET = 0 } +000000050001b038: 0000: 48e79001 00000001 + write HLSQ_CS_CNTL (e796) + HLSQ_CS_CNTL: { SSBO_ENABLE | INSTRLEN = 1 } +000000050001b040: 0000: 48e79601 00000003 + write SP_CS_CONFIG (e589) + SP_CS_CONFIG: { ENABLED | CONSTOBJECTOFFSET = 0 | SHADEROBJOFFSET = 0 } +000000050001b048: 0000: 48e58901 00000001 + write HLSQ_CS_CONSTLEN (e7dc) + HLSQ_CS_CONSTLEN: 48 +000000050001b050: 0000: 40e7dc01 00000030 + write HLSQ_CS_INSTRLEN (e7dd) + HLSQ_CS_INSTRLEN: 1 +000000050001b058: 0000: 48e7dd01 00000001 + write HLSQ_CS_NDRANGE_0 (e7b0) + HLSQ_CS_NDRANGE_0: { KERNELDIM = 3 | LOCALSIZEX = 4 | LOCALSIZEY = 5 | LOCALSIZEZ = 6 } +000000050001b060: 0000: 40e7b001 01805013 + write UNKNOWN_E5F2 (e5f2) + UNKNOWN_E5F2: 0 + SP_CS_OBJ_START_LO: 0x18000 + SP_CS_OBJ_START_HI: 0x5 base=500018000, offset=0, size=8192 +0000000500018000: 0000: 00000000 20554001 20020000 46d00000 00000001 20554002 00000000 00000000 +0000000500018020: 0020: 00000001 20154003 00010000 42300000 00000001 20154004 01800002 c1060300 +0000000500018040: 0040: 00000000 00000400 01800004 c1060100 01804001 c0460000 00000000 00001000 +0000000500018060: 0060: 01000601 c7260003 00000000 03000000 00000000 00000000 00000000 00000000 +* + :1:0000:0000[20554001x_00000000x] mov.s32s32 r0.y, 0 + :2:0001:0001[46d00000x_20020000x] shl.b r0.x, r0.x, 2 + :1:0002:0002[20554002x_00000001x] mov.s32s32 r0.z, 1 + :0:0003:0003[00000000x_00000000x] nop + :1:0004:0004[20154003x_00000001x] mov.s32s32 r0.w, r0.y + :2:0005:0005[42300000x_00010000x] add.s r0.x, r0.x, r0.y + :1:0006:0006[20154004x_00000001x] mov.s32s32 r1.x, r0.y + :6:0007:0007[c1060300x_01800002x] stl.u32 l[r0.y], r0.y, 1 + :0:0008:0008[00000400x_00000000x] (rpt4)nop + :6:0009:0013[c1060100x_01800004x] stl.u32 l[r0.x], r0.z, 1 + :6:0010:0014[c0460000x_01804001x] ldl.u32 r0.x, l[r0.y], 1 + :0:0011:0015[00001000x_00000000x] (ss)nop + :6:0012:0016[c7260003x_01000601x] stgb.untyped.4d.u32.1 g[0], r0.x, r0.y, r0.w + :0:0013:0017[03000000x_00000000x] end + :0:0014:0018[00000000x_00000000x] nop + :0:0015:0019[00000000x_00000000x] nop + :0:0016:0020[00000000x_00000000x] nop + :0:0017:0021[00000000x_00000000x] nop + Stats: + - shaderdb: 22 instr, 11 nops, 11 non-nops, 4 mov, 0 cov + - shaderdb: 0 last-baryf, 0 half, 1 full, 0 constlen + - shaderdb: 12 cat0, 4 cat1, 2 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 + - shaderdb: 0 sstall, 1 (ss), 0 (sy) +000000050001b068: 0000: 48e5f283 00000000 00018000 00000005 + write 0xe5f9 (e5f9) + 0xe5f9: 0000001f +000000050001b078: 0000: 40e5f901 0000001f + write HLSQ_CS_CNTL_0 (e7b7) + HLSQ_CS_CNTL_0: { WGIDCONSTID = r51.w | UNK0 = r48.x | UNK1 = r63.x | LOCALIDREGID = r0.x } + HLSQ_CS_CNTL_1: 0x1f +000000050001b080: 0000: 48e7b702 00fcc0cf 0000001f + write HLSQ_CS_KERNEL_GROUP_X (e7b9) + HLSQ_CS_KERNEL_GROUP_X: 0x1 + HLSQ_CS_KERNEL_GROUP_Y: 0x1 + HLSQ_CS_KERNEL_GROUP_Z: 0x1 +000000050001b08c: 0000: 40e7b983 00000001 00000001 00000001 + opcode: CP_LOAD_STATE4 (30) (4 dwords) + { DST_OFF = 0 | STATE_SRC = SS4_INDIRECT | STATE_BLOCK = SB4_CS_SHADER | NUM_UNIT = 1 } + { STATE_TYPE = ST4_SHADER | EXT_SRC_ADDR = 0x18000 } + { EXT_SRC_ADDR_HI = 0x5 } + :1:0000:0000[20554001x_00000000x] mov.s32s32 r0.y, 0 + :2:0001:0001[46d00000x_20020000x] shl.b r0.x, r0.x, 2 + :1:0002:0002[20554002x_00000001x] mov.s32s32 r0.z, 1 + :0:0003:0003[00000000x_00000000x] nop + :1:0004:0004[20154003x_00000001x] mov.s32s32 r0.w, r0.y + :2:0005:0005[42300000x_00010000x] add.s r0.x, r0.x, r0.y + :1:0006:0006[20154004x_00000001x] mov.s32s32 r1.x, r0.y + :6:0007:0007[c1060300x_01800002x] stl.u32 l[r0.y], r0.y, 1 + :0:0008:0008[00000400x_00000000x] (rpt4)nop + :6:0009:0013[c1060100x_01800004x] stl.u32 l[r0.x], r0.z, 1 + :6:0010:0014[c0460000x_01804001x] ldl.u32 r0.x, l[r0.y], 1 + :0:0011:0015[00001000x_00000000x] (ss)nop + :6:0012:0016[c7260003x_01000601x] stgb.untyped.4d.u32.1 g[0], r0.x, r0.y, r0.w + :0:0013:0017[03000000x_00000000x] end + :0:0014:0018[00000000x_00000000x] nop + :0:0015:0019[00000000x_00000000x] nop + Stats: + - shaderdb: 20 instr, 9 nops, 11 non-nops, 4 mov, 0 cov + - shaderdb: 0 last-baryf, 0 half, 1 full, 0 constlen + - shaderdb: 10 cat0, 4 cat1, 2 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 + - shaderdb: 0 sstall, 1 (ss), 0 (sy) +000000050001b09c: 0000: 70b08003 00760000 00018000 00000005 + write TPL1_VS_TEX_COUNT (e700) + TPL1_VS_TEX_COUNT: 0 +000000050001b0ac: 0000: 48e70001 00000000 + write TPL1_HS_TEX_COUNT (e701) + TPL1_HS_TEX_COUNT: 0 +000000050001b0b4: 0000: 40e70101 00000000 + write TPL1_DS_TEX_COUNT (e702) + TPL1_DS_TEX_COUNT: 0 +000000050001b0bc: 0000: 40e70201 00000000 + write TPL1_GS_TEX_COUNT (e703) + TPL1_GS_TEX_COUNT: 0 +000000050001b0c4: 0000: 48e70301 00000000 + write TPL1_FS_TEX_COUNT (e750) + TPL1_FS_TEX_COUNT: 0 +000000050001b0cc: 0000: 48e75001 00000000 + write TPL1_CS_TEX_COUNT (e751) + TPL1_CS_TEX_COUNT: 0 +000000050001b0d4: 0000: 40e75101 00000000 + opcode: CP_LOAD_STATE4 (30) (8 dwords) + { DST_OFF = 0 | STATE_SRC = SS4_DIRECT | STATE_BLOCK = SB4_CS_SSBO | NUM_UNIT = 1 } + { STATE_TYPE = ST4_SHADER | EXT_SRC_ADDR = 0 } + { EXT_SRC_ADDR_HI = 0 } + { BASE_LO = 0 } + { PITCH = 0 } + { ARRAY_PITCH = 0 } + { CPP = 0 } +000000050001b0ec: 0000: 00000000 00000000 00000000 00000000 +000000050001b0dc: 0000: 70b00007 007c0000 00000000 00000000 00000000 00000000 00000000 00000000 + opcode: CP_LOAD_STATE4 (30) (6 dwords) + { DST_OFF = 0 | STATE_SRC = SS4_DIRECT | STATE_BLOCK = SB4_CS_SSBO | NUM_UNIT = 1 } + { STATE_TYPE = ST4_CONSTANTS | EXT_SRC_ADDR = 0 } + { EXT_SRC_ADDR_HI = 0 } + { FMT = 0 | WIDTH = 0 } + { HEIGHT = 0 | DEPTH = 0 } +000000050001b10c: 0000: 00000000 00000000 +000000050001b0fc: 0000: 70b08005 007c0000 00000001 00000000 00000000 00000000 + opcode: CP_LOAD_STATE4 (30) (6 dwords) + { DST_OFF = 0 | STATE_SRC = SS4_DIRECT | STATE_BLOCK = SB4_CS_SSBO | NUM_UNIT = 1 } + { STATE_TYPE = ST4_UBO | EXT_SRC_ADDR = 0 } + { EXT_SRC_ADDR_HI = 0 } + { BASE_LO = 0 } + { BASE_HI = 0 } +000000050001b124: 0000: 00000000 00000000 +000000050001b114: 0000: 70b08005 007c0000 00000002 00000000 00000000 00000000 +0000000500190278: 0000: 70bf8003 0001b000 00000005 0000004b + write VPC_SO_OVERRIDE (e2a2) + VPC_SO_OVERRIDE: { SO_DISABLE } +0000000500190288: 0000: 40e2a201 00000001 + opcode: CP_SET_VISIBILITY_OVERRIDE (64) (2 dwords) +0000000500190290: 0000: 70640001 00000001 + opcode: CP_INDIRECT_BUFFER (3f) (4 dwords) + ibaddr:0000000500170000 + ibsize:0000004f + write HLSQ_CONTROL_0_REG (e784) + HLSQ_CONTROL_0_REG: { FSTHREADSIZE = FOUR_QUADS | CSTHREADSIZE = TWO_QUADS | 0x880 } +0000000500170000: 0000: 48e78401 00000881 + write SP_CS_CTRL_REG0 (e5f0) + SP_CS_CTRL_REG0: { BUFFER | THREADSIZE = TWO_QUADS | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 2 | BRANCHSTACK = 0 | 0x2 } +0000000500170008: 0000: 40e5f001 00000806 + write HLSQ_CS_CONFIG (e790) + HLSQ_CS_CONFIG: { ENABLED | CONSTOBJECTOFFSET = 0 | SHADEROBJOFFSET = 0 } +0000000500170010: 0000: 48e79001 00000001 + write HLSQ_CS_CNTL (e796) + HLSQ_CS_CNTL: { SSBO_ENABLE | INSTRLEN = 1 } +0000000500170018: 0000: 48e79601 00000003 + write SP_CS_CONFIG (e589) + SP_CS_CONFIG: { ENABLED | CONSTOBJECTOFFSET = 0 | SHADEROBJOFFSET = 0 } +0000000500170020: 0000: 48e58901 00000001 + write HLSQ_CS_CONSTLEN (e7dc) + HLSQ_CS_CONSTLEN: 48 +0000000500170028: 0000: 40e7dc01 00000030 + write HLSQ_CS_INSTRLEN (e7dd) + HLSQ_CS_INSTRLEN: 1 +0000000500170030: 0000: 48e7dd01 00000001 + write HLSQ_CS_NDRANGE_0 (e7b0) + HLSQ_CS_NDRANGE_0: { KERNELDIM = 3 | LOCALSIZEX = 4 | LOCALSIZEY = 5 | LOCALSIZEZ = 6 } +0000000500170038: 0000: 40e7b001 01805013 + write UNKNOWN_E5F2 (e5f2) + UNKNOWN_E5F2: 0 + SP_CS_OBJ_START_LO: 0x18000 base=500018000, offset=0, size=8192 + SP_CS_OBJ_START_HI: 0x5 base=500018000, offset=0, size=8192 +0000000500018000: 0000: 00000000 20554001 20020000 46d00000 00000001 20554002 00000000 00000000 +0000000500018020: 0020: 00000001 20154003 00010000 42300000 00000001 20154004 01800002 c1060300 +0000000500018040: 0040: 00000000 00000400 01800004 c1060100 01804001 c0460000 00000000 00001000 +0000000500018060: 0060: 01000601 c7260003 00000000 03000000 00000000 00000000 00000000 00000000 +* + :1:0000:0000[20554001x_00000000x] mov.s32s32 r0.y, 0 + :2:0001:0001[46d00000x_20020000x] shl.b r0.x, r0.x, 2 + :1:0002:0002[20554002x_00000001x] mov.s32s32 r0.z, 1 + :0:0003:0003[00000000x_00000000x] nop + :1:0004:0004[20154003x_00000001x] mov.s32s32 r0.w, r0.y + :2:0005:0005[42300000x_00010000x] add.s r0.x, r0.x, r0.y + :1:0006:0006[20154004x_00000001x] mov.s32s32 r1.x, r0.y + :6:0007:0007[c1060300x_01800002x] stl.u32 l[r0.y], r0.y, 1 + :0:0008:0008[00000400x_00000000x] (rpt4)nop + :6:0009:0013[c1060100x_01800004x] stl.u32 l[r0.x], r0.z, 1 + :6:0010:0014[c0460000x_01804001x] ldl.u32 r0.x, l[r0.y], 1 + :0:0011:0015[00001000x_00000000x] (ss)nop + :6:0012:0016[c7260003x_01000601x] stgb.untyped.4d.u32.1 g[0], r0.x, r0.y, r0.w + :0:0013:0017[03000000x_00000000x] end + :0:0014:0018[00000000x_00000000x] nop + :0:0015:0019[00000000x_00000000x] nop + :0:0016:0020[00000000x_00000000x] nop + :0:0017:0021[00000000x_00000000x] nop + Stats: + - shaderdb: 22 instr, 11 nops, 11 non-nops, 4 mov, 0 cov + - shaderdb: 0 last-baryf, 0 half, 1 full, 0 constlen + - shaderdb: 12 cat0, 4 cat1, 2 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 + - shaderdb: 0 sstall, 1 (ss), 0 (sy) +0000000500170040: 0000: 48e5f283 00000000 00018000 00000005 + write 0xe5f9 (e5f9) + 0xe5f9: 0000001f +0000000500170050: 0000: 40e5f901 0000001f + write HLSQ_CS_CNTL_0 (e7b7) + HLSQ_CS_CNTL_0: { WGIDCONSTID = r51.w | UNK0 = r48.x | UNK1 = r63.x | LOCALIDREGID = r0.x } + HLSQ_CS_CNTL_1: 0x1f +0000000500170058: 0000: 48e7b702 00fcc0cf 0000001f + write HLSQ_CS_KERNEL_GROUP_X (e7b9) + HLSQ_CS_KERNEL_GROUP_X: 0x1 + HLSQ_CS_KERNEL_GROUP_Y: 0x1 + HLSQ_CS_KERNEL_GROUP_Z: 0x1 +0000000500170064: 0000: 40e7b983 00000001 00000001 00000001 + opcode: CP_LOAD_STATE4 (30) (4 dwords) + { DST_OFF = 0 | STATE_SRC = SS4_INDIRECT | STATE_BLOCK = SB4_CS_SHADER | NUM_UNIT = 1 } + { STATE_TYPE = ST4_SHADER | EXT_SRC_ADDR = 0x18000 } + { EXT_SRC_ADDR_HI = 0x5 } + :1:0000:0000[20554001x_00000000x] mov.s32s32 r0.y, 0 + :2:0001:0001[46d00000x_20020000x] shl.b r0.x, r0.x, 2 + :1:0002:0002[20554002x_00000001x] mov.s32s32 r0.z, 1 + :0:0003:0003[00000000x_00000000x] nop + :1:0004:0004[20154003x_00000001x] mov.s32s32 r0.w, r0.y + :2:0005:0005[42300000x_00010000x] add.s r0.x, r0.x, r0.y + :1:0006:0006[20154004x_00000001x] mov.s32s32 r1.x, r0.y + :6:0007:0007[c1060300x_01800002x] stl.u32 l[r0.y], r0.y, 1 + :0:0008:0008[00000400x_00000000x] (rpt4)nop + :6:0009:0013[c1060100x_01800004x] stl.u32 l[r0.x], r0.z, 1 + :6:0010:0014[c0460000x_01804001x] ldl.u32 r0.x, l[r0.y], 1 + :0:0011:0015[00001000x_00000000x] (ss)nop + :6:0012:0016[c7260003x_01000601x] stgb.untyped.4d.u32.1 g[0], r0.x, r0.y, r0.w + :0:0013:0017[03000000x_00000000x] end + :0:0014:0018[00000000x_00000000x] nop + :0:0015:0019[00000000x_00000000x] nop + Stats: + - shaderdb: 20 instr, 9 nops, 11 non-nops, 4 mov, 0 cov + - shaderdb: 0 last-baryf, 0 half, 1 full, 0 constlen + - shaderdb: 10 cat0, 4 cat1, 2 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 + - shaderdb: 0 sstall, 1 (ss), 0 (sy) +0000000500170074: 0000: 70b08003 00760000 00018000 00000005 + opcode: CP_LOAD_STATE4 (30) (8 dwords) + { DST_OFF = 0 | STATE_SRC = SS4_DIRECT | STATE_BLOCK = SB4_CS_SSBO | NUM_UNIT = 1 } + { STATE_TYPE = ST4_SHADER | EXT_SRC_ADDR = 0 } + { EXT_SRC_ADDR_HI = 0 } + { BASE_LO = 0 } + { PITCH = 0 } + { ARRAY_PITCH = 0 } + { CPP = 0 } +0000000500170094: 0000: 00000000 00000000 00000000 00000000 +0000000500170084: 0000: 70b00007 007c0000 00000000 00000000 00000000 00000000 00000000 00000000 + opcode: CP_LOAD_STATE4 (30) (6 dwords) + { DST_OFF = 0 | STATE_SRC = SS4_DIRECT | STATE_BLOCK = SB4_CS_SSBO | NUM_UNIT = 1 } + { STATE_TYPE = ST4_CONSTANTS | EXT_SRC_ADDR = 0 } + { EXT_SRC_ADDR_HI = 0 } + { FMT = 0 | WIDTH = 0 } + { HEIGHT = 0 | DEPTH = 0 } +00000005001700b4: 0000: 00000000 00000000 +00000005001700a4: 0000: 70b08005 007c0000 00000001 00000000 00000000 00000000 + opcode: CP_LOAD_STATE4 (30) (6 dwords) + { DST_OFF = 0 | STATE_SRC = SS4_DIRECT | STATE_BLOCK = SB4_CS_SSBO | NUM_UNIT = 1 } + { STATE_TYPE = ST4_UBO | EXT_SRC_ADDR = 0 } + { EXT_SRC_ADDR_HI = 0 } + { BASE_LO = 0 } + { BASE_HI = 0 } +00000005001700cc: 0000: 00000000 00000000 +00000005001700bc: 0000: 70b08005 007c0000 00000002 00000000 00000000 00000000 + write TPL1_VS_TEX_COUNT (e700) + TPL1_VS_TEX_COUNT: 0 +00000005001700d4: 0000: 48e70001 00000000 + write TPL1_HS_TEX_COUNT (e701) + TPL1_HS_TEX_COUNT: 0 +00000005001700dc: 0000: 40e70101 00000000 + write TPL1_DS_TEX_COUNT (e702) + TPL1_DS_TEX_COUNT: 0 +00000005001700e4: 0000: 40e70201 00000000 + write TPL1_GS_TEX_COUNT (e703) + TPL1_GS_TEX_COUNT: 0 +00000005001700ec: 0000: 48e70301 00000000 + write TPL1_FS_TEX_COUNT (e750) + TPL1_FS_TEX_COUNT: 0 +00000005001700f4: 0000: 48e75001 00000000 + write TPL1_CS_TEX_COUNT (e751) + TPL1_CS_TEX_COUNT: 0 +00000005001700fc: 0000: 40e75101 00000000 + write HLSQ_CS_NDRANGE_1 (e7b1) + HLSQ_CS_NDRANGE_1: { GLOBALSIZE_X = 5 } + HLSQ_CS_NDRANGE_2: { GLOBALOFF_X = 0 } + HLSQ_CS_NDRANGE_3: { GLOBALSIZE_Y = 12 } + HLSQ_CS_NDRANGE_4: { GLOBALOFF_Y = 0 } + HLSQ_CS_NDRANGE_5: { GLOBALSIZE_Z = 21 } + HLSQ_CS_NDRANGE_6: { GLOBALOFF_Z = 0 } +0000000500170104: 0000: 48e7b186 00000005 00000000 0000000c 00000000 00000015 00000000 + opcode: CP_EXEC_CS (33) (5 dwords) + { 0 = 0 } + { NGROUPS_X = 1 } + { NGROUPS_Y = 2 } + { NGROUPS_Z = 3 } + draw[0] register values +!+ 00000001 CP_SCRATCH[0].REG: 1 +!+ 00100000 RB_DBG_ECO_CNTL: 0x100000 +!+ 00000044 RB_MODE_CNTL: 0x44 +!+ 10000000 RB_CCU_CNTL: 0x10000000 +!+ 0000001f PC_MODE_CNTL: 0x1f + + 00000000 PC_TESSFACTOR_ADDR_LO: 0 + + 00000000 PC_TESSFACTOR_ADDR_HI: 0 +!+ 00000080 HLSQ_TIMEOUT_THRESHOLD_0: 0x80 + + 00000000 HLSQ_TIMEOUT_THRESHOLD_1: 0 +!+ 00000001 HLSQ_MODE_CNTL: 0x1 + + 00000000 VFD_MODE_CNTL: 0 +!+ 00000400 VPC_DBG_ECO_CNTL: { ALLFLATOPTDIS } + + 00000000 VPC_MODE_CNTL: { 0 } + + 00000000 UCHE_CACHE_INVALIDATE_MIN_LO: 0 + + 00000000 UCHE_CACHE_INVALIDATE_MIN_HI: 0 + + 00000000 UCHE_CACHE_INVALIDATE_MAX_LO: 0 + + 00000000 UCHE_CACHE_INVALIDATE_MAX_HI: 0 +!+ 00000012 UCHE_CACHE_INVALIDATE: 0x12 +!+ 00000800 SP_DBG_ECO_CNTL: 0x800 +!+ 0000001e SP_MODE_CNTL: 0x1e +!+ 00000544 TPL1_MODE_CNTL: 0x544 +!+ 3ff00010 GRAS_SU_POINT_MINMAX: { MIN = 1.000000 | MAX = 1023.000000 } +!+ 00000008 GRAS_SU_POINT_SIZE: 0.500000 + + 00000000 GRAS_SU_CONSERVATIVE_RAS_CNTL: 0 + + 00000000 GRAS_SC_SCREEN_SCISSOR_CNTL: 0 + + 00000000 GRAS_SC_WINDOW_SCISSOR_TL: { X = 0 | Y = 0 } +!+ 00ff00ff GRAS_SC_WINDOW_SCISSOR_BR: { X = 255 | Y = 255 } + + 00000000 GRAS_LRZ_CNTL: { 0 } +!+ 00020000 RB_CNTL: { WIDTH = 0 | HEIGHT = 0 | BYPASS } + + 00000000 RB_WINDOW_OFFSET: { X = 0 | Y = 0 } + + 00000000 RB_RESOLVE_CNTL_1: { X = 0 | Y = 0 } +!+ 00ff00ff RB_RESOLVE_CNTL_2: { X = 255 | Y = 255 } + + 00000000 UNKNOWN_E292: 0 + + 00000000 UNKNOWN_E293: 0 +!+ 00000001 VPC_SO_OVERRIDE: { SO_DISABLE } +!+ 00000012 PC_RASTER_CNTL: { POLYMODE_FRONT_PTYPE = PC_DRAW_TRIANGLES | POLYMODE_BACK_PTYPE = PC_DRAW_TRIANGLES } +!+ ffffffff PC_RESTART_INDEX: 0xffffffff +!+ 00000003 PC_POWER_CNTL: 0x3 +!+ 00000003 VFD_POWER_CNTL: 0x3 + + 00000000 SP_SP_CNTL: 0 +!+ 00000001 SP_CS_CONFIG: { ENABLED | CONSTOBJECTOFFSET = 0 | SHADEROBJOFFSET = 0 } + + 00000000 SP_VS_CONFIG_MAX_CONST: 0 + + 00000000 SP_FS_CONFIG_MAX_CONST: 0 +!+ 00000806 SP_CS_CTRL_REG0: { BUFFER | THREADSIZE = TWO_QUADS | HALFREGFOOTPRINT = 0 | FULLREGFOOTPRINT = 2 | BRANCHSTACK = 0 | 0x2 } + + 00000000 UNKNOWN_E5F2: 0 +!+ 00018000 SP_CS_OBJ_START_LO: 0x18000 base=500018000, offset=0, size=8192 +!+ 00000005 SP_CS_OBJ_START_HI: 0x5 base=500018000, offset=0, size=8192 +0000000500018000: 0000: 00000000 20554001 20020000 46d00000 00000001 20554002 00000000 00000000 +0000000500018020: 0020: 00000001 20154003 00010000 42300000 00000001 20154004 01800002 c1060300 +0000000500018040: 0040: 00000000 00000400 01800004 c1060100 01804001 c0460000 00000000 00001000 +0000000500018060: 0060: 01000601 c7260003 00000000 03000000 00000000 00000000 00000000 00000000 +* + :1:0000:0000[20554001x_00000000x] mov.s32s32 r0.y, 0 + :2:0001:0001[46d00000x_20020000x] shl.b r0.x, r0.x, 2 + :1:0002:0002[20554002x_00000001x] mov.s32s32 r0.z, 1 + :0:0003:0003[00000000x_00000000x] nop + :1:0004:0004[20154003x_00000001x] mov.s32s32 r0.w, r0.y + :2:0005:0005[42300000x_00010000x] add.s r0.x, r0.x, r0.y + :1:0006:0006[20154004x_00000001x] mov.s32s32 r1.x, r0.y + :6:0007:0007[c1060300x_01800002x] stl.u32 l[r0.y], r0.y, 1 + :0:0008:0008[00000400x_00000000x] (rpt4)nop + :6:0009:0013[c1060100x_01800004x] stl.u32 l[r0.x], r0.z, 1 + :6:0010:0014[c0460000x_01804001x] ldl.u32 r0.x, l[r0.y], 1 + :0:0011:0015[00001000x_00000000x] (ss)nop + :6:0012:0016[c7260003x_01000601x] stgb.untyped.4d.u32.1 g[0], r0.x, r0.y, r0.w + :0:0013:0017[03000000x_00000000x] end + :0:0014:0018[00000000x_00000000x] nop + :0:0015:0019[00000000x_00000000x] nop + :0:0016:0020[00000000x_00000000x] nop + :0:0017:0021[00000000x_00000000x] nop + Stats: + - shaderdb: 22 instr, 11 nops, 11 non-nops, 4 mov, 0 cov + - shaderdb: 0 last-baryf, 0 half, 1 full, 0 constlen + - shaderdb: 12 cat0, 4 cat1, 2 cat2, 0 cat3, 0 cat4, 0 cat5, 4 cat6, 0 cat7 + - shaderdb: 0 sstall, 1 (ss), 0 (sy) +!+ 0000001f 0xe5f9: 0000001f + + 00000000 TPL1_VS_TEX_COUNT: 0 + + 00000000 TPL1_HS_TEX_COUNT: 0 + + 00000000 TPL1_DS_TEX_COUNT: 0 + + 00000000 TPL1_GS_TEX_COUNT: 0 + + 00000000 TPL1_FS_TEX_COUNT: 0 + + 00000000 TPL1_CS_TEX_COUNT: 0 +!+ 00000881 HLSQ_CONTROL_0_REG: { FSTHREADSIZE = FOUR_QUADS | CSTHREADSIZE = TWO_QUADS | 0x880 } +!+ 01f00000 HLSQ_UPDATE_CNTL: 0x1f00000 +!+ 00000001 HLSQ_CS_CONFIG: { ENABLED | CONSTOBJECTOFFSET = 0 | SHADEROBJOFFSET = 0 } +!+ 00000003 HLSQ_CS_CNTL: { SSBO_ENABLE | INSTRLEN = 1 } +!+ 01805013 HLSQ_CS_NDRANGE_0: { KERNELDIM = 3 | LOCALSIZEX = 4 | LOCALSIZEY = 5 | LOCALSIZEZ = 6 } +!+ 00000005 HLSQ_CS_NDRANGE_1: { GLOBALSIZE_X = 5 } + + 00000000 HLSQ_CS_NDRANGE_2: { GLOBALOFF_X = 0 } +!+ 0000000c HLSQ_CS_NDRANGE_3: { GLOBALSIZE_Y = 12 } + + 00000000 HLSQ_CS_NDRANGE_4: { GLOBALOFF_Y = 0 } +!+ 00000015 HLSQ_CS_NDRANGE_5: { GLOBALSIZE_Z = 21 } + + 00000000 HLSQ_CS_NDRANGE_6: { GLOBALOFF_Z = 0 } +!+ 00fcc0cf HLSQ_CS_CNTL_0: { WGIDCONSTID = r51.w | UNK0 = r48.x | UNK1 = r63.x | LOCALIDREGID = r0.x } +!+ 0000001f HLSQ_CS_CNTL_1: 0x1f +!+ 00000001 HLSQ_CS_KERNEL_GROUP_X: 0x1 +!+ 00000001 HLSQ_CS_KERNEL_GROUP_Y: 0x1 +!+ 00000001 HLSQ_CS_KERNEL_GROUP_Z: 0x1 + + 00000000 UNKNOWN_E7C0: 0 + + 00000000 0xe7c1: 00000000 + + 00000000 0xe7c2: 00000000 + + 00000000 HLSQ_VS_CONSTLEN: 0 + + 00000000 HLSQ_VS_INSTRLEN: 0 + + 00000000 UNKNOWN_E7C5: 0 + + 00000000 0xe7c6: 00000000 + + 00000000 0xe7c7: 00000000 + + 00000000 HLSQ_HS_CONSTLEN: 0 + + 00000000 HLSQ_HS_INSTRLEN: 0 + + 00000000 UNKNOWN_E7CA: 0 + + 00000000 0xe7cb: 00000000 + + 00000000 0xe7cc: 00000000 + + 00000000 HLSQ_DS_CONSTLEN: 0 + + 00000000 HLSQ_DS_INSTRLEN: 0 + + 00000000 UNKNOWN_E7CF: 0 + + 00000000 0xe7d0: 00000000 + + 00000000 0xe7d1: 00000000 + + 00000000 HLSQ_GS_CONSTLEN: 0 + + 00000000 HLSQ_GS_INSTRLEN: 0 + + 00000000 UNKNOWN_E7D4: 0 + + 00000000 0xe7d5: 00000000 + + 00000000 0xe7d6: 00000000 + + 00000000 HLSQ_FS_CONSTLEN: 0 + + 00000000 HLSQ_FS_INSTRLEN: 0 + + 00000000 UNKNOWN_E7D9: 0 + + 00000000 0xe7da: 00000000 + + 00000000 0xe7db: 00000000 +!+ 00000030 HLSQ_CS_CONSTLEN: 48 +!+ 00000001 HLSQ_CS_INSTRLEN: 1 +0000000500170120: 0000: 70b30004 00000000 00000001 00000002 00000003 + write VPC_CNTL_0 (e280) + VPC_CNTL_0: { STRIDE_IN_VPC = 0 } +0000000500170134: 0000: 40e28001 00000000 +0000000500190298: 0000: 70bf8003 00170000 00000005 0000004f + opcode: CP_SET_DRAW_STATE (43) (4 dwords) + { COUNT = 0 | DISABLE_ALL_GROUPS | GROUP_ID = 0 } + { ADDR_LO = 0 } + { ADDR_HI = 0 } +00000005001902a8: 0000: 70438003 00040000 00000000 00000000 + opcode: CP_SKIP_IB2_ENABLE_LOCAL (23) (2 dwords) +00000005001902b8: 0000: 70230001 00000000 + opcode: CP_SKIP_IB2_ENABLE_GLOBAL (1d) (2 dwords) +00000005001902c0: 0000: 709d0001 00000000 + write GRAS_LRZ_CNTL (e100) + GRAS_LRZ_CNTL: { 0x8 } +00000005001902c8: 0000: 48e10001 00000008 + opcode: CP_EVENT_WRITE (46) (2 dwords) + { EVENT = LRZ_FLUSH } + event LRZ_FLUSH +00000005001902d0: 0000: 70460001 00000026 + opcode: CP_EVENT_WRITE (46) (5 dwords) + { EVENT = CACHE_FLUSH_TS } + { ADDR_0_LO = 0x10000 } + { ADDR_0_HI = 0x5 } + { 3 = 0 } + event CACHE_FLUSH_TS +00000005001902d8: 0000: 70460004 00000004 00010000 00000005 00000000 + opcode: CP_SET_RENDER_MODE (6c) (9 dwords) + { MODE = BYPASS } + { ADDR_0_LO = 0x15000 } + { ADDR_0_HI = 0x5 } + { 0 } + { 4 = 0x3 } + { ADDR_1_LEN = 15 } + { ADDR_1_LO = 0x1f010 } + { ADDR_1_HI = 0x5 } +0000000500015000: 0000: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 +* + addr: 0x000000050001f010 + len: 0xf + write GRAS_LRZ_CNTL (e100) + GRAS_LRZ_CNTL: { 0x8 } +000000050001f010: 0000: 48e10001 00000008 + write CP_SCRATCH[0].REG (0b78) + CP_SCRATCH[0].REG: 1 +000000050001f018: 0000: 400b7801 00000001 + opcode: CP_WAIT_MEM_WRITES (12) (1 dwords) +000000050001f020: 0000: 70928000 + opcode: (null) (74) (6 dwords) +000000050001f024: 0000: 70f48005 c0000b78 00012c40 00000005 00015000 00000005 + opcode: CP_MEM_WRITE (3d) (4 dwords) + { ADDR_LO = 0x11000 } + { ADDR_HI = 0x5 } + gpuaddr:0000000500011000 +000000050001f048: 0000: 00000001 +000000050001f03c: 0000: 703d8003 00011000 00000005 00000001 +000000050001f010: 0000: 48e10001 00000008 400b7801 00000001 70928000 70f48005 c0000b78 00012c40 +000000050001f030: 0020: 00000005 00015000 00000005 703d8003 00011000 00000005 00000001 +00000005001902ec: 0000: 70ec0008 00000001 00015000 00000005 00000000 00000003 0000000f 0001f010 +000000050019030c: 0020: 00000005 + opcode: CP_PREEMPT_ENABLE_LOCAL (6a) (2 dwords) +0000000500190310: 0000: 70ea0001 00000001 + opcode: CP_SET_RENDER_MODE (6c) (9 dwords) + { MODE = BYPASS } + { ADDR_0_LO = 0x15000 } + { ADDR_0_HI = 0x5 } + { 0 } + { 4 = 0x3 } + { ADDR_1_LEN = 15 } + { ADDR_1_LO = 0x1f010 } + { ADDR_1_HI = 0x5 } +0000000500015000: 0000: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 +* + addr: 0x000000050001f010 + len: 0xf + write GRAS_LRZ_CNTL (e100) + GRAS_LRZ_CNTL: { 0x8 } +000000050001f010: 0000: 48e10001 00000008 + write CP_SCRATCH[0].REG (0b78) + CP_SCRATCH[0].REG: 1 +000000050001f018: 0000: 400b7801 00000001 + opcode: CP_WAIT_MEM_WRITES (12) (1 dwords) +000000050001f020: 0000: 70928000 + opcode: (null) (74) (6 dwords) +000000050001f024: 0000: 70f48005 c0000b78 00012c40 00000005 00015000 00000005 + opcode: CP_MEM_WRITE (3d) (4 dwords) + { ADDR_LO = 0x11000 } + { ADDR_HI = 0x5 } + gpuaddr:0000000500011000 +000000050001f048: 0000: 00000001 +000000050001f03c: 0000: 703d8003 00011000 00000005 00000001 +000000050001f010: 0000: 48e10001 00000008 400b7801 00000001 70928000 70f48005 c0000b78 00012c40 +000000050001f030: 0020: 00000005 00015000 00000005 703d8003 00011000 00000005 00000001 +0000000500190318: 0000: 70ec0008 00000001 00015000 00000005 00000000 00000003 0000000f 0001f010 +0000000500190338: 0020: 00000005 +############################################################ +vertices: 0 +cmd: test_compiler:384: <<< eglSwapBuffers(display, surface): succeeded +cmd: test_compiler:385: >>> glFlush() +cmd: glFlush() +cmd: test_compiler:385: <<< glFlush(): succeeded +cmd: test_compiler:387: >>> eglDestroySurface(display, surface) +cmd: eglDestroySurface(display, surface) +cmd: test_compiler:387: <<< eglDestroySurface(display, surface): succeeded +cmd: test_compiler:388: >>> eglTerminate(display) +cmd: eglTerminate(display) +############################################################ +cmdstream[1]: 11 dwords + opcode: CP_SET_RENDER_MODE (6c) (9 dwords) + { MODE = BYPASS } + { ADDR_0_LO = 0x15000 } + { ADDR_0_HI = 0x5 } + { 0 } + { 4 = 0x3 } + { ADDR_1_LEN = 15 } + { ADDR_1_LO = 0x1f010 } + { ADDR_1_HI = 0x5 } +0000000500015000: 0000: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 +* + addr: 0x000000050001f010 + len: 0xf + write GRAS_LRZ_CNTL (e100) + GRAS_LRZ_CNTL: { 0x8 } +000000050001f010: 0000: 48e10001 00000008 + write CP_SCRATCH[0].REG (0b78) + CP_SCRATCH[0].REG: 1 +000000050001f018: 0000: 400b7801 00000001 + opcode: CP_WAIT_MEM_WRITES (12) (1 dwords) +000000050001f020: 0000: 70928000 + opcode: (null) (74) (6 dwords) +000000050001f024: 0000: 70f48005 c0000b78 00012c40 00000005 00015000 00000005 + opcode: CP_MEM_WRITE (3d) (4 dwords) + { ADDR_LO = 0x11000 } + { ADDR_HI = 0x5 } + gpuaddr:0000000500011000 +000000050001f048: 0000: 00000001 +000000050001f03c: 0000: 703d8003 00011000 00000005 00000001 +000000050001f010: 0000: 48e10001 00000008 400b7801 00000001 70928000 70f48005 c0000b78 00012c40 +000000050001f030: 0020: 00000005 00015000 00000005 703d8003 00011000 00000005 00000001 +000000050019033c: 0000: 70ec0008 00000001 00015000 00000005 00000000 00000003 0000000f 0001f010 +000000050019035c: 0020: 00000005 + opcode: CP_NOP (10) (2 dwords) +0000000500190360: 0000: 70100001 00000000 +############################################################ +vertices: 0 +############################################################ +cmdstream[2]: 2 dwords + opcode: CP_NOP (10) (2 dwords) +000000050000c000: 0000: 70100001 00000000 +############################################################ +vertices: 0 +cmd: test_compiler:388: <<< eglTerminate(display): succeeded diff --git a/src/freedreno/.gitlab-ci/traces/compute-a540.rd.gz b/src/freedreno/.gitlab-ci/traces/compute-a540.rd.gz new file mode 100644 index 00000000000..2c8e968cf0a Binary files /dev/null and b/src/freedreno/.gitlab-ci/traces/compute-a540.rd.gz differ diff --git a/src/freedreno/decode/meson.build b/src/freedreno/decode/meson.build index 5c97eaf1a34..d4f199590c5 100644 --- a/src/freedreno/decode/meson.build +++ b/src/freedreno/decode/meson.build @@ -153,6 +153,7 @@ if dep_lua.found() and dep_libarchive.found() ['fd-clouds', ['--frame', '0', '--once']], ['es2gears-a320', ['--frame', '0', '--once']], ['glxgears-a420', ['--frame', '1', '--once']], + ['compute-a540', ['--once']], ['dEQP-GLES2.functional.texture.specification.basic_teximage2d.rgba16f_2d', ['--once']], ['dEQP-VK.draw.indirect_draw.indexed.indirect_draw_count.triangle_list', ['--frame', '0', '--once']], # Test a lua script to ensure we don't break scripting API