diff --git a/docs/features.txt b/docs/features.txt index f9cd93cc0e4..06ca61d07f6 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -145,9 +145,9 @@ GL 4.1, GLSL 4.10 --- all DONE: i965/gen7+, nvc0, r600, radeonsi, llvmpipe, virg GL_ARB_ES2_compatibility DONE (freedreno, i965, nv50, softpipe, v3d, vc4, panfrost, lima) GL_ARB_get_program_binary DONE (freedreno, v3d, 0 or 1 binary formats) GL_ARB_separate_shader_objects DONE (all drivers) - GL_ARB_shader_precision DONE (i965/gen7+, all drivers that support GLSL 4.10) - GL_ARB_vertex_attrib_64bit DONE (i965/gen7+, softpipe, ) - GL_ARB_viewport_array DONE (i965, nv50, softpipe, ) + GL_ARB_shader_precision DONE (freedreno/a6xx, i965/gen7+, all drivers that support GLSL 4.10) + GL_ARB_vertex_attrib_64bit DONE (freedreno/a6xx, i965/gen7+, softpipe, ) + GL_ARB_viewport_array DONE (freedreno/a6xx, i965, nv50, softpipe, ) GL 4.2, GLSL 4.20 -- all DONE: i965/gen7+, nvc0, r600, radeonsi, llvmpipe, virgl, zink, d3d12 @@ -175,7 +175,7 @@ GL 4.3, GLSL 4.30 -- all DONE: i965/gen8+, nvc0, r600, radeonsi, llvmpipe, virgl GL_ARB_copy_image DONE (freedreno/a6xx, i965, nv50, softpipe, v3d) GL_KHR_debug DONE (all drivers) GL_ARB_explicit_uniform_location DONE (all drivers that support GLSL) - GL_ARB_fragment_layer_viewport DONE (i965, nv50, softpipe, d3d12) + GL_ARB_fragment_layer_viewport DONE (freedreno/a6xx, i965, nv50, softpipe, d3d12) GL_ARB_framebuffer_no_attachments DONE (freedreno, i965, softpipe, v3d, d3d12) GL_ARB_internalformat_query2 DONE (all drivers) GL_ARB_invalidate_subdata DONE (all drivers) @@ -346,7 +346,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve GL_OES_texture_half_float DONE (freedreno, i965, r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe, panfrost, v3d, zink, lima) GL_OES_texture_half_float_linear DONE (freedreno, i965, r300, r600, radeonsi, nv30, nv50, nvc0, softpipe, llvmpipe, panfrost, v3d, zink, lima) GL_OES_texture_view DONE (freedreno, i965/gen8+, r600, radeonsi, nv50, nvc0, softpipe, llvmpipe, v3d, zink) - GL_OES_viewport_array DONE (i965, nvc0, r600, radeonsi, softpipe, zink) + GL_OES_viewport_array DONE (freedreno/a6xx, i965, nvc0, r600, radeonsi, softpipe, zink) GLX_ARB_context_flush_control DONE (all drivers) GLX_ARB_robustness_application_isolation not started GLX_ARB_robustness_share_group_isolation not started diff --git a/src/freedreno/ci/deqp-freedreno-a630.toml b/src/freedreno/ci/deqp-freedreno-a630.toml index 4b31afc3342..6822fc7eb0c 100644 --- a/src/freedreno/ci/deqp-freedreno-a630.toml +++ b/src/freedreno/ci/deqp-freedreno-a630.toml @@ -40,7 +40,7 @@ caselists = [ "/deqp/mustpass/gles2-khr-master.txt", "/deqp/mustpass/gles3-khr-master.txt", "/deqp/mustpass/gles31-khr-master.txt", - "/deqp/mustpass/gl33-master.txt", + "/deqp/mustpass/gl43-master.txt", ] skips = ["install/freedreno-a630-premerge-skips.txt"] deqp_args = [ diff --git a/src/freedreno/ci/freedreno-a618-premerge-skips.txt b/src/freedreno/ci/freedreno-a618-premerge-skips.txt index 82badfe0999..e040115dba6 100644 --- a/src/freedreno/ci/freedreno-a618-premerge-skips.txt +++ b/src/freedreno/ci/freedreno-a618-premerge-skips.txt @@ -3,7 +3,7 @@ # delete lines from the test list. Be careful. # Timeouts, passes otherwise -KHR-GL33.texture_swizzle.smoke +KHR-GL43.texture_swizzle.smoke # These generally take near 15 seconds. The time is spent inside dEQP. dEQP-VK.tessellation.invariance.inner_triangle_set.quads_equal_spacing diff --git a/src/freedreno/ci/freedreno-a630-fails.txt b/src/freedreno/ci/freedreno-a630-fails.txt index fc50bcb8962..63055233573 100644 --- a/src/freedreno/ci/freedreno-a630-fails.txt +++ b/src/freedreno/ci/freedreno-a630-fails.txt @@ -1,8 +1,21 @@ -# Test bug, using GLSL 4.20 on a 3.3 context. -# Broken in VK-GL-CTS e5c45899ed7e90fb616ccd1d3c66e6eded120684 -KHR-GL33.cull_distance.coverage,Fail +# Shader compilation error log: 0:6(1): error: invalid stream specified 1 is larger than MAX_VERTEX_STREAMS - 1 +KHR-GL43.transform_feedback.draw_xfb_stream_instanced_test,Fail -KHR-GL33.transform_feedback.draw_xfb_stream_instanced_test,Fail +# glGetError() returned GL_INVALID_ENUM at gl4cMapBufferAlignmentTests.cpp:279 +KHR-GL43.map_buffer_alignment.functional,Fail + +KHR-GL43.gpu_shader_fp64.fp64.max_uniform_components,Fail +KHR-GL43.gpu_shader_fp64.builtin.mod_dvec2,Fail +KHR-GL43.gpu_shader_fp64.builtin.mod_dvec3,Fail +KHR-GL43.gpu_shader_fp64.builtin.mod_dvec4,Fail +KHR-GL43.shader_subroutine.control_flow_and_returned_subroutine_values_used_as_subroutine_input,Fail +KHR-GL43.shader_image_load_store.basic-allFormats-store,Fail +KHR-GL43.shader_image_load_store.basic-allTargets-store,Fail +KHR-GL43.shader_image_load_store.non-layered_binding,Fail +KHR-GL43.shading_language_420pack.binding_images,Fail +KHR-GL43.copy_image.functional,Fail +KHR-GL43.compute_shader.conditional-dispatching,Fail +KHR-GL43.vertex_attrib_binding.advanced-largeStrideAndOffsetsNewAndLegacyAPI,Fail # Lots of errors like "[279] Check failed. Received: [3,0,0,2] instead of: [5,0,0,2]" KHR-GLES31.core.geometry_shader.layered_framebuffer.depth_support,Fail @@ -16,6 +29,9 @@ KHR-GLES31.core.tessellation_shader.tessellation_shader_tc_barriers.barrier_guar # https://gitlab.freedesktop.org/mesa/mesa/-/issues/5582 KHR-GLES31.core.texture_cube_map_array.color_depth_attachments,Fail +# glsl parser bug will go away when we expose gl44 and/or GL_ARB_enhanced_layouts +KHR-GL33.CommonBugs.CommonBug_ParenthesisInLayoutQualifierIntegerValue,Fail + # rendering errors in ~4x4 blocks around the bottom side of the diagonal for the quad bypass-dEQP-GLES31.functional.blend_equation_advanced.msaa.colorburn,Fail bypass-dEQP-GLES31.functional.blend_equation_advanced.msaa.colordodge,Fail @@ -354,7 +370,6 @@ spec@!opengl 1.0@depth-clear-precision-check,Fail spec@!opengl 1.0@depth-clear-precision-check@depth24,Fail spec@!opengl 1.0@depth-clear-precision-check@depth24_stencil8,Fail spec@!opengl 1.0@depth-clear-precision-check@depth32,Fail -spec@arb_texture_buffer_object@texture-buffer-size-clamp,Fail # https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3759 # deqp-vk: ../src/freedreno/vulkan/tu_pipeline.c:3894: tu_pipeline_builder_init_graphics: Assertion `subpass->color_count == 0 || !create_info->pColorBlendState || subpass->color_count == create_info->pColorBlendState->attachmentCount' failed diff --git a/src/freedreno/ci/freedreno-a630-flakes.txt b/src/freedreno/ci/freedreno-a630-flakes.txt index 3c29449643d..8af2ef4f8c7 100644 --- a/src/freedreno/ci/freedreno-a630-flakes.txt +++ b/src/freedreno/ci/freedreno-a630-flakes.txt @@ -89,7 +89,10 @@ dEQP-GLES3.functional.fbo.blit.conversion.rg8i_to_r16i dEQP-GLES3.functional.fbo.blit.conversion.rg8_to_r16f # First noticed Jun 1 2020 on an innocent branch. -KHR-GL33.packed_depth_stencil.verify_copy_tex_image.depth32f_stencil8 +KHR-GL43.packed_depth_stencil.verify_copy_tex_image.depth32f_stencil8 + +KHR-GL43.shader_image_size.basic-nonMS-fs-float +KHR-GL43.shader_image_size.advanced-nonMS-fs-float # Flaky with introduction of testing KHR-GLES31.core.arrays_of_arrays.InteractionFunctionCalls2 diff --git a/src/freedreno/ci/freedreno-a630-premerge-skips.txt b/src/freedreno/ci/freedreno-a630-premerge-skips.txt index 028c613d5ed..bdb9693564c 100644 --- a/src/freedreno/ci/freedreno-a630-premerge-skips.txt +++ b/src/freedreno/ci/freedreno-a630-premerge-skips.txt @@ -3,4 +3,6 @@ # delete lines from the test list. Be careful. # Timeouts, passes otherwise -KHR-GL33.texture_swizzle.smoke +KHR-GL43.texture_swizzle.smoke +KHR-GL43.gpu_shader_fp64.builtin.inverse_dmat4 +KHR-GL43.gpu_shader_fp64.fp64.varyings diff --git a/src/freedreno/ci/freedreno-a630-skips.txt b/src/freedreno/ci/freedreno-a630-skips.txt index b7d49e80510..40c448c6bf9 100644 --- a/src/freedreno/ci/freedreno-a630-skips.txt +++ b/src/freedreno/ci/freedreno-a630-skips.txt @@ -36,6 +36,7 @@ KHR-GLES31.core.shader_image_load_store.basic-allFormats-store-fs # 60s timeout KHR-GLES31.core.tessellation_shader.tessellation_control_to_tessellation_evaluation.gl_MaxPatchVertices_Position_PointSize +KHR-GL43.copy_image.functional # These take most of a minute to run spec@!opengl 3.0@clearbuffer-depth-cs-probe diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.c b/src/gallium/drivers/freedreno/a6xx/fd6_context.c index 179d7992893..cc736aac8e8 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_context.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.c @@ -187,7 +187,8 @@ setup_state_map(struct fd_context *ctx) /* NOTE: scissor enabled bit is part of rasterizer state, but * fd_rasterizer_state_bind() will mark scissor dirty if needed: */ - fd_context_add_map(ctx, FD_DIRTY_SCISSOR, BIT(FD6_GROUP_SCISSOR)); + fd_context_add_map(ctx, FD_DIRTY_SCISSOR | FD_DIRTY_PROG, + BIT(FD6_GROUP_SCISSOR)); /* Stuff still emit in IB2 * @@ -195,7 +196,7 @@ setup_state_map(struct fd_context *ctx) * move it into FD6_GROUP_RASTERIZER? */ fd_context_add_map( - ctx, FD_DIRTY_STENCIL_REF | FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER, + ctx, FD_DIRTY_STENCIL_REF | FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG, BIT(FD6_GROUP_NON_GROUP)); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 44241c31519..c1268729ab8 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -730,15 +730,19 @@ static struct fd_ringbuffer * build_scissor(struct fd6_emit *emit) assert_dt { struct fd_context *ctx = emit->ctx; - struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); + struct pipe_scissor_state *scissors = fd_context_get_scissor(ctx); + unsigned num_viewports = emit->prog->num_viewports; struct fd_ringbuffer *ring = fd_submit_new_ringbuffer( - emit->ctx->batch->submit, 3 * 4, FD_RINGBUFFER_STREAMING); + emit->ctx->batch->submit, (1 + (2 * num_viewports)) * 4, FD_RINGBUFFER_STREAMING); - OUT_REG( - ring, - A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0, .x = scissor->minx, .y = scissor->miny), - A6XX_GRAS_SC_SCREEN_SCISSOR_BR(0, .x = scissor->maxx, .y = scissor->maxy)); + OUT_PKT4(ring, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0), 2 * num_viewports); + for (unsigned i = 0; i < num_viewports; i++) { + OUT_RING(ring, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_X(scissors[i].minx) | + A6XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(scissors[i].miny)); + OUT_RING(ring, A6XX_GRAS_SC_SCREEN_SCISSOR_BR_X(scissors[i].maxx) | + A6XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(scissors[i].maxy)); + } return ring; } @@ -954,6 +958,7 @@ fd6_emit_non_ring(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt { struct fd_context *ctx = emit->ctx; const enum fd_dirty_3d_state dirty = emit->dirty; + unsigned num_viewports = emit->prog->num_viewports; if (dirty & FD_DIRTY_STENCIL_REF) { struct pipe_stencil_ref *sr = &ctx->stencil_ref; @@ -963,23 +968,27 @@ fd6_emit_non_ring(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt A6XX_RB_STENCILREF_BFREF(sr->ref_value[1])); } - if (dirty & FD_DIRTY_VIEWPORT) { - struct pipe_scissor_state *scissor = &ctx->viewport_scissor[0]; - struct pipe_viewport_state *vp = & ctx->viewport[0]; + if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_PROG)) { + for (unsigned i = 0; i < num_viewports; i++) { + struct pipe_scissor_state *scissor = &ctx->viewport_scissor[i]; + struct pipe_viewport_state *vp = & ctx->viewport[i]; - OUT_REG(ring, A6XX_GRAS_CL_VPORT_XOFFSET(0, vp->translate[0]), - A6XX_GRAS_CL_VPORT_XSCALE(0, vp->scale[0]), - A6XX_GRAS_CL_VPORT_YOFFSET(0, vp->translate[1]), - A6XX_GRAS_CL_VPORT_YSCALE(0, vp->scale[1]), - A6XX_GRAS_CL_VPORT_ZOFFSET(0, vp->translate[2]), - A6XX_GRAS_CL_VPORT_ZSCALE(0, vp->scale[2])); + OUT_REG(ring, A6XX_GRAS_CL_VPORT_XOFFSET(i, vp->translate[0]), + A6XX_GRAS_CL_VPORT_XSCALE(i, vp->scale[0]), + A6XX_GRAS_CL_VPORT_YOFFSET(i, vp->translate[1]), + A6XX_GRAS_CL_VPORT_YSCALE(i, vp->scale[1]), + A6XX_GRAS_CL_VPORT_ZOFFSET(i, vp->translate[2]), + A6XX_GRAS_CL_VPORT_ZSCALE(i, vp->scale[2])); - OUT_REG( - ring, - A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0, .x = scissor->minx, - .y = scissor->miny), - A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR(0, .x = scissor->maxx, - .y = scissor->maxy)); + OUT_REG( + ring, + A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(i, + .x = scissor->minx, + .y = scissor->miny), + A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR(i, + .x = scissor->maxx, + .y = scissor->maxy)); + } OUT_REG(ring, A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ(.horz = ctx->guardband.x, .vert = ctx->guardband.y)); @@ -988,18 +997,22 @@ fd6_emit_non_ring(struct fd_ringbuffer *ring, struct fd6_emit *emit) assert_dt /* The clamp ranges are only used when the rasterizer wants depth * clamping. */ - if ((dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER)) && + if ((dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) && fd_depth_clamp_enabled(ctx)) { - struct pipe_viewport_state *vp = & ctx->viewport[0]; - float zmin, zmax; + for (unsigned i = 0; i < num_viewports; i++) { + struct pipe_viewport_state *vp = & ctx->viewport[i]; + float zmin, zmax; - util_viewport_zmin_zmax(vp, ctx->rasterizer->clip_halfz, - &zmin, &zmax); + util_viewport_zmin_zmax(vp, ctx->rasterizer->clip_halfz, + &zmin, &zmax); - OUT_REG(ring, A6XX_GRAS_CL_Z_CLAMP_MIN(0, zmin), - A6XX_GRAS_CL_Z_CLAMP_MAX(0, zmax)); + OUT_REG(ring, A6XX_GRAS_CL_Z_CLAMP_MIN(i, zmin), + A6XX_GRAS_CL_Z_CLAMP_MAX(i, zmax)); - OUT_REG(ring, A6XX_RB_Z_CLAMP_MIN(zmin), A6XX_RB_Z_CLAMP_MAX(zmax)); + /* TODO: what to do about this and multi viewport ? */ + if (i == 0) + OUT_REG(ring, A6XX_RB_Z_CLAMP_MIN(zmin), A6XX_RB_Z_CLAMP_MAX(zmax)); + } } } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 5aba5189188..4f45bc69b5c 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -400,14 +400,14 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid; uint32_t smask_in_regid, smask_regid; uint32_t stencilref_regid; - uint32_t vertex_regid, instance_regid, layer_regid, vs_primitive_regid; + uint32_t vertex_regid, instance_regid, layer_regid, view_regid, vs_primitive_regid; uint32_t hs_invocation_regid; uint32_t tess_coord_x_regid, tess_coord_y_regid, hs_rel_patch_regid, ds_rel_patch_regid, ds_primitive_regid; uint32_t ij_regid[IJ_COUNT]; uint32_t gs_header_regid; enum a6xx_threadsize fssz; - uint8_t psize_loc = ~0, pos_loc = ~0, layer_loc = ~0; + uint8_t psize_loc = ~0, pos_loc = ~0, layer_loc = ~0, view_loc =~0; uint8_t clip0_loc, clip1_loc; int i, j; @@ -417,6 +417,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, const struct ir3_shader_variant *ds = state->ds; const struct ir3_shader_variant *gs = state->gs; const struct ir3_shader_variant *fs = binning_pass ? &dummy_fs : state->fs; + const struct ir3_shader_variant *last_shader = binning_pass ? state->bs : + fd6_last_shader(state); /* binning VS is wrong when GS is present, so use nonbinning VS * TODO: compile both binning VS/GS variants correctly @@ -428,11 +430,12 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, fssz = fs->info.double_threadsize ? THREAD128 : THREAD64; - pos_regid = ir3_find_output_regid(vs, VARYING_SLOT_POS); - psize_regid = ir3_find_output_regid(vs, VARYING_SLOT_PSIZ); - clip0_regid = ir3_find_output_regid(vs, VARYING_SLOT_CLIP_DIST0); - clip1_regid = ir3_find_output_regid(vs, VARYING_SLOT_CLIP_DIST1); - layer_regid = ir3_find_output_regid(vs, VARYING_SLOT_LAYER); + pos_regid = ir3_find_output_regid(last_shader, VARYING_SLOT_POS); + psize_regid = ir3_find_output_regid(last_shader, VARYING_SLOT_PSIZ); + clip0_regid = ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST0); + clip1_regid = ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST1); + layer_regid = ir3_find_output_regid(last_shader, VARYING_SLOT_LAYER); + view_regid = ir3_find_output_regid(last_shader, VARYING_SLOT_VIEWPORT); vertex_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID); instance_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_INSTANCE_ID); if (hs) @@ -475,7 +478,6 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, psize_regid = ir3_find_output_regid(gs, VARYING_SLOT_PSIZ); clip0_regid = ir3_find_output_regid(gs, VARYING_SLOT_CLIP_DIST0); clip1_regid = ir3_find_output_regid(gs, VARYING_SLOT_CLIP_DIST1); - layer_regid = ir3_find_output_regid(gs, VARYING_SLOT_LAYER); } else { gs_header_regid = regid(63, 0); } @@ -522,6 +524,9 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, if (!key->msaa) smask_regid = regid(63, 0); + if (!binning_pass) + state->num_viewports = VALIDREG(view_regid) ? PIPE_MAX_VIEWPORTS : 1; + /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. */ @@ -575,7 +580,6 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, } struct ir3_shader_linkage l = {0}; - const struct ir3_shader_variant *last_shader = fd6_last_shader(state); bool do_streamout = (last_shader->stream_output.num_outputs > 0); uint8_t clip_mask = last_shader->clip_mask, @@ -611,6 +615,11 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, ir3_link_add(&l, VARYING_SLOT_LAYER, layer_regid, 0x1, l.max_loc); } + if (VALIDREG(view_regid)) { + view_loc = l.max_loc; + ir3_link_add(&l, VARYING_SLOT_VIEWPORT, view_regid, 0x1, l.max_loc); + } + if (VALIDREG(pos_regid)) { pos_loc = l.max_loc; ir3_link_add(&l, VARYING_SLOT_POS, pos_regid, 0xf, l.max_loc); @@ -830,6 +839,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, OUT_RING(ring, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(l.max_loc) | CONDREG(psize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) | CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) | + CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) | A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask)); OUT_PKT4(ring, REG_A6XX_HLSQ_CONTROL_1_REG, 5); @@ -871,7 +881,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, OUT_PKT4(ring, REG_A6XX_VPC_VS_LAYER_CNTL, 1); OUT_RING(ring, A6XX_VPC_VS_LAYER_CNTL_LAYERLOC(layer_loc) | - A6XX_VPC_VS_LAYER_CNTL_VIEWLOC(0xff)); + A6XX_VPC_VS_LAYER_CNTL_VIEWLOC(view_loc)); bool need_size = fs->frag_face || fs->fragcoord_compmask != 0; bool need_size_persamp = false; @@ -979,11 +989,13 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, A6XX_VPC_GS_PACK_STRIDE_IN_VPC(l.max_loc)); OUT_PKT4(ring, REG_A6XX_VPC_GS_LAYER_CNTL, 1); - OUT_RING(ring, A6XX_VPC_GS_LAYER_CNTL_LAYERLOC(layer_loc) | 0xff00); + OUT_RING(ring, A6XX_VPC_GS_LAYER_CNTL_LAYERLOC(layer_loc) | + A6XX_VPC_GS_LAYER_CNTL_VIEWLOC(view_loc)); OUT_PKT4(ring, REG_A6XX_GRAS_GS_LAYER_CNTL, 1); OUT_RING(ring, - CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER)); + CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER) | + CONDREG(view_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_VIEW)); uint32_t flags_regid = ir3_find_output_regid(gs, VARYING_SLOT_GS_VERTEX_FLAGS_IR3); @@ -1000,6 +1012,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, A6XX_PC_GS_OUT_CNTL_STRIDE_IN_VPC(l.max_loc) | CONDREG(psize_regid, A6XX_PC_GS_OUT_CNTL_PSIZE) | CONDREG(layer_regid, A6XX_PC_GS_OUT_CNTL_LAYER) | + CONDREG(view_regid, A6XX_PC_GS_OUT_CNTL_VIEW) | COND(gs_reads_primid, A6XX_PC_GS_OUT_CNTL_PRIMITIVE_ID) | A6XX_PC_GS_OUT_CNTL_CLIP_MASK(clip_cull_mask)); @@ -1062,7 +1075,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_context *ctx, OUT_PKT4(ring, REG_A6XX_GRAS_VS_LAYER_CNTL, 1); OUT_RING(ring, - CONDREG(layer_regid, A6XX_GRAS_VS_LAYER_CNTL_WRITES_LAYER)); + CONDREG(layer_regid, A6XX_GRAS_VS_LAYER_CNTL_WRITES_LAYER) | + CONDREG(view_regid, A6XX_GRAS_VS_LAYER_CNTL_WRITES_VIEW)); } OUT_PKT4(ring, REG_A6XX_VPC_VS_CLIP_CNTL, 1); @@ -1215,7 +1229,8 @@ emit_interp_state(struct fd_ringbuffer *ring, const struct fd6_program_state *st vinterp[loc / 16] |= INTERP_ONE << ((loc % 16) * 2); loc++; } - } else if (fs->inputs[j].slot == VARYING_SLOT_LAYER) { + } else if (fs->inputs[j].slot == VARYING_SLOT_LAYER || + fs->inputs[j].slot == VARYING_SLOT_VIEWPORT) { const struct ir3_shader_variant *last_shader = fd6_last_shader(state); uint32_t loc = inloc; @@ -1223,7 +1238,7 @@ emit_interp_state(struct fd_ringbuffer *ring, const struct fd6_program_state *st * implicitly zero and the FS is supposed to read zero. */ if (ir3_find_output(last_shader, fs->inputs[j].slot) < 0 && - (compmask & 0x1)) { + (compmask & 0x1)) { vinterp[loc / 16] |= INTERP_ZERO << ((loc % 16) * 2); } else { vinterp[loc / 16] |= INTERP_FLAT << ((loc % 16) * 2); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.h b/src/gallium/drivers/freedreno/a6xx/fd6_program.h index 9a62b50f830..8478cc2fa8c 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.h @@ -52,6 +52,12 @@ struct fd6_program_state { const struct ir3_stream_output_info *stream_output; + /** + * Whether multiple viewports are used is determined by whether + * the last shader stage writes viewport id + */ + uint16_t num_viewports; + /** * Output components from frag shader. It is possible to have * a fragment shader that only writes a subset of the bound diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index a29c67d55bb..3bedb49bb34 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -328,7 +328,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_GLSL_FEATURE_LEVEL: case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY: if (is_a6xx(screen)) - return 400; + return 430; else if (is_ir3(screen)) return 140; else @@ -396,6 +396,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return is_a6xx(screen); case PIPE_CAP_MAX_VIEWPORTS: + if (is_a6xx(screen)) + return 16; return 1; case PIPE_CAP_MAX_VARYINGS: