diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 7dac4078559..edbbc7d5451 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -107,6 +107,7 @@ struct aa_transform_context { uint64_t tempsUsed; /**< bitmask */ int colorOutput; /**< which output is the primary color */ int maxInput, maxGeneric; /**< max input index found */ + int numImm; /**< number of immediate regsters */ int colorTemp, aaTemp; /**< temp registers */ }; @@ -147,6 +148,18 @@ aa_transform_decl(struct tgsi_transform_context *ctx, ctx->emit_declaration(ctx, decl); } +/** + * TGSI immediate declaration transform callback. + */ +static void +aa_immediate(struct tgsi_transform_context *ctx, + struct tgsi_full_immediate *imm) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *)ctx; + + ctx->emit_immediate(ctx, imm); + aactx->numImm++; +} /** * Find the lowest zero bit, or -1 if bitfield is all ones. @@ -182,6 +195,9 @@ aa_transform_prolog(struct tgsi_transform_context *ctx) /* declare new temp regs */ tgsi_transform_temp_decl(ctx, aactx->aaTemp); tgsi_transform_temp_decl(ctx, aactx->colorTemp); + + /* declare new immediate reg */ + tgsi_transform_immediate_decl(ctx, 2.0, -1.0, 0.0, 0.25); } @@ -215,6 +231,26 @@ aa_transform_epilog(struct tgsi_transform_context *ctx) inst.Src[1].Register.Negate = true; ctx->emit_instruction(ctx, &inst); + /* linelength * 2 - 1 */ + tgsi_transform_op3_swz_inst(ctx, TGSI_OPCODE_MAD, + TGSI_FILE_TEMPORARY, aactx->aaTemp, + TGSI_WRITEMASK_Y, + TGSI_FILE_INPUT, aactx->maxInput + 1, + TGSI_SWIZZLE_W, false, + TGSI_FILE_IMMEDIATE, aactx->numImm, + TGSI_SWIZZLE_X, + TGSI_FILE_IMMEDIATE, aactx->numImm, + TGSI_SWIZZLE_Y); + + /* MIN height alpha */ + tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN, + TGSI_FILE_TEMPORARY, aactx->aaTemp, + TGSI_WRITEMASK_Z, + TGSI_FILE_TEMPORARY, aactx->aaTemp, + TGSI_SWIZZLE_Z, + TGSI_FILE_TEMPORARY, aactx->aaTemp, + TGSI_SWIZZLE_Y, false); + /* MUL width / height alpha */ tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, aactx->aaTemp, @@ -292,6 +328,7 @@ generate_aaline_fs(struct aaline_stage *aaline) transform.base.epilog = aa_transform_epilog; transform.base.transform_instruction = aa_transform_inst; transform.base.transform_declaration = aa_transform_decl; + transform.base.transform_immediate = aa_immediate; aaline_fs.tokens = tgsi_transform_shader(orig_fs->tokens, newLen, &transform.base); if (!aaline_fs.tokens) @@ -390,29 +427,7 @@ aaline_line(struct draw_stage *stage, struct prim_header *header) uint i; half_length = 0.5f * sqrtf(dx * dx + dy * dy); - - if (half_length < 0.5f) { - /* - * The logic we use for "normal" sized segments is incorrect - * for very short segments (basically because we only have - * one value to interpolate, not a distance to each endpoint). - * Therefore, we calculate half_length differently, so that for - * original line length (near) 0, we get alpha 0 - otherwise - * max alpha would still be 0.5. This also prevents us from - * artifacts due to degenerated lines (the endpoints being - * identical, which would still receive anywhere from alpha - * 0-0.5 otherwise) (at least the pstipple stage may generate - * such lines due to float inaccuracies if line length is very - * close to a integer). - * Might not be fully accurate neither (because the "strength" of - * the line is going to be determined by how close to the pixel - * center those 1 or 2 fragments are) but it's probably the best - * we can do. - */ - half_length = 2.0f * half_length; - } else { - half_length = half_length + 0.5f; - } + half_length = half_length + 0.5f; t_w = half_width; t_l = 0.5f; diff --git a/src/gallium/auxiliary/nir/nir_draw_helpers.c b/src/gallium/auxiliary/nir/nir_draw_helpers.c index 098b83b194b..4588b56f461 100644 --- a/src/gallium/auxiliary/nir/nir_draw_helpers.c +++ b/src/gallium/auxiliary/nir/nir_draw_helpers.c @@ -174,10 +174,13 @@ nir_lower_aaline_block(nir_block *block, nir_ssa_def *out_input = intrin->src[1].ssa; b->cursor = nir_before_instr(instr); nir_ssa_def *lw = nir_load_var(b, state->line_width_input); + nir_ssa_def *len = nir_channel(b, lw, 3); + len = nir_fadd_imm(b, nir_fmul_imm(b, len, 2.0), -1.0); nir_ssa_def *tmp = nir_fsat(b, nir_fadd(b, nir_channels(b, lw, 0xa), nir_fneg(b, nir_fabs(b, nir_channels(b, lw, 0x5))))); - tmp = nir_fmul(b, nir_channel(b, tmp, 0), nir_channel(b, tmp, 1)); + tmp = nir_fmul(b, nir_channel(b, tmp, 0), + nir_fmin(b, nir_channel(b, tmp, 1), len)); tmp = nir_fmul(b, nir_channel(b, out_input, 3), tmp); nir_ssa_def *out = nir_vec4(b, nir_channel(b, out_input, 0), diff --git a/src/gallium/drivers/llvmpipe/ci/traces-llvmpipe.yml b/src/gallium/drivers/llvmpipe/ci/traces-llvmpipe.yml index 5fd0723f2a7..4eabbbb78c5 100644 --- a/src/gallium/drivers/llvmpipe/ci/traces-llvmpipe.yml +++ b/src/gallium/drivers/llvmpipe/ci/traces-llvmpipe.yml @@ -72,7 +72,7 @@ traces: checksum: de5452f4cbc0100d8ecb51459e47cd99 bgfx/29-debugdraw.rdc: gl-vmware-llvmpipe: - checksum: 164e5226af26b6552506542a45bc6bf5 + checksum: 015201fe000d6a323b0f7d3f218d3e47 bgfx/31-rsm.rdc: gl-vmware-llvmpipe: checksum: b59d323511488d5c098ebfa9b434c2dc @@ -120,7 +120,7 @@ traces: checksum: a55dd3d87a86b3b47121ff67861028c3 jvgs/jvgs-d27fb67-v2.trace: gl-vmware-llvmpipe: - checksum: b8c21bf76e667735d1640b215f456531 + checksum: 43b89627364b4cabbab84931aef4ce5e pathfinder/demo-v2.trace: gl-vmware-llvmpipe: checksum: a053c56658bc830249bc94317a3b3ea8