mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 00:10:10 +01:00
optimize 965 clip
1. increase clip thread number to 2 2. do cliptest for -rhw
This commit is contained in:
parent
22bc3cdd49
commit
b47c9f8c91
4 changed files with 112 additions and 14 deletions
|
|
@ -147,6 +147,13 @@ static void clip_and_emit_line( struct brw_clip_compile *c )
|
|||
brw_clip_init_planes(c);
|
||||
brw_clip_init_clipmask(c);
|
||||
|
||||
/* -ve rhw workaround */
|
||||
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
|
||||
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
|
||||
brw_imm_ud(1<<20));
|
||||
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
|
||||
plane_loop = brw_DO(p, BRW_EXECUTE_1);
|
||||
{
|
||||
/* if (planemask & 1)
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ static void upload_clip_unit( struct brw_context *brw )
|
|||
/* BRW_NEW_URB_FENCE */
|
||||
clip.thread4.nr_urb_entries = brw->urb.nr_clip_entries;
|
||||
clip.thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
|
||||
clip.thread4.max_threads = 0; /* Hmm, maybe the max is 1 or 2 threads */
|
||||
clip.thread4.max_threads = 1; /* 2 threads */
|
||||
|
||||
if (INTEL_DEBUG & DEBUG_STATS)
|
||||
clip.thread4.stats_enable = 1;
|
||||
|
|
|
|||
|
|
@ -42,6 +42,20 @@
|
|||
#include "brw_util.h"
|
||||
#include "brw_clip.h"
|
||||
|
||||
static struct brw_reg get_tmp( struct brw_clip_compile *c )
|
||||
{
|
||||
struct brw_reg tmp = brw_vec4_grf(c->last_tmp, 0);
|
||||
|
||||
if (++c->last_tmp > c->prog_data.total_grf)
|
||||
c->prog_data.total_grf = c->last_tmp;
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static void release_tmps( struct brw_clip_compile *c )
|
||||
{
|
||||
c->last_tmp = c->first_tmp;
|
||||
}
|
||||
|
||||
|
||||
void brw_clip_tri_alloc_regs( struct brw_clip_compile *c,
|
||||
|
|
@ -435,15 +449,103 @@ static void maybe_do_clip_tri( struct brw_clip_compile *c )
|
|||
brw_ENDIF(p, do_clip);
|
||||
}
|
||||
|
||||
static void brw_clip_test( struct brw_clip_compile *c )
|
||||
{
|
||||
struct brw_reg t = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
|
||||
struct brw_reg t1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
|
||||
struct brw_reg t2 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
|
||||
struct brw_reg t3 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
|
||||
|
||||
struct brw_reg v0 = get_tmp(c);
|
||||
struct brw_reg v1 = get_tmp(c);
|
||||
struct brw_reg v2 = get_tmp(c);
|
||||
|
||||
struct brw_indirect vt0 = brw_indirect(0, 0);
|
||||
struct brw_indirect vt1 = brw_indirect(1, 0);
|
||||
struct brw_indirect vt2 = brw_indirect(2, 0);
|
||||
|
||||
struct brw_compile *p = &c->func;
|
||||
|
||||
brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0]));
|
||||
brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1]));
|
||||
brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2]));
|
||||
brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS]));
|
||||
brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS]));
|
||||
brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS]));
|
||||
brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f));
|
||||
|
||||
/* test nearz, xmin, ymin plane */
|
||||
brw_CMP(p, t1, BRW_CONDITIONAL_LE, negate(v0), get_element(v0, 3));
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
brw_CMP(p, t2, BRW_CONDITIONAL_LE, negate(v1), get_element(v1, 3));
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
brw_CMP(p, t3, BRW_CONDITIONAL_LE, negate(v2), get_element(v2, 3));
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
brw_XOR(p, t, t1, t2);
|
||||
brw_XOR(p, t1, t2, t3);
|
||||
brw_OR(p, t, t, t1);
|
||||
|
||||
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
|
||||
get_element(t, 0), brw_imm_ud(0));
|
||||
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<5)));
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
|
||||
get_element(t, 1), brw_imm_ud(0));
|
||||
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<3)));
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
|
||||
get_element(t, 2), brw_imm_ud(0));
|
||||
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<1)));
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
|
||||
/* test farz, xmax, ymax plane */
|
||||
brw_CMP(p, t1, BRW_CONDITIONAL_L, v0, get_element(v0, 3));
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
brw_CMP(p, t2, BRW_CONDITIONAL_L, v1, get_element(v1, 3));
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
brw_CMP(p, t3, BRW_CONDITIONAL_L, v2, get_element(v2, 3));
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
|
||||
brw_XOR(p, t, t1, t2);
|
||||
brw_XOR(p, t1, t2, t3);
|
||||
brw_OR(p, t, t, t1);
|
||||
|
||||
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
|
||||
get_element(t, 0), brw_imm_ud(0));
|
||||
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<4)));
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
|
||||
get_element(t, 1), brw_imm_ud(0));
|
||||
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<2)));
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_NZ,
|
||||
get_element(t, 2), brw_imm_ud(0));
|
||||
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud((1<<0)));
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
|
||||
release_tmps(c);
|
||||
}
|
||||
|
||||
|
||||
void brw_emit_tri_clip( struct brw_clip_compile *c )
|
||||
{
|
||||
struct brw_instruction *neg_rhw;
|
||||
struct brw_compile *p = &c->func;
|
||||
brw_clip_tri_alloc_regs(c, 3 + c->key.nr_userclip + 6);
|
||||
brw_clip_tri_init_vertices(c);
|
||||
brw_clip_init_clipmask(c);
|
||||
|
||||
/* if -ve rhw workaround bit is set,
|
||||
do cliptest */
|
||||
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
|
||||
brw_AND(p, brw_null_reg(), get_element_ud(c->reg.R0, 2),
|
||||
brw_imm_ud(1<<20));
|
||||
neg_rhw = brw_IF(p, BRW_EXECUTE_1);
|
||||
{
|
||||
brw_clip_test(c);
|
||||
}
|
||||
brw_ENDIF(p, neg_rhw);
|
||||
|
||||
/* Can't push into do_clip_tri because with polygon (or quad)
|
||||
* flatshading, need to apply the flatshade here because we don't
|
||||
* respect the PV when converting to trifan for emit:
|
||||
|
|
@ -462,6 +564,3 @@ void brw_emit_tri_clip( struct brw_clip_compile *c )
|
|||
*/
|
||||
brw_clip_kill_thread(c);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -272,6 +272,7 @@ void brw_clip_kill_thread(struct brw_clip_compile *c)
|
|||
|
||||
|
||||
|
||||
|
||||
struct brw_reg brw_clip_plane0_address( struct brw_clip_compile *c )
|
||||
{
|
||||
return brw_address(c->reg.fixed_planes);
|
||||
|
|
@ -327,8 +328,7 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
|
|||
|
||||
/* Shift so that lowest outcode bit is rightmost:
|
||||
*/
|
||||
brw_MOV(p, c->reg.planemask, incoming);
|
||||
brw_SHR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(26));
|
||||
brw_SHR(p, c->reg.planemask, incoming, brw_imm_ud(26));
|
||||
|
||||
if (c->key.nr_userclip) {
|
||||
struct brw_reg tmp = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UD);
|
||||
|
|
@ -342,13 +342,5 @@ void brw_clip_init_clipmask( struct brw_clip_compile *c )
|
|||
|
||||
release_tmp(c, tmp);
|
||||
}
|
||||
|
||||
/* Test for -ve rhw workaround
|
||||
*/
|
||||
brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
|
||||
brw_AND(p, vec1(brw_null_reg()), incoming, brw_imm_ud(1<<20));
|
||||
brw_OR(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(0x3f));
|
||||
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue