From ee32e9b4753eca62e360f96ce61ef7ff683e6bb7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Apr 2009 16:49:18 -0600 Subject: [PATCH] i965: implement relative addressing for VS constant buffer reads A scatter-read should be possible, but we're just using two READs for the time being. --- src/mesa/drivers/dri/i965/brw_eu.h | 1 + src/mesa/drivers/dri/i965/brw_eu_emit.c | 59 ++++++++++- src/mesa/drivers/dri/i965/brw_vs_emit.c | 130 ++++++++++++------------ 3 files changed, 123 insertions(+), 67 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 66f8eb840c1..896e67dbfe9 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -868,6 +868,7 @@ void brw_dp_READ_4( struct brw_compile *p, void brw_dp_READ_4_vs( struct brw_compile *p, struct brw_reg dest, GLboolean relAddr, + struct brw_reg addrReg, GLuint location, GLuint bind_table_index ); diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index c731a93a8d6..df2141660c0 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1003,15 +1003,18 @@ void brw_dp_READ_4( struct brw_compile *p, /** - * Read float[4] constant from VS constant buffer. + * Read float[4] constant(s) from VS constant buffer. + * For relative addressing, two float[4] constants will be read into 'dest'. + * Otherwise, one float[4] constant will be read into the lower half of 'dest'. */ void brw_dp_READ_4_vs(struct brw_compile *p, struct brw_reg dest, GLboolean relAddr, + struct brw_reg addrReg, GLuint location, GLuint bind_table_index) { - const GLuint msg_reg_nr = 1; + GLuint msg_reg_nr = 1; /* printf("vs const read msg, location %u, msg_reg_nr %d\n", @@ -1034,7 +1037,12 @@ void brw_dp_READ_4_vs(struct brw_compile *p, b = brw_message_reg(msg_reg_nr); b = retype(b, BRW_REGISTER_TYPE_UD); /*b = get_element_ud(b, 2);*/ - brw_MOV(p, b, brw_imm_ud(location)); + if (relAddr) { + brw_ADD(p, b, addrReg, brw_imm_ud(location)); + } + else { + brw_MOV(p, b, brw_imm_ud(location)); + } brw_pop_insn_state(p); } @@ -1053,13 +1061,56 @@ void brw_dp_READ_4_vs(struct brw_compile *p, brw_set_dp_read_message(insn, bind_table_index, - 0, /* msg_control (0 means 1 Oword) */ + 0, /* msg_control (0 means 1 Oword, lower half) */ BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ 0, /* source cache = data cache */ 1, /* msg_length */ 1, /* response_length (1 Oword) */ 0); /* eot */ } + + if (relAddr) { + /* second read to get second constant */ + msg_reg_nr++; + { + /* Setup MRF[1] with location/offset into const buffer */ + struct brw_reg b; + + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + + b = brw_message_reg(msg_reg_nr); + b = retype(b, BRW_REGISTER_TYPE_UD); + addrReg = suboffset(addrReg, 1); /* upper half of addrReg */ + brw_ADD(p, b, addrReg, brw_imm_ud(location)); + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + /*insn->header.access_mode = BRW_ALIGN_16;*/ + + brw_set_dest(insn, dest); + brw_set_src0(insn, brw_null_reg()); + + brw_set_dp_read_message(insn, + bind_table_index, + 1, /* msg_control (1 means 1 Oword, upper half) */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } + } } diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 19ead73d8ca..98fbdf5064d 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -709,7 +709,7 @@ get_constant(struct brw_vs_compile *c, assert(argIndex < 3); - if (c->current_const[argIndex].index != src->Index) { + if (c->current_const[argIndex].index != src->Index || src->RelAddr) { c->current_const[argIndex].index = src->Index; @@ -722,15 +722,18 @@ get_constant(struct brw_vs_compile *c, brw_dp_READ_4_vs(p, c->current_const[argIndex].reg, /* writeback dest */ src->RelAddr, /* relative indexing? */ + c->regs[PROGRAM_ADDRESS][0], /* address register */ 16 * src->Index, /* byte offset */ SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ ); } - /* replicate lower four floats into upper four floats (to get XYZWXYZW) */ const_reg = c->current_const[argIndex].reg; - const_reg = stride(const_reg, 0, 4, 0); - const_reg.subnr = 0; + if (!src->RelAddr) { + /* replicate lower four floats into upper half (to get XYZWXYZW) */ + const_reg = stride(const_reg, 0, 4, 0); + const_reg.subnr = 0; + } return const_reg; } @@ -771,52 +774,6 @@ static struct brw_reg get_reg( struct brw_vs_compile *c, } -/** - * Get brw reg corresponding to the instruction's [argIndex] src reg. - * TODO: relative addressing! - */ -static struct brw_reg -get_src_reg( struct brw_vs_compile *c, - const struct prog_instruction *inst, - GLuint argIndex ) -{ - const GLuint file = inst->SrcReg[argIndex].File; - const GLint index = inst->SrcReg[argIndex].Index; - - switch (file) { - case PROGRAM_TEMPORARY: - case PROGRAM_INPUT: - case PROGRAM_OUTPUT: - assert(c->regs[file][index].nr != 0); - return c->regs[file][index]; - case PROGRAM_STATE_VAR: - case PROGRAM_CONSTANT: - case PROGRAM_UNIFORM: - if (c->use_const_buffer) { - return get_constant(c, inst, argIndex); - } - else { - assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); - return c->regs[PROGRAM_STATE_VAR][index]; - } - case PROGRAM_ADDRESS: - assert(index == 0); - return c->regs[file][index]; - - case PROGRAM_UNDEFINED: - /* this is a normal case since we loop over all three src args */ - return brw_null_reg(); - - case PROGRAM_LOCAL_PARAM: - case PROGRAM_ENV_PARAM: - case PROGRAM_WRITE_ONLY: - default: - assert(0); - return brw_null_reg(); - } -} - - /** * Indirect addressing: get reg[[arg] + offset]. */ @@ -853,6 +810,62 @@ static struct brw_reg deref( struct brw_vs_compile *c, } +/** + * Get brw reg corresponding to the instruction's [argIndex] src reg. + * TODO: relative addressing! + */ +static struct brw_reg +get_src_reg( struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex ) +{ + const GLuint file = inst->SrcReg[argIndex].File; + const GLint index = inst->SrcReg[argIndex].Index; + const GLboolean relAddr = inst->SrcReg[argIndex].RelAddr; + + switch (file) { + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + if (relAddr) { + return deref(c, c->regs[file][0], index); + } + else { + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + } + + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + if (c->use_const_buffer) { + return get_constant(c, inst, argIndex); + } + else if (relAddr) { + return deref(c, c->regs[PROGRAM_STATE_VAR][0], index); + } + else { + assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); + return c->regs[PROGRAM_STATE_VAR][index]; + } + case PROGRAM_ADDRESS: + assert(index == 0); + return c->regs[file][index]; + + case PROGRAM_UNDEFINED: + /* this is a normal case since we loop over all three src args */ + return brw_null_reg(); + + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_WRITE_ONLY: + default: + assert(0); + return brw_null_reg(); + } +} + + static void emit_arl( struct brw_vs_compile *c, struct brw_reg dst, struct brw_reg arg0 ) @@ -864,8 +877,8 @@ static void emit_arl( struct brw_vs_compile *c, if (need_tmp) tmp = get_tmp(c); - brw_RNDD(p, tmp, arg0); - brw_MUL(p, dst, tmp, brw_imm_d(16)); + brw_RNDD(p, tmp, arg0); /* tmp = round(arg0) */ + brw_MUL(p, dst, tmp, brw_imm_d(16)); /* dst = tmp * 16 */ if (need_tmp) release_tmp(c, tmp); @@ -888,13 +901,7 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, if (src->File == PROGRAM_UNDEFINED) return brw_null_reg(); - if (src->RelAddr) { - /* XXX fix */ - reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); - } - else { - reg = get_src_reg(c, inst, argIndex); - } + reg = get_src_reg(c, inst, argIndex); /* Convert 3-bit swizzle to 2-bit. */ @@ -989,10 +996,7 @@ static void emit_swz( struct brw_vs_compile *c, if (src_mask) { struct brw_reg arg0; - if (src.RelAddr) - arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); - else - arg0 = get_src_reg(c, inst, argIndex); + arg0 = get_src_reg(c, inst, argIndex); arg0 = brw_swizzle(arg0, src_swz[0], src_swz[1],