mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 02:38:04 +02:00
i965/fs: Use instruction execution sizes instead of heuristics
Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com> Reviewed-by: Matt Turner <mattst88@gmail.com>
This commit is contained in:
parent
894ec5a1d8
commit
b18fd234da
3 changed files with 10 additions and 23 deletions
|
|
@ -2426,8 +2426,7 @@ fs_visitor::compute_to_mrf()
|
||||||
int mrf_high;
|
int mrf_high;
|
||||||
if (inst->dst.reg & BRW_MRF_COMPR4) {
|
if (inst->dst.reg & BRW_MRF_COMPR4) {
|
||||||
mrf_high = mrf_low + 4;
|
mrf_high = mrf_low + 4;
|
||||||
} else if (dispatch_width == 16 &&
|
} else if (inst->exec_size == 16) {
|
||||||
(!inst->force_uncompressed && !inst->force_sechalf)) {
|
|
||||||
mrf_high = mrf_low + 1;
|
mrf_high = mrf_low + 1;
|
||||||
} else {
|
} else {
|
||||||
mrf_high = mrf_low;
|
mrf_high = mrf_low;
|
||||||
|
|
@ -2517,9 +2516,7 @@ fs_visitor::compute_to_mrf()
|
||||||
|
|
||||||
if (scan_inst->dst.reg & BRW_MRF_COMPR4) {
|
if (scan_inst->dst.reg & BRW_MRF_COMPR4) {
|
||||||
scan_mrf_high = scan_mrf_low + 4;
|
scan_mrf_high = scan_mrf_low + 4;
|
||||||
} else if (dispatch_width == 16 &&
|
} else if (scan_inst->exec_size == 16) {
|
||||||
(!scan_inst->force_uncompressed &&
|
|
||||||
!scan_inst->force_sechalf)) {
|
|
||||||
scan_mrf_high = scan_mrf_low + 1;
|
scan_mrf_high = scan_mrf_low + 1;
|
||||||
} else {
|
} else {
|
||||||
scan_mrf_high = scan_mrf_low;
|
scan_mrf_high = scan_mrf_low;
|
||||||
|
|
@ -2675,10 +2672,6 @@ static void
|
||||||
clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps,
|
clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps,
|
||||||
int first_grf, int grf_len)
|
int first_grf, int grf_len)
|
||||||
{
|
{
|
||||||
bool inst_simd16 = (dispatch_width > 8 &&
|
|
||||||
!inst->force_uncompressed &&
|
|
||||||
!inst->force_sechalf);
|
|
||||||
|
|
||||||
/* Clear the flag for registers that actually got read (as expected). */
|
/* Clear the flag for registers that actually got read (as expected). */
|
||||||
for (int i = 0; i < inst->sources; i++) {
|
for (int i = 0; i < inst->sources; i++) {
|
||||||
int grf;
|
int grf;
|
||||||
|
|
@ -2694,7 +2687,7 @@ clear_deps_for_inst_src(fs_inst *inst, int dispatch_width, bool *deps,
|
||||||
if (grf >= first_grf &&
|
if (grf >= first_grf &&
|
||||||
grf < first_grf + grf_len) {
|
grf < first_grf + grf_len) {
|
||||||
deps[grf - first_grf] = false;
|
deps[grf - first_grf] = false;
|
||||||
if (inst_simd16)
|
if (inst->exec_size == 16)
|
||||||
deps[grf - first_grf + 1] = false;
|
deps[grf - first_grf + 1] = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -2749,10 +2742,6 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool scan_inst_simd16 = (dispatch_width > 8 &&
|
|
||||||
!scan_inst->force_uncompressed &&
|
|
||||||
!scan_inst->force_sechalf);
|
|
||||||
|
|
||||||
/* We insert our reads as late as possible on the assumption that any
|
/* We insert our reads as late as possible on the assumption that any
|
||||||
* instruction but a MOV that might have left us an outstanding
|
* instruction but a MOV that might have left us an outstanding
|
||||||
* dependency has more latency than a MOV.
|
* dependency has more latency than a MOV.
|
||||||
|
|
@ -2766,7 +2755,7 @@ fs_visitor::insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
|
||||||
needs_dep[reg - first_write_grf]) {
|
needs_dep[reg - first_write_grf]) {
|
||||||
inst->insert_before(block, DEP_RESOLVE_MOV(reg));
|
inst->insert_before(block, DEP_RESOLVE_MOV(reg));
|
||||||
needs_dep[reg - first_write_grf] = false;
|
needs_dep[reg - first_write_grf] = false;
|
||||||
if (scan_inst_simd16)
|
if (scan_inst->exec_size == 16)
|
||||||
needs_dep[reg - first_write_grf + 1] = false;
|
needs_dep[reg - first_write_grf + 1] = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -85,11 +85,11 @@ fs_live_variables::setup_one_read(bblock_t *block, fs_inst *inst,
|
||||||
* would get stomped by the first decode as well.
|
* would get stomped by the first decode as well.
|
||||||
*/
|
*/
|
||||||
int end_ip = ip;
|
int end_ip = ip;
|
||||||
if (v->dispatch_width == 16 && (reg.stride == 0 ||
|
if (inst->exec_size == 16 && (reg.stride == 0 ||
|
||||||
reg.type == BRW_REGISTER_TYPE_UW ||
|
reg.type == BRW_REGISTER_TYPE_UW ||
|
||||||
reg.type == BRW_REGISTER_TYPE_W ||
|
reg.type == BRW_REGISTER_TYPE_W ||
|
||||||
reg.type == BRW_REGISTER_TYPE_UB ||
|
reg.type == BRW_REGISTER_TYPE_UB ||
|
||||||
reg.type == BRW_REGISTER_TYPE_B)) {
|
reg.type == BRW_REGISTER_TYPE_B)) {
|
||||||
end_ip++;
|
end_ip++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -747,9 +747,7 @@ instruction_scheduler::add_barrier_deps(schedule_node *n)
|
||||||
bool
|
bool
|
||||||
fs_instruction_scheduler::is_compressed(fs_inst *inst)
|
fs_instruction_scheduler::is_compressed(fs_inst *inst)
|
||||||
{
|
{
|
||||||
return (v->dispatch_width == 16 &&
|
return inst->exec_size == 16;
|
||||||
!inst->force_uncompressed &&
|
|
||||||
!inst->force_sechalf);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue