diff --git a/.pick_status.json b/.pick_status.json index 06af7362e4b..5ae48edbabe 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -175,7 +175,7 @@ "description": "intel/vec4: Don't spill fp64 registers more than once", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": "82c69426a5a32f9189c8b01059f831c84e9b83a3" }, diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index e9b3827599e..a5ebaaa46d2 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -572,6 +572,7 @@ enum opcode { VEC4_OPCODE_PICK_HIGH_32BIT, VEC4_OPCODE_SET_LOW_32BIT, VEC4_OPCODE_SET_HIGH_32BIT, + VEC4_OPCODE_MOV_FOR_SCRATCH, FS_OPCODE_DDX_COARSE, FS_OPCODE_DDX_FINE, diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp index 6198eb81a85..5e57f0d708f 100644 --- a/src/intel/compiler/brw_ir_performance.cpp +++ b/src/intel/compiler/brw_ir_performance.cpp @@ -379,6 +379,7 @@ namespace { case BRW_OPCODE_ADD: case BRW_OPCODE_MUL: case SHADER_OPCODE_MOV_RELOC_IMM: + case VEC4_OPCODE_MOV_FOR_SCRATCH: if (devinfo->ver >= 11) { return calculate_desc(info, unit_fpu, 0, 2, 0, 0, 2, 0, 10, 6, 14, 0, 0); diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index efd9b8bcb02..1ea61205b09 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -421,6 +421,8 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) return "set_low_32bit"; case VEC4_OPCODE_SET_HIGH_32BIT: return "set_high_32bit"; + case VEC4_OPCODE_MOV_FOR_SCRATCH: + return "mov_for_scratch"; case FS_OPCODE_DDX_COARSE: return "ddx_coarse"; diff --git a/src/intel/compiler/brw_vec4.h b/src/intel/compiler/brw_vec4.h index 9cfb48596a2..b928239f1ca 100644 --- a/src/intel/compiler/brw_vec4.h +++ b/src/intel/compiler/brw_vec4.h @@ -320,6 +320,7 @@ public: vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src, bool for_write, + bool for_scratch = false, bblock_t *block = NULL, vec4_instruction *ref = NULL); diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp index 1266095eb8b..bd7d83f3ce6 100644 --- a/src/intel/compiler/brw_vec4_generator.cpp +++ b/src/intel/compiler/brw_vec4_generator.cpp @@ -1538,6 +1538,7 @@ generate_code(struct brw_codegen *p, switch (inst->opcode) { case VEC4_OPCODE_UNPACK_UNIFORM: case BRW_OPCODE_MOV: + case VEC4_OPCODE_MOV_FOR_SCRATCH: brw_MOV(p, dst, src[0]); break; case BRW_OPCODE_ADD: diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index 966bc223f84..188175adc41 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -2138,6 +2138,7 @@ vec4_visitor::nir_emit_undef(nir_ssa_undef_instr *instr) */ vec4_instruction * vec4_visitor::shuffle_64bit_data(dst_reg dst, src_reg src, bool for_write, + bool for_scratch, bblock_t *block, vec4_instruction *ref) { assert(type_sz(src.type) == 8); @@ -2145,6 +2146,8 @@ vec4_visitor::shuffle_64bit_data(dst_reg dst, src_reg src, bool for_write, assert(!regions_overlap(dst, 2 * REG_SIZE, src, 2 * REG_SIZE)); assert(!ref == !block); + opcode mov_op = for_scratch ? VEC4_OPCODE_MOV_FOR_SCRATCH : BRW_OPCODE_MOV; + const vec4_builder bld = !ref ? vec4_builder(this).at_end() : vec4_builder(this).at(block, ref->next); @@ -2156,22 +2159,22 @@ vec4_visitor::shuffle_64bit_data(dst_reg dst, src_reg src, bool for_write, } /* dst+0.XY = src+0.XY */ - bld.group(4, 0).MOV(writemask(dst, WRITEMASK_XY), src); + bld.group(4, 0).emit(mov_op, writemask(dst, WRITEMASK_XY), src); /* dst+0.ZW = src+1.XY */ bld.group(4, for_write ? 1 : 0) - .MOV(writemask(dst, WRITEMASK_ZW), + .emit(mov_op, writemask(dst, WRITEMASK_ZW), swizzle(byte_offset(src, REG_SIZE), BRW_SWIZZLE_XYXY)); /* dst+1.XY = src+0.ZW */ bld.group(4, for_write ? 0 : 1) - .MOV(writemask(byte_offset(dst, REG_SIZE), WRITEMASK_XY), - swizzle(src, BRW_SWIZZLE_ZWZW)); + .emit(mov_op, writemask(byte_offset(dst, REG_SIZE), WRITEMASK_XY), + swizzle(src, BRW_SWIZZLE_ZWZW)); /* dst+1.ZW = src+1.ZW */ return bld.group(4, 1) - .MOV(writemask(byte_offset(dst, REG_SIZE), WRITEMASK_ZW), - byte_offset(src, REG_SIZE)); + .emit(mov_op, writemask(byte_offset(dst, REG_SIZE), WRITEMASK_ZW), + byte_offset(src, REG_SIZE)); } } diff --git a/src/intel/compiler/brw_vec4_reg_allocate.cpp b/src/intel/compiler/brw_vec4_reg_allocate.cpp index 35c47fc37ca..19917124d79 100644 --- a/src/intel/compiler/brw_vec4_reg_allocate.cpp +++ b/src/intel/compiler/brw_vec4_reg_allocate.cpp @@ -468,6 +468,7 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) case SHADER_OPCODE_GFX4_SCRATCH_READ: case SHADER_OPCODE_GFX4_SCRATCH_WRITE: + case VEC4_OPCODE_MOV_FOR_SCRATCH: for (int i = 0; i < 3; i++) { if (inst->src[i].file == VGRF) no_spill[inst->src[i].nr] = true; diff --git a/src/intel/compiler/brw_vec4_visitor.cpp b/src/intel/compiler/brw_vec4_visitor.cpp index cfad712e01a..c20d620a85a 100644 --- a/src/intel/compiler/brw_vec4_visitor.cpp +++ b/src/intel/compiler/brw_vec4_visitor.cpp @@ -1400,7 +1400,7 @@ vec4_visitor::emit_scratch_read(bblock_t *block, vec4_instruction *inst, vec4_instruction *last_read = SCRATCH_READ(byte_offset(shuffled_float, REG_SIZE), index); emit_before(block, inst, last_read); - shuffle_64bit_data(temp, src_reg(shuffled), false, block, last_read); + shuffle_64bit_data(temp, src_reg(shuffled), false, true, block, last_read); } } @@ -1445,7 +1445,7 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst, } else { dst_reg shuffled = dst_reg(this, alloc_type); vec4_instruction *last = - shuffle_64bit_data(shuffled, temp, true, block, inst); + shuffle_64bit_data(shuffled, temp, true, true, block, inst); src_reg shuffled_float = src_reg(retype(shuffled, BRW_REGISTER_TYPE_F)); uint8_t mask = 0; @@ -1652,7 +1652,7 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, if (is_64bit) { temp = retype(temp, BRW_REGISTER_TYPE_DF); - shuffle_64bit_data(orig_temp, src_reg(temp), false, block, inst); + shuffle_64bit_data(orig_temp, src_reg(temp), false, false, block, inst); } }