diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index b82705e46d9..3fccd180cfb 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -573,6 +573,7 @@ enum opcode { VEC4_OPCODE_PICK_HIGH_32BIT, VEC4_OPCODE_SET_LOW_32BIT, VEC4_OPCODE_SET_HIGH_32BIT, + VEC4_OPCODE_MOV_FOR_SCRATCH, FS_OPCODE_DDX_COARSE, FS_OPCODE_DDX_FINE, diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp index 786e2e45a86..fa0aca6344b 100644 --- a/src/intel/compiler/brw_ir_performance.cpp +++ b/src/intel/compiler/brw_ir_performance.cpp @@ -379,6 +379,7 @@ namespace { case BRW_OPCODE_ADD: case BRW_OPCODE_MUL: case SHADER_OPCODE_MOV_RELOC_IMM: + case VEC4_OPCODE_MOV_FOR_SCRATCH: if (devinfo->ver >= 11) { return calculate_desc(info, unit_fpu, 0, 2, 0, 0, 2, 0, 10, 6, 14, 0, 0); diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 91ee7c08603..bc50595137c 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -421,6 +421,8 @@ brw_instruction_name(const struct intel_device_info *devinfo, enum opcode op) return "set_low_32bit"; case VEC4_OPCODE_SET_HIGH_32BIT: return "set_high_32bit"; + case VEC4_OPCODE_MOV_FOR_SCRATCH: + return "mov_for_scratch"; case FS_OPCODE_DDX_COARSE: return "ddx_coarse"; diff --git a/src/intel/compiler/brw_vec4.h b/src/intel/compiler/brw_vec4.h index 9cfb48596a2..b928239f1ca 100644 --- a/src/intel/compiler/brw_vec4.h +++ b/src/intel/compiler/brw_vec4.h @@ -320,6 +320,7 @@ public: vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src, bool for_write, + bool for_scratch = false, bblock_t *block = NULL, vec4_instruction *ref = NULL); diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp index 67232e44d9a..1039bf2927c 100644 --- a/src/intel/compiler/brw_vec4_generator.cpp +++ b/src/intel/compiler/brw_vec4_generator.cpp @@ -1537,6 +1537,7 @@ generate_code(struct brw_codegen *p, switch (inst->opcode) { case VEC4_OPCODE_UNPACK_UNIFORM: case BRW_OPCODE_MOV: + case VEC4_OPCODE_MOV_FOR_SCRATCH: brw_MOV(p, dst, src[0]); break; case BRW_OPCODE_ADD: diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index 84598d10463..636dbfb6ea0 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -2138,6 +2138,7 @@ vec4_visitor::nir_emit_undef(nir_ssa_undef_instr *instr) */ vec4_instruction * vec4_visitor::shuffle_64bit_data(dst_reg dst, src_reg src, bool for_write, + bool for_scratch, bblock_t *block, vec4_instruction *ref) { assert(type_sz(src.type) == 8); @@ -2145,6 +2146,8 @@ vec4_visitor::shuffle_64bit_data(dst_reg dst, src_reg src, bool for_write, assert(!regions_overlap(dst, 2 * REG_SIZE, src, 2 * REG_SIZE)); assert(!ref == !block); + opcode mov_op = for_scratch ? VEC4_OPCODE_MOV_FOR_SCRATCH : BRW_OPCODE_MOV; + const vec4_builder bld = !ref ? vec4_builder(this).at_end() : vec4_builder(this).at(block, ref->next); @@ -2156,22 +2159,22 @@ vec4_visitor::shuffle_64bit_data(dst_reg dst, src_reg src, bool for_write, } /* dst+0.XY = src+0.XY */ - bld.group(4, 0).MOV(writemask(dst, WRITEMASK_XY), src); + bld.group(4, 0).emit(mov_op, writemask(dst, WRITEMASK_XY), src); /* dst+0.ZW = src+1.XY */ bld.group(4, for_write ? 1 : 0) - .MOV(writemask(dst, WRITEMASK_ZW), + .emit(mov_op, writemask(dst, WRITEMASK_ZW), swizzle(byte_offset(src, REG_SIZE), BRW_SWIZZLE_XYXY)); /* dst+1.XY = src+0.ZW */ bld.group(4, for_write ? 0 : 1) - .MOV(writemask(byte_offset(dst, REG_SIZE), WRITEMASK_XY), - swizzle(src, BRW_SWIZZLE_ZWZW)); + .emit(mov_op, writemask(byte_offset(dst, REG_SIZE), WRITEMASK_XY), + swizzle(src, BRW_SWIZZLE_ZWZW)); /* dst+1.ZW = src+1.ZW */ return bld.group(4, 1) - .MOV(writemask(byte_offset(dst, REG_SIZE), WRITEMASK_ZW), - byte_offset(src, REG_SIZE)); + .emit(mov_op, writemask(byte_offset(dst, REG_SIZE), WRITEMASK_ZW), + byte_offset(src, REG_SIZE)); } } diff --git a/src/intel/compiler/brw_vec4_reg_allocate.cpp b/src/intel/compiler/brw_vec4_reg_allocate.cpp index 35c47fc37ca..19917124d79 100644 --- a/src/intel/compiler/brw_vec4_reg_allocate.cpp +++ b/src/intel/compiler/brw_vec4_reg_allocate.cpp @@ -468,6 +468,7 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) case SHADER_OPCODE_GFX4_SCRATCH_READ: case SHADER_OPCODE_GFX4_SCRATCH_WRITE: + case VEC4_OPCODE_MOV_FOR_SCRATCH: for (int i = 0; i < 3; i++) { if (inst->src[i].file == VGRF) no_spill[inst->src[i].nr] = true; diff --git a/src/intel/compiler/brw_vec4_visitor.cpp b/src/intel/compiler/brw_vec4_visitor.cpp index 01d8372fa9f..d790dbf7f4a 100644 --- a/src/intel/compiler/brw_vec4_visitor.cpp +++ b/src/intel/compiler/brw_vec4_visitor.cpp @@ -1401,7 +1401,7 @@ vec4_visitor::emit_scratch_read(bblock_t *block, vec4_instruction *inst, vec4_instruction *last_read = SCRATCH_READ(byte_offset(shuffled_float, REG_SIZE), index); emit_before(block, inst, last_read); - shuffle_64bit_data(temp, src_reg(shuffled), false, block, last_read); + shuffle_64bit_data(temp, src_reg(shuffled), false, true, block, last_read); } } @@ -1446,7 +1446,7 @@ vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst, } else { dst_reg shuffled = dst_reg(this, alloc_type); vec4_instruction *last = - shuffle_64bit_data(shuffled, temp, true, block, inst); + shuffle_64bit_data(shuffled, temp, true, true, block, inst); src_reg shuffled_float = src_reg(retype(shuffled, BRW_REGISTER_TYPE_F)); uint8_t mask = 0; @@ -1653,7 +1653,7 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, if (is_64bit) { temp = retype(temp, BRW_REGISTER_TYPE_DF); - shuffle_64bit_data(orig_temp, src_reg(temp), false, block, inst); + shuffle_64bit_data(orig_temp, src_reg(temp), false, false, block, inst); } }