diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index e62e6e1c9e9..ae08c4ab505 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -1770,17 +1770,6 @@ void brw_svb_write(struct brw_codegen *p, unsigned binding_table_index, bool send_commit_msg); -brw_inst *brw_fb_WRITE(struct brw_codegen *p, - struct brw_reg payload, - struct brw_reg implied_header, - unsigned msg_control, - unsigned binding_table_index, - unsigned msg_length, - unsigned response_length, - bool eot, - bool last_render_target, - bool header_present); - brw_inst *gfx9_fb_READ(struct brw_codegen *p, struct brw_reg dst, struct brw_reg payload, diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 74bd9c0ddbe..0c8bcf65620 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -2503,60 +2503,6 @@ void brw_oword_block_read(struct brw_codegen *p, brw_pop_insn_state(p); } -brw_inst * -brw_fb_WRITE(struct brw_codegen *p, - struct brw_reg payload, - struct brw_reg implied_header, - unsigned msg_control, - unsigned binding_table_index, - unsigned msg_length, - unsigned response_length, - bool eot, - bool last_render_target, - bool header_present) -{ - const struct intel_device_info *devinfo = p->devinfo; - const unsigned target_cache = - (devinfo->ver >= 6 ? GFX6_SFID_DATAPORT_RENDER_CACHE : - BRW_SFID_DATAPORT_WRITE); - brw_inst *insn; - struct brw_reg dest, src0; - - if (brw_get_default_exec_size(p) >= BRW_EXECUTE_16) - dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); - else - dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); - - if (devinfo->ver >= 6) { - insn = next_insn(p, BRW_OPCODE_SENDC); - } else { - insn = next_insn(p, BRW_OPCODE_SEND); - } - brw_inst_set_sfid(devinfo, insn, target_cache); - brw_inst_set_compression(devinfo, insn, false); - - if (devinfo->ver >= 6) { - /* headerless version, just submit color payload */ - src0 = payload; - } else { - assert(payload.file == BRW_MESSAGE_REGISTER_FILE); - brw_inst_set_base_mrf(devinfo, insn, payload.nr); - src0 = implied_header; - } - - brw_set_dest(p, insn, dest); - brw_set_src0(p, insn, src0); - brw_set_desc(p, insn, - brw_message_desc(devinfo, msg_length, response_length, - header_present) | - brw_fb_write_desc(devinfo, binding_table_index, msg_control, - last_render_target, - false /* coarse_write */)); - brw_inst_set_eot(devinfo, insn, eot); - - return insn; -} - brw_inst * gfx9_fb_READ(struct brw_codegen *p, struct brw_reg dst, diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 27e9b3cbd6c..488a573c28a 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -448,26 +448,18 @@ public: const unsigned *get_assembly(); private: - void fire_fb_write(fs_inst *inst, - struct brw_reg payload, - struct brw_reg implied_header, - GLuint nr); void generate_send(fs_inst *inst, struct brw_reg dst, struct brw_reg desc, struct brw_reg ex_desc, struct brw_reg payload, struct brw_reg payload2); - void generate_fb_write(fs_inst *inst, struct brw_reg payload); void generate_fb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload); void generate_cs_terminate(fs_inst *inst, struct brw_reg payload); void generate_barrier(fs_inst *inst, struct brw_reg src); bool generate_linterp(fs_inst *inst, struct brw_reg dst, struct brw_reg *src); - void generate_tex(fs_inst *inst, struct brw_reg dst, - struct brw_reg surface_index, - struct brw_reg sampler_index); void generate_ddx(const fs_inst *inst, struct brw_reg dst, struct brw_reg src); void generate_ddy(const fs_inst *inst, @@ -479,9 +471,6 @@ private: void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst, struct brw_reg index, struct brw_reg offset); - void generate_varying_pull_constant_load_gfx4(fs_inst *inst, - struct brw_reg dst, - struct brw_reg index); void generate_set_sample_id(fs_inst *inst, struct brw_reg dst, diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 15c70a23071..bb5784ab0c4 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -106,48 +106,6 @@ brw_reg_from_fs_reg(const struct intel_device_info *devinfo, fs_inst *inst, brw_reg = brw_vecn_reg(width, brw_file_from_reg(reg), reg->nr, 0); brw_reg = stride(brw_reg, width * reg->stride, width, reg->stride); } - - if (devinfo->verx10 == 70) { - /* From the IvyBridge PRM (EU Changes by Processor Generation, page 13): - * "Each DF (Double Float) operand uses an element size of 4 rather - * than 8 and all regioning parameters are twice what the values - * would be based on the true element size: ExecSize, Width, - * HorzStride, and VertStride. Each DF operand uses a pair of - * channels and all masking and swizzing should be adjusted - * appropriately." - * - * From the IvyBridge PRM (Special Requirements for Handling Double - * Precision Data Types, page 71): - * "In Align1 mode, all regioning parameters like stride, execution - * size, and width must use the syntax of a pair of packed - * floats. The offsets for these data types must be 64-bit - * aligned. The execution size and regioning parameters are in terms - * of floats." - * - * Summarized: when handling DF-typed arguments, ExecSize, - * VertStride, and Width must be doubled. - * - * It applies to BayTrail too. - */ - if (type_sz(reg->type) == 8) { - brw_reg.width++; - if (brw_reg.vstride > 0) - brw_reg.vstride++; - assert(brw_reg.hstride == BRW_HORIZONTAL_STRIDE_1); - } - - /* When converting from DF->F, we set the destination stride to 2 - * because each d2f conversion implicitly writes 2 floats, being - * the first one the converted value. IVB/BYT actually writes two - * F components per SIMD channel, and every other component is - * filled with garbage. - */ - if (reg == &inst->dst && get_exec_type_size(inst) == 8 && - type_sz(inst->dst.type) < 8) { - assert(brw_reg.hstride > BRW_HORIZONTAL_STRIDE_1); - brw_reg.hstride--; - } - } } brw_reg = retype(brw_reg, reg->type); @@ -170,19 +128,6 @@ brw_reg_from_fs_reg(const struct intel_device_info *devinfo, fs_inst *inst, unreachable("not reached"); } - /* On HSW+, scalar DF sources can be accessed using the normal <0,1,0> - * region, but on IVB and BYT DF regions must be programmed in terms of - * floats. A <0,2,1> region accomplishes this. - */ - if (devinfo->verx10 == 70 && - type_sz(reg->type) == 8 && - brw_reg.vstride == BRW_VERTICAL_STRIDE_0 && - brw_reg.width == BRW_WIDTH_1 && - brw_reg.hstride == BRW_HORIZONTAL_STRIDE_0) { - brw_reg.width = BRW_WIDTH_2; - brw_reg.hstride = BRW_HORIZONTAL_STRIDE_1; - } - return brw_reg; } @@ -232,22 +177,20 @@ fs_generator::patch_halt_jumps() int scale = brw_jump_scale(p->devinfo); - if (devinfo->ver >= 6) { - /* There is a somewhat strange undocumented requirement of using - * HALT, according to the simulator. If some channel has HALTed to - * a particular UIP, then by the end of the program, every channel - * must have HALTed to that UIP. Furthermore, the tracking is a - * stack, so you can't do the final halt of a UIP after starting - * halting to a new UIP. - * - * Symptoms of not emitting this instruction on actual hardware - * included GPU hangs and sparkly rendering on the piglit discard - * tests. - */ - brw_inst *last_halt = brw_HALT(p); - brw_inst_set_uip(p->devinfo, last_halt, 1 * scale); - brw_inst_set_jip(p->devinfo, last_halt, 1 * scale); - } + /* There is a somewhat strange undocumented requirement of using + * HALT, according to the simulator. If some channel has HALTed to + * a particular UIP, then by the end of the program, every channel + * must have HALTed to that UIP. Furthermore, the tracking is a + * stack, so you can't do the final halt of a UIP after starting + * halting to a new UIP. + * + * Symptoms of not emitting this instruction on actual hardware + * included GPU hangs and sparkly rendering on the piglit discard + * tests. + */ + brw_inst *last_halt = brw_HALT(p); + brw_inst_set_uip(p->devinfo, last_halt, 1 * scale); + brw_inst_set_jip(p->devinfo, last_halt, 1 * scale); int ip = p->nr_insn; @@ -255,67 +198,12 @@ fs_generator::patch_halt_jumps() brw_inst *patch = &p->store[patch_ip->ip]; assert(brw_inst_opcode(p->isa, patch) == BRW_OPCODE_HALT); - if (devinfo->ver >= 6) { - /* HALT takes a half-instruction distance from the pre-incremented IP. */ - brw_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale); - } else { - brw_set_src1(p, patch, brw_imm_d((ip - patch_ip->ip) * scale)); - } + /* HALT takes a half-instruction distance from the pre-incremented IP. */ + brw_inst_set_uip(p->devinfo, patch, (ip - patch_ip->ip) * scale); } this->discard_halt_patches.make_empty(); - if (devinfo->ver < 6) { - /* From the g965 PRM: - * - * "As DMask is not automatically reloaded into AMask upon completion - * of this instruction, software has to manually restore AMask upon - * completion." - * - * DMask lives in the bottom 16 bits of sr0.1. - */ - brw_inst *reset = brw_MOV(p, brw_mask_reg(BRW_AMASK), - retype(brw_sr0_reg(1), BRW_REGISTER_TYPE_UW)); - brw_inst_set_exec_size(devinfo, reset, BRW_EXECUTE_1); - brw_inst_set_mask_control(devinfo, reset, BRW_MASK_DISABLE); - brw_inst_set_qtr_control(devinfo, reset, BRW_COMPRESSION_NONE); - brw_inst_set_thread_control(devinfo, reset, BRW_THREAD_SWITCH); - } - - if (devinfo->ver == 4 && devinfo->platform != INTEL_PLATFORM_G4X) { - /* From the g965 PRM: - * - * "[DevBW, DevCL] Erratum: The subfields in mask stack register are - * reset to zero during graphics reset, however, they are not - * initialized at thread dispatch. These subfields will retain the - * values from the previous thread. Software should make sure the - * mask stack is empty (reset to zero) before terminating the thread. - * In case that this is not practical, software may have to reset the - * mask stack at the beginning of each kernel, which will impact the - * performance." - * - * Luckily we can rely on: - * - * "[DevBW, DevCL] This register access restriction is not - * applicable, hardware does ensure execution pipeline coherency, - * when a mask stack register is used as an explicit source and/or - * destination." - */ - brw_push_insn_state(p); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); - - brw_set_default_exec_size(p, BRW_EXECUTE_2); - brw_MOV(p, vec2(brw_mask_stack_depth_reg(0)), brw_imm_uw(0)); - - brw_set_default_exec_size(p, BRW_EXECUTE_16); - /* Reset the if stack. */ - brw_MOV(p, retype(brw_mask_stack_reg(0), BRW_REGISTER_TYPE_UW), - brw_imm_uw(0)); - - brw_pop_insn_state(p); - } - return true; } @@ -357,92 +245,6 @@ fs_generator::generate_send(fs_inst *inst, } } -void -fs_generator::fire_fb_write(fs_inst *inst, - struct brw_reg payload, - struct brw_reg implied_header, - GLuint nr) -{ - struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data); - - if (devinfo->ver < 6) { - brw_push_insn_state(p); - brw_set_default_exec_size(p, BRW_EXECUTE_8); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); - brw_set_default_flag_reg(p, 0, 0); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, offset(retype(payload, BRW_REGISTER_TYPE_UD), 1), - offset(retype(implied_header, BRW_REGISTER_TYPE_UD), 1)); - brw_pop_insn_state(p); - } - - uint32_t msg_control = brw_fb_write_msg_control(inst, prog_data); - - /* We assume render targets start at 0, because headerless FB write - * messages set "Render Target Index" to 0. Using a different binding - * table index would make it impossible to use headerless messages. - */ - const uint32_t surf_index = inst->target; - - brw_inst *insn = brw_fb_WRITE(p, - payload, - retype(implied_header, BRW_REGISTER_TYPE_UW), - msg_control, - surf_index, - nr, - 0, - inst->eot, - inst->last_rt, - inst->header_size != 0); - - if (devinfo->ver >= 6) - brw_inst_set_rt_slot_group(devinfo, insn, inst->group / 16); -} - -void -fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload) -{ - assert(devinfo->ver < 7); - - brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); - brw_set_default_flag_reg(p, 0, 0); - - const struct brw_reg implied_header = - devinfo->ver < 6 ? payload : brw_null_reg(); - - if (inst->base_mrf >= 0) - payload = brw_message_reg(inst->base_mrf); - - if (!runtime_check_aads_emit) { - fire_fb_write(inst, payload, implied_header, inst->mlen); - } else { - /* This can only happen in gen < 6 */ - assert(devinfo->ver < 6); - - struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); - - /* Check runtime bit to detect if we have to send AA data or not */ - brw_push_insn_state(p); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_default_exec_size(p, BRW_EXECUTE_1); - brw_AND(p, - v1_null_ud, - retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD), - brw_imm_ud(1<<26)); - brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ); - - int jmp = brw_JMPI(p, brw_imm_ud(0), BRW_PREDICATE_NORMAL) - p->store; - brw_pop_insn_state(p); - { - /* Don't send AA data */ - fire_fb_write(inst, offset(payload, 1), implied_header, inst->mlen-1); - } - brw_land_fwd_jump(p, jmp); - fire_fb_write(inst, payload, implied_header, inst->mlen); - } -} - void fs_generator::generate_fb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload) @@ -496,9 +298,6 @@ fs_generator::generate_mov_indirect(fs_inst *inst, brw_MOV(p, dst, reg); } } else { - /* Prior to Broadwell, there are only 8 address registers. */ - assert(inst->exec_size <= 8 || devinfo->ver >= 8); - /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */ struct brw_reg addr = vec8(brw_address_reg(0)); @@ -549,25 +348,22 @@ fs_generator::generate_mov_indirect(fs_inst *inst, * initializing the whole address register with a pipelined NoMask MOV * instruction. */ - if (devinfo->ver >= 7) { - insn = brw_MOV(p, addr, brw_imm_uw(imm_byte_offset)); - brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); - brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); - if (devinfo->ver >= 12) - brw_set_default_swsb(p, tgl_swsb_null()); - else - brw_inst_set_no_dd_clear(devinfo, insn, use_dep_ctrl); - } + insn = brw_MOV(p, addr, brw_imm_uw(imm_byte_offset)); + brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE); + brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NONE); + if (devinfo->ver >= 12) + brw_set_default_swsb(p, tgl_swsb_null()); + else + brw_inst_set_no_dd_clear(devinfo, insn, use_dep_ctrl); insn = brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset)); if (devinfo->ver >= 12) brw_set_default_swsb(p, tgl_swsb_regdist(1)); - else if (devinfo->ver >= 7) + else brw_inst_set_no_dd_check(devinfo, insn, use_dep_ctrl); if (type_sz(reg.type) > 4 && - ((devinfo->verx10 == 70) || - devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo) || + (intel_device_info_is_9lp(devinfo) || !devinfo->has_64bit_float || devinfo->verx10 >= 125)) { /* IVB has an issue (which we found empirically) where it reads two * address register components per channel for indirectly addressed @@ -592,21 +388,7 @@ fs_generator::generate_mov_indirect(fs_inst *inst, } else { struct brw_reg ind_src = brw_VxH_indirect(0, 0); - brw_inst *mov = brw_MOV(p, dst, retype(ind_src, reg.type)); - - if (devinfo->ver == 6 && dst.file == BRW_MESSAGE_REGISTER_FILE && - !inst->get_next()->is_tail_sentinel() && - ((fs_inst *)inst->get_next())->mlen > 0) { - /* From the Sandybridge PRM: - * - * "[Errata: DevSNB(SNB)] If MRF register is updated by any - * instruction that “indexed/indirect” source AND is followed - * by a send, the instruction requires a “Switch”. This is to - * avoid race condition where send may dispatch before MRF is - * updated." - */ - brw_inst_set_thread_control(devinfo, mov, BRW_THREAD_SWITCH); - } + brw_MOV(p, dst, retype(ind_src, reg.type)); } } } @@ -623,8 +405,7 @@ fs_generator::generate_shuffle(fs_inst *inst, /* Ivy bridge has some strange behavior that makes this a real pain to * implement for 64-bit values so we just don't bother. */ - assert((devinfo->verx10 >= 75 && devinfo->has_64bit_float) || - type_sz(src.type) <= 4); + assert(devinfo->has_64bit_float || type_sz(src.type) <= 4); /* Gen12.5 adds the following region restriction: * @@ -646,7 +427,7 @@ fs_generator::generate_shuffle(fs_inst *inst, * easier just to split it here. */ const unsigned lower_width = - devinfo->ver <= 7 || element_sz(src) > 4 || element_sz(dst) > 4 ? 8 : + element_sz(src) > 4 || element_sz(dst) > 4 ? 8 : MIN2(16, inst->exec_size); brw_set_default_exec_size(p, cvt(lower_width) - 1); @@ -875,358 +656,20 @@ fs_generator::generate_linterp(fs_inst *inst, * |-----------------------------------| * |(x0, x1)|(y0, y1)|(x2, x3)|(y2, y3)| * ----------------------------------- - * - * but for the LINE/MAC pair, the LINE reads Xs and the MAC reads Ys: - * - * ----------------------------------- - * | src1+0 | src1+1 | src1+2 | src1+3 | - * |-----------------------------------| - * |(x0, x1)|(y0, y1)| | | in SIMD8 - * |-----------------------------------| - * |(x0, x1)|(x2, x3)|(y0, y1)|(y2, y3)| in SIMD16 - * ----------------------------------- - * - * See also: emit_interpolation_setup_gfx4(). */ struct brw_reg delta_x = src[0]; - struct brw_reg delta_y = offset(src[0], inst->exec_size / 8); struct brw_reg interp = src[1]; - brw_inst *i[2]; /* nir_lower_interpolation() will do the lowering to MAD instructions for * us on gfx11+ */ assert(devinfo->ver < 11); + assert(devinfo->has_pln); - if (devinfo->has_pln) { - if (devinfo->ver <= 6 && (delta_x.nr & 1) != 0) { - /* From the Sandy Bridge PRM Vol. 4, Pt. 2, Section 8.3.53, "Plane": - * - * "[DevSNB]: must be even register aligned. - * - * This restriction is lifted on Ivy Bridge. - * - * This means that we need to split PLN into LINE+MAC on-the-fly. - * Unfortunately, the inputs are laid out for PLN and not LINE+MAC so - * we have to split into SIMD8 pieces. For gfx4 (!has_pln), the - * coordinate registers are laid out differently so we leave it as a - * SIMD16 instruction. - */ - assert(inst->exec_size == 8 || inst->exec_size == 16); - assert(inst->group % 16 == 0); - - brw_push_insn_state(p); - brw_set_default_exec_size(p, BRW_EXECUTE_8); - - /* Thanks to two accumulators, we can emit all the LINEs and then all - * the MACs. This improves parallelism a bit. - */ - for (unsigned g = 0; g < inst->exec_size / 8; g++) { - brw_inst *line = brw_LINE(p, brw_null_reg(), interp, - offset(delta_x, g * 2)); - brw_inst_set_group(devinfo, line, inst->group + g * 8); - - /* LINE writes the accumulator automatically on gfx4-5. On Sandy - * Bridge and later, we have to explicitly enable it. - */ - if (devinfo->ver >= 6) - brw_inst_set_acc_wr_control(p->devinfo, line, true); - - /* brw_set_default_saturate() is called before emitting - * instructions, so the saturate bit is set in each instruction, - * so we need to unset it on the LINE instructions. - */ - brw_inst_set_saturate(p->devinfo, line, false); - } - - for (unsigned g = 0; g < inst->exec_size / 8; g++) { - brw_inst *mac = brw_MAC(p, offset(dst, g), suboffset(interp, 1), - offset(delta_x, g * 2 + 1)); - brw_inst_set_group(devinfo, mac, inst->group + g * 8); - brw_inst_set_cond_modifier(p->devinfo, mac, inst->conditional_mod); - } - - brw_pop_insn_state(p); - - return true; - } else { - brw_PLN(p, dst, interp, delta_x); - - return false; - } - } else { - i[0] = brw_LINE(p, brw_null_reg(), interp, delta_x); - i[1] = brw_MAC(p, dst, suboffset(interp, 1), delta_y); - - brw_inst_set_cond_modifier(p->devinfo, i[1], inst->conditional_mod); - - /* brw_set_default_saturate() is called before emitting instructions, so - * the saturate bit is set in each instruction, so we need to unset it on - * the first instruction. - */ - brw_inst_set_saturate(p->devinfo, i[0], false); - - return true; - } + brw_PLN(p, dst, interp, delta_x); + return false; } -void -fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, - struct brw_reg surface_index, - struct brw_reg sampler_index) -{ - assert(devinfo->ver < 7); - assert(inst->size_written % REG_SIZE == 0); - int msg_type = -1; - uint32_t simd_mode; - uint32_t return_format; - - /* Sampler EOT message of less than the dispatch width would kill the - * thread prematurely. - */ - assert(!inst->eot || inst->exec_size == dispatch_width); - - switch (dst.type) { - case BRW_REGISTER_TYPE_D: - return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32; - break; - case BRW_REGISTER_TYPE_UD: - return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32; - break; - default: - return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; - break; - } - - /* Stomp the resinfo output type to UINT32. On gens 4-5, the output type - * is set as part of the message descriptor. On gfx4, the PRM seems to - * allow UINT32 and FLOAT32 (i965 PRM, Vol. 4 Section 4.8.1.1), but on - * later gens UINT32 is required. Once you hit Sandy Bridge, the bit is - * gone from the message descriptor entirely and you just get UINT32 all - * the time regasrdless. Since we can really only do non-UINT32 on gfx4, - * just stomp it to UINT32 all the time. - */ - if (inst->opcode == SHADER_OPCODE_TXS) - return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32; - - switch (inst->exec_size) { - case 8: - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; - break; - case 16: - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; - break; - default: - unreachable("Invalid width for texture instruction"); - } - - if (devinfo->ver >= 5) { - switch (inst->opcode) { - case SHADER_OPCODE_TEX: - if (inst->shadow_compare) { - msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_COMPARE; - } else { - msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE; - } - break; - case FS_OPCODE_TXB: - if (inst->shadow_compare) { - msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE; - } else { - msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_BIAS; - } - break; - case SHADER_OPCODE_TXL: - if (inst->shadow_compare) { - msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; - } else { - msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_LOD; - } - break; - case SHADER_OPCODE_TXS: - msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_RESINFO; - break; - case SHADER_OPCODE_TXD: - assert(!inst->shadow_compare); - msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_DERIVS; - break; - case SHADER_OPCODE_TXF: - msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_LD; - break; - case SHADER_OPCODE_TXF_CMS: - msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_LD; - break; - case SHADER_OPCODE_LOD: - msg_type = GFX5_SAMPLER_MESSAGE_LOD; - break; - case SHADER_OPCODE_TG4: - assert(devinfo->ver == 6); - assert(!inst->shadow_compare); - msg_type = GFX7_SAMPLER_MESSAGE_SAMPLE_GATHER4; - break; - case SHADER_OPCODE_TG4_BIAS: - assert(devinfo->ver >= 20); - assert(!inst->shadow_compare); - msg_type = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_B; - break; - case SHADER_OPCODE_TG4_EXPLICIT_LOD: - assert(devinfo->ver >= 20); - if (inst->shadow_compare) - msg_type = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L_C; - else - msg_type = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_L; - break; - case SHADER_OPCODE_TG4_IMPLICIT_LOD: - assert(devinfo->ver >= 20); - if (inst->shadow_compare) - msg_type = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I_C; - else - msg_type = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_I; - break; - case SHADER_OPCODE_TG4_OFFSET_BIAS: - assert(devinfo->ver >= 20); - assert(!inst->shadow_compare); - msg_type = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_B; - break; - case SHADER_OPCODE_TG4_OFFSET_LOD: - assert(devinfo->ver >= 20); - if (inst->shadow_compare) - msg_type = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_L_C; - else - msg_type = XE2_SAMPLER_MESSAGE_SAMPLE_GATHER4_PO_L; - break; - case SHADER_OPCODE_SAMPLEINFO: - msg_type = GFX6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO; - break; - default: - unreachable("not reached"); - } - } else { - switch (inst->opcode) { - case SHADER_OPCODE_TEX: - /* Note that G45 and older determines shadow compare and dispatch width - * from message length for most messages. - */ - if (inst->exec_size == 8) { - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; - if (inst->shadow_compare) { - assert(inst->mlen == 6); - } else { - assert(inst->mlen <= 4); - } - } else { - if (inst->shadow_compare) { - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE; - assert(inst->mlen == 9); - } else { - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE; - assert(inst->mlen <= 7 && inst->mlen % 2 == 1); - } - } - break; - case FS_OPCODE_TXB: - if (inst->shadow_compare) { - assert(inst->exec_size == 8); - assert(inst->mlen == 6); - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE; - } else { - assert(inst->mlen == 9); - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; - } - break; - case SHADER_OPCODE_TXL: - if (inst->shadow_compare) { - assert(inst->exec_size == 8); - assert(inst->mlen == 6); - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE; - } else { - assert(inst->mlen == 9); - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD; - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; - } - break; - case SHADER_OPCODE_TXD: - /* There is no sample_d_c message; comparisons are done manually */ - assert(inst->exec_size == 8); - assert(inst->mlen == 7 || inst->mlen == 10); - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS; - break; - case SHADER_OPCODE_TXF: - assert(inst->mlen <= 9 && inst->mlen % 2 == 1); - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD; - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; - break; - case SHADER_OPCODE_TXS: - assert(inst->mlen == 3); - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO; - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; - break; - default: - unreachable("not reached"); - } - } - assert(msg_type != -1); - - if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { - dst = vec16(dst); - } - - assert(sampler_index.type == BRW_REGISTER_TYPE_UD); - - /* Load the message header if present. If there's a texture offset, - * we need to set it up explicitly and load the offset bitfield. - * Otherwise, we can use an implied move from g0 to the first message reg. - */ - struct brw_reg src = brw_null_reg(); - if (inst->header_size != 0) { - if (devinfo->ver < 6 && !inst->offset) { - /* Set up an implied move from g0 to the MRF. */ - src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); - } else { - const tgl_swsb swsb = brw_get_default_swsb(p); - assert(inst->base_mrf != -1); - struct brw_reg header_reg = brw_message_reg(inst->base_mrf); - - brw_push_insn_state(p); - brw_set_default_swsb(p, tgl_swsb_src_dep(swsb)); - brw_set_default_exec_size(p, BRW_EXECUTE_8); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); - /* Explicitly set up the message header by copying g0 to the MRF. */ - brw_MOV(p, header_reg, brw_vec8_grf(0, 0)); - brw_set_default_swsb(p, tgl_swsb_regdist(1)); - - brw_set_default_exec_size(p, BRW_EXECUTE_1); - if (inst->offset) { - /* Set the offset bits in DWord 2. */ - brw_MOV(p, get_element_ud(header_reg, 2), - brw_imm_ud(inst->offset)); - } - - brw_pop_insn_state(p); - brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1)); - } - } - - assert(surface_index.file == BRW_IMMEDIATE_VALUE); - assert(sampler_index.file == BRW_IMMEDIATE_VALUE); - - brw_SAMPLE(p, - retype(dst, BRW_REGISTER_TYPE_UW), - inst->base_mrf, - src, - surface_index.ud, - sampler_index.ud % 16, - msg_type, - inst->size_written / REG_SIZE, - inst->mlen, - inst->header_size != 0, - simd_mode, - return_format); -} - - /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input * looking like: * @@ -1261,50 +704,27 @@ fs_generator::generate_ddx(const fs_inst *inst, { unsigned vstride, width; - if (devinfo->ver >= 8) { - if (inst->opcode == FS_OPCODE_DDX_FINE) { - /* produce accurate derivatives */ - vstride = BRW_VERTICAL_STRIDE_2; - width = BRW_WIDTH_2; - } else { - /* replicate the derivative at the top-left pixel to other pixels */ - vstride = BRW_VERTICAL_STRIDE_4; - width = BRW_WIDTH_4; - } - - struct brw_reg src0 = byte_offset(src, type_sz(src.type));; - struct brw_reg src1 = src; - - src0.vstride = vstride; - src0.width = width; - src0.hstride = BRW_HORIZONTAL_STRIDE_0; - src1.vstride = vstride; - src1.width = width; - src1.hstride = BRW_HORIZONTAL_STRIDE_0; - - brw_ADD(p, dst, src0, negate(src1)); + if (inst->opcode == FS_OPCODE_DDX_FINE) { + /* produce accurate derivatives */ + vstride = BRW_VERTICAL_STRIDE_2; + width = BRW_WIDTH_2; } else { - /* On Haswell and earlier, the region used above appears to not work - * correctly for compressed instructions. At least on Haswell and - * Iron Lake, compressed ALIGN16 instructions do work. Since we - * would have to split to SIMD8 no matter which method we choose, we - * may as well use ALIGN16 on all platforms gfx7 and earlier. - */ - struct brw_reg src0 = stride(src, 4, 4, 1); - struct brw_reg src1 = stride(src, 4, 4, 1); - if (inst->opcode == FS_OPCODE_DDX_FINE) { - src0.swizzle = BRW_SWIZZLE_XXZZ; - src1.swizzle = BRW_SWIZZLE_YYWW; - } else { - src0.swizzle = BRW_SWIZZLE_XXXX; - src1.swizzle = BRW_SWIZZLE_YYYY; - } - - brw_push_insn_state(p); - brw_set_default_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, dst, negate(src0), src1); - brw_pop_insn_state(p); + /* replicate the derivative at the top-left pixel to other pixels */ + vstride = BRW_VERTICAL_STRIDE_4; + width = BRW_WIDTH_4; } + + struct brw_reg src0 = byte_offset(src, type_sz(src.type));; + struct brw_reg src1 = src; + + src0.vstride = vstride; + src0.width = width; + src0.hstride = BRW_HORIZONTAL_STRIDE_0; + src1.vstride = vstride; + src1.width = width; + src1.hstride = BRW_HORIZONTAL_STRIDE_0; + + brw_ADD(p, dst, src0, negate(src1)); } /* The negate_value boolean is used to negate the derivative computation for @@ -1331,8 +751,7 @@ fs_generator::generate_ddy(const fs_inst *inst, * So for half-float operations we use the Gfx11+ Align1 path. CHV * inherits its FP16 hardware from SKL, so it is not affected. */ - if (devinfo->ver >= 11 || - (devinfo->platform == INTEL_PLATFORM_BDW && src.type == BRW_REGISTER_TYPE_HF)) { + if (devinfo->ver >= 11) { src = stride(src, 0, 2, 1); brw_push_insn_state(p); @@ -1358,28 +777,10 @@ fs_generator::generate_ddy(const fs_inst *inst, } } else { /* replicate the derivative at the top-left pixel to other pixels */ - if (devinfo->ver >= 8) { - struct brw_reg src0 = byte_offset(stride(src, 4, 4, 0), 0 * type_size); - struct brw_reg src1 = byte_offset(stride(src, 4, 4, 0), 2 * type_size); + struct brw_reg src0 = byte_offset(stride(src, 4, 4, 0), 0 * type_size); + struct brw_reg src1 = byte_offset(stride(src, 4, 4, 0), 2 * type_size); - brw_ADD(p, dst, negate(src0), src1); - } else { - /* On Haswell and earlier, the region used above appears to not work - * correctly for compressed instructions. At least on Haswell and - * Iron Lake, compressed ALIGN16 instructions do work. Since we - * would have to split to SIMD8 no matter which method we choose, we - * may as well use ALIGN16 on all platforms gfx7 and earlier. - */ - struct brw_reg src0 = stride(src, 4, 4, 1); - struct brw_reg src1 = stride(src, 4, 4, 1); - src0.swizzle = BRW_SWIZZLE_XXXX; - src1.swizzle = BRW_SWIZZLE_ZZZZ; - - brw_push_insn_state(p); - brw_set_default_access_mode(p, BRW_ALIGN_16); - brw_ADD(p, dst, negate(src0), src1); - brw_pop_insn_state(p); - } + brw_ADD(p, dst, negate(src0), src1); } } @@ -1543,64 +944,6 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst, read_offset, surf_index); } -void -fs_generator::generate_varying_pull_constant_load_gfx4(fs_inst *inst, - struct brw_reg dst, - struct brw_reg index) -{ - assert(devinfo->ver < 7); /* Should use the gfx7 variant. */ - assert(inst->header_size != 0); - assert(inst->mlen); - - assert(index.file == BRW_IMMEDIATE_VALUE && - index.type == BRW_REGISTER_TYPE_UD); - uint32_t surf_index = index.ud; - - uint32_t simd_mode, rlen, msg_type; - if (inst->exec_size == 16) { - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; - rlen = 8; - } else { - assert(inst->exec_size == 8); - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; - rlen = 4; - } - - if (devinfo->ver >= 5) - msg_type = GFX5_SAMPLER_MESSAGE_SAMPLE_LD; - else { - /* We always use the SIMD16 message so that we only have to load U, and - * not V or R. - */ - msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD; - assert(inst->mlen == 3); - assert(inst->size_written == 8 * REG_SIZE); - rlen = 8; - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; - } - - struct brw_reg header = brw_vec8_grf(0, 0); - gfx6_resolve_implied_move(p, &header, inst->base_mrf); - - brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); - brw_inst_set_compression(devinfo, send, false); - brw_inst_set_sfid(devinfo, send, BRW_SFID_SAMPLER); - brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UW)); - brw_set_src0(p, send, header); - if (devinfo->ver < 6) - brw_inst_set_base_mrf(p->devinfo, send, inst->base_mrf); - - /* Our surface is set up as floats, regardless of what actual data is - * stored in it. - */ - uint32_t return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; - brw_set_desc(p, send, - brw_message_desc(devinfo, inst->mlen, rlen, inst->header_size) | - brw_sampler_desc(devinfo, surf_index, - 0, /* sampler (unused) */ - msg_type, simd_mode, return_format)); -} - /* Sets vstride=1, width=4, hstride=0 of register src1 during * the ADD instruction. */ @@ -1616,8 +959,7 @@ fs_generator::generate_set_sample_id(fs_inst *inst, src0.type == BRW_REGISTER_TYPE_UD); const struct brw_reg reg = stride(src1, 1, 4, 0); - const unsigned lower_size = MIN2(inst->exec_size, - devinfo->ver >= 8 ? 16 : 8); + const unsigned lower_size = MIN2(inst->exec_size, 16); for (unsigned i = 0; i < inst->exec_size / lower_size; i++) { brw_inst *insn = brw_ADD(p, offset(dst, i * lower_size / 8), @@ -1689,8 +1031,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, * The documentation is often lacking annotations for Atom parts, * and empirically this affects CHV as well. */ - if (devinfo->ver >= 8 && - devinfo->ver <= 9 && + if (devinfo->ver <= 9 && p->nr_insn > 1 && brw_inst_opcode(p->isa, brw_last_inst) == BRW_OPCODE_MATH && brw_inst_math_function(devinfo, brw_last_inst) == BRW_MATH_FUNCTION_POW && @@ -1765,7 +1106,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, inst->dst.component_size(inst->exec_size) > REG_SIZE; brw_set_default_compression(p, compressed); - if ((devinfo->ver >= 20 || devinfo->ver < 7) && inst->group % 8 != 0) { + if (devinfo->ver >= 20 && inst->group % 8 != 0) { assert(inst->force_writemask_all); assert(!inst->predicate && !inst->conditional_mod); assert(!inst->writes_accumulator_implicitly(devinfo) && @@ -1796,11 +1137,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, brw_set_default_predicate_control(p, inst->predicate); brw_set_default_predicate_inverse(p, inst->predicate_inverse); /* On gfx7 and above, hardware automatically adds the group onto the - * flag subregister number. On Sandy Bridge and older, we have to do it - * ourselves. + * flag subregister number. */ - const unsigned flag_subreg = inst->flag_subreg + - (devinfo->ver >= 7 ? 0 : inst->group / 16); + const unsigned flag_subreg = inst->flag_subreg; brw_set_default_flag_reg(p, flag_subreg / 2, flag_subreg % 2); brw_set_default_saturate(p, inst->saturate); brw_set_default_mask_control(p, inst->force_writemask_all); @@ -1815,10 +1154,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, brw_set_default_swsb(p, swsb); unsigned exec_size = inst->exec_size; - if (devinfo->verx10 == 70 && - (get_exec_type_size(inst) == 8 || type_sz(inst->dst.type) == 8)) { - exec_size *= 2; - } brw_set_default_exec_size(p, cvt(exec_size) - 1); @@ -1868,14 +1203,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, break; case BRW_OPCODE_MAD: - assert(devinfo->ver >= 6); if (devinfo->ver < 10) brw_set_default_access_mode(p, BRW_ALIGN_16); brw_MAD(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_LRP: - assert(devinfo->ver >= 6 && devinfo->ver <= 10); + assert(devinfo->ver <= 10); if (devinfo->ver < 10) brw_set_default_access_mode(p, BRW_ALIGN_16); brw_LRP(p, dst, src[0], src[1], src[2]); @@ -1937,49 +1271,27 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, brw_F16TO32(p, dst, src[0]); break; case BRW_OPCODE_CMP: - if (inst->exec_size >= 16 && devinfo->verx10 == 70 && - dst.file == BRW_ARCHITECTURE_REGISTER_FILE) { - /* For unknown reasons the WaCMPInstFlagDepClearedEarly workaround - * implemented in the compiler is not sufficient. Overriding the - * type when the destination is the null register is necessary but - * not sufficient by itself. - */ - dst.type = BRW_REGISTER_TYPE_D; - } brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); break; case BRW_OPCODE_CMPN: - if (inst->exec_size >= 16 && devinfo->verx10 == 70 && - dst.file == BRW_ARCHITECTURE_REGISTER_FILE) { - /* For unknown reasons the WaCMPInstFlagDepClearedEarly workaround - * implemented in the compiler is not sufficient. Overriding the - * type when the destination is the null register is necessary but - * not sufficient by itself. - */ - dst.type = BRW_REGISTER_TYPE_D; - } brw_CMPN(p, dst, inst->conditional_mod, src[0], src[1]); break; case BRW_OPCODE_SEL: brw_SEL(p, dst, src[0], src[1]); break; case BRW_OPCODE_CSEL: - assert(devinfo->ver >= 8); if (devinfo->ver < 10) brw_set_default_access_mode(p, BRW_ALIGN_16); brw_CSEL(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_BFREV: - assert(devinfo->ver >= 7); brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD), retype(src[0], BRW_REGISTER_TYPE_UD)); break; case BRW_OPCODE_FBH: - assert(devinfo->ver >= 7); brw_FBH(p, retype(dst, src[0].type), src[0]); break; case BRW_OPCODE_FBL: - assert(devinfo->ver >= 7); brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), retype(src[0], BRW_REGISTER_TYPE_UD)); break; @@ -1987,16 +1299,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, brw_LZD(p, dst, src[0]); break; case BRW_OPCODE_CBIT: - assert(devinfo->ver >= 7); brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), retype(src[0], BRW_REGISTER_TYPE_UD)); break; case BRW_OPCODE_ADDC: - assert(devinfo->ver >= 7); brw_ADDC(p, dst, src[0], src[1]); break; case BRW_OPCODE_SUBB: - assert(devinfo->ver >= 7); brw_SUBB(p, dst, src[0], src[1]); break; case BRW_OPCODE_MAC: @@ -2004,31 +1313,24 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, break; case BRW_OPCODE_BFE: - assert(devinfo->ver >= 7); if (devinfo->ver < 10) brw_set_default_access_mode(p, BRW_ALIGN_16); brw_BFE(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_BFI1: - assert(devinfo->ver >= 7); brw_BFI1(p, dst, src[0], src[1]); break; case BRW_OPCODE_BFI2: - assert(devinfo->ver >= 7); if (devinfo->ver < 10) brw_set_default_access_mode(p, BRW_ALIGN_16); brw_BFI2(p, dst, src[0], src[1], src[2]); break; case BRW_OPCODE_IF: - if (inst->src[0].file != BAD_FILE) { - /* The instruction has an embedded compare (only allowed on gfx6) */ - assert(devinfo->ver == 6); - gfx6_IF(p, inst->conditional_mod, src[0], src[1]); - } else { - brw_IF(p, brw_get_default_exec_size(p)); - } + /* Can't have embedded compare (was only allowed on Gfx6). */ + assert(inst->src[0].file == BAD_FILE); + brw_IF(p, brw_get_default_exec_size(p)); break; case BRW_OPCODE_ELSE: @@ -2062,39 +1364,18 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: assert(inst->conditional_mod == BRW_CONDITIONAL_NONE); - if (devinfo->ver >= 6) { - assert(inst->mlen == 0); - assert(devinfo->ver >= 7 || inst->exec_size == 8); - gfx6_math(p, dst, brw_math_function(inst->opcode), - src[0], brw_null_reg()); - } else { - assert(inst->mlen >= 1); - assert(devinfo->ver == 5 || devinfo->platform == INTEL_PLATFORM_G4X || inst->exec_size == 8); - gfx4_math(p, dst, - brw_math_function(inst->opcode), - inst->base_mrf, src[0], - BRW_MATH_PRECISION_FULL); - send_count++; - } + assert(inst->mlen == 0); + gfx6_math(p, dst, brw_math_function(inst->opcode), + src[0], brw_null_reg()); break; case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: case SHADER_OPCODE_POW: assert(devinfo->verx10 < 125); assert(inst->conditional_mod == BRW_CONDITIONAL_NONE); - if (devinfo->ver >= 6) { - assert(inst->mlen == 0); - assert((devinfo->ver >= 7 && inst->opcode == SHADER_OPCODE_POW) || - inst->exec_size == 8); - gfx6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]); - } else { - assert(inst->mlen >= 1); - assert(inst->exec_size == 8); - gfx4_math(p, dst, brw_math_function(inst->opcode), - inst->base_mrf, src[0], - BRW_MATH_PRECISION_FULL); - send_count++; - } + assert(inst->mlen == 0); + assert(inst->opcode == SHADER_OPCODE_POW || inst->exec_size == 8); + gfx6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]); break; case FS_OPCODE_LINTERP: multiple_instructions_emitted = generate_linterp(inst, dst, src); @@ -2130,26 +1411,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, send_count++; break; - case SHADER_OPCODE_TEX: - case FS_OPCODE_TXB: - case SHADER_OPCODE_TXD: - case SHADER_OPCODE_TXF: - case SHADER_OPCODE_TXF_CMS: - case SHADER_OPCODE_TXL: - case SHADER_OPCODE_TXS: - case SHADER_OPCODE_LOD: - case SHADER_OPCODE_TG4: - case SHADER_OPCODE_TG4_BIAS: - case SHADER_OPCODE_TG4_EXPLICIT_LOD: - case SHADER_OPCODE_TG4_IMPLICIT_LOD: - case SHADER_OPCODE_TG4_OFFSET_LOD: - case SHADER_OPCODE_TG4_OFFSET_BIAS: - case SHADER_OPCODE_SAMPLEINFO: - assert(inst->src[0].file == BAD_FILE); - generate_tex(inst, dst, src[1], src[2]); - send_count++; - break; - case FS_OPCODE_DDX_COARSE: case FS_OPCODE_DDX_FINE: generate_ddx(inst, dst, src[0]); @@ -2195,17 +1456,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, send_count++; break; - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4: - generate_varying_pull_constant_load_gfx4(inst, dst, src[0]); - send_count++; - break; - - case FS_OPCODE_REP_FB_WRITE: - case FS_OPCODE_FB_WRITE: - generate_fb_write(inst, src[0]); - send_count++; - break; - case FS_OPCODE_FB_READ: generate_fb_read(inst, dst, src[0]); send_count++; @@ -2274,7 +1524,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, break; case FS_OPCODE_LOAD_LIVE_CHANNELS: { - assert(devinfo->ver >= 8); assert(inst->force_writemask_all && inst->group == 0); assert(inst->dst.file == BAD_FILE); brw_set_default_exec_size(p, BRW_EXECUTE_1); @@ -2361,13 +1610,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width, send_count++; break; - case BRW_OPCODE_DIM: - assert(devinfo->platform == INTEL_PLATFORM_HSW); - assert(src[0].type == BRW_REGISTER_TYPE_DF); - assert(dst.type == BRW_REGISTER_TYPE_DF); - brw_DIM(p, dst, retype(src[0], BRW_REGISTER_TYPE_F)); - break; - case SHADER_OPCODE_RND_MODE: { assert(src[0].file == BRW_IMMEDIATE_VALUE); /*