mesa/src/intel/compiler/brw_eu_emit.c

/*
 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
 Intel funded Tungsten Graphics to
 develop this 3D driver.

 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
 "Software"), to deal in the Software without restriction, including
 without limitation the rights to use, copy, modify, merge, publish,
 distribute, sublicense, and/or sell copies of the Software, and to
 permit persons to whom the Software is furnished to do so, subject to
 the following conditions:

 The above copyright notice and this permission notice (including the
 next paragraph) shall be included in all copies or substantial
 portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

 **********************************************************************/
 /*
  * Authors:
  *   Keith Whitwell <keithw@vmware.com>
  */


#include "brw_eu_defines.h"
#include "brw_eu.h"

#include "util/ralloc.h"

void
brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
{
   const struct intel_device_info *devinfo = p->devinfo;

   if (dest.file == BRW_GENERAL_REGISTER_FILE)
      assert(dest.nr < XE2_MAX_GRF);

   /* The hardware has a restriction where a destination of size Byte with
    * a stride of 1 is only allowed for a packed byte MOV. For any other
    * instruction, the stride must be at least 2, even when the destination
    * is the NULL register.
    */
   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
       dest.nr == BRW_ARF_NULL &&
       brw_type_size_bytes(dest.type) == 1 &&
       dest.hstride == BRW_HORIZONTAL_STRIDE_1) {
      dest.hstride = BRW_HORIZONTAL_STRIDE_2;
   }

   if (devinfo->ver >= 12 &&
       (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
        brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
             dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
      assert(dest.address_mode == BRW_ADDRESS_DIRECT);
      assert(dest.subnr == 0);
      assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
             (dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
              dest.vstride == dest.width + 1));
      assert(!dest.negate && !dest.abs);
      brw_inst_set_dst_reg_file(devinfo, inst, dest.file);
      brw_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));

   } else if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
              brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
      assert(devinfo->ver < 12);
      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
             dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
      assert(dest.address_mode == BRW_ADDRESS_DIRECT);
      assert(dest.subnr % 16 == 0);
      assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
             dest.vstride == dest.width + 1);
      assert(!dest.negate && !dest.abs);
      brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
      brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
      brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file);
   } else {
      brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
      brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);

      if (dest.address_mode == BRW_ADDRESS_DIRECT) {
         brw_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));

         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
            brw_inst_set_dst_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
            if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
               dest.hstride = BRW_HORIZONTAL_STRIDE_1;
            brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
         } else {
            brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
            brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
            if (dest.file == BRW_GENERAL_REGISTER_FILE) {
               assert(dest.writemask != 0);
            }
            /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
             *    Although Dst.HorzStride is a don't care for Align16, HW needs
             *    this to be programmed as "01".
             */
            brw_inst_set_dst_hstride(devinfo, inst, 1);
         }
      } else {
         brw_inst_set_dst_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));

         /* These are different sizes in align1 vs align16:
          */
         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
            brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
                                          dest.indirect_offset);
            if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
               dest.hstride = BRW_HORIZONTAL_STRIDE_1;
            brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
         } else {
            brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
                                           dest.indirect_offset);
            /* even ignored in da16, still need to set as '01' */
            brw_inst_set_dst_hstride(devinfo, inst, 1);
         }
      }
   }
}

void
brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
{
   const struct intel_device_info *devinfo = p->devinfo;

   if (reg.file == BRW_GENERAL_REGISTER_FILE)
      assert(reg.nr < XE2_MAX_GRF);

   if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND  ||
       brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC ||
       brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
       brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
      /* Any source modifiers or regions will be ignored, since this just
       * identifies the GRF to start reading the message contents from.
       * Check for some likely failures.
       */
      assert(!reg.negate);
      assert(!reg.abs);
      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
   }

   if (devinfo->ver >= 12 &&
       (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
        brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
      assert(reg.file != BRW_IMMEDIATE_VALUE);
      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
      assert(reg.subnr == 0);
      assert(has_scalar_region(reg) ||
             (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
              reg.vstride == reg.width + 1));
      assert(!reg.negate && !reg.abs);
      brw_inst_set_send_src0_reg_file(devinfo, inst, reg.file);
      brw_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));

   } else if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
              brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
      assert(reg.file == BRW_GENERAL_REGISTER_FILE);
      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
      assert(reg.subnr % 16 == 0);
      assert(has_scalar_region(reg) ||
             (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
              reg.vstride == reg.width + 1));
      assert(!reg.negate && !reg.abs);
      brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
      brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
   } else {
      brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
      brw_inst_set_src0_abs(devinfo, inst, reg.abs);
      brw_inst_set_src0_negate(devinfo, inst, reg.negate);
      brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);

      if (reg.file == BRW_IMMEDIATE_VALUE) {
         if (reg.type == BRW_TYPE_DF)
            brw_inst_set_imm_df(devinfo, inst, reg.df);
         else if (reg.type == BRW_TYPE_UQ ||
                  reg.type == BRW_TYPE_Q)
            brw_inst_set_imm_uq(devinfo, inst, reg.u64);
         else
            brw_inst_set_imm_ud(devinfo, inst, reg.ud);

         if (devinfo->ver < 12 && brw_type_size_bytes(reg.type) < 8) {
            brw_inst_set_src1_reg_file(devinfo, inst,
                                       BRW_ARCHITECTURE_REGISTER_FILE);
            brw_inst_set_src1_reg_hw_type(devinfo, inst,
                                          brw_inst_src0_reg_hw_type(devinfo, inst));
         }
      } else {
         if (reg.address_mode == BRW_ADDRESS_DIRECT) {
            brw_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
            if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
               brw_inst_set_src0_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
            } else {
               brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
            }
         } else {
            brw_inst_set_src0_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));

            if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
               brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
            } else {
               brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
            }
         }

         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
            if (reg.width == BRW_WIDTH_1 &&
                brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
               brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
               brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
               brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
            } else {
               brw_inst_set_src0_hstride(devinfo, inst, reg.hstride);
               brw_inst_set_src0_width(devinfo, inst, reg.width);
               brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
            }
         } else {
            brw_inst_set_src0_da16_swiz_x(devinfo, inst,
               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
            brw_inst_set_src0_da16_swiz_y(devinfo, inst,
               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
            brw_inst_set_src0_da16_swiz_z(devinfo, inst,
               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
            brw_inst_set_src0_da16_swiz_w(devinfo, inst,
               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));

            if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
               /* This is an oddity of the fact we're using the same
                * descriptions for registers in align_16 as align_1:
                */
               brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
            } else {
               brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
            }
         }
      }
   }
}


void
brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
{
   const struct intel_device_info *devinfo = p->devinfo;

   if (reg.file == BRW_GENERAL_REGISTER_FILE)
      assert(reg.nr < XE2_MAX_GRF);

   if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
       brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC ||
       (devinfo->ver >= 12 &&
        (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
         brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC))) {
      assert(reg.file == BRW_GENERAL_REGISTER_FILE ||
             reg.file == BRW_ARCHITECTURE_REGISTER_FILE);
      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
      assert(reg.subnr == 0);
      assert(has_scalar_region(reg) ||
             (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
              reg.vstride == reg.width + 1));
      assert(!reg.negate && !reg.abs);
      brw_inst_set_send_src1_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
      brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file);
   } else {
      /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
       *
       *    "Accumulator registers may be accessed explicitly as src0
       *    operands only."
       */
      assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
             (reg.nr & 0xF0) != BRW_ARF_ACCUMULATOR);

      brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
      brw_inst_set_src1_abs(devinfo, inst, reg.abs);
      brw_inst_set_src1_negate(devinfo, inst, reg.negate);

      /* Only src1 can be immediate in two-argument instructions.
       */
      assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE);

      if (reg.file == BRW_IMMEDIATE_VALUE) {
         /* two-argument instructions can only use 32-bit immediates */
         assert(brw_type_size_bytes(reg.type) < 8);
         brw_inst_set_imm_ud(devinfo, inst, reg.ud);
      } else {
         /* This is a hardware restriction, which may or may not be lifted
          * in the future:
          */
         assert (reg.address_mode == BRW_ADDRESS_DIRECT);
         /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */

         brw_inst_set_src1_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
            brw_inst_set_src1_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
         } else {
            brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
         }

         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
            if (reg.width == BRW_WIDTH_1 &&
                brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
               brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
               brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
               brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
            } else {
               brw_inst_set_src1_hstride(devinfo, inst, reg.hstride);
               brw_inst_set_src1_width(devinfo, inst, reg.width);
               brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
            }
         } else {
            brw_inst_set_src1_da16_swiz_x(devinfo, inst,
               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
            brw_inst_set_src1_da16_swiz_y(devinfo, inst,
               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
            brw_inst_set_src1_da16_swiz_z(devinfo, inst,
               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
            brw_inst_set_src1_da16_swiz_w(devinfo, inst,
               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));

            if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
               /* This is an oddity of the fact we're using the same
                * descriptions for registers in align_16 as align_1:
                */
               brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
            } else {
               brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
            }
         }
      }
   }
}

/**
 * Specify the descriptor and extended descriptor immediate for a SEND(C)
 * message instruction.
 */
void
brw_set_desc_ex(struct brw_codegen *p, brw_inst *inst,
                unsigned desc, unsigned ex_desc)
{
   const struct intel_device_info *devinfo = p->devinfo;
   assert(brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
          brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC);
   if (devinfo->ver < 12)
      brw_inst_set_src1_file_type(devinfo, inst,
                                  BRW_IMMEDIATE_VALUE, BRW_TYPE_UD);
   brw_inst_set_send_desc(devinfo, inst, desc);
   if (devinfo->ver >= 9)
      brw_inst_set_send_ex_desc(devinfo, inst, ex_desc);
}

static void
brw_inst_set_state(const struct brw_isa_info *isa,
                   brw_inst *insn,
                   const struct brw_insn_state *state)
{
   const struct intel_device_info *devinfo = isa->devinfo;

   brw_inst_set_exec_size(devinfo, insn, state->exec_size);
   brw_inst_set_group(devinfo, insn, state->group);
   brw_inst_set_access_mode(devinfo, insn, state->access_mode);
   brw_inst_set_mask_control(devinfo, insn, state->mask_control);
   if (devinfo->ver >= 12)
      brw_inst_set_swsb(devinfo, insn, tgl_swsb_encode(devinfo, state->swsb));
   brw_inst_set_saturate(devinfo, insn, state->saturate);
   brw_inst_set_pred_control(devinfo, insn, state->predicate);
   brw_inst_set_pred_inv(devinfo, insn, state->pred_inv);

   if (is_3src(isa, brw_inst_opcode(isa, insn)) &&
       state->access_mode == BRW_ALIGN_16) {
      brw_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
      brw_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
   } else {
      brw_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
      brw_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
   }

   if (devinfo->ver < 20)
      brw_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
}

static brw_inst *
brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned alignment)
{
   assert(util_is_power_of_two_or_zero(sizeof(brw_inst)));
   assert(util_is_power_of_two_or_zero(alignment));
   const unsigned align_insn = MAX2(alignment / sizeof(brw_inst), 1);
   const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
   const unsigned new_nr_insn = start_insn + nr_insn;

   if (p->store_size < new_nr_insn) {
      p->store_size = util_next_power_of_two(new_nr_insn * sizeof(brw_inst));
      p->store = reralloc(p->mem_ctx, p->store, brw_inst, p->store_size);
   }

   /* Memset any padding due to alignment to 0.  We don't want to be hashing
    * or caching a bunch of random bits we got from a memory allocation.
    */
   if (p->nr_insn < start_insn) {
      memset(&p->store[p->nr_insn], 0,
             (start_insn - p->nr_insn) * sizeof(brw_inst));
   }

   assert(p->next_insn_offset == p->nr_insn * sizeof(brw_inst));
   p->nr_insn = new_nr_insn;
   p->next_insn_offset = new_nr_insn * sizeof(brw_inst);

   return &p->store[start_insn];
}

void
brw_realign(struct brw_codegen *p, unsigned alignment)
{
   brw_append_insns(p, 0, alignment);
}

int
brw_append_data(struct brw_codegen *p, void *data,
                unsigned size, unsigned alignment)
{
   unsigned nr_insn = DIV_ROUND_UP(size, sizeof(brw_inst));
   void *dst = brw_append_insns(p, nr_insn, alignment);
   memcpy(dst, data, size);

   /* If it's not a whole number of instructions, memset the end */
   if (size < nr_insn * sizeof(brw_inst))
      memset(dst + size, 0, nr_insn * sizeof(brw_inst) - size);

   return dst - (void *)p->store;
}

#define next_insn brw_next_insn
brw_inst *
brw_next_insn(struct brw_codegen *p, unsigned opcode)
{
   brw_inst *insn = brw_append_insns(p, 1, sizeof(brw_inst));

   memset(insn, 0, sizeof(*insn));
   brw_inst_set_opcode(p->isa, insn, opcode);

   /* Apply the default instruction state */
   brw_inst_set_state(p->isa, insn, p->current);

   return insn;
}

void
brw_add_reloc(struct brw_codegen *p, uint32_t id,
              enum brw_shader_reloc_type type,
              uint32_t offset, uint32_t delta)
{
   if (p->num_relocs + 1 > p->reloc_array_size) {
      p->reloc_array_size = MAX2(16, p->reloc_array_size * 2);
      p->relocs = reralloc(p->mem_ctx, p->relocs,
                           struct brw_shader_reloc, p->reloc_array_size);
   }

   p->relocs[p->num_relocs++] = (struct brw_shader_reloc) {
      .id = id,
      .type = type,
      .offset = offset,
      .delta = delta,
   };
}

static brw_inst *
brw_alu1(struct brw_codegen *p, unsigned opcode,
         struct brw_reg dest, struct brw_reg src)
{
   brw_inst *insn = next_insn(p, opcode);
   brw_set_dest(p, insn, dest);
   brw_set_src0(p, insn, src);
   return insn;
}

static brw_inst *
brw_alu2(struct brw_codegen *p, unsigned opcode,
         struct brw_reg dest, struct brw_reg src0, struct brw_reg src1)
{
   /* 64-bit immediates are only supported on 1-src instructions */
   assert(src0.file != BRW_IMMEDIATE_VALUE ||
          brw_type_size_bytes(src0.type) <= 4);
   assert(src1.file != BRW_IMMEDIATE_VALUE ||
          brw_type_size_bytes(src1.type) <= 4);

   brw_inst *insn = next_insn(p, opcode);
   brw_set_dest(p, insn, dest);
   brw_set_src0(p, insn, src0);
   brw_set_src1(p, insn, src1);
   return insn;
}

static int
get_3src_subreg_nr(struct brw_reg reg)
{
   /* Normally, SubRegNum is in bytes (0..31).  However, 3-src instructions
    * use 32-bit units (components 0..7).  Since they only support F/D/UD
    * types, this doesn't lose any flexibility, but uses fewer bits.
    */
   return reg.subnr / 4;
}

static enum gfx10_align1_3src_vertical_stride
to_3src_align1_vstride(const struct intel_device_info *devinfo,
                       enum brw_vertical_stride vstride)
{
   switch (vstride) {
   case BRW_VERTICAL_STRIDE_0:
      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0;
   case BRW_VERTICAL_STRIDE_1:
      assert(devinfo->ver >= 12);
      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1;
   case BRW_VERTICAL_STRIDE_2:
      assert(devinfo->ver < 12);
      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2;
   case BRW_VERTICAL_STRIDE_4:
      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4;
   case BRW_VERTICAL_STRIDE_8:
   case BRW_VERTICAL_STRIDE_16:
      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8;
   default:
      unreachable("invalid vstride");
   }
}


static enum gfx10_align1_3src_src_horizontal_stride
to_3src_align1_hstride(enum brw_horizontal_stride hstride)
{
   switch (hstride) {
   case BRW_HORIZONTAL_STRIDE_0:
      return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0;
   case BRW_HORIZONTAL_STRIDE_1:
      return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1;
   case BRW_HORIZONTAL_STRIDE_2:
      return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2;
   case BRW_HORIZONTAL_STRIDE_4:
      return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4;
   default:
      unreachable("invalid hstride");
   }
}

static brw_inst *
brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
         struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
{
   const struct intel_device_info *devinfo = p->devinfo;
   brw_inst *inst = next_insn(p, opcode);

   assert(dest.nr < XE2_MAX_GRF);

   if (devinfo->ver >= 10)
      assert(!(src0.file == BRW_IMMEDIATE_VALUE &&
               src2.file == BRW_IMMEDIATE_VALUE));

   assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < XE2_MAX_GRF);
   assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < XE2_MAX_GRF);
   assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < XE2_MAX_GRF);
   assert(dest.address_mode == BRW_ADDRESS_DIRECT);
   assert(src0.address_mode == BRW_ADDRESS_DIRECT);
   assert(src1.address_mode == BRW_ADDRESS_DIRECT);
   assert(src2.address_mode == BRW_ADDRESS_DIRECT);

   if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
             (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
              (dest.nr & 0xF0) == BRW_ARF_ACCUMULATOR));

      STATIC_ASSERT((BRW_ARCHITECTURE_REGISTER_FILE ^ 1) == BRW_ALIGN1_3SRC_ACCUMULATOR);
      STATIC_ASSERT((BRW_GENERAL_REGISTER_FILE ^ 1) == BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE);

      /* Gfx10 and Gfx11 bit encoding for the register file is the inversion of
       * the actual register file (see the STATIC_ASSERTs above).
       */
      unsigned dst_reg_file = devinfo->ver >= 12 ? dest.file : dest.file ^ 1;

      brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, dst_reg_file);
      brw_inst_set_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
      brw_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest) / 8);
      brw_inst_set_3src_a1_dst_hstride(devinfo, inst, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1);

      if (brw_type_is_float(dest.type)) {
         brw_inst_set_3src_a1_exec_type(devinfo, inst,
                                        BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
      } else {
         brw_inst_set_3src_a1_exec_type(devinfo, inst,
                                        BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
      }

      brw_inst_set_3src_a1_dst_type(devinfo, inst, dest.type);
      brw_inst_set_3src_a1_src0_type(devinfo, inst, src0.type);
      brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type);
      brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type);

      if (src0.file == BRW_IMMEDIATE_VALUE) {
         brw_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud);
      } else {
         brw_inst_set_3src_a1_src0_vstride(
            devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride));
         brw_inst_set_3src_a1_src0_hstride(devinfo, inst,
                                           to_3src_align1_hstride(src0.hstride));
         brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
         brw_inst_set_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
         brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
         brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
      }
      brw_inst_set_3src_a1_src1_vstride(
         devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride));
      brw_inst_set_3src_a1_src1_hstride(devinfo, inst,
                                        to_3src_align1_hstride(src1.hstride));

      brw_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
      if (src1.file == BRW_ARCHITECTURE_REGISTER_FILE) {
         brw_inst_set_3src_src1_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
      } else {
         brw_inst_set_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
      }
      brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
      brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);

      if (src2.file == BRW_IMMEDIATE_VALUE) {
         brw_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud);
      } else {
         brw_inst_set_3src_a1_src2_hstride(devinfo, inst,
                                           to_3src_align1_hstride(src2.hstride));
         /* no vstride on src2 */
         brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
         brw_inst_set_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
         brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
         brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
      }

      assert(src0.file == BRW_GENERAL_REGISTER_FILE ||
             src0.file == BRW_IMMEDIATE_VALUE);
      assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
             (src1.file == BRW_ARCHITECTURE_REGISTER_FILE &&
              src1.nr == BRW_ARF_ACCUMULATOR));
      assert(src2.file == BRW_GENERAL_REGISTER_FILE ||
             src2.file == BRW_IMMEDIATE_VALUE);

      if (devinfo->ver >= 12) {
         if (src0.file == BRW_IMMEDIATE_VALUE) {
            brw_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1);
         } else {
            brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file);
         }

         brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file);

         if (src2.file == BRW_IMMEDIATE_VALUE) {
            brw_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1);
         } else {
            brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file);
         }
      } else {
         brw_inst_set_3src_a1_src0_reg_file(devinfo, inst,
                                            src0.file == BRW_GENERAL_REGISTER_FILE ?
                                            BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
                                            BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
         brw_inst_set_3src_a1_src1_reg_file(devinfo, inst,
                                            src1.file == BRW_GENERAL_REGISTER_FILE ?
                                            BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
                                            BRW_ALIGN1_3SRC_ACCUMULATOR);
         brw_inst_set_3src_a1_src2_reg_file(devinfo, inst,
                                            src2.file == BRW_GENERAL_REGISTER_FILE ?
                                            BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
                                            BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
      }

   } else {
      assert(dest.file == BRW_GENERAL_REGISTER_FILE);
      assert(dest.type == BRW_TYPE_F  ||
             dest.type == BRW_TYPE_DF ||
             dest.type == BRW_TYPE_D  ||
             dest.type == BRW_TYPE_UD ||
             dest.type == BRW_TYPE_HF);
      brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
      brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4);
      brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);

      assert(src0.file == BRW_GENERAL_REGISTER_FILE);
      brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
      brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0));
      brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
      brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
      brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
      brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst,
                                          src0.vstride == BRW_VERTICAL_STRIDE_0);

      assert(src1.file == BRW_GENERAL_REGISTER_FILE);
      brw_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle);
      brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1));
      brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
      brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
      brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
      brw_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst,
                                          src1.vstride == BRW_VERTICAL_STRIDE_0);

      assert(src2.file == BRW_GENERAL_REGISTER_FILE);
      brw_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle);
      brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2));
      brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
      brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
      brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
      brw_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst,
                                          src2.vstride == BRW_VERTICAL_STRIDE_0);

      /* Set both the source and destination types based on dest.type,
       * ignoring the source register types.  The MAD and LRP emitters ensure
       * that all four types are float.  The BFE and BFI2 emitters, however,
       * may send us mixed D and UD types and want us to ignore that and use
       * the destination type.
       */
      brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
      brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);

      /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType:
       *
       *    "Three source instructions can use operands with mixed-mode
       *     precision. When SrcType field is set to :f or :hf it defines
       *     precision for source 0 only, and fields Src1Type and Src2Type
       *     define precision for other source operands:
       *
       *     0b = :f. Single precision Float (32-bit).
       *     1b = :hf. Half precision Float (16-bit)."
       */
      if (src1.type == BRW_TYPE_HF)
         brw_inst_set_3src_a16_src1_type(devinfo, inst, 1);

      if (src2.type == BRW_TYPE_HF)
         brw_inst_set_3src_a16_src2_type(devinfo, inst, 1);
   }

   return inst;
}

static brw_inst *
brw_dpas_three_src(struct brw_codegen *p, enum gfx12_systolic_depth opcode,
                   unsigned sdepth, unsigned rcount, struct brw_reg dest,
                   struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
{
   const struct intel_device_info *devinfo = p->devinfo;
   brw_inst *inst = next_insn(p, opcode);

   assert(dest.file == BRW_GENERAL_REGISTER_FILE);
   brw_inst_set_dpas_3src_dst_reg_file(devinfo, inst,
                                       BRW_GENERAL_REGISTER_FILE);
   brw_inst_set_dpas_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
   brw_inst_set_dpas_3src_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));

   if (brw_type_is_float(dest.type)) {
      brw_inst_set_dpas_3src_exec_type(devinfo, inst,
                                       BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
   } else {
      brw_inst_set_dpas_3src_exec_type(devinfo, inst,
                                       BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
   }

   brw_inst_set_dpas_3src_sdepth(devinfo, inst, sdepth);
   brw_inst_set_dpas_3src_rcount(devinfo, inst, rcount - 1);

   brw_inst_set_dpas_3src_dst_type(devinfo, inst, dest.type);
   brw_inst_set_dpas_3src_src0_type(devinfo, inst, src0.type);
   brw_inst_set_dpas_3src_src1_type(devinfo, inst, src1.type);
   brw_inst_set_dpas_3src_src2_type(devinfo, inst, src2.type);

   assert(src0.file == BRW_GENERAL_REGISTER_FILE ||
          (src0.file == BRW_ARCHITECTURE_REGISTER_FILE &&
           src0.nr == BRW_ARF_NULL));

   brw_inst_set_dpas_3src_src0_reg_file(devinfo, inst, src0.file);
   brw_inst_set_dpas_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
   brw_inst_set_dpas_3src_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));

   assert(src1.file == BRW_GENERAL_REGISTER_FILE);

   brw_inst_set_dpas_3src_src1_reg_file(devinfo, inst, src1.file);
   brw_inst_set_dpas_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
   brw_inst_set_dpas_3src_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
   brw_inst_set_dpas_3src_src1_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);

   assert(src2.file == BRW_GENERAL_REGISTER_FILE);

   brw_inst_set_dpas_3src_src2_reg_file(devinfo, inst, src2.file);
   brw_inst_set_dpas_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
   brw_inst_set_dpas_3src_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
   brw_inst_set_dpas_3src_src2_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);

   return inst;
}

/***********************************************************************
 * Convenience routines.
 */
#define ALU1(OP)					\
brw_inst *brw_##OP(struct brw_codegen *p,		\
	      struct brw_reg dest,			\
	      struct brw_reg src0)   			\
{							\
   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
}

#define ALU2(OP)					\
brw_inst *brw_##OP(struct brw_codegen *p,		\
	      struct brw_reg dest,			\
	      struct brw_reg src0,			\
	      struct brw_reg src1)   			\
{							\
   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
}

#define ALU3(OP)					\
brw_inst *brw_##OP(struct brw_codegen *p,		\
	      struct brw_reg dest,			\
	      struct brw_reg src0,			\
	      struct brw_reg src1,			\
	      struct brw_reg src2)   			\
{                                                       \
   if (p->current->access_mode == BRW_ALIGN_16) {       \
      if (src0.vstride == BRW_VERTICAL_STRIDE_0)        \
         src0.swizzle = BRW_SWIZZLE_XXXX;               \
      if (src1.vstride == BRW_VERTICAL_STRIDE_0)        \
         src1.swizzle = BRW_SWIZZLE_XXXX;               \
      if (src2.vstride == BRW_VERTICAL_STRIDE_0)        \
         src2.swizzle = BRW_SWIZZLE_XXXX;               \
   }                                                    \
   return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2);	\
}

#define ALU3F(OP)                                               \
brw_inst *brw_##OP(struct brw_codegen *p,         \
                                 struct brw_reg dest,           \
                                 struct brw_reg src0,           \
                                 struct brw_reg src1,           \
                                 struct brw_reg src2)           \
{                                                               \
   assert(dest.type == BRW_TYPE_F ||                   \
          dest.type == BRW_TYPE_DF);                   \
   if (dest.type == BRW_TYPE_F) {                      \
      assert(src0.type == BRW_TYPE_F);                 \
      assert(src1.type == BRW_TYPE_F);                 \
      assert(src2.type == BRW_TYPE_F);                 \
   } else if (dest.type == BRW_TYPE_DF) {              \
      assert(src0.type == BRW_TYPE_DF);                \
      assert(src1.type == BRW_TYPE_DF);                \
      assert(src2.type == BRW_TYPE_DF);                \
   }                                                            \
                                                                \
   if (p->current->access_mode == BRW_ALIGN_16) {               \
      if (src0.vstride == BRW_VERTICAL_STRIDE_0)                \
         src0.swizzle = BRW_SWIZZLE_XXXX;                       \
      if (src1.vstride == BRW_VERTICAL_STRIDE_0)                \
         src1.swizzle = BRW_SWIZZLE_XXXX;                       \
      if (src2.vstride == BRW_VERTICAL_STRIDE_0)                \
         src2.swizzle = BRW_SWIZZLE_XXXX;                       \
   }                                                            \
   return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
}

ALU2(SEL)
ALU1(NOT)
ALU2(AND)
ALU2(OR)
ALU2(XOR)
ALU2(SHR)
ALU2(SHL)
ALU2(ASR)
ALU2(ROL)
ALU2(ROR)
ALU3(CSEL)
ALU1(FRC)
ALU1(RNDD)
ALU1(RNDE)
ALU1(RNDU)
ALU1(RNDZ)
ALU2(MAC)
ALU2(MACH)
ALU1(LZD)
ALU2(DP4)
ALU2(DPH)
ALU2(DP3)
ALU2(DP2)
ALU3(DP4A)
ALU3(MAD)
ALU3F(LRP)
ALU1(BFREV)
ALU3(BFE)
ALU2(BFI1)
ALU3(BFI2)
ALU1(FBH)
ALU1(FBL)
ALU1(CBIT)
ALU2(ADDC)
ALU2(SUBB)
ALU3(ADD3)
ALU1(MOV)

brw_inst *
brw_ADD(struct brw_codegen *p, struct brw_reg dest,
        struct brw_reg src0, struct brw_reg src1)
{
   /* 6.2.2: add */
   if (src0.type == BRW_TYPE_F ||
       (src0.file == BRW_IMMEDIATE_VALUE &&
	src0.type == BRW_TYPE_VF)) {
      assert(src1.type != BRW_TYPE_UD);
      assert(src1.type != BRW_TYPE_D);
   }

   if (src1.type == BRW_TYPE_F ||
       (src1.file == BRW_IMMEDIATE_VALUE &&
	src1.type == BRW_TYPE_VF)) {
      assert(src0.type != BRW_TYPE_UD);
      assert(src0.type != BRW_TYPE_D);
   }

   return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
}

brw_inst *
brw_AVG(struct brw_codegen *p, struct brw_reg dest,
        struct brw_reg src0, struct brw_reg src1)
{
   assert(dest.type == src0.type);
   assert(src0.type == src1.type);
   switch (src0.type) {
   case BRW_TYPE_B:
   case BRW_TYPE_UB:
   case BRW_TYPE_W:
   case BRW_TYPE_UW:
   case BRW_TYPE_D:
   case BRW_TYPE_UD:
      break;
   default:
      unreachable("Bad type for brw_AVG");
   }

   return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
}

brw_inst *
brw_MUL(struct brw_codegen *p, struct brw_reg dest,
        struct brw_reg src0, struct brw_reg src1)
{
   /* 6.32.38: mul */
   if (src0.type == BRW_TYPE_D ||
       src0.type == BRW_TYPE_UD ||
       src1.type == BRW_TYPE_D ||
       src1.type == BRW_TYPE_UD) {
      assert(dest.type != BRW_TYPE_F);
   }

   if (src0.type == BRW_TYPE_F ||
       (src0.file == BRW_IMMEDIATE_VALUE &&
	src0.type == BRW_TYPE_VF)) {
      assert(src1.type != BRW_TYPE_UD);
      assert(src1.type != BRW_TYPE_D);
   }

   if (src1.type == BRW_TYPE_F ||
       (src1.file == BRW_IMMEDIATE_VALUE &&
	src1.type == BRW_TYPE_VF)) {
      assert(src0.type != BRW_TYPE_UD);
      assert(src0.type != BRW_TYPE_D);
   }

   assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
	  src0.nr != BRW_ARF_ACCUMULATOR);
   assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
	  src1.nr != BRW_ARF_ACCUMULATOR);

   return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
}

brw_inst *
brw_LINE(struct brw_codegen *p, struct brw_reg dest,
         struct brw_reg src0, struct brw_reg src1)
{
   src0.vstride = BRW_VERTICAL_STRIDE_0;
   src0.width = BRW_WIDTH_1;
   src0.hstride = BRW_HORIZONTAL_STRIDE_0;
   return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1);
}

brw_inst *
brw_PLN(struct brw_codegen *p, struct brw_reg dest,
        struct brw_reg src0, struct brw_reg src1)
{
   src0.vstride = BRW_VERTICAL_STRIDE_0;
   src0.width = BRW_WIDTH_1;
   src0.hstride = BRW_HORIZONTAL_STRIDE_0;
   src1.vstride = BRW_VERTICAL_STRIDE_8;
   src1.width = BRW_WIDTH_8;
   src1.hstride = BRW_HORIZONTAL_STRIDE_1;
   return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1);
}

brw_inst *
brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth,
         unsigned rcount, struct brw_reg dest, struct brw_reg src0,
         struct brw_reg src1, struct brw_reg src2)
{
   return brw_dpas_three_src(p, BRW_OPCODE_DPAS, sdepth, rcount, dest, src0,
                             src1, src2);
}

void brw_NOP(struct brw_codegen *p)
{
   brw_inst *insn = next_insn(p, BRW_OPCODE_NOP);
   memset(insn, 0, sizeof(*insn));
   brw_inst_set_opcode(p->isa, insn, BRW_OPCODE_NOP);
}

void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func)
{
   brw_inst *insn = next_insn(p, BRW_OPCODE_SYNC);
   brw_inst_set_cond_modifier(p->devinfo, insn, func);
}

/***********************************************************************
 * Comparisons, if/else/endif
 */

brw_inst *
brw_JMPI(struct brw_codegen *p, struct brw_reg index,
         unsigned predicate_control)
{
   const struct intel_device_info *devinfo = p->devinfo;
   struct brw_reg ip = brw_ip_reg();
   brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);

   brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1);
   brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
   brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
   brw_inst_set_pred_control(devinfo, inst, predicate_control);

   return inst;
}

static void
push_if_stack(struct brw_codegen *p, brw_inst *inst)
{
   p->if_stack[p->if_stack_depth] = inst - p->store;

   p->if_stack_depth++;
   if (p->if_stack_array_size <= p->if_stack_depth) {
      p->if_stack_array_size *= 2;
      p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
			     p->if_stack_array_size);
   }
}

static brw_inst *
pop_if_stack(struct brw_codegen *p)
{
   p->if_stack_depth--;
   return &p->store[p->if_stack[p->if_stack_depth]];
}

static void
push_loop_stack(struct brw_codegen *p, brw_inst *inst)
{
   if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) {
      p->loop_stack_array_size *= 2;
      p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
			       p->loop_stack_array_size);
   }

   p->loop_stack[p->loop_stack_depth] = inst - p->store;
   p->loop_stack_depth++;
}

static brw_inst *
get_inner_do_insn(struct brw_codegen *p)
{
   return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
}

/* EU takes the value from the flag register and pushes it onto some
 * sort of a stack (presumably merging with any flag value already on
 * the stack).  Within an if block, the flags at the top of the stack
 * control execution on each channel of the unit, eg. on each of the
 * 16 pixel values in our wm programs.
 *
 * When the matching 'else' instruction is reached (presumably by
 * countdown of the instruction count patched in by our ELSE/ENDIF
 * functions), the relevant flags are inverted.
 *
 * When the matching 'endif' instruction is reached, the flags are
 * popped off.  If the stack is now empty, normal execution resumes.
 */
brw_inst *
brw_IF(struct brw_codegen *p, unsigned execute_size)
{
   const struct intel_device_info *devinfo = p->devinfo;
   brw_inst *insn;

   insn = next_insn(p, BRW_OPCODE_IF);

   /* Override the defaults for this instruction:
    */
   brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_TYPE_D)));
   if (devinfo->ver < 12)
      brw_set_src0(p, insn, brw_imm_d(0));
   brw_inst_set_jip(devinfo, insn, 0);
   brw_inst_set_uip(devinfo, insn, 0);

   brw_inst_set_exec_size(devinfo, insn, execute_size);
   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
   brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL);
   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);

   push_if_stack(p, insn);
   return insn;
}

/**
 * Patch IF and ELSE instructions with appropriate jump targets.
 */
static void
patch_IF_ELSE(struct brw_codegen *p,
              brw_inst *if_inst, brw_inst *else_inst, brw_inst *endif_inst)
{
   const struct intel_device_info *devinfo = p->devinfo;

   assert(if_inst != NULL && brw_inst_opcode(p->isa, if_inst) == BRW_OPCODE_IF);
   assert(endif_inst != NULL);
   assert(else_inst == NULL || brw_inst_opcode(p->isa, else_inst) == BRW_OPCODE_ELSE);

   unsigned br = brw_jump_scale(devinfo);

   assert(brw_inst_opcode(p->isa, endif_inst) == BRW_OPCODE_ENDIF);
   brw_inst_set_exec_size(devinfo, endif_inst, brw_inst_exec_size(devinfo, if_inst));

   if (else_inst == NULL) {
      /* Patch IF -> ENDIF */
      brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
      brw_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst));
   } else {
      brw_inst_set_exec_size(devinfo, else_inst, brw_inst_exec_size(devinfo, if_inst));

      /* Patch ELSE -> ENDIF */
      /* The IF instruction's JIP should point just past the ELSE */
      brw_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1));
      /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
      brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));

      if (devinfo->ver < 11) {
         /* Set the ELSE instruction to use branch_ctrl with a join
          * jump target pointing at the NOP inserted right before
          * the ENDIF instruction in order to make sure it is
          * executed in all cases, since attempting to do the same
          * as on other generations could cause the EU to jump at
          * the instruction immediately after the ENDIF due to
          * Wa_220160235, which could cause the program to continue
          * running with all channels disabled.
          */
         brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst - 1));
         brw_inst_set_branch_control(devinfo, else_inst, true);
      } else {
         brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst));
      }

      /* Since we don't set branch_ctrl on Gfx11+, the ELSE's
       * JIP and UIP both should point to ENDIF on those
       * platforms.
       */
      brw_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst));
   }
}

void
brw_ELSE(struct brw_codegen *p)
{
   const struct intel_device_info *devinfo = p->devinfo;
   brw_inst *insn;

   insn = next_insn(p, BRW_OPCODE_ELSE);

   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
   if (devinfo->ver < 12)
      brw_set_src0(p, insn, brw_imm_d(0));
   brw_inst_set_jip(devinfo, insn, 0);
   brw_inst_set_uip(devinfo, insn, 0);

   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);

   push_if_stack(p, insn);
}

void
brw_ENDIF(struct brw_codegen *p)
{
   const struct intel_device_info *devinfo = p->devinfo;
   brw_inst *insn = NULL;
   brw_inst *else_inst = NULL;
   brw_inst *if_inst = NULL;
   brw_inst *tmp;

   assert(p->if_stack_depth > 0);

   if (devinfo->ver < 11 &&
       brw_inst_opcode(p->isa, &p->store[p->if_stack[
                             p->if_stack_depth - 1]]) == BRW_OPCODE_ELSE) {
      /* Insert a NOP to be specified as join instruction within the
       * ELSE block, which is valid for an ELSE instruction with
       * branch_ctrl on.  The ELSE instruction will be set to jump
       * here instead of to the ENDIF instruction, since attempting to
       * do the latter would prevent the ENDIF from being executed in
       * some cases due to Wa_220160235, which could cause the program
       * to continue running with all channels disabled.
       */
      brw_NOP(p);
   }

   /*
    * A single next_insn() may change the base address of instruction store
    * memory(p->store), so call it first before referencing the instruction
    * store pointer from an index
    */
   insn = next_insn(p, BRW_OPCODE_ENDIF);

   /* Pop the IF and (optional) ELSE instructions from the stack */
   tmp = pop_if_stack(p);
   if (brw_inst_opcode(p->isa, tmp) == BRW_OPCODE_ELSE) {
      else_inst = tmp;
      tmp = pop_if_stack(p);
   }
   if_inst = tmp;

   brw_set_src0(p, insn, brw_imm_d(0));

   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);

   brw_inst_set_jip(devinfo, insn, 2);
   patch_IF_ELSE(p, if_inst, else_inst, insn);
}

brw_inst *
brw_BREAK(struct brw_codegen *p)
{
   const struct intel_device_info *devinfo = p->devinfo;
   brw_inst *insn;

   insn = next_insn(p, BRW_OPCODE_BREAK);
   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
   brw_set_src0(p, insn, brw_imm_d(0x0));
   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
   brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));

   return insn;
}

brw_inst *
brw_CONT(struct brw_codegen *p)
{
   const struct intel_device_info *devinfo = p->devinfo;
   brw_inst *insn;

   insn = next_insn(p, BRW_OPCODE_CONTINUE);
   brw_set_dest(p, insn, brw_ip_reg());
   brw_set_src0(p, insn, brw_imm_d(0x0));

   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
   brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
   return insn;
}

brw_inst *
brw_HALT(struct brw_codegen *p)
{
   const struct intel_device_info *devinfo = p->devinfo;
   brw_inst *insn;

   insn = next_insn(p, BRW_OPCODE_HALT);
   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
   if (devinfo->ver < 12) {
      brw_set_src0(p, insn, brw_imm_d(0x0));
   }

   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
   brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
   return insn;
}

/* DO/WHILE loop:
 *
 * The DO/WHILE is just an unterminated loop -- break or continue are
 * used for control within the loop.  We have a few ways they can be
 * done.
 *
 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
 * jip and no DO instruction.
 *
 * For gfx6, there's no more mask stack, so no need for DO.  WHILE
 * just points back to the first instruction of the loop.
 */
brw_inst *
brw_DO(struct brw_codegen *p, unsigned execute_size)
{
   push_loop_stack(p, &p->store[p->nr_insn]);
   return &p->store[p->nr_insn];
}

brw_inst *
brw_WHILE(struct brw_codegen *p)
{
   const struct intel_device_info *devinfo = p->devinfo;
   brw_inst *insn, *do_insn;
   unsigned br = brw_jump_scale(devinfo);

   insn = next_insn(p, BRW_OPCODE_WHILE);
   do_insn = get_inner_do_insn(p);

   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
   if (devinfo->ver < 12)
      brw_set_src0(p, insn, brw_imm_d(0));
   brw_inst_set_jip(devinfo, insn, br * (do_insn - insn));

   brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));

   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);

   p->loop_stack_depth--;

   return insn;
}

void brw_CMP(struct brw_codegen *p,
	     struct brw_reg dest,
	     unsigned conditional,
	     struct brw_reg src0,
	     struct brw_reg src1)
{
   const struct intel_device_info *devinfo = p->devinfo;
   brw_inst *insn = next_insn(p, BRW_OPCODE_CMP);

   brw_inst_set_cond_modifier(devinfo, insn, conditional);
   brw_set_dest(p, insn, dest);
   brw_set_src0(p, insn, src0);
   brw_set_src1(p, insn, src1);
}

void brw_CMPN(struct brw_codegen *p,
              struct brw_reg dest,
              unsigned conditional,
              struct brw_reg src0,
              struct brw_reg src1)
{
   const struct intel_device_info *devinfo = p->devinfo;
   brw_inst *insn = next_insn(p, BRW_OPCODE_CMPN);

   brw_inst_set_cond_modifier(devinfo, insn, conditional);
   brw_set_dest(p, insn, dest);
   brw_set_src0(p, insn, src0);
   brw_set_src1(p, insn, src1);
}

/***********************************************************************
 * Helpers for the various SEND message types:
 */

void gfx6_math(struct brw_codegen *p,
	       struct brw_reg dest,
	       unsigned function,
	       struct brw_reg src0,
	       struct brw_reg src1)
{
   const struct intel_device_info *devinfo = p->devinfo;
   brw_inst *insn = next_insn(p, BRW_OPCODE_MATH);

   assert(dest.file == BRW_GENERAL_REGISTER_FILE);

   assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);

   if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
       function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
       function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
      assert(src0.type != BRW_TYPE_F);
      assert(src1.type != BRW_TYPE_F);
      assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
             src1.file == BRW_IMMEDIATE_VALUE);
      /* From BSpec 6647/47428 "[Instruction] Extended Math Function":
       *     INT DIV function does not support source modifiers.
       */
      assert(!src0.negate);
      assert(!src0.abs);
      assert(!src1.negate);
      assert(!src1.abs);
   } else {
      assert(src0.type == BRW_TYPE_F ||
             (src0.type == BRW_TYPE_HF && devinfo->ver >= 9));
      assert(src1.type == BRW_TYPE_F ||
             (src1.type == BRW_TYPE_HF && devinfo->ver >= 9));
   }

   brw_inst_set_math_function(devinfo, insn, function);

   brw_set_dest(p, insn, dest);
   brw_set_src0(p, insn, src0);
   brw_set_src1(p, insn, src1);
}

void
brw_send_indirect_message(struct brw_codegen *p,
                          unsigned sfid,
                          struct brw_reg dst,
                          struct brw_reg payload,
                          struct brw_reg desc,
                          unsigned desc_imm,
                          bool eot)
{
   const struct intel_device_info *devinfo = p->devinfo;
   struct brw_inst *send;

   dst = retype(dst, BRW_TYPE_UW);

   assert(desc.type == BRW_TYPE_UD);

   if (desc.file == BRW_IMMEDIATE_VALUE) {
      send = next_insn(p, BRW_OPCODE_SEND);
      brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));
      brw_set_desc(p, send, desc.ud | desc_imm);
   } else {
      const struct tgl_swsb swsb = brw_get_default_swsb(p);
      struct brw_reg addr = retype(brw_address_reg(0), BRW_TYPE_UD);

      brw_push_insn_state(p);
      brw_set_default_access_mode(p, BRW_ALIGN_1);
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
      brw_set_default_exec_size(p, BRW_EXECUTE_1);
      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
      brw_set_default_flag_reg(p, 0, 0);
      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));

      /* Load the indirect descriptor to an address register using OR so the
       * caller can specify additional descriptor bits with the desc_imm
       * immediate.
       */
      brw_OR(p, addr, desc, brw_imm_ud(desc_imm));

      brw_pop_insn_state(p);

      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
      send = next_insn(p, BRW_OPCODE_SEND);
      brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));

      if (devinfo->ver >= 12)
         brw_inst_set_send_sel_reg32_desc(devinfo, send, true);
      else
         brw_set_src1(p, send, addr);
   }

   brw_set_dest(p, send, dst);
   brw_inst_set_sfid(devinfo, send, sfid);
   brw_inst_set_eot(devinfo, send, eot);
}

void
brw_send_indirect_split_message(struct brw_codegen *p,
                                unsigned sfid,
                                struct brw_reg dst,
                                struct brw_reg payload0,
                                struct brw_reg payload1,
                                struct brw_reg desc,
                                unsigned desc_imm,
                                struct brw_reg ex_desc,
                                unsigned ex_desc_imm,
                                bool ex_desc_scratch,
                                bool ex_bso,
                                bool eot)
{
   const struct intel_device_info *devinfo = p->devinfo;
   struct brw_inst *send;

   dst = retype(dst, BRW_TYPE_UW);

   assert(desc.type == BRW_TYPE_UD);

   if (desc.file == BRW_IMMEDIATE_VALUE) {
      desc.ud |= desc_imm;
   } else {
      const struct tgl_swsb swsb = brw_get_default_swsb(p);
      struct brw_reg addr = retype(brw_address_reg(0), BRW_TYPE_UD);

      brw_push_insn_state(p);
      brw_set_default_access_mode(p, BRW_ALIGN_1);
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
      brw_set_default_exec_size(p, BRW_EXECUTE_1);
      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
      brw_set_default_flag_reg(p, 0, 0);
      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));

      /* Load the indirect descriptor to an address register using OR so the
       * caller can specify additional descriptor bits with the desc_imm
       * immediate.
       */
      brw_OR(p, addr, desc, brw_imm_ud(desc_imm));

      brw_pop_insn_state(p);
      desc = addr;

      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
   }

   if (ex_desc.file == BRW_IMMEDIATE_VALUE &&
       !ex_desc_scratch &&
       (devinfo->ver >= 12 ||
        ((ex_desc.ud | ex_desc_imm) & INTEL_MASK(15, 12)) == 0)) {
      /* ATS-M PRMs, Volume 2d: Command Reference: Structures,
       * EU_INSTRUCTION_SEND instruction
       *
       *    "ExBSO: Exists If: ([ExDesc.IsReg]==true)"
       */
      assert(!ex_bso);
      ex_desc.ud |= ex_desc_imm;
   } else {
      const struct tgl_swsb swsb = brw_get_default_swsb(p);
      struct brw_reg addr = retype(brw_address_reg(2), BRW_TYPE_UD);

      brw_push_insn_state(p);
      brw_set_default_access_mode(p, BRW_ALIGN_1);
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
      brw_set_default_exec_size(p, BRW_EXECUTE_1);
      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
      brw_set_default_flag_reg(p, 0, 0);
      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));

      /* Load the indirect extended descriptor to an address register using OR
       * so the caller can specify additional descriptor bits with the
       * desc_imm immediate.
       *
       * Even though the instruction dispatcher always pulls the SFID and EOT
       * fields from the instruction itself, actual external unit which
       * processes the message gets the SFID and EOT from the extended
       * descriptor which comes from the address register.  If we don't OR
       * those two bits in, the external unit may get confused and hang.
       */
      unsigned imm_part = ex_bso ? 0 : (ex_desc_imm | sfid | eot << 5);

      if (ex_desc_scratch) {
         assert(devinfo->verx10 >= 125);
         brw_AND(p, addr,
                 retype(brw_vec1_grf(0, 5), BRW_TYPE_UD),
                 brw_imm_ud(INTEL_MASK(31, 10)));

         if (devinfo->ver >= 20 && sfid == GFX12_SFID_UGM) {
            const unsigned ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc_imm);
            assert(ex_desc_imm == brw_message_ex_desc(devinfo, ex_mlen));
            brw_SHR(p, addr, addr, brw_imm_ud(4));
         } else {
            /* Or the scratch surface offset together with the immediate part
             * of the extended descriptor.
             */
            brw_OR(p, addr, addr, brw_imm_ud(imm_part));
         }

      } else if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
         /* ex_desc bits 15:12 don't exist in the instruction encoding prior
          * to Gfx12, so we may have fallen back to an indirect extended
          * descriptor.
          */
         brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part));
      } else {
         brw_OR(p, addr, ex_desc, brw_imm_ud(imm_part));
      }

      brw_pop_insn_state(p);
      ex_desc = addr;

      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
   }

   send = next_insn(p, devinfo->ver >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS);
   brw_set_dest(p, send, dst);
   brw_set_src0(p, send, retype(payload0, BRW_TYPE_UD));
   brw_set_src1(p, send, retype(payload1, BRW_TYPE_UD));

   if (desc.file == BRW_IMMEDIATE_VALUE) {
      brw_inst_set_send_sel_reg32_desc(devinfo, send, 0);
      brw_inst_set_send_desc(devinfo, send, desc.ud);
   } else {
      assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
      assert(desc.nr == BRW_ARF_ADDRESS);
      assert(desc.subnr == 0);
      brw_inst_set_send_sel_reg32_desc(devinfo, send, 1);
   }

   if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
      brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
      brw_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud);
   } else {
      assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
      assert(ex_desc.nr == BRW_ARF_ADDRESS);
      assert((ex_desc.subnr & 0x3) == 0);
      brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
      brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, phys_subnr(devinfo, ex_desc) >> 2);

      if (devinfo->ver >= 20 && sfid == GFX12_SFID_UGM) {
         const unsigned ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc_imm);
         brw_inst_set_bits(send, 103, 99, ex_mlen / reg_unit(devinfo));
      }
   }

   if (ex_bso) {
      /* The send instruction ExBSO field does not exist with UGM on Gfx20+,
       * it is assumed.
       *
       * BSpec 56890
       */
      if (devinfo->ver < 20 || sfid != GFX12_SFID_UGM)
         brw_inst_set_send_ex_bso(devinfo, send, true);
      brw_inst_set_send_src1_len(devinfo, send, GET_BITS(ex_desc_imm, 10, 6));
   }
   brw_inst_set_sfid(devinfo, send, sfid);
   brw_inst_set_eot(devinfo, send, eot);
}

static bool
while_jumps_before_offset(const struct intel_device_info *devinfo,
                          brw_inst *insn, int while_offset, int start_offset)
{
   int scale = 16 / brw_jump_scale(devinfo);
   int jip = brw_inst_jip(devinfo, insn);
   assert(jip < 0);
   return while_offset + jip * scale <= start_offset;
}


static int
brw_find_next_block_end(struct brw_codegen *p, int start_offset)
{
   int offset;
   void *store = p->store;
   const struct intel_device_info *devinfo = p->devinfo;

   int depth = 0;

   for (offset = next_offset(devinfo, store, start_offset);
        offset < p->next_insn_offset;
        offset = next_offset(devinfo, store, offset)) {
      brw_inst *insn = store + offset;

      switch (brw_inst_opcode(p->isa, insn)) {
      case BRW_OPCODE_IF:
         depth++;
         break;
      case BRW_OPCODE_ENDIF:
         if (depth == 0)
            return offset;
         depth--;
         break;
      case BRW_OPCODE_WHILE:
         /* If the while doesn't jump before our instruction, it's the end
          * of a sibling do...while loop.  Ignore it.
          */
         if (!while_jumps_before_offset(devinfo, insn, offset, start_offset))
            continue;
         FALLTHROUGH;
      case BRW_OPCODE_ELSE:
      case BRW_OPCODE_HALT:
         if (depth == 0)
            return offset;
         break;
      default:
         break;
      }
   }

   return 0;
}

/* There is no DO instruction on gfx6, so to find the end of the loop
 * we have to see if the loop is jumping back before our start
 * instruction.
 */
static int
brw_find_loop_end(struct brw_codegen *p, int start_offset)
{
   const struct intel_device_info *devinfo = p->devinfo;
   int offset;
   void *store = p->store;

   /* Always start after the instruction (such as a WHILE) we're trying to fix
    * up.
    */
   for (offset = next_offset(devinfo, store, start_offset);
        offset < p->next_insn_offset;
        offset = next_offset(devinfo, store, offset)) {
      brw_inst *insn = store + offset;

      if (brw_inst_opcode(p->isa, insn) == BRW_OPCODE_WHILE) {
	 if (while_jumps_before_offset(devinfo, insn, offset, start_offset))
	    return offset;
      }
   }
   assert(!"not reached");
   return start_offset;
}

/* After program generation, go back and update the UIP and JIP of
 * BREAK, CONT, and HALT instructions to their correct locations.
 */
void
brw_set_uip_jip(struct brw_codegen *p, int start_offset)
{
   const struct intel_device_info *devinfo = p->devinfo;
   int offset;
   int br = brw_jump_scale(devinfo);
   int scale = 16 / br;
   void *store = p->store;

   for (offset = start_offset; offset < p->next_insn_offset; offset += 16) {
      brw_inst *insn = store + offset;
      assert(brw_inst_cmpt_control(devinfo, insn) == 0);

      switch (brw_inst_opcode(p->isa, insn)) {
      case BRW_OPCODE_BREAK: {
         int block_end_offset = brw_find_next_block_end(p, offset);
         assert(block_end_offset != 0);
         brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
	 /* Gfx7 UIP points to WHILE; Gfx6 points just after it */
         brw_inst_set_uip(devinfo, insn,
	    (brw_find_loop_end(p, offset) - offset) / scale);
	 break;
      }

      case BRW_OPCODE_CONTINUE: {
         int block_end_offset = brw_find_next_block_end(p, offset);
         assert(block_end_offset != 0);
         brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
         brw_inst_set_uip(devinfo, insn,
            (brw_find_loop_end(p, offset) - offset) / scale);

         assert(brw_inst_uip(devinfo, insn) != 0);
         assert(brw_inst_jip(devinfo, insn) != 0);
	 break;
      }

      case BRW_OPCODE_ENDIF: {
         int block_end_offset = brw_find_next_block_end(p, offset);
         int32_t jump = (block_end_offset == 0) ?
                        1 * br : (block_end_offset - offset) / scale;
         brw_inst_set_jip(devinfo, insn, jump);
	 break;
      }

      case BRW_OPCODE_HALT: {
	 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
	  *
	  *    "In case of the halt instruction not inside any conditional
	  *     code block, the value of <JIP> and <UIP> should be the
	  *     same. In case of the halt instruction inside conditional code
	  *     block, the <UIP> should be the end of the program, and the
	  *     <JIP> should be end of the most inner conditional code block."
	  *
	  * The uip will have already been set by whoever set up the
	  * instruction.
	  */
         int block_end_offset = brw_find_next_block_end(p, offset);
	 if (block_end_offset == 0) {
            brw_inst_set_jip(devinfo, insn, brw_inst_uip(devinfo, insn));
	 } else {
            brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
	 }
         assert(brw_inst_uip(devinfo, insn) != 0);
         assert(brw_inst_jip(devinfo, insn) != 0);
	 break;
      }

      default:
         break;
      }
   }
}

static void
brw_set_memory_fence_message(struct brw_codegen *p,
                             struct brw_inst *insn,
                             enum brw_message_target sfid,
                             bool commit_enable,
                             unsigned bti)
{
   const struct intel_device_info *devinfo = p->devinfo;

   brw_set_desc(p, insn, brw_message_desc(
                   devinfo, 1, (commit_enable ? 1 : 0), true));

   brw_inst_set_sfid(devinfo, insn, sfid);

   switch (sfid) {
   case GFX6_SFID_DATAPORT_RENDER_CACHE:
      brw_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_RC_MEMORY_FENCE);
      break;
   case GFX7_SFID_DATAPORT_DATA_CACHE:
      brw_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_DC_MEMORY_FENCE);
      break;
   default:
      unreachable("Not reached");
   }

   if (commit_enable)
      brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5);

   assert(devinfo->ver >= 11 || bti == 0);
   brw_inst_set_binding_table_index(devinfo, insn, bti);
}

static void
gfx12_set_memory_fence_message(struct brw_codegen *p,
                               struct brw_inst *insn,
                               enum brw_message_target sfid,
                               uint32_t desc)
{
   const unsigned mlen = 1 * reg_unit(p->devinfo); /* g0 header */
    /* Completion signaled by write to register. No data returned. */
   const unsigned rlen = 1 * reg_unit(p->devinfo);

   brw_inst_set_sfid(p->devinfo, insn, sfid);

   if (sfid == BRW_SFID_URB && p->devinfo->ver < 20) {
      brw_set_desc(p, insn, brw_urb_fence_desc(p->devinfo) |
                            brw_message_desc(p->devinfo, mlen, rlen, true));
   } else {
      enum lsc_fence_scope scope = lsc_fence_msg_desc_scope(p->devinfo, desc);
      enum lsc_flush_type flush_type = lsc_fence_msg_desc_flush_type(p->devinfo, desc);

      if (sfid == GFX12_SFID_TGM) {
         scope = LSC_FENCE_TILE;
         flush_type = LSC_FLUSH_TYPE_EVICT;
      }

      /* Wa_14012437816:
       *
       *   "For any fence greater than local scope, always set flush type to
       *    at least invalidate so that fence goes on properly."
       *
       *   "The bug is if flush_type is 'None', the scope is always downgraded
       *    to 'local'."
       *
       * Here set scope to NONE_6 instead of NONE, which has the same effect
       * as NONE but avoids the downgrade to scope LOCAL.
       */
      if (intel_needs_workaround(p->devinfo, 14012437816) &&
          scope > LSC_FENCE_LOCAL &&
          flush_type == LSC_FLUSH_TYPE_NONE) {
         flush_type = LSC_FLUSH_TYPE_NONE_6;
      }

      brw_set_desc(p, insn, lsc_fence_msg_desc(p->devinfo, scope,
                                               flush_type, false) |
                            brw_message_desc(p->devinfo, mlen, rlen, false));
   }
}

void
brw_memory_fence(struct brw_codegen *p,
                 struct brw_reg dst,
                 struct brw_reg src,
                 enum opcode send_op,
                 enum brw_message_target sfid,
                 uint32_t desc,
                 bool commit_enable,
                 unsigned bti)
{
   const struct intel_device_info *devinfo = p->devinfo;

   dst = retype(vec1(dst), BRW_TYPE_UW);
   src = retype(vec1(src), BRW_TYPE_UD);

   /* Set dst as destination for dependency tracking, the MEMORY_FENCE
    * message doesn't write anything back.
    */
   struct brw_inst *insn = next_insn(p, send_op);
   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
   brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
   brw_set_dest(p, insn, dst);
   brw_set_src0(p, insn, src);

   /* All DG2 hardware requires LSC for fence messages, even A-step */
   if (devinfo->has_lsc)
      gfx12_set_memory_fence_message(p, insn, sfid, desc);
   else
      brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
}

void
brw_broadcast(struct brw_codegen *p,
              struct brw_reg dst,
              struct brw_reg src,
              struct brw_reg idx)
{
   const struct intel_device_info *devinfo = p->devinfo;
   assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);

   brw_push_insn_state(p);
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
   brw_set_default_exec_size(p, BRW_EXECUTE_1);

   assert(src.file == BRW_GENERAL_REGISTER_FILE &&
          src.address_mode == BRW_ADDRESS_DIRECT);
   assert(!src.abs && !src.negate);

   /* Gen12.5 adds the following region restriction:
    *
    *    "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float
    *    and Quad-Word data must not be used."
    *
    * We require the source and destination types to match so stomp to an
    * unsigned integer type.
    */
   assert(src.type == dst.type);
   src.type = dst.type =
      brw_type_with_size(BRW_TYPE_UD, brw_type_size_bits(src.type));

   if ((src.vstride == 0 && src.hstride == 0) ||
       idx.file == BRW_IMMEDIATE_VALUE) {
      /* Trivial, the source is already uniform or the index is a constant.
       * We will typically not get here if the optimizer is doing its job, but
       * asserting would be mean.
       */
      const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.ud : 0;
      src = stride(suboffset(src, i), 0, 1, 0);

      if (brw_type_size_bytes(src.type) > 4 && !devinfo->has_64bit_int) {
         brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
                    subscript(src, BRW_TYPE_D, 0));
         brw_set_default_swsb(p, tgl_swsb_null());
         brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
                    subscript(src, BRW_TYPE_D, 1));
      } else {
         brw_MOV(p, dst, src);
      }
   } else {
      /* From the Haswell PRM section "Register Region Restrictions":
       *
       *    "The lower bits of the AddressImmediate must not overflow to
       *    change the register address.  The lower 5 bits of Address
       *    Immediate when added to lower 5 bits of address register gives
       *    the sub-register offset. The upper bits of Address Immediate
       *    when added to upper bits of address register gives the register
       *    address. Any overflow from sub-register offset is dropped."
       *
       * Fortunately, for broadcast, we never have a sub-register offset so
       * this isn't an issue.
       */
      assert(src.subnr == 0);

      const struct brw_reg addr =
         retype(brw_address_reg(0), BRW_TYPE_UD);
      unsigned offset = src.nr * REG_SIZE + src.subnr;
      /* Limit in bytes of the signed indirect addressing immediate. */
      const unsigned limit = 512;

      brw_push_insn_state(p);
      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
      brw_set_default_flag_reg(p, 0, 0);

      /* Take into account the component size and horizontal stride. */
      assert(src.vstride == src.hstride + src.width);
      brw_SHL(p, addr, vec1(idx),
              brw_imm_ud(util_logbase2(brw_type_size_bytes(src.type)) +
                         src.hstride - 1));

      /* We can only address up to limit bytes using the indirect
       * addressing immediate, account for the difference if the source
       * register is above this limit.
       */
      if (offset >= limit) {
         brw_set_default_swsb(p, tgl_swsb_regdist(1));
         brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
         offset = offset % limit;
      }

      brw_pop_insn_state(p);

      brw_set_default_swsb(p, tgl_swsb_regdist(1));

      /* Use indirect addressing to fetch the specified component. */
      if (brw_type_size_bytes(src.type) > 4 &&
          (intel_device_info_is_9lp(devinfo) || !devinfo->has_64bit_int)) {
         /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
          *
          *   "When source or destination datatype is 64b or operation is
          *    integer DWord multiply, indirect addressing must not be
          *    used."
          *
          * We may also not support Q/UQ types.
          *
          * To work around both of these, we do two integer MOVs instead
          * of one 64-bit MOV.  Because no double value should ever cross
          * a register boundary, it's safe to use the immediate offset in
          * the indirect here to handle adding 4 bytes to the offset and
          * avoid the extra ADD to the register file.
          */
         brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
                    retype(brw_vec1_indirect(addr.subnr, offset),
                           BRW_TYPE_D));
         brw_set_default_swsb(p, tgl_swsb_null());
         brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
                    retype(brw_vec1_indirect(addr.subnr, offset + 4),
                           BRW_TYPE_D));
      } else {
         brw_MOV(p, dst,
                 retype(brw_vec1_indirect(addr.subnr, offset), src.type));
      }
   }

   brw_pop_insn_state(p);
}


/**
 * Emit the SEND message for a barrier
 */
void
brw_barrier(struct brw_codegen *p, struct brw_reg src)
{
   const struct intel_device_info *devinfo = p->devinfo;
   struct brw_inst *inst;

   brw_push_insn_state(p);
   brw_set_default_access_mode(p, BRW_ALIGN_1);
   inst = next_insn(p, BRW_OPCODE_SEND);
   brw_set_dest(p, inst, retype(brw_null_reg(), BRW_TYPE_UW));
   brw_set_src0(p, inst, src);
   brw_set_src1(p, inst, brw_null_reg());
   brw_set_desc(p, inst, brw_message_desc(devinfo,
                                          1 * reg_unit(devinfo), 0, false));

   brw_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY);
   brw_inst_set_gateway_subfuncid(devinfo, inst,
                                  BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);

   brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
   brw_pop_insn_state(p);
}


/**
 * Emit the wait instruction for a barrier
 */
void
brw_WAIT(struct brw_codegen *p)
{
   const struct intel_device_info *devinfo = p->devinfo;
   struct brw_inst *insn;

   struct brw_reg src = brw_notification_reg();

   insn = next_insn(p, BRW_OPCODE_WAIT);
   brw_set_dest(p, insn, src);
   brw_set_src0(p, insn, src);
   brw_set_src1(p, insn, brw_null_reg());

   brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
}

void
brw_float_controls_mode(struct brw_codegen *p,
                        unsigned mode, unsigned mask)
{
   assert(p->current->mask_control == BRW_MASK_DISABLE);

   /* From the Skylake PRM, Volume 7, page 760:
    *  "Implementation Restriction on Register Access: When the control
    *   register is used as an explicit source and/or destination, hardware
    *   does not ensure execution pipeline coherency. Software must set the
    *   thread control field to ‘switch’ for an instruction that uses
    *   control register as an explicit operand."
    *
    * On Gfx12+ this is implemented in terms of SWSB annotations instead.
    */
   brw_set_default_swsb(p, tgl_swsb_regdist(1));

   brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
                            brw_imm_ud(~mask));
   brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
   if (p->devinfo->ver < 12)
      brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);

   if (mode) {
      brw_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
                                 brw_imm_ud(mode));
      brw_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1);
      if (p->devinfo->ver < 12)
         brw_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
   }

   if (p->devinfo->ver >= 12)
      brw_SYNC(p, TGL_SYNC_NOP);
}

void
brw_update_reloc_imm(const struct brw_isa_info *isa,
                     brw_inst *inst,
                     uint32_t value)
{
   const struct intel_device_info *devinfo = isa->devinfo;

   /* Sanity check that the instruction is a MOV of an immediate */
   assert(brw_inst_opcode(isa, inst) == BRW_OPCODE_MOV);
   assert(brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE);

   /* If it was compacted, we can't safely rewrite */
   assert(brw_inst_cmpt_control(devinfo, inst) == 0);

   brw_inst_set_imm_ud(devinfo, inst, value);
}

/* A default value for constants that will be patched at run-time.
 * We pick an arbitrary value that prevents instruction compaction.
 */
#define DEFAULT_PATCH_IMM 0x4a7cc037

void
brw_MOV_reloc_imm(struct brw_codegen *p,
                  struct brw_reg dst,
                  enum brw_reg_type src_type,
                  uint32_t id,
                  uint32_t base)
{
   assert(brw_type_size_bytes(src_type) == 4);
   assert(brw_type_size_bytes(dst.type) == 4);

   brw_add_reloc(p, id, BRW_SHADER_RELOC_TYPE_MOV_IMM,
                 p->next_insn_offset, base);

   brw_MOV(p, dst, retype(brw_imm_ud(DEFAULT_PATCH_IMM), src_type));
}
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								/*
 								 Copyright (C) Intel Corp.  2006.  All Rights Reserved.
-												s/Tungsten Graphics/VMware/

Tungsten Graphics Inc. was acquired by VMware Inc. in 2008.  Leaving the
old copyright name is creating unnecessary confusion, hence this change.

This was the sed script I used:

    $ cat tg2vmw.sed
    # Run as:
    #
    #   git reset --hard HEAD && find include scons src -type f -not -name 'sed*' -print0 | xargs -0 sed -i -f tg2vmw.sed
    #

    # Rename copyrights
    s/Tungsten Gra\(ph\|hp\)ics,\? [iI]nc\.\?\(, Cedar Park\)\?\(, Austin\)\?\(, \(Texas\|TX\)\)\?\.\?/VMware, Inc./g
    /Copyright/s/Tungsten Graphics\(,\? [iI]nc\.\)\?\(, Cedar Park\)\?\(, Austin\)\?\(, \(Texas\|TX\)\)\?\.\?/VMware, Inc./
    s/TUNGSTEN GRAPHICS/VMWARE/g

    # Rename emails
    s/alanh@tungstengraphics.com/alanh@vmware.com/
    s/jens@tungstengraphics.com/jowen@vmware.com/g
    s/jrfonseca-at-tungstengraphics-dot-com/jfonseca-at-vmware-dot-com/
    s/jrfonseca\?@tungstengraphics.com/jfonseca@vmware.com/g
    s/keithw\?@tungstengraphics.com/keithw@vmware.com/g
    s/michel@tungstengraphics.com/daenzer@vmware.com/g
    s/thomas-at-tungstengraphics-dot-com/thellstom-at-vmware-dot-com/
    s/zack@tungstengraphics.com/zackr@vmware.com/

    # Remove dead links
    s@Tungsten Graphics (http://www.tungstengraphics.com)@Tungsten Graphics@g

    # C string src/gallium/state_trackers/vega/api_misc.c
    s/"Tungsten Graphics, Inc"/"VMware, Inc"/

Reviewed-by: Brian Paul <brianp@vmware.com>

											
										
										
											2014-01-17 16:27:50 +00:00
+								 Intel funded Tungsten Graphics to
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								 develop this 3D driver.
-												i965: Drop trailing whitespace from files shared with intel-gpu-tools.

Performed via s/  *$//g.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2013-11-25 15:39:03 -08:00
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								 Permission is hereby granted, free of charge, to any person obtaining
 								 a copy of this software and associated documentation files (the
 								 "Software"), to deal in the Software without restriction, including
 								 without limitation the rights to use, copy, modify, merge, publish,
 								 distribute, sublicense, and/or sell copies of the Software, and to
 								 permit persons to whom the Software is furnished to do so, subject to
 								 the following conditions:
-												i965: Drop trailing whitespace from files shared with intel-gpu-tools.

Performed via s/  *$//g.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2013-11-25 15:39:03 -08:00
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								 The above copyright notice and this permission notice (including the
 								 next paragraph) shall be included in all copies or substantial
 								 portions of the Software.
-												i965: Drop trailing whitespace from files shared with intel-gpu-tools.

Performed via s/  *$//g.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2013-11-25 15:39:03 -08:00
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 								 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 								 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 								 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
 								 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 								 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 								 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-												i965: Drop trailing whitespace from files shared with intel-gpu-tools.

Performed via s/  *$//g.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2013-11-25 15:39:03 -08:00
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								 **********************************************************************/
 								 /*
 								  * Authors:
-												s/Tungsten Graphics/VMware/

Tungsten Graphics Inc. was acquired by VMware Inc. in 2008.  Leaving the
old copyright name is creating unnecessary confusion, hence this change.

This was the sed script I used:

    $ cat tg2vmw.sed
    # Run as:
    #
    #   git reset --hard HEAD && find include scons src -type f -not -name 'sed*' -print0 | xargs -0 sed -i -f tg2vmw.sed
    #

    # Rename copyrights
    s/Tungsten Gra\(ph\|hp\)ics,\? [iI]nc\.\?\(, Cedar Park\)\?\(, Austin\)\?\(, \(Texas\|TX\)\)\?\.\?/VMware, Inc./g
    /Copyright/s/Tungsten Graphics\(,\? [iI]nc\.\)\?\(, Cedar Park\)\?\(, Austin\)\?\(, \(Texas\|TX\)\)\?\.\?/VMware, Inc./
    s/TUNGSTEN GRAPHICS/VMWARE/g

    # Rename emails
    s/alanh@tungstengraphics.com/alanh@vmware.com/
    s/jens@tungstengraphics.com/jowen@vmware.com/g
    s/jrfonseca-at-tungstengraphics-dot-com/jfonseca-at-vmware-dot-com/
    s/jrfonseca\?@tungstengraphics.com/jfonseca@vmware.com/g
    s/keithw\?@tungstengraphics.com/keithw@vmware.com/g
    s/michel@tungstengraphics.com/daenzer@vmware.com/g
    s/thomas-at-tungstengraphics-dot-com/thellstom-at-vmware-dot-com/
    s/zack@tungstengraphics.com/zackr@vmware.com/

    # Remove dead links
    s@Tungsten Graphics (http://www.tungstengraphics.com)@Tungsten Graphics@g

    # C string src/gallium/state_trackers/vega/api_misc.c
    s/"Tungsten Graphics, Inc"/"VMware, Inc"/

Reviewed-by: Brian Paul <brianp@vmware.com>

											
										
										
											2014-01-17 16:27:50 +00:00
+								  *   Keith Whitwell <keithw@vmware.com>
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								  */
-												i965: Drop trailing whitespace from files shared with intel-gpu-tools.

Performed via s/  *$//g.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2013-11-25 15:39:03 -08:00
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												i965: split EU defines to brw_eu_defines.h

Split out the EU defines from the 'generic' ones, as the former are more
compiler oriented.

With a later commit we'll move brw_eu_defines.h alongside the compiler
infra to src/intel/. Pulling all the defines in there seems overzealous.

Some defines are used by both i965 and the i965 compiler. Those are
moved to brw_eu_defines.h, and annotated accordingly. The i965 users
were updated to have the extre include to indicate that.

With future work we might provide a better, split but for now this seems
reasonable.

Cc: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2017-03-09 00:44:29 +00:00
+								#include "brw_eu_defines.h"
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								#include "brw_eu.h"
-												util: Move ralloc to a new src/util directory.

For a long time, we've wanted a place to put utility code which isn't
directly tied to Mesa or Gallium internals.  This patch creates a new
src/util directory for exactly that purpose, and builds the contents as
libmesautil.la.

ralloc seemed like a good first candidate.  These days, it's directly
used by mesa/main, i965, i915, and r300g, so keeping it in src/glsl
didn't make much sense.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>

v2 (Jason Ekstrand): More realloc uses and some scons fixes

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>

											
										
										
											2014-02-24 23:39:14 -08:00
+								#include "util/ralloc.h"
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												i965: Make some EU emit code for DP read/write messages non-static.

We keep building these strange interfaces for DP read/write where
there's a helper function with some partially-specific,
partially-general controls, which is used in exactly one place in code
generation.  Making these public will let us set up those instructions
in the one place they're to be generated.

											
										
										
											2011-08-07 13:16:06 -07:00
+								void
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Convert brw_eu_emit.c to the new brw_inst API.

v2:
 - Fix IF -> ELSE patching on Sandybridge.
 - Don't set base_mrf on Gen6+ in OWord Block Read functions.  (Although
 - the old code did this universally, it shouldn't have - the field
 - doesn't exist on Gen6+ and just got overwritten by the SFID anyway.)

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2014-06-04 17:08:57 -07:00
-												intel/brw: Remove MRF type

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-21 21:21:20 -08:00
+								   if (dest.file == BRW_GENERAL_REGISTER_FILE)
-												intel/eu/xe2+: Add definition for size of GRF space on Xe2.

And use it in various places in the compiler that require knowledge
about the size of the register file.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25514>

											
										
										
											2022-07-07 01:15:14 -07:00
+								      assert(dest.nr < XE2_MAX_GRF);
-												i965: add some register number assertions

Haven't seen failures yet, but if/when there are, more investigation will
be done.

											
										
										
											2009-03-13 09:17:08 -06:00
-												intel/compiler: Don't change hstride if not needed

Alignment requirements may have changed the horizontal stride already,
so don't set it if not required to avoid breaking said requirements.

Fixes several tests such as
dEQP-VK.subgroups.vote.graphics.subgroupallequal_int8_t

Signed-off-by: Iván Briano <ivan.briano@intel.com>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2019-10-23 09:18:03 -07:00
+								   /* The hardware has a restriction where a destination of size Byte with
 								    * a stride of 1 is only allowed for a packed byte MOV. For any other
 								    * instruction, the stride must be at least 2, even when the destination
 								    * is the NULL register.
-												intel/eu: force stride of 2 on NULL register for Byte instructions

The hardware only allows a stride of 1 on a Byte destination for raw
byte MOV instructions. This is required even when the destination
is the NULL register.

Rather than making sure that we emit a proper NULL:B destination
every time we need one, just fix it at emission time.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2018-11-07 12:08:02 +01:00
+								    */
 								   if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
 								       dest.nr == BRW_ARF_NULL &&
-												intel/brw: Replace type_sz and brw_reg_type_to_size with brw_type_size_*

Both of these helpers do the same thing.  We now have brw_type_size_bits
and brw_type_size_bytes and can use whichever makes sense in that place.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-21 00:57:59 -07:00
+								       brw_type_size_bytes(dest.type) == 1 &&
-												intel/compiler: Don't change hstride if not needed

Alignment requirements may have changed the horizontal stride already,
so don't set it if not required to avoid breaking said requirements.

Fixes several tests such as
dEQP-VK.subgroups.vote.graphics.subgroupallequal_int8_t

Signed-off-by: Iván Briano <ivan.briano@intel.com>
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2019-10-23 09:18:03 -07:00
+								       dest.hstride == BRW_HORIZONTAL_STRIDE_1) {
-												intel/eu: force stride of 2 on NULL register for Byte instructions

The hardware only allows a stride of 1 on a Byte destination for raw
byte MOV instructions. This is required even when the destination
is the NULL register.

Rather than making sure that we emit a proper NULL:B destination
every time we need one, just fix it at emission time.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2018-11-07 12:08:02 +01:00
+								      dest.hstride = BRW_HORIZONTAL_STRIDE_2;
 								   }
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								   if (devinfo->ver >= 12 &&
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								       (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
 								        brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
-												intel/eu/gen12: Codegen pathological SEND source and destination regions.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2019-08-25 18:13:42 -07:00
+								      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
 								             dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
 								      assert(dest.address_mode == BRW_ADDRESS_DIRECT);
 								      assert(dest.subnr == 0);
 								      assert(brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1 ||
 								             (dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
 								              dest.vstride == dest.width + 1));
 								      assert(!dest.negate && !dest.abs);
 								      brw_inst_set_dst_reg_file(devinfo, inst, dest.file);
-												intel/eu/xe2+: Translate brw_reg fields in REG_SIZE units to physical 512b GRF units during codegen.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27165>

											
										
										
											2022-07-07 01:09:32 -07:00
+								      brw_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
-												intel/eu/gen12: Codegen pathological SEND source and destination regions.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2019-08-25 18:13:42 -07:00
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								   } else if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
 								              brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								      assert(devinfo->ver < 12);
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
 								             dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
 								      assert(dest.address_mode == BRW_ADDRESS_DIRECT);
 								      assert(dest.subnr % 16 == 0);
 								      assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
 								             dest.vstride == dest.width + 1);
 								      assert(!dest.negate && !dest.abs);
 								      brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
 								      brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
 								      brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file);
 								   } else {
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								      brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
 								      brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								      if (dest.address_mode == BRW_ADDRESS_DIRECT) {
-												intel/eu/xe2+: Translate brw_reg fields in REG_SIZE units to physical 512b GRF units during codegen.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27165>

											
										
										
											2022-07-07 01:09:32 -07:00
+								         brw_inst_set_dst_da_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-												intel/eu/xe2+: Translate brw_reg fields in REG_SIZE units to physical 512b GRF units during codegen.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27165>

											
										
										
											2022-07-07 01:09:32 -07:00
+								            brw_inst_set_dst_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								            if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
 								               dest.hstride = BRW_HORIZONTAL_STRIDE_1;
 								            brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
 								         } else {
 								            brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
 								            brw_inst_set_da16_writemask(devinfo, inst, dest.writemask);
-												intel/brw: Remove MRF type

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-21 21:21:20 -08:00
+								            if (dest.file == BRW_GENERAL_REGISTER_FILE) {
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								               assert(dest.writemask != 0);
 								            }
 								            /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
 								             *    Although Dst.HorzStride is a don't care for Align16, HW needs
 								             *    this to be programmed as "01".
 								             */
 								            brw_inst_set_dst_hstride(devinfo, inst, 1);
-												i965: Fix writemask != 0 assertions on Sandybridge.

This fixes myriads of regressions since commit 169f9c030c16d1247a3a7629
("i965: Add an assertion that writemask != NULL for non-ARFs.").

On Sandybridge, our control flow handling (such as brw_IF) does:

   brw_set_dest(p, insn, brw_imm_w(0));
   insn->bits1.branch_gen6.jump_count = 0;

This results in a IMM destination with zero for the writemask.  IMM
destinations are rather bizarre, but the code has been working for ages,
so I'm loathe to change it.

Fixes glxgears on Sandybridge.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2013-09-17 11:54:05 -07:00
+								         }
-												i965: Fix else and brace placement in brw_eu_emit.c.

I'm making a lot of changes to this area, and I figured I may as well
not conflate these trivial changes.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>

											
										
										
											2014-06-04 16:55:59 -07:00
+								      } else {
-												intel/eu/xe2+: Translate brw_reg fields in REG_SIZE units to physical 512b GRF units during codegen.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27165>

											
										
										
											2022-07-07 01:09:32 -07:00
+								         brw_inst_set_dst_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
 								         /* These are different sizes in align1 vs align16:
 								          */
 								         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
 								            brw_inst_set_dst_ia1_addr_imm(devinfo, inst,
 								                                          dest.indirect_offset);
 								            if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
 								               dest.hstride = BRW_HORIZONTAL_STRIDE_1;
 								            brw_inst_set_dst_hstride(devinfo, inst, dest.hstride);
 								         } else {
 								            brw_inst_set_dst_ia16_addr_imm(devinfo, inst,
 								                                           dest.indirect_offset);
 								            /* even ignored in da16, still need to set as '01' */
 								            brw_inst_set_dst_hstride(devinfo, inst, 1);
 								         }
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								      }
 								   }
 								}
-												i965: Make some EU emit code for DP read/write messages non-static.

We keep building these strange interfaces for DP read/write where
there's a helper function with some partially-specific,
partially-general controls, which is used in exactly one place in code
generation.  Making these public will let us set up those instructions
in the one place they're to be generated.

											
										
										
											2011-08-07 13:16:06 -07:00
+								void
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965/gen7: Add some safety checks for send messages from GRFs.

											
										
										
											2012-11-09 11:38:14 -08:00
-												intel/brw: Remove MRF type

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-21 21:21:20 -08:00
+								   if (reg.file == BRW_GENERAL_REGISTER_FILE)
-												intel/eu/xe2+: Add definition for size of GRF space on Xe2.

And use it in various places in the compiler that require knowledge
about the size of the register file.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25514>

											
										
										
											2022-07-07 01:15:14 -07:00
+								      assert(reg.nr < XE2_MAX_GRF);
-												i965: add some register number assertions

Haven't seen failures yet, but if/when there are, more investigation will
be done.

											
										
										
											2009-03-13 09:17:08 -06:00
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								   if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND  ||
 								       brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC ||
 								       brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
 								       brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
-												i965/gen7: Add some safety checks for send messages from GRFs.

											
										
										
											2012-11-09 11:38:14 -08:00
+								      /* Any source modifiers or regions will be ignored, since this just
-												intel/brw: Remove MRF type

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-21 21:21:20 -08:00
+								       * identifies the GRF to start reading the message contents from.
-												i965/gen7: Add some safety checks for send messages from GRFs.

											
										
										
											2012-11-09 11:38:14 -08:00
+								       * Check for some likely failures.
 								       */
 								      assert(!reg.negate);
 								      assert(!reg.abs);
 								      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
 								   }
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								   if (devinfo->ver >= 12 &&
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								       (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
 								        brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC)) {
-												intel/eu/gen12: Codegen pathological SEND source and destination regions.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2019-08-25 18:13:42 -07:00
+								      assert(reg.file != BRW_IMMEDIATE_VALUE);
 								      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
 								      assert(reg.subnr == 0);
-												intel/compiler: Relax SENDS regioning assertions

The next commit fixes a mistake in the assembler and ends up running
afoul of this assertion.

Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5956>

											
										
										
											2020-07-15 15:12:57 -07:00
+								      assert(has_scalar_region(reg) ||
-												intel/eu/gen12: Codegen pathological SEND source and destination regions.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2019-08-25 18:13:42 -07:00
+								             (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
 								              reg.vstride == reg.width + 1));
 								      assert(!reg.negate && !reg.abs);
 								      brw_inst_set_send_src0_reg_file(devinfo, inst, reg.file);
-												intel/eu/xe2+: Translate brw_reg fields in REG_SIZE units to physical 512b GRF units during codegen.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27165>

											
										
										
											2022-07-07 01:09:32 -07:00
+								      brw_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
-												intel/eu/gen12: Codegen pathological SEND source and destination regions.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2019-08-25 18:13:42 -07:00
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								   } else if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
 								              brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC) {
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								      assert(reg.file == BRW_GENERAL_REGISTER_FILE);
 								      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
 								      assert(reg.subnr % 16 == 0);
-												intel/compiler: Relax SENDS regioning assertions

The next commit fixes a mistake in the assembler and ends up running
afoul of this assertion.

Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5956>

											
										
										
											2020-07-15 15:12:57 -07:00
+								      assert(has_scalar_region(reg) ||
 								             (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
 								              reg.vstride == reg.width + 1));
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								      assert(!reg.negate && !reg.abs);
 								      brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
 								      brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
 								   } else {
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								      brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
 								      brw_inst_set_src0_abs(devinfo, inst, reg.abs);
 								      brw_inst_set_src0_negate(devinfo, inst, reg.negate);
 								      brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
 								      if (reg.file == BRW_IMMEDIATE_VALUE) {
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								         if (reg.type == BRW_TYPE_DF)
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								            brw_inst_set_imm_df(devinfo, inst, reg.df);
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								         else if (reg.type == BRW_TYPE_UQ ||
 								                  reg.type == BRW_TYPE_Q)
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								            brw_inst_set_imm_uq(devinfo, inst, reg.u64);
 								         else
 								            brw_inst_set_imm_ud(devinfo, inst, reg.ud);
-												intel/brw: Replace type_sz and brw_reg_type_to_size with brw_type_size_*

Both of these helpers do the same thing.  We now have brw_type_size_bits
and brw_type_size_bytes and can use whichever makes sense in that place.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-21 00:57:59 -07:00
+								         if (devinfo->ver < 12 && brw_type_size_bytes(reg.type) < 8) {
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								            brw_inst_set_src1_reg_file(devinfo, inst,
 								                                       BRW_ARCHITECTURE_REGISTER_FILE);
 								            brw_inst_set_src1_reg_hw_type(devinfo, inst,
 								                                          brw_inst_src0_reg_hw_type(devinfo, inst));
 								         }
-												i965: Fix else and brace placement in brw_eu_emit.c.

I'm making a lot of changes to this area, and I figured I may as well
not conflate these trivial changes.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>

											
										
										
											2014-06-04 16:55:59 -07:00
+								      } else {
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								         if (reg.address_mode == BRW_ADDRESS_DIRECT) {
-												intel/eu/xe2+: Translate brw_reg fields in REG_SIZE units to physical 512b GRF units during codegen.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27165>

											
										
										
											2022-07-07 01:09:32 -07:00
+								            brw_inst_set_src0_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								            if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-												intel/eu/xe2+: Translate brw_reg fields in REG_SIZE units to physical 512b GRF units during codegen.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27165>

											
										
										
											2022-07-07 01:09:32 -07:00
+								               brw_inst_set_src0_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								            } else {
 								               brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
 								            }
 								         } else {
-												intel/eu/xe2+: Translate brw_reg fields in REG_SIZE units to physical 512b GRF units during codegen.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27165>

											
										
										
											2022-07-07 01:09:32 -07:00
+								            brw_inst_set_src0_ia_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								            if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
 								               brw_inst_set_src0_ia1_addr_imm(devinfo, inst, reg.indirect_offset);
 								            } else {
 								               brw_inst_set_src0_ia16_addr_imm(devinfo, inst, reg.indirect_offset);
 								            }
 								         }
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
 								            if (reg.width == BRW_WIDTH_1 &&
 								                brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
 								               brw_inst_set_src0_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
 								               brw_inst_set_src0_width(devinfo, inst, BRW_WIDTH_1);
 								               brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
 								            } else {
 								               brw_inst_set_src0_hstride(devinfo, inst, reg.hstride);
 								               brw_inst_set_src0_width(devinfo, inst, reg.width);
 								               brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
 								            }
-												i965: Use correct VertStride on align16 instructions.

In commit c35fa7a, we changed the "width" of DF source registers to 2,
which is conceptually fine. Unfortunately a VertStride of 2 is not
allowed by align16 instructions on IVB/BYT, and the regular VertStride
of 4 works fine in any case.

See generated_tests/spec/arb_gpu_shader_fp64/execution/built-in-functions/vs-round-double.shader_test
for example:

cmp.ge.f0(8)    g18<1>DF        g1<0>.xyxyDF    -g8<2>DF        { align16 1Q };
        ERROR: In Align16 mode, only VertStride of 0 or 4 is allowed
cmp.ge.f0(8)    g19<1>DF        g1<0>.xyxyDF    -g9<2>DF        { align16 2N };
        ERROR: In Align16 mode, only VertStride of 0 or 4 is allowed

v2:
- Add spec quote (Curro).
- Change the condition to only BRW_VERTICAL_STRIDE_2 (Curro)

Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2017-01-20 13:35:33 -08:00
+								         } else {
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								            brw_inst_set_src0_da16_swiz_x(devinfo, inst,
 								               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
 								            brw_inst_set_src0_da16_swiz_y(devinfo, inst,
 								               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
 								            brw_inst_set_src0_da16_swiz_z(devinfo, inst,
 								               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
 								            brw_inst_set_src0_da16_swiz_w(devinfo, inst,
 								               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
 								            if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
 								               /* This is an oddity of the fact we're using the same
 								                * descriptions for registers in align_16 as align_1:
 								                */
 								               brw_inst_set_src0_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
 								            } else {
 								               brw_inst_set_src0_vstride(devinfo, inst, reg.vstride);
 								            }
-												i965: Use correct VertStride on align16 instructions.

In commit c35fa7a, we changed the "width" of DF source registers to 2,
which is conceptually fine. Unfortunately a VertStride of 2 is not
allowed by align16 instructions on IVB/BYT, and the regular VertStride
of 4 works fine in any case.

See generated_tests/spec/arb_gpu_shader_fp64/execution/built-in-functions/vs-round-double.shader_test
for example:

cmp.ge.f0(8)    g18<1>DF        g1<0>.xyxyDF    -g8<2>DF        { align16 1Q };
        ERROR: In Align16 mode, only VertStride of 0 or 4 is allowed
cmp.ge.f0(8)    g19<1>DF        g1<0>.xyxyDF    -g9<2>DF        { align16 2N };
        ERROR: In Align16 mode, only VertStride of 0 or 4 is allowed

v2:
- Add spec quote (Curro).
- Change the condition to only BRW_VERTICAL_STRIDE_2 (Curro)

Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2017-01-20 13:35:33 -08:00
+								         }
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								      }
 								   }
 								}
-												i965: Reformat brw_set_src1 so it can be easily found with grep.

											
										
										
											2014-05-02 14:49:24 -07:00
+								void
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												fix a couple of cases where a message reg is used as an instruction source.

											
										
										
											2006-09-01 14:18:06 +00:00
-												intel/compiler: Avoid false positive assertions

A follow on patch will move the 'nr' field to the union containing the
immediate field, so prepare by checking that we're only testing these
assertions if the .file is correct.

The assertions with != ARF were kind of silly to begin with because the
<128 check is specifically only for things in the GRF.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-12-10 11:42:44 -08:00
+								   if (reg.file == BRW_GENERAL_REGISTER_FILE)
-												intel/eu/xe2+: Add definition for size of GRF space on Xe2.

And use it in various places in the compiler that require knowledge
about the size of the register file.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25514>

											
										
										
											2022-07-07 01:15:14 -07:00
+								      assert(reg.nr < XE2_MAX_GRF);
-												i965: add some register number assertions

Haven't seen failures yet, but if/when there are, more investigation will
be done.

											
										
										
											2009-03-13 09:17:08 -06:00
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								   if (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDS ||
 								       brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDSC ||
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								       (devinfo->ver >= 12 &&
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								        (brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
 								         brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC))) {
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								      assert(reg.file == BRW_GENERAL_REGISTER_FILE ||
 								             reg.file == BRW_ARCHITECTURE_REGISTER_FILE);
 								      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
 								      assert(reg.subnr == 0);
-												intel/compiler: Relax SENDS regioning assertions

The next commit fixes a mistake in the assembler and ends up running
afoul of this assertion.

Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5956>

											
										
										
											2020-07-15 15:12:57 -07:00
+								      assert(has_scalar_region(reg) ||
-												intel/eu/gen12: Codegen pathological SEND source and destination regions.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2019-08-25 18:13:42 -07:00
+								             (reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
 								              reg.vstride == reg.width + 1));
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								      assert(!reg.negate && !reg.abs);
-												intel/eu/xe2+: Translate brw_reg fields in REG_SIZE units to physical 512b GRF units during codegen.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27165>

											
										
										
											2022-07-07 01:09:32 -07:00
+								      brw_inst_set_send_src1_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								      brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file);
 								   } else {
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								      /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
 								       *
 								       *    "Accumulator registers may be accessed explicitly as src0
 								       *    operands only."
 								       */
 								      assert(reg.file != BRW_ARCHITECTURE_REGISTER_FILE ||
-												intel/brw: Fix handling of accumulator register numbers

Folks, there's more than one accumulator. In general, when the
register file is ARF, the upper 4 bits of the register number specify
which ARF, and the lower 4 bits specify which one of that ARF. This
can be further partitioned by the subregister number.

This is already mostly handled correctly for flags register, but lots
of places wanted to check the register number for equality with
BRW_ARF_ACCUMULATOR. If acc1 is ever specified, that won't work.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28281>

											
										
										
											2023-08-09 14:03:57 -07:00
+								             (reg.nr & 0xF0) != BRW_ARF_ACCUMULATOR);
-												i965/emit: Add assertions for accumulator restrictions

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2015-09-10 16:19:22 -07:00
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								      brw_inst_set_src1_file_type(devinfo, inst, reg.file, reg.type);
 								      brw_inst_set_src1_abs(devinfo, inst, reg.abs);
 								      brw_inst_set_src1_negate(devinfo, inst, reg.negate);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								      /* Only src1 can be immediate in two-argument instructions.
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								       */
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								      assert(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								      if (reg.file == BRW_IMMEDIATE_VALUE) {
 								         /* two-argument instructions can only use 32-bit immediates */
-												intel/brw: Replace type_sz and brw_reg_type_to_size with brw_type_size_*

Both of these helpers do the same thing.  We now have brw_type_size_bits
and brw_type_size_bytes and can use whichever makes sense in that place.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-21 00:57:59 -07:00
+								         assert(brw_type_size_bytes(reg.type) < 8);
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								         brw_inst_set_imm_ud(devinfo, inst, reg.ud);
-												i965: Fix else and brace placement in brw_eu_emit.c.

I'm making a lot of changes to this area, and I figured I may as well
not conflate these trivial changes.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>

											
										
										
											2014-06-04 16:55:59 -07:00
+								      } else {
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								         /* This is a hardware restriction, which may or may not be lifted
 								          * in the future:
 								          */
 								         assert (reg.address_mode == BRW_ADDRESS_DIRECT);
 								         /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												intel/eu/xe2+: Translate brw_reg fields in REG_SIZE units to physical 512b GRF units during codegen.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27165>

											
										
										
											2022-07-07 01:09:32 -07:00
+								         brw_inst_set_src1_da_reg_nr(devinfo, inst, phys_nr(devinfo, reg));
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
-												intel/eu/xe2+: Translate brw_reg fields in REG_SIZE units to physical 512b GRF units during codegen.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27165>

											
										
										
											2022-07-07 01:09:32 -07:00
+								            brw_inst_set_src1_da1_subreg_nr(devinfo, inst, phys_subnr(devinfo, reg));
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								         } else {
 								            brw_inst_set_src1_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
 								         }
 								         if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
 								            if (reg.width == BRW_WIDTH_1 &&
 								                brw_inst_exec_size(devinfo, inst) == BRW_EXECUTE_1) {
 								               brw_inst_set_src1_hstride(devinfo, inst, BRW_HORIZONTAL_STRIDE_0);
 								               brw_inst_set_src1_width(devinfo, inst, BRW_WIDTH_1);
 								               brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_0);
 								            } else {
 								               brw_inst_set_src1_hstride(devinfo, inst, reg.hstride);
 								               brw_inst_set_src1_width(devinfo, inst, reg.width);
 								               brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
 								            }
-												i965: Use correct VertStride on align16 instructions.

In commit c35fa7a, we changed the "width" of DF source registers to 2,
which is conceptually fine. Unfortunately a VertStride of 2 is not
allowed by align16 instructions on IVB/BYT, and the regular VertStride
of 4 works fine in any case.

See generated_tests/spec/arb_gpu_shader_fp64/execution/built-in-functions/vs-round-double.shader_test
for example:

cmp.ge.f0(8)    g18<1>DF        g1<0>.xyxyDF    -g8<2>DF        { align16 1Q };
        ERROR: In Align16 mode, only VertStride of 0 or 4 is allowed
cmp.ge.f0(8)    g19<1>DF        g1<0>.xyxyDF    -g9<2>DF        { align16 2N };
        ERROR: In Align16 mode, only VertStride of 0 or 4 is allowed

v2:
- Add spec quote (Curro).
- Change the condition to only BRW_VERTICAL_STRIDE_2 (Curro)

Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2017-01-20 13:35:33 -08:00
+								         } else {
-												intel/inst: Indent some code

We're about to add some more if cases so let's have the giant re-indent
in it's own patch to make review easier.

Acked-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 17:40:32 -06:00
+								            brw_inst_set_src1_da16_swiz_x(devinfo, inst,
 								               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_X));
 								            brw_inst_set_src1_da16_swiz_y(devinfo, inst,
 								               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Y));
 								            brw_inst_set_src1_da16_swiz_z(devinfo, inst,
 								               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_Z));
 								            brw_inst_set_src1_da16_swiz_w(devinfo, inst,
 								               BRW_GET_SWZ(reg.swizzle, BRW_CHANNEL_W));
 								            if (reg.vstride == BRW_VERTICAL_STRIDE_8) {
 								               /* This is an oddity of the fact we're using the same
 								                * descriptions for registers in align_16 as align_1:
 								                */
 								               brw_inst_set_src1_vstride(devinfo, inst, BRW_VERTICAL_STRIDE_4);
 								            } else {
 								               brw_inst_set_src1_vstride(devinfo, inst, reg.vstride);
 								            }
-												i965: Use correct VertStride on align16 instructions.

In commit c35fa7a, we changed the "width" of DF source registers to 2,
which is conceptually fine. Unfortunately a VertStride of 2 is not
allowed by align16 instructions on IVB/BYT, and the regular VertStride
of 4 works fine in any case.

See generated_tests/spec/arb_gpu_shader_fp64/execution/built-in-functions/vs-round-double.shader_test
for example:

cmp.ge.f0(8)    g18<1>DF        g1<0>.xyxyDF    -g8<2>DF        { align16 1Q };
        ERROR: In Align16 mode, only VertStride of 0 or 4 is allowed
cmp.ge.f0(8)    g19<1>DF        g1<0>.xyxyDF    -g9<2>DF        { align16 2N };
        ERROR: In Align16 mode, only VertStride of 0 or 4 is allowed

v2:
- Add spec quote (Curro).
- Change the condition to only BRW_VERTICAL_STRIDE_2 (Curro)

Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2017-01-20 13:35:33 -08:00
+								         }
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								      }
 								   }
 								}
-												intel/eu: Define helper to specify the descriptor immediates of a SEND instruction.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-06-02 13:48:42 -07:00
+								/**
 								 * Specify the descriptor and extended descriptor immediate for a SEND(C)
 								 * message instruction.
 								 */
 								void
 								brw_set_desc_ex(struct brw_codegen *p, brw_inst *inst,
 								                unsigned desc, unsigned ex_desc)
 								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								   assert(brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SEND ||
 								          brw_inst_opcode(p->isa, inst) == BRW_OPCODE_SENDC);
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								   if (devinfo->ver < 12)
-												intel/eu/gen12: Codegen SEND descriptor regions correctly.

The SEND instruction is now four-source.  The descriptor is no longer
part of source 1, so avoid touching it to avoid corruption while
initializing the descriptor.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2019-09-03 12:18:38 -07:00
+								      brw_inst_set_src1_file_type(devinfo, inst,
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								                                  BRW_IMMEDIATE_VALUE, BRW_TYPE_UD);
-												intel/eu: Define helper to specify the descriptor immediates of a SEND instruction.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-06-02 13:48:42 -07:00
+								   brw_inst_set_send_desc(devinfo, inst, desc);
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								   if (devinfo->ver >= 9)
-												intel/eu: Define helper to specify the descriptor immediates of a SEND instruction.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-06-02 13:48:42 -07:00
+								      brw_inst_set_send_ex_desc(devinfo, inst, ex_desc);
 								}
-												intel/eu: Copy fields manually in brw_next_insn

Instead of doing a memcpy, this moves us to start with a blank
instruction (memset to zero) and copy the fields over one at a time.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-05-29 13:45:57 -07:00
+								static void
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								brw_inst_set_state(const struct brw_isa_info *isa,
-												intel/eu: Copy fields manually in brw_next_insn

Instead of doing a memcpy, this moves us to start with a blank
instruction (memset to zero) and copy the fields over one at a time.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-05-29 13:45:57 -07:00
+								                   brw_inst *insn,
 								                   const struct brw_insn_state *state)
 								{
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								   const struct intel_device_info *devinfo = isa->devinfo;
-												intel/eu: Copy fields manually in brw_next_insn

Instead of doing a memcpy, this moves us to start with a blank
instruction (memset to zero) and copy the fields over one at a time.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-05-29 13:45:57 -07:00
+								   brw_inst_set_exec_size(devinfo, insn, state->exec_size);
 								   brw_inst_set_group(devinfo, insn, state->group);
 								   brw_inst_set_access_mode(devinfo, insn, state->access_mode);
 								   brw_inst_set_mask_control(devinfo, insn, state->mask_control);
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								   if (devinfo->ver >= 12)
-												intel/fs: Implement representation of SWSB cross-pipeline synchronization annotations.

The execution units of XeHP platforms have multiple asynchronous ALU
pipelines instead of (as far as software is concerned) the single
in-order pipeline that handled most ALU instructions except for
extended math in the original Xe.  It's now the compiler's
responsibility to identify cross-pipeline dependencies and insert
synchronization annotations whenever necessary, which are encoded as
some additional bits of the SWSB instruction field.

This commit represents the cross-pipeline synchronization annotations
as part of the existing tgl_swsb structure used for codegen.  The
existing tgl_swsb_*() helpers used by hand-crafted assembly are
extended to default to TGL_PIPE_ALL big-hammer synchronization in
order to ensure backwards compatibility with the existing assembly.
The following commits will extend the software scoreboard lowering
pass in order to keep track of cross-pipeline dependencies across IR
instructions, and insert more specific pipeline annotations in the
SWSB field.

The disassembler is also extended here to print out any existing
pipeline sync annotations.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10000>

											
										
										
											2021-04-06 13:03:45 -07:00
+								      brw_inst_set_swsb(devinfo, insn, tgl_swsb_encode(devinfo, state->swsb));
-												intel/eu: Copy fields manually in brw_next_insn

Instead of doing a memcpy, this moves us to start with a blank
instruction (memset to zero) and copy the fields over one at a time.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-05-29 13:45:57 -07:00
+								   brw_inst_set_saturate(devinfo, insn, state->saturate);
 								   brw_inst_set_pred_control(devinfo, insn, state->predicate);
 								   brw_inst_set_pred_inv(devinfo, insn, state->pred_inv);
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								   if (is_3src(isa, brw_inst_opcode(isa, insn)) &&
-												intel/eu: Set flag [sub]register number differently for 3src

Prior to gen8, the flag [sub]register number is in a different spot on
3src instructions than on other instructions.  Starting with Broadwell,
they made it consistent.  This commit fixes bugs that occur when a
conditional modifier gets propagated into a 3src instruction such as a
MAD.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-05-29 15:28:36 -07:00
+								       state->access_mode == BRW_ALIGN_16) {
 								      brw_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								      brw_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
-												intel/eu: Set flag [sub]register number differently for 3src

Prior to gen8, the flag [sub]register number is in a different spot on
3src instructions than on other instructions.  Starting with Broadwell,
they made it consistent.  This commit fixes bugs that occur when a
conditional modifier gets propagated into a 3src instruction such as a
MAD.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-05-29 15:28:36 -07:00
+								   } else {
 								      brw_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								      brw_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
-												intel/eu: Set flag [sub]register number differently for 3src

Prior to gen8, the flag [sub]register number is in a different spot on
3src instructions than on other instructions.  Starting with Broadwell,
they made it consistent.  This commit fixes bugs that occur when a
conditional modifier gets propagated into a 3src instruction such as a
MAD.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-05-29 15:28:36 -07:00
+								   }
-												intel/eu: Copy fields manually in brw_next_insn

Instead of doing a memcpy, this moves us to start with a blank
instruction (memset to zero) and copy the fields over one at a time.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-05-29 13:45:57 -07:00
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								   if (devinfo->ver < 20)
-												intel/eu: Copy fields manually in brw_next_insn

Instead of doing a memcpy, this moves us to start with a blank
instruction (memset to zero) and copy the fields over one at a time.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-05-29 13:45:57 -07:00
+								      brw_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
 								}
-												intel/eu: Add some new helpers

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6244>

											
										
										
											2020-08-07 21:59:12 -05:00
+								static brw_inst *
-												treewide: Avoid use align as variable, replace it with other names

align is a function and when we want use it, the align variable will shadow it
So replace it with other names

Signed-off-by: Yonggang Luo <luoyonggang@gmail.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25997>

											
										
										
											2023-11-02 04:44:59 +08:00
+								brw_append_insns(struct brw_codegen *p, unsigned nr_insn, unsigned alignment)
-												intel/eu: Add some new helpers

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6244>

											
										
										
											2020-08-07 21:59:12 -05:00
+								{
 								   assert(util_is_power_of_two_or_zero(sizeof(brw_inst)));
-												treewide: Avoid use align as variable, replace it with other names

align is a function and when we want use it, the align variable will shadow it
So replace it with other names

Signed-off-by: Yonggang Luo <luoyonggang@gmail.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25997>

											
										
										
											2023-11-02 04:44:59 +08:00
+								   assert(util_is_power_of_two_or_zero(alignment));
 								   const unsigned align_insn = MAX2(alignment / sizeof(brw_inst), 1);
-												intel/eu: Add some new helpers

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6244>

											
										
										
											2020-08-07 21:59:12 -05:00
+								   const unsigned start_insn = ALIGN(p->nr_insn, align_insn);
 								   const unsigned new_nr_insn = start_insn + nr_insn;
 								   if (p->store_size < new_nr_insn) {
 								      p->store_size = util_next_power_of_two(new_nr_insn * sizeof(brw_inst));
 								      p->store = reralloc(p->mem_ctx, p->store, brw_inst, p->store_size);
 								   }
 								   /* Memset any padding due to alignment to 0.  We don't want to be hashing
 								    * or caching a bunch of random bits we got from a memory allocation.
 								    */
 								   if (p->nr_insn < start_insn) {
 								      memset(&p->store[p->nr_insn], 0,
 								             (start_insn - p->nr_insn) * sizeof(brw_inst));
 								   }
 								   assert(p->next_insn_offset == p->nr_insn * sizeof(brw_inst));
 								   p->nr_insn = new_nr_insn;
 								   p->next_insn_offset = new_nr_insn * sizeof(brw_inst);
 								   return &p->store[start_insn];
 								}
 								void
-												treewide: Avoid use align as variable, replace it with other names

align is a function and when we want use it, the align variable will shadow it
So replace it with other names

Signed-off-by: Yonggang Luo <luoyonggang@gmail.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25997>

											
										
										
											2023-11-02 04:44:59 +08:00
+								brw_realign(struct brw_codegen *p, unsigned alignment)
-												intel/eu: Add some new helpers

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6244>

											
										
										
											2020-08-07 21:59:12 -05:00
+								{
-												treewide: Avoid use align as variable, replace it with other names

align is a function and when we want use it, the align variable will shadow it
So replace it with other names

Signed-off-by: Yonggang Luo <luoyonggang@gmail.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25997>

											
										
										
											2023-11-02 04:44:59 +08:00
+								   brw_append_insns(p, 0, alignment);
-												intel/eu: Add some new helpers

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6244>

											
										
										
											2020-08-07 21:59:12 -05:00
+								}
 								int
 								brw_append_data(struct brw_codegen *p, void *data,
-												treewide: Avoid use align as variable, replace it with other names

align is a function and when we want use it, the align variable will shadow it
So replace it with other names

Signed-off-by: Yonggang Luo <luoyonggang@gmail.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25997>

											
										
										
											2023-11-02 04:44:59 +08:00
+								                unsigned size, unsigned alignment)
-												intel/eu: Add some new helpers

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6244>

											
										
										
											2020-08-07 21:59:12 -05:00
+								{
 								   unsigned nr_insn = DIV_ROUND_UP(size, sizeof(brw_inst));
-												treewide: Avoid use align as variable, replace it with other names

align is a function and when we want use it, the align variable will shadow it
So replace it with other names

Signed-off-by: Yonggang Luo <luoyonggang@gmail.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25997>

											
										
										
											2023-11-02 04:44:59 +08:00
+								   void *dst = brw_append_insns(p, nr_insn, alignment);
-												intel/eu: Add some new helpers

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6244>

											
										
										
											2020-08-07 21:59:12 -05:00
+								   memcpy(dst, data, size);
 								   /* If it's not a whole number of instructions, memset the end */
 								   if (size < nr_insn * sizeof(brw_inst))
 								      memset(dst + size, 0, nr_insn * sizeof(brw_inst) - size);
 								   return dst - (void *)p->store;
 								}
-												i965: Make some EU emit code for DP read/write messages non-static.

We keep building these strange interfaces for DP read/write where
there's a helper function with some partially-specific,
partially-general controls, which is used in exactly one place in code
generation.  Making these public will let us set up those instructions
in the one place they're to be generated.

											
										
										
											2011-08-07 13:16:06 -07:00
+								#define next_insn brw_next_insn
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								brw_inst *
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_next_insn(struct brw_codegen *p, unsigned opcode)
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								{
-												intel/eu: Add some new helpers

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6244>

											
										
										
											2020-08-07 21:59:12 -05:00
+								   brw_inst *insn = brw_append_insns(p, 1, sizeof(brw_inst));
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												intel/eu: Copy fields manually in brw_next_insn

Instead of doing a memcpy, this moves us to start with a blank
instruction (memset to zero) and copy the fields over one at a time.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-05-29 13:45:57 -07:00
+								   memset(insn, 0, sizeof(*insn));
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								   brw_inst_set_opcode(p->isa, insn, opcode);
-												intel/eu: Copy fields manually in brw_next_insn

Instead of doing a memcpy, this moves us to start with a blank
instruction (memset to zero) and copy the fields over one at a time.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-05-29 13:45:57 -07:00
 								   /* Apply the default instruction state */
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								   brw_inst_set_state(p->isa, insn, p->current);
-												intel/eu: Copy fields manually in brw_next_insn

Instead of doing a memcpy, this moves us to start with a blank
instruction (memset to zero) and copy the fields over one at a time.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-05-29 13:45:57 -07:00
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								   return insn;
 								}
-												intel/compiler: Generalize shader relocations a bit

This commit adds a delta to be added to the relocated value as well as
the possibility of multiple types of relocations.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8637>

											
										
										
											2020-09-04 12:09:11 -05:00
+								void
 								brw_add_reloc(struct brw_codegen *p, uint32_t id,
 								              enum brw_shader_reloc_type type,
 								              uint32_t offset, uint32_t delta)
 								{
 								   if (p->num_relocs + 1 > p->reloc_array_size) {
 								      p->reloc_array_size = MAX2(16, p->reloc_array_size * 2);
 								      p->relocs = reralloc(p->mem_ctx, p->relocs,
 								                           struct brw_shader_reloc, p->reloc_array_size);
 								   }
 								   p->relocs[p->num_relocs++] = (struct brw_shader_reloc) {
 								      .id = id,
 								      .type = type,
 								      .offset = offset,
 								      .delta = delta,
 								   };
 								}
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								static brw_inst *
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_alu1(struct brw_codegen *p, unsigned opcode,
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								         struct brw_reg dest, struct brw_reg src)
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								{
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								   brw_inst *insn = next_insn(p, opcode);
-												i965: Add support for the instruction compression bits on gen6.

Since the 8-wide first-quarter and 16-wide first-half have the same
bit encoding, we now need to track "do you want instruction
compression" in the compile state.

											
										
										
											2010-12-03 11:49:29 -08:00
+								   brw_set_dest(p, insn, dest);
-												i965: Pass brw_compile pointer to brw_set_src[01].

This makes it symmetric with brw_set_dest, which is convenient, and will
also allow for assertions to be made based off of intel->gen.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-10 16:51:12 -07:00
+								   brw_set_src0(p, insn, src);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								   return insn;
 								}
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								static brw_inst *
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_alu2(struct brw_codegen *p, unsigned opcode,
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								         struct brw_reg dest, struct brw_reg src0, struct brw_reg src1)
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								{
-												i965/eu: add support for DF immediates

v2 (Sam):
  - Remove 'however' from the comment (Topi)

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2015-08-03 14:47:36 -07:00
+								   /* 64-bit immediates are only supported on 1-src instructions */
-												intel/brw: Replace type_sz and brw_reg_type_to_size with brw_type_size_*

Both of these helpers do the same thing.  We now have brw_type_size_bits
and brw_type_size_bytes and can use whichever makes sense in that place.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-21 00:57:59 -07:00
+								   assert(src0.file != BRW_IMMEDIATE_VALUE ||
 								          brw_type_size_bytes(src0.type) <= 4);
 								   assert(src1.file != BRW_IMMEDIATE_VALUE ||
 								          brw_type_size_bytes(src1.type) <= 4);
-												i965/eu: add support for DF immediates

v2 (Sam):
  - Remove 'however' from the comment (Topi)

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2015-08-03 14:47:36 -07:00
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								   brw_inst *insn = next_insn(p, opcode);
-												i965: Add support for the instruction compression bits on gen6.

Since the 8-wide first-quarter and 16-wide first-half have the same
bit encoding, we now need to track "do you want instruction
compression" in the compile state.

											
										
										
											2010-12-03 11:49:29 -08:00
+								   brw_set_dest(p, insn, dest);
-												i965: Pass brw_compile pointer to brw_set_src[01].

This makes it symmetric with brw_set_dest, which is convenient, and will
also allow for assertions to be made based off of intel->gen.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-10 16:51:12 -07:00
+								   brw_set_src0(p, insn, src0);
 								   brw_set_src1(p, insn, src1);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								   return insn;
 								}
-												i965: Add support for the MAD opcode on gen6+.

v2: Fix MRF handling on gen7.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1)

											
										
										
											2010-03-22 10:05:42 -07:00
+								static int
 								get_3src_subreg_nr(struct brw_reg reg)
 								{
-												i965: Move 3-src subnr swizzle handling into the vec4 backend.

While most align16 instructions only support a SubRegNum of 0 or 4
(using swizzling to control the other channels), 3-src instructions
actually support arbitrary SubRegNums.  When the RepCtrl bit is set,
we believe it ignores the swizzle and uses the equivalent of a <0,1,0>
region from the subnr.

In the past, we adopted a vec4-centric approach of specifying subnr of
0 or 4 and a swizzle, then having brw_eu_emit.c convert that to a proper
SubRegNum.  This isn't a great fit for the scalar backend, where we
don't set swizzles at all, and happily set subnrs in the range [0, 7].

This patch changes brw_eu_emit.c to use subnr and swizzle directly,
relying on the higher levels to set them sensibly.

This should fix problems where scalar sources get copy propagated into
3-src instructions in the FS backend.  I've only observed this with
TES push model inputs, but I suppose it could happen in other cases.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-12-31 12:47:19 -08:00
+								   /* Normally, SubRegNum is in bytes (0..31).  However, 3-src instructions
 								    * use 32-bit units (components 0..7).  Since they only support F/D/UD
 								    * types, this doesn't lose any flexibility, but uses fewer bits.
 								    */
 								   return reg.subnr / 4;
-												i965: Add support for the MAD opcode on gen6+.

v2: Fix MRF handling on gen7.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1)

											
										
										
											2010-03-22 10:05:42 -07:00
+								}
-												intel: Rename genx keyword to gfxx in source files

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen[[:digit:]]+" -rIl $SEARCH_PATH | xargs sed -ie "s/gen\([[:digit:]]\+\)/gfx\1/g"

Exclude pack.h and xml changes in this patch:
grep -E "gfx[[:digit:]]+_pack\.h" -rIl $SEARCH_PATH | xargs sed -ie "s/gfx\([[:digit:]]\+_pack\.h\)/gen\1/g"
grep -E "gfx[[:digit:]]+\.xml" -rIl $SEARCH_PATH | xargs sed -ie "s/gfx\([[:digit:]]\+\.xml\)/gen\1/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 15:40:04 -07:00
+								static enum gfx10_align1_3src_vertical_stride
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								to_3src_align1_vstride(const struct intel_device_info *devinfo,
-												intel/eu/gen12: Codegen three-source instruction source and destination regions.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-11-09 14:13:35 -08:00
+								                       enum brw_vertical_stride vstride)
-												i965/fs: Add/use functions to convert to 3src_align1 vstride/hstride

Some cases weren't handled, such as stride 4 which is needed for 64-bit
operations. Presumably fixes the assertion failure mentioned in commit
2d0457203871 (Revert "i965/fs: Use align1 mode on ternary instructions
on Gen10+") but who can really say since the commit neglected to list
any of them!

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-12-11 11:44:48 -08:00
+								{
 								   switch (vstride) {
 								   case BRW_VERTICAL_STRIDE_0:
 								      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_0;
-												intel/eu/gen12: Codegen three-source instruction source and destination regions.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-11-09 14:13:35 -08:00
+								   case BRW_VERTICAL_STRIDE_1:
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								      assert(devinfo->ver >= 12);
-												intel/eu/gen12: Codegen three-source instruction source and destination regions.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-11-09 14:13:35 -08:00
+								      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_1;
-												i965/fs: Add/use functions to convert to 3src_align1 vstride/hstride

Some cases weren't handled, such as stride 4 which is needed for 64-bit
operations. Presumably fixes the assertion failure mentioned in commit
2d0457203871 (Revert "i965/fs: Use align1 mode on ternary instructions
on Gen10+") but who can really say since the commit neglected to list
any of them!

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-12-11 11:44:48 -08:00
+								   case BRW_VERTICAL_STRIDE_2:
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								      assert(devinfo->ver < 12);
-												i965/fs: Add/use functions to convert to 3src_align1 vstride/hstride

Some cases weren't handled, such as stride 4 which is needed for 64-bit
operations. Presumably fixes the assertion failure mentioned in commit
2d0457203871 (Revert "i965/fs: Use align1 mode on ternary instructions
on Gen10+") but who can really say since the commit neglected to list
any of them!

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-12-11 11:44:48 -08:00
+								      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_2;
 								   case BRW_VERTICAL_STRIDE_4:
 								      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_4;
 								   case BRW_VERTICAL_STRIDE_8:
 								   case BRW_VERTICAL_STRIDE_16:
 								      return BRW_ALIGN1_3SRC_VERTICAL_STRIDE_8;
 								   default:
 								      unreachable("invalid vstride");
 								   }
 								}
-												intel: Rename genx keyword to gfxx in source files

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen[[:digit:]]+" -rIl $SEARCH_PATH | xargs sed -ie "s/gen\([[:digit:]]\+\)/gfx\1/g"

Exclude pack.h and xml changes in this patch:
grep -E "gfx[[:digit:]]+_pack\.h" -rIl $SEARCH_PATH | xargs sed -ie "s/gfx\([[:digit:]]\+_pack\.h\)/gen\1/g"
grep -E "gfx[[:digit:]]+\.xml" -rIl $SEARCH_PATH | xargs sed -ie "s/gfx\([[:digit:]]\+\.xml\)/gen\1/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 15:40:04 -07:00
+								static enum gfx10_align1_3src_src_horizontal_stride
-												i965/fs: Add/use functions to convert to 3src_align1 vstride/hstride

Some cases weren't handled, such as stride 4 which is needed for 64-bit
operations. Presumably fixes the assertion failure mentioned in commit
2d0457203871 (Revert "i965/fs: Use align1 mode on ternary instructions
on Gen10+") but who can really say since the commit neglected to list
any of them!

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-12-11 11:44:48 -08:00
+								to_3src_align1_hstride(enum brw_horizontal_stride hstride)
 								{
 								   switch (hstride) {
 								   case BRW_HORIZONTAL_STRIDE_0:
 								      return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_0;
 								   case BRW_HORIZONTAL_STRIDE_1:
 								      return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_1;
 								   case BRW_HORIZONTAL_STRIDE_2:
 								      return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_2;
 								   case BRW_HORIZONTAL_STRIDE_4:
 								      return BRW_ALIGN1_3SRC_SRC_HORIZONTAL_STRIDE_4;
 								   default:
 								      unreachable("invalid hstride");
 								   }
 								}
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								static brw_inst *
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								         struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
-												i965: Add support for the MAD opcode on gen6+.

v2: Fix MRF handling on gen7.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1)

											
										
										
											2010-03-22 10:05:42 -07:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								   brw_inst *inst = next_insn(p, opcode);
-												i965: Add support for the MAD opcode on gen6+.

v2: Fix MRF handling on gen7.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1)

											
										
										
											2010-03-22 10:05:42 -07:00
-												intel/eu/xe2+: Add definition for size of GRF space on Xe2.

And use it in various places in the compiler that require knowledge
about the size of the register file.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25514>

											
										
										
											2022-07-07 01:15:14 -07:00
+								   assert(dest.nr < XE2_MAX_GRF);
-												intel/compiler: Add Immediate support for 3 source instruction

On Gen >= 10, Either src0 or src2 can use 16-bit immediate value, but
not both.

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2019-07-25 18:28:06 -07:00
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								   if (devinfo->ver >= 10)
-												intel/compiler: Add Immediate support for 3 source instruction

On Gen >= 10, Either src0 or src2 can use 16-bit immediate value, but
not both.

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2019-07-25 18:28:06 -07:00
+								      assert(!(src0.file == BRW_IMMEDIATE_VALUE &&
 								               src2.file == BRW_IMMEDIATE_VALUE));
-												intel/eu/xe2+: Add definition for size of GRF space on Xe2.

And use it in various places in the compiler that require knowledge
about the size of the register file.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25514>

											
										
										
											2022-07-07 01:15:14 -07:00
+								   assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < XE2_MAX_GRF);
 								   assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < XE2_MAX_GRF);
 								   assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < XE2_MAX_GRF);
-												i965: Add support for the MAD opcode on gen6+.

v2: Fix MRF handling on gen7.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1)

											
										
										
											2010-03-22 10:05:42 -07:00
+								   assert(dest.address_mode == BRW_ADDRESS_DIRECT);
 								   assert(src0.address_mode == BRW_ADDRESS_DIRECT);
 								   assert(src1.address_mode == BRW_ADDRESS_DIRECT);
 								   assert(src2.address_mode == BRW_ADDRESS_DIRECT);
-												i965: Add align1 ternary instruction emission support

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-06-14 14:49:52 -07:00
 								   if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
 								      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
-												intel/compiler: Assert that ARF used is the accumulator

v2: Move the new check to be with similar existing checks. Suggested by
Lionel.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20527>

											
										
										
											2022-12-07 11:11:42 -08:00
+								             (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
-												intel/brw: Fix handling of accumulator register numbers

Folks, there's more than one accumulator. In general, when the
register file is ARF, the upper 4 bits of the register number specify
which ARF, and the lower 4 bits specify which one of that ARF. This
can be further partitioned by the subregister number.

This is already mostly handled correctly for flags register, but lots
of places wanted to check the register number for equality with
BRW_ARF_ACCUMULATOR. If acc1 is ever specified, that won't work.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28281>

											
										
										
											2023-08-09 14:03:57 -07:00
+								              (dest.nr & 0xF0) == BRW_ARF_ACCUMULATOR));
-												i965: Add align1 ternary instruction emission support

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-06-14 14:49:52 -07:00
-												intel/brw: Fix handling of accumulator register numbers

Folks, there's more than one accumulator. In general, when the
register file is ARF, the upper 4 bits of the register number specify
which ARF, and the lower 4 bits specify which one of that ARF. This
can be further partitioned by the subregister number.

This is already mostly handled correctly for flags register, but lots
of places wanted to check the register number for equality with
BRW_ARF_ACCUMULATOR. If acc1 is ever specified, that won't work.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28281>

											
										
										
											2023-08-09 14:03:57 -07:00
+								      STATIC_ASSERT((BRW_ARCHITECTURE_REGISTER_FILE ^ 1) == BRW_ALIGN1_3SRC_ACCUMULATOR);
 								      STATIC_ASSERT((BRW_GENERAL_REGISTER_FILE ^ 1) == BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE);
 								      /* Gfx10 and Gfx11 bit encoding for the register file is the inversion of
 								       * the actual register file (see the STATIC_ASSERTs above).
 								       */
 								      unsigned dst_reg_file = devinfo->ver >= 12 ? dest.file : dest.file ^ 1;
-												i965: Add align1 ternary instruction emission support

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-06-14 14:49:52 -07:00
-												intel/brw: Fix handling of accumulator register numbers

Folks, there's more than one accumulator. In general, when the
register file is ARF, the upper 4 bits of the register number specify
which ARF, and the lower 4 bits specify which one of that ARF. This
can be further partitioned by the subregister number.

This is already mostly handled correctly for flags register, but lots
of places wanted to check the register number for equality with
BRW_ARF_ACCUMULATOR. If acc1 is ever specified, that won't work.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28281>

											
										
										
											2023-08-09 14:03:57 -07:00
+								      brw_inst_set_3src_a1_dst_reg_file(devinfo, inst, dst_reg_file);
 								      brw_inst_set_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
 								      brw_inst_set_3src_a1_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest) / 8);
-												i965: Add align1 ternary instruction emission support

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-06-14 14:49:52 -07:00
+								      brw_inst_set_3src_a1_dst_hstride(devinfo, inst, BRW_ALIGN1_3SRC_DST_HORIZONTAL_STRIDE_1);
-												intel/brw: Use newer brw_type_is_* shorter names

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 23:19:43 -07:00
+								      if (brw_type_is_float(dest.type)) {
-												i965: Add align1 ternary instruction emission support

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-06-14 14:49:52 -07:00
+								         brw_inst_set_3src_a1_exec_type(devinfo, inst,
 								                                        BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
 								      } else {
 								         brw_inst_set_3src_a1_exec_type(devinfo, inst,
 								                                        BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
 								      }
 								      brw_inst_set_3src_a1_dst_type(devinfo, inst, dest.type);
 								      brw_inst_set_3src_a1_src0_type(devinfo, inst, src0.type);
 								      brw_inst_set_3src_a1_src1_type(devinfo, inst, src1.type);
 								      brw_inst_set_3src_a1_src2_type(devinfo, inst, src2.type);
-												intel/compiler: Add Immediate support for 3 source instruction

On Gen >= 10, Either src0 or src2 can use 16-bit immediate value, but
not both.

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2019-07-25 18:28:06 -07:00
+								      if (src0.file == BRW_IMMEDIATE_VALUE) {
 								         brw_inst_set_3src_a1_src0_imm(devinfo, inst, src0.ud);
 								      } else {
 								         brw_inst_set_3src_a1_src0_vstride(
 								            devinfo, inst, to_3src_align1_vstride(devinfo, src0.vstride));
 								         brw_inst_set_3src_a1_src0_hstride(devinfo, inst,
 								                                           to_3src_align1_hstride(src0.hstride));
-												intel/eu/xe2+: Translate brw_reg fields in REG_SIZE units to physical 512b GRF units during codegen.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27165>

											
										
										
											2022-07-07 01:09:32 -07:00
+								         brw_inst_set_3src_a1_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
-												intel/brw: Drop NF type support

Icelake removed the PLN instruction for interpolating fragment shader
inputs, instead adding a special "Native Float" (NF) data type which
was a 66-bit floating point data type that could only be used with the
accumulator.  On Tigerlake, they dropped NF support in favor of just
doing the interpolation with MAD instructions.

We stopped using NF years ago (commit 9ea90aae1e778c56381c63fc43cfe),
instead just using the fs_visitor::lower_linterp() pass to emit MADs.

Since this existed only for a short time, and had very limited utility,
we drop it from the compiler.  One downside is that we can no longer
disassemble Icelake shaders containing NF types properly, but I doubt
anyone really minds.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2023-07-14 01:06:46 -07:00
+								         brw_inst_set_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
-												intel/compiler: Add Immediate support for 3 source instruction

On Gen >= 10, Either src0 or src2 can use 16-bit immediate value, but
not both.

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2019-07-25 18:28:06 -07:00
+								         brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
 								         brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
 								      }
-												intel/eu/gen12: Codegen three-source instruction source and destination regions.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-11-09 14:13:35 -08:00
+								      brw_inst_set_3src_a1_src1_vstride(
 								         devinfo, inst, to_3src_align1_vstride(devinfo, src1.vstride));
-												i965: Add align1 ternary instruction emission support

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-06-14 14:49:52 -07:00
+								      brw_inst_set_3src_a1_src1_hstride(devinfo, inst,
-												i965/fs: Add/use functions to convert to 3src_align1 vstride/hstride

Some cases weren't handled, such as stride 4 which is needed for 64-bit
operations. Presumably fixes the assertion failure mentioned in commit
2d0457203871 (Revert "i965/fs: Use align1 mode on ternary instructions
on Gen10+") but who can really say since the commit neglected to list
any of them!

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-12-11 11:44:48 -08:00
+								                                        to_3src_align1_hstride(src1.hstride));
-												i965: Add align1 ternary instruction emission support

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-06-14 14:49:52 -07:00
-												intel/eu/xe2+: Translate brw_reg fields in REG_SIZE units to physical 512b GRF units during codegen.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27165>

											
										
										
											2022-07-07 01:09:32 -07:00
+								      brw_inst_set_3src_a1_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
-												i965: Add align1 ternary instruction emission support

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-06-14 14:49:52 -07:00
+								      if (src1.file == BRW_ARCHITECTURE_REGISTER_FILE) {
 								         brw_inst_set_3src_src1_reg_nr(devinfo, inst, BRW_ARF_ACCUMULATOR);
 								      } else {
-												intel/eu/xe2+: Translate brw_reg fields in REG_SIZE units to physical 512b GRF units during codegen.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27165>

											
										
										
											2022-07-07 01:09:32 -07:00
+								         brw_inst_set_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
-												i965: Add align1 ternary instruction emission support

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-06-14 14:49:52 -07:00
+								      }
 								      brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
 								      brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
-												intel/compiler: Add Immediate support for 3 source instruction

On Gen >= 10, Either src0 or src2 can use 16-bit immediate value, but
not both.

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2019-07-25 18:28:06 -07:00
+								      if (src2.file == BRW_IMMEDIATE_VALUE) {
 								         brw_inst_set_3src_a1_src2_imm(devinfo, inst, src2.ud);
 								      } else {
 								         brw_inst_set_3src_a1_src2_hstride(devinfo, inst,
 								                                           to_3src_align1_hstride(src2.hstride));
 								         /* no vstride on src2 */
-												intel/eu/xe2+: Translate brw_reg fields in REG_SIZE units to physical 512b GRF units during codegen.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27165>

											
										
										
											2022-07-07 01:09:32 -07:00
+								         brw_inst_set_3src_a1_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
 								         brw_inst_set_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
-												intel/compiler: Add Immediate support for 3 source instruction

On Gen >= 10, Either src0 or src2 can use 16-bit immediate value, but
not both.

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2019-07-25 18:28:06 -07:00
+								         brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
 								         brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
 								      }
-												i965: Add align1 ternary instruction emission support

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-06-14 14:49:52 -07:00
 								      assert(src0.file == BRW_GENERAL_REGISTER_FILE ||
-												intel/brw: Drop NF type support

Icelake removed the PLN instruction for interpolating fragment shader
inputs, instead adding a special "Native Float" (NF) data type which
was a 66-bit floating point data type that could only be used with the
accumulator.  On Tigerlake, they dropped NF support in favor of just
doing the interpolation with MAD instructions.

We stopped using NF years ago (commit 9ea90aae1e778c56381c63fc43cfe),
instead just using the fs_visitor::lower_linterp() pass to emit MADs.

Since this existed only for a short time, and had very limited utility,
we drop it from the compiler.  One downside is that we can no longer
disassemble Icelake shaders containing NF types properly, but I doubt
anyone really minds.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2023-07-14 01:06:46 -07:00
+								             src0.file == BRW_IMMEDIATE_VALUE);
-												i965: Add align1 ternary instruction emission support

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-06-14 14:49:52 -07:00
+								      assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
-												intel/compiler: Assert that ARF used is the accumulator

v2: Move the new check to be with similar existing checks. Suggested by
Lionel.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20527>

											
										
										
											2022-12-07 11:11:42 -08:00
+								             (src1.file == BRW_ARCHITECTURE_REGISTER_FILE &&
 								              src1.nr == BRW_ARF_ACCUMULATOR));
-												i965: Add align1 ternary instruction emission support

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-06-14 14:49:52 -07:00
+								      assert(src2.file == BRW_GENERAL_REGISTER_FILE ||
 								             src2.file == BRW_IMMEDIATE_VALUE);
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								      if (devinfo->ver >= 12) {
-												intel/compiler: Set bits according to source file

On Gen >= 12, if src0 or src2 holds immediate value, we need set
src[0/2]_is_imm bits instead of register file.

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2019-04-19 13:37:17 -07:00
+								         if (src0.file == BRW_IMMEDIATE_VALUE) {
 								            brw_inst_set_3src_a1_src0_is_imm(devinfo, inst, 1);
 								         } else {
 								            brw_inst_set_3src_a1_src0_reg_file(devinfo, inst, src0.file);
 								         }
-												intel/eu/gen12: Codegen three-source instruction source and destination regions.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-11-09 14:13:35 -08:00
+								         brw_inst_set_3src_a1_src1_reg_file(devinfo, inst, src1.file);
-												intel/compiler: Set bits according to source file

On Gen >= 12, if src0 or src2 holds immediate value, we need set
src[0/2]_is_imm bits instead of register file.

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2019-04-19 13:37:17 -07:00
 								         if (src2.file == BRW_IMMEDIATE_VALUE) {
 								            brw_inst_set_3src_a1_src2_is_imm(devinfo, inst, 1);
 								         } else {
 								            brw_inst_set_3src_a1_src2_reg_file(devinfo, inst, src2.file);
 								         }
-												intel/eu/gen12: Codegen three-source instruction source and destination regions.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-11-09 14:13:35 -08:00
+								      } else {
 								         brw_inst_set_3src_a1_src0_reg_file(devinfo, inst,
 								                                            src0.file == BRW_GENERAL_REGISTER_FILE ?
 								                                            BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
 								                                            BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
 								         brw_inst_set_3src_a1_src1_reg_file(devinfo, inst,
 								                                            src1.file == BRW_GENERAL_REGISTER_FILE ?
 								                                            BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
 								                                            BRW_ALIGN1_3SRC_ACCUMULATOR);
 								         brw_inst_set_3src_a1_src2_reg_file(devinfo, inst,
 								                                            src2.file == BRW_GENERAL_REGISTER_FILE ?
 								                                            BRW_ALIGN1_3SRC_GENERAL_REGISTER_FILE :
 								                                            BRW_ALIGN1_3SRC_IMMEDIATE_VALUE);
 								      }
-												i965: Add align1 ternary instruction emission support

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-06-14 14:49:52 -07:00
+								   } else {
-												intel/brw: Remove MRF type

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-21 21:21:20 -08:00
+								      assert(dest.file == BRW_GENERAL_REGISTER_FILE);
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								      assert(dest.type == BRW_TYPE_F  ||
 								             dest.type == BRW_TYPE_DF ||
 								             dest.type == BRW_TYPE_D  ||
 								             dest.type == BRW_TYPE_UD ||
 								             dest.type == BRW_TYPE_HF);
-												i965: Add align1 ternary instruction emission support

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-06-14 14:49:52 -07:00
+								      brw_inst_set_3src_dst_reg_nr(devinfo, inst, dest.nr);
-												intel/eu: Set the right subnr for ALIGN16 destinations

Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5596>

											
										
										
											2019-10-17 09:54:02 -05:00
+								      brw_inst_set_3src_a16_dst_subreg_nr(devinfo, inst, dest.subnr / 4);
-												i965: Add align1 ternary instruction emission support

Reviewed-by: Scott D Phillips <scott.d.phillips@intel.com>

											
										
										
											2017-06-14 14:49:52 -07:00
+								      brw_inst_set_3src_a16_dst_writemask(devinfo, inst, dest.writemask);
 								      assert(src0.file == BRW_GENERAL_REGISTER_FILE);
 								      brw_inst_set_3src_a16_src0_swizzle(devinfo, inst, src0.swizzle);
 								      brw_inst_set_3src_a16_src0_subreg_nr(devinfo, inst, get_3src_subreg_nr(src0));
 								      brw_inst_set_3src_src0_reg_nr(devinfo, inst, src0.nr);
 								      brw_inst_set_3src_src0_abs(devinfo, inst, src0.abs);
 								      brw_inst_set_3src_src0_negate(devinfo, inst, src0.negate);
 								      brw_inst_set_3src_a16_src0_rep_ctrl(devinfo, inst,
 								                                          src0.vstride == BRW_VERTICAL_STRIDE_0);
 								      assert(src1.file == BRW_GENERAL_REGISTER_FILE);
 								      brw_inst_set_3src_a16_src1_swizzle(devinfo, inst, src1.swizzle);
 								      brw_inst_set_3src_a16_src1_subreg_nr(devinfo, inst, get_3src_subreg_nr(src1));
 								      brw_inst_set_3src_src1_reg_nr(devinfo, inst, src1.nr);
 								      brw_inst_set_3src_src1_abs(devinfo, inst, src1.abs);
 								      brw_inst_set_3src_src1_negate(devinfo, inst, src1.negate);
 								      brw_inst_set_3src_a16_src1_rep_ctrl(devinfo, inst,
 								                                          src1.vstride == BRW_VERTICAL_STRIDE_0);
 								      assert(src2.file == BRW_GENERAL_REGISTER_FILE);
 								      brw_inst_set_3src_a16_src2_swizzle(devinfo, inst, src2.swizzle);
 								      brw_inst_set_3src_a16_src2_subreg_nr(devinfo, inst, get_3src_subreg_nr(src2));
 								      brw_inst_set_3src_src2_reg_nr(devinfo, inst, src2.nr);
 								      brw_inst_set_3src_src2_abs(devinfo, inst, src2.abs);
 								      brw_inst_set_3src_src2_negate(devinfo, inst, src2.negate);
 								      brw_inst_set_3src_a16_src2_rep_ctrl(devinfo, inst,
 								                                          src2.vstride == BRW_VERTICAL_STRIDE_0);
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								      /* Set both the source and destination types based on dest.type,
 								       * ignoring the source register types.  The MAD and LRP emitters ensure
 								       * that all four types are float.  The BFE and BFI2 emitters, however,
 								       * may send us mixed D and UD types and want us to ignore that and use
 								       * the destination type.
 								       */
 								      brw_inst_set_3src_a16_src_type(devinfo, inst, dest.type);
 								      brw_inst_set_3src_a16_dst_type(devinfo, inst, dest.type);
-												intel/compiler: set correct precision fields for 3-source float instructions

Source0 and Destination extract the floating-point precision automatically
from the SrcType and DstType instruction fields respectively when they are
set to types :F or :HF. For Source1 and Source2 operands, we use the new
1-bit fields Src1Type and Src2Type, where 0 means normal precision and 1
means half-precision. Since we always use the type of the destination for
all operands when we emit 3-source instructions, we only need set Src1Type
and Src2Type to 1 when we are emitting a half-precision instruction.

v2:
 - Set the bit separately for each source based on its type so we can
   do mixed floating-point mode in the future (Topi).

v3:
 - Use regular citation style for the comment referencing the PRM (Matt).
 - Decided not to add asserts in the emission code to check that only
   mixed HF/F types are used since such checks would break negative tests
   for brw_eu_validate.c (Matt)

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2018-05-22 08:17:38 +02:00
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								      /* From the Bspec, 3D Media GPGPU, Instruction fields, srcType:
 								       *
 								       *    "Three source instructions can use operands with mixed-mode
 								       *     precision. When SrcType field is set to :f or :hf it defines
 								       *     precision for source 0 only, and fields Src1Type and Src2Type
 								       *     define precision for other source operands:
 								       *
 								       *     0b = :f. Single precision Float (32-bit).
 								       *     1b = :hf. Half precision Float (16-bit)."
 								       */
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								      if (src1.type == BRW_TYPE_HF)
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								         brw_inst_set_3src_a16_src1_type(devinfo, inst, 1);
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								      if (src2.type == BRW_TYPE_HF)
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								         brw_inst_set_3src_a16_src2_type(devinfo, inst, 1);
-												i965/gen7: Set src/dst types for 3-src instructions.

Also update asserts to allow BFE and BFI2, which take (unsigned)
doubleword arguments.

v2: Allow BRW_REGISTER_TYPE_UD for src1 and src2 as well.
    Assert that src2.type (instead of src0.type) matches dest.type since
    it's the primary argument and src0 and src1 might correctly have
    different types.

Reviewed-by: Chris Forbes <chrisf@ijw.co.nz> [v1]

											
										
										
											2013-04-17 12:23:54 -07:00
+								   }
-												i965: Convert brw_eu_emit.c to the new brw_inst API.

v2:
 - Fix IF -> ELSE patching on Sandybridge.
 - Don't set base_mrf on Gen6+ in OWord Block Read functions.  (Although
 - the old code did this universally, it shouldn't have - the field
 - doesn't exist on Gen6+ and just got overwritten by the SFID anyway.)

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2014-06-04 17:08:57 -07:00
+								   return inst;
-												i965: Add support for the MAD opcode on gen6+.

v2: Fix MRF handling on gen7.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1)

											
										
										
											2010-03-22 10:05:42 -07:00
+								}
-												intel/compiler: Initial bits for DPAS instruction

v2: Add brw_ir_performance.cpp and brw_fs_generator.cpp changes. Fix
overlapping register allocation (via has_source_and_destination_hazard). Fix
incorrect destination register file encoding.

v3: Prevent lower_regioning from trying to "fix" DPAS sources.

v4: Add instruction latency information for scheduling and perf
estimates.

v5: Remove all mention of DPASW. Suggested by Curro and Caio. Update
the comment in fs_inst::has_source_and_destination_hazard. Suggested
by Caio.

v6: Add some comments near the src2 calculation in
fs_inst::size_read. Suggested by Caio.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25994>

											
										
										
											2023-09-20 12:42:24 -07:00
+								static brw_inst *
 								brw_dpas_three_src(struct brw_codegen *p, enum gfx12_systolic_depth opcode,
 								                   unsigned sdepth, unsigned rcount, struct brw_reg dest,
 								                   struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
 								{
 								   const struct intel_device_info *devinfo = p->devinfo;
 								   brw_inst *inst = next_insn(p, opcode);
 								   assert(dest.file == BRW_GENERAL_REGISTER_FILE);
 								   brw_inst_set_dpas_3src_dst_reg_file(devinfo, inst,
 								                                       BRW_GENERAL_REGISTER_FILE);
-												intel/brw/xe2+: Use phys_nr and phys_subnr in DPAS encoding

Suggested-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28404>

											
										
										
											2024-03-26 19:46:02 -07:00
+								   brw_inst_set_dpas_3src_dst_reg_nr(devinfo, inst, phys_nr(devinfo, dest));
 								   brw_inst_set_dpas_3src_dst_subreg_nr(devinfo, inst, phys_subnr(devinfo, dest));
-												intel/compiler: Initial bits for DPAS instruction

v2: Add brw_ir_performance.cpp and brw_fs_generator.cpp changes. Fix
overlapping register allocation (via has_source_and_destination_hazard). Fix
incorrect destination register file encoding.

v3: Prevent lower_regioning from trying to "fix" DPAS sources.

v4: Add instruction latency information for scheduling and perf
estimates.

v5: Remove all mention of DPASW. Suggested by Curro and Caio. Update
the comment in fs_inst::has_source_and_destination_hazard. Suggested
by Caio.

v6: Add some comments near the src2 calculation in
fs_inst::size_read. Suggested by Caio.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25994>

											
										
										
											2023-09-20 12:42:24 -07:00
-												intel/brw: Use newer brw_type_is_* shorter names

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 23:19:43 -07:00
+								   if (brw_type_is_float(dest.type)) {
-												intel/compiler: Initial bits for DPAS instruction

v2: Add brw_ir_performance.cpp and brw_fs_generator.cpp changes. Fix
overlapping register allocation (via has_source_and_destination_hazard). Fix
incorrect destination register file encoding.

v3: Prevent lower_regioning from trying to "fix" DPAS sources.

v4: Add instruction latency information for scheduling and perf
estimates.

v5: Remove all mention of DPASW. Suggested by Curro and Caio. Update
the comment in fs_inst::has_source_and_destination_hazard. Suggested
by Caio.

v6: Add some comments near the src2 calculation in
fs_inst::size_read. Suggested by Caio.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25994>

											
										
										
											2023-09-20 12:42:24 -07:00
+								      brw_inst_set_dpas_3src_exec_type(devinfo, inst,
 								                                       BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
 								   } else {
 								      brw_inst_set_dpas_3src_exec_type(devinfo, inst,
 								                                       BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
 								   }
 								   brw_inst_set_dpas_3src_sdepth(devinfo, inst, sdepth);
 								   brw_inst_set_dpas_3src_rcount(devinfo, inst, rcount - 1);
 								   brw_inst_set_dpas_3src_dst_type(devinfo, inst, dest.type);
 								   brw_inst_set_dpas_3src_src0_type(devinfo, inst, src0.type);
 								   brw_inst_set_dpas_3src_src1_type(devinfo, inst, src1.type);
 								   brw_inst_set_dpas_3src_src2_type(devinfo, inst, src2.type);
 								   assert(src0.file == BRW_GENERAL_REGISTER_FILE ||
 								          (src0.file == BRW_ARCHITECTURE_REGISTER_FILE &&
 								           src0.nr == BRW_ARF_NULL));
 								   brw_inst_set_dpas_3src_src0_reg_file(devinfo, inst, src0.file);
-												intel/brw/xe2+: Use phys_nr and phys_subnr in DPAS encoding

Suggested-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28404>

											
										
										
											2024-03-26 19:46:02 -07:00
+								   brw_inst_set_dpas_3src_src0_reg_nr(devinfo, inst, phys_nr(devinfo, src0));
 								   brw_inst_set_dpas_3src_src0_subreg_nr(devinfo, inst, phys_subnr(devinfo, src0));
-												intel/compiler: Initial bits for DPAS instruction

v2: Add brw_ir_performance.cpp and brw_fs_generator.cpp changes. Fix
overlapping register allocation (via has_source_and_destination_hazard). Fix
incorrect destination register file encoding.

v3: Prevent lower_regioning from trying to "fix" DPAS sources.

v4: Add instruction latency information for scheduling and perf
estimates.

v5: Remove all mention of DPASW. Suggested by Curro and Caio. Update
the comment in fs_inst::has_source_and_destination_hazard. Suggested
by Caio.

v6: Add some comments near the src2 calculation in
fs_inst::size_read. Suggested by Caio.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25994>

											
										
										
											2023-09-20 12:42:24 -07:00
 								   assert(src1.file == BRW_GENERAL_REGISTER_FILE);
 								   brw_inst_set_dpas_3src_src1_reg_file(devinfo, inst, src1.file);
-												intel/brw/xe2+: Use phys_nr and phys_subnr in DPAS encoding

Suggested-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28404>

											
										
										
											2024-03-26 19:46:02 -07:00
+								   brw_inst_set_dpas_3src_src1_reg_nr(devinfo, inst, phys_nr(devinfo, src1));
 								   brw_inst_set_dpas_3src_src1_subreg_nr(devinfo, inst, phys_subnr(devinfo, src1));
-												intel/compiler: Initial bits for DPAS instruction

v2: Add brw_ir_performance.cpp and brw_fs_generator.cpp changes. Fix
overlapping register allocation (via has_source_and_destination_hazard). Fix
incorrect destination register file encoding.

v3: Prevent lower_regioning from trying to "fix" DPAS sources.

v4: Add instruction latency information for scheduling and perf
estimates.

v5: Remove all mention of DPASW. Suggested by Curro and Caio. Update
the comment in fs_inst::has_source_and_destination_hazard. Suggested
by Caio.

v6: Add some comments near the src2 calculation in
fs_inst::size_read. Suggested by Caio.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25994>

											
										
										
											2023-09-20 12:42:24 -07:00
+								   brw_inst_set_dpas_3src_src1_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
 								   assert(src2.file == BRW_GENERAL_REGISTER_FILE);
 								   brw_inst_set_dpas_3src_src2_reg_file(devinfo, inst, src2.file);
-												intel/brw/xe2+: Use phys_nr and phys_subnr in DPAS encoding

Suggested-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28404>

											
										
										
											2024-03-26 19:46:02 -07:00
+								   brw_inst_set_dpas_3src_src2_reg_nr(devinfo, inst, phys_nr(devinfo, src2));
 								   brw_inst_set_dpas_3src_src2_subreg_nr(devinfo, inst, phys_subnr(devinfo, src2));
-												intel/compiler: Initial bits for DPAS instruction

v2: Add brw_ir_performance.cpp and brw_fs_generator.cpp changes. Fix
overlapping register allocation (via has_source_and_destination_hazard). Fix
incorrect destination register file encoding.

v3: Prevent lower_regioning from trying to "fix" DPAS sources.

v4: Add instruction latency information for scheduling and perf
estimates.

v5: Remove all mention of DPASW. Suggested by Curro and Caio. Update
the comment in fs_inst::has_source_and_destination_hazard. Suggested
by Caio.

v6: Add some comments near the src2 calculation in
fs_inst::size_read. Suggested by Caio.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25994>

											
										
										
											2023-09-20 12:42:24 -07:00
+								   brw_inst_set_dpas_3src_src2_subbyte(devinfo, inst, BRW_SUB_BYTE_PRECISION_NONE);
 								   return inst;
 								}
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
 								/***********************************************************************
 								 * Convenience routines.
 								 */
 								#define ALU1(OP)					\
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_inst *brw_##OP(struct brw_codegen *p,		\
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+									      struct brw_reg dest,			\
 									      struct brw_reg src0)   			\
 								{							\
 								   return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);    	\
 								}
 								#define ALU2(OP)					\
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_inst *brw_##OP(struct brw_codegen *p,		\
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+									      struct brw_reg dest,			\
 									      struct brw_reg src0,			\
 									      struct brw_reg src1)   			\
 								{							\
 								   return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);	\
 								}
-												i965: Add support for the MAD opcode on gen6+.

v2: Fix MRF handling on gen7.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1)

											
										
										
											2010-03-22 10:05:42 -07:00
+								#define ALU3(OP)					\
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_inst *brw_##OP(struct brw_codegen *p,		\
-												i965: Add support for the MAD opcode on gen6+.

v2: Fix MRF handling on gen7.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1)

											
										
										
											2010-03-22 10:05:42 -07:00
+									      struct brw_reg dest,			\
 									      struct brw_reg src0,			\
 									      struct brw_reg src1,			\
 									      struct brw_reg src2)   			\
-												intel/compiler: Set swizzle to BRW_SWIZZLE_XXXX for scalar region

When RepCtrl is set, the swizzle field is ignored by the hardware. In
order to ensure a 1-to-1 correspondence between the human-readable
disassembly and the binary instruction encoding always set the swizzle
to XXXX (all zeros) when it is unused due to RepCtrl

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2018-12-08 21:50:36 -08:00
+								{                                                       \
 								   if (p->current->access_mode == BRW_ALIGN_16) {       \
 								      if (src0.vstride == BRW_VERTICAL_STRIDE_0)        \
 								         src0.swizzle = BRW_SWIZZLE_XXXX;               \
 								      if (src1.vstride == BRW_VERTICAL_STRIDE_0)        \
 								         src1.swizzle = BRW_SWIZZLE_XXXX;               \
 								      if (src2.vstride == BRW_VERTICAL_STRIDE_0)        \
 								         src2.swizzle = BRW_SWIZZLE_XXXX;               \
 								   }                                                    \
-												i965: Add support for the MAD opcode on gen6+.

v2: Fix MRF handling on gen7.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (v1)

											
										
										
											2010-03-22 10:05:42 -07:00
+								   return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2);	\
 								}
-												i965: Add back strict type assertions for MAD and LRP.

Commit 526ffdfc033ab01cf133cb7e8290c65d12ccc9be relaxed the type
assertions in brw_alu3 to allow D/UD types (required by BFE and BFI2).
This lost us the strict type checking for MAD and LRP, which require
all four types to be float.

This patch adds a new ALU3F wrapper which checks these once again.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>

											
										
										
											2013-06-13 14:55:18 -07:00
+								#define ALU3F(OP)                                               \
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_inst *brw_##OP(struct brw_codegen *p,         \
-												i965: Add back strict type assertions for MAD and LRP.

Commit 526ffdfc033ab01cf133cb7e8290c65d12ccc9be relaxed the type
assertions in brw_alu3 to allow D/UD types (required by BFE and BFI2).
This lost us the strict type checking for MAD and LRP, which require
all four types to be float.

This patch adds a new ALU3F wrapper which checks these once again.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>

											
										
										
											2013-06-13 14:55:18 -07:00
+								                                 struct brw_reg dest,           \
 								                                 struct brw_reg src0,           \
 								                                 struct brw_reg src1,           \
 								                                 struct brw_reg src2)           \
 								{                                                               \
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   assert(dest.type == BRW_TYPE_F ||                   \
 								          dest.type == BRW_TYPE_DF);                   \
 								   if (dest.type == BRW_TYPE_F) {                      \
 								      assert(src0.type == BRW_TYPE_F);                 \
 								      assert(src1.type == BRW_TYPE_F);                 \
 								      assert(src2.type == BRW_TYPE_F);                 \
 								   } else if (dest.type == BRW_TYPE_DF) {              \
 								      assert(src0.type == BRW_TYPE_DF);                \
 								      assert(src1.type == BRW_TYPE_DF);                \
 								      assert(src2.type == BRW_TYPE_DF);                \
-												intel/compiler: Set swizzle to BRW_SWIZZLE_XXXX for scalar region

When RepCtrl is set, the swizzle field is ignored by the hardware. In
order to ensure a 1-to-1 correspondence between the human-readable
disassembly and the binary instruction encoding always set the swizzle
to XXXX (all zeros) when it is unused due to RepCtrl

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2018-12-08 21:50:36 -08:00
+								   }                                                            \
 								                                                                \
 								   if (p->current->access_mode == BRW_ALIGN_16) {               \
 								      if (src0.vstride == BRW_VERTICAL_STRIDE_0)                \
 								         src0.swizzle = BRW_SWIZZLE_XXXX;                       \
 								      if (src1.vstride == BRW_VERTICAL_STRIDE_0)                \
 								         src1.swizzle = BRW_SWIZZLE_XXXX;                       \
 								      if (src2.vstride == BRW_VERTICAL_STRIDE_0)                \
 								         src2.swizzle = BRW_SWIZZLE_XXXX;                       \
-												i965/eu: Allow 3-src float ops with doubles

v2:
  - set 3src_src_type for BRW_REGISTER_TYPE_DF (Connor)

Signed-off-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2014-10-20 11:46:37 +03:00
+								   }                                                            \
-												i965: Add back strict type assertions for MAD and LRP.

Commit 526ffdfc033ab01cf133cb7e8290c65d12ccc9be relaxed the type
assertions in brw_alu3 to allow D/UD types (required by BFE and BFI2).
This lost us the strict type checking for MAD and LRP, which require
all four types to be float.

This patch adds a new ALU3F wrapper which checks these once again.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>

											
										
										
											2013-06-13 14:55:18 -07:00
+								   return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
 								}
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								ALU2(SEL)
 								ALU1(NOT)
 								ALU2(AND)
 								ALU2(OR)
 								ALU2(XOR)
 								ALU2(SHR)
 								ALU2(SHL)
 								ALU2(ASR)
-												intel/compiler: Enable the emission of ROR/ROL instructions

v2: 1) Drop changes for vec4 backend as on Gen11+ we don't support
       align16 mode (Matt Turner)

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2019-05-29 11:43:30 -07:00
+								ALU2(ROL)
 								ALU2(ROR)
-												i965/fs: Add infrastructure for generating CSEL instructions.

v2 (idr): Don't allow CSEL with a non-float src2.

v3 (idr): Add CSEL to fs_inst::flags_written.  Suggested by Matt.

v4 (idr): Only set BRW_ALIGN_16 on Gen < 10 (suggested by Matt).  Don't
reset the access mode afterwards (suggested by Samuel and Matt).  Add
support for CSEL not modifying the flags to more places (requested by
Matt).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com> [v3]
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-11-22 20:12:17 -08:00
+								ALU3(CSEL)
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								ALU1(FRC)
 								ALU1(RNDD)
-												intel/compiler: Move Gen4/5 rounding to visitor

Gen4/5's rounding instructions operate differently than later Gens'.
They all return the floor of the input and the "Round-increment"
conditional modifier answers whether the result should be incremented by
1.0 to get the appropriate result for the operation (and thus its
behavior is determined by the round opcode; e.g., RNDZ vs RNDE).

Since this requires a second instruciton (a predicated ADD) that
consumes the result of the round instruction, the round instruction
cannot write its result directly to the (write-only) message registers.
By emitting the ADD in the generator, the backend thinks it's safe to
store the round's result directly to the message register file.

To avoid this, we move the emission of the ADD instruction to the NIR
translator so that the backend has the information it needs.

I suspect this also fixes code generated for RNDZ.SAT but since
Gen4/5 don't support GLSL 1.30 which adds the trunc() function, I
couldn't write a piglit test to confirm. My thinking is that if x=-0.5:

      sat(trunc(-0.5)) = 0.0

But on Gen4/5 where sat(trunc(x)) is implemented as

      rndz.r.f0  result, x             // result = floor(x)
                                       // set f0 if increment needed
      (+f0) add  result, result, 1.0   // fixup so result = trunc(x)

then putting saturate on both instructions will give the wrong result.

      floor(-0.5) = -1.0
      sat(floor(-0.5)) = 0.0
      // +1 increment would be needed since floor(-0.5) != trunc(-0.5)
      sat(sat(floor(-0.5)) + 1.0) = 1.0

Fixes: 6f394343b1f ("nir/algebraic: i2f(f2i()) -> trunc()")
Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/2355
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3459>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3459>

											
										
										
											2020-01-16 11:17:14 -08:00
+								ALU1(RNDE)
-												intel/eu: Add the RNDU opcode

We don't want to use it on gen5 and earlier because only RNDD can be
done with a single instruction and we can implement RNDU(x) as -RNDD(-x)
so it's better to just do that when we have the instruction.  On gen6
and above, we may as well just use the right instruction.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5596>

											
										
										
											2019-08-22 11:15:50 -05:00
+								ALU1(RNDU)
-												intel/compiler: Move Gen4/5 rounding to visitor

Gen4/5's rounding instructions operate differently than later Gens'.
They all return the floor of the input and the "Round-increment"
conditional modifier answers whether the result should be incremented by
1.0 to get the appropriate result for the operation (and thus its
behavior is determined by the round opcode; e.g., RNDZ vs RNDE).

Since this requires a second instruciton (a predicated ADD) that
consumes the result of the round instruction, the round instruction
cannot write its result directly to the (write-only) message registers.
By emitting the ADD in the generator, the backend thinks it's safe to
store the round's result directly to the message register file.

To avoid this, we move the emission of the ADD instruction to the NIR
translator so that the backend has the information it needs.

I suspect this also fixes code generated for RNDZ.SAT but since
Gen4/5 don't support GLSL 1.30 which adds the trunc() function, I
couldn't write a piglit test to confirm. My thinking is that if x=-0.5:

      sat(trunc(-0.5)) = 0.0

But on Gen4/5 where sat(trunc(x)) is implemented as

      rndz.r.f0  result, x             // result = floor(x)
                                       // set f0 if increment needed
      (+f0) add  result, result, 1.0   // fixup so result = trunc(x)

then putting saturate on both instructions will give the wrong result.

      floor(-0.5) = -1.0
      sat(floor(-0.5)) = 0.0
      // +1 increment would be needed since floor(-0.5) != trunc(-0.5)
      sat(sat(floor(-0.5)) + 1.0) = 1.0

Fixes: 6f394343b1f ("nir/algebraic: i2f(f2i()) -> trunc()")
Closes: https://gitlab.freedesktop.org/mesa/mesa/issues/2355
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3459>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3459>

											
										
										
											2020-01-16 11:17:14 -08:00
+								ALU1(RNDZ)
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								ALU2(MAC)
 								ALU2(MACH)
 								ALU1(LZD)
 								ALU2(DP4)
 								ALU2(DPH)
 								ALU2(DP3)
 								ALU2(DP2)
-												intel/compiler: Basic support for DP4A instruction

v2: Very significant rebase on changes to previous commits.
Specifically, brw_fs_nir.cpp changes were pretty much rewritten from
scratch after changing the NIR opcode names and types.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12142>

											
										
										
											2021-02-23 18:46:53 -08:00
+								ALU3(DP4A)
-												intel/compiler/fs: Implement FS_OPCODE_LINTERP with MADs on Gen11+

The PLN instruction is no more. Its functionality is now implemented
using two MAD instructions with the new native-float type. Instead of

   pln(16) r20.0<1>:F r10.4<0;1,0>:F r4.0<8;8,1>:F

we now have

   mad(8) acc0<1>:NF r10.7<0;1,0>:F r4.0<8;8,1>:F r10.4<0;1,0>:F
   mad(8) r20.0<1>:F acc0<8;8,1>:NF r5.0<8;8,1>:F r10.5<0;1,0>:F
   mad(8) acc0<1>:NF r10.7<0;1,0>:F r6.0<8;8,1>:F r10.4<0;1,0>:F
   mad(8) r21.0<1>:F acc0<8;8,1>:NF r7.0<8;8,1>:F r10.5<0;1,0>:F

... and in the case of SIMD8 only the first pair of MAD instructions is
used.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2017-06-14 14:47:19 -07:00
+								ALU3(MAD)
-												i965: Add back strict type assertions for MAD and LRP.

Commit 526ffdfc033ab01cf133cb7e8290c65d12ccc9be relaxed the type
assertions in brw_alu3 to allow D/UD types (required by BFE and BFI2).
This lost us the strict type checking for MAD and LRP, which require
all four types to be float.

This patch adds a new ALU3F wrapper which checks these once again.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>

											
										
										
											2013-06-13 14:55:18 -07:00
+								ALU3F(LRP)
-												i965: Add support for emitting and disassembling bit instructions.

Specifically
   bfe - for bitfieldExtract()
   bfi1 and bfi2 - for bitfieldInsert()
   bfrev - for bitfieldReverse()
   cbit - for bitCount()
   fbh - for findMSB()
   fbl - for findLSB()

Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>

											
										
										
											2013-04-09 17:56:19 -07:00
+								ALU1(BFREV)
 								ALU3(BFE)
 								ALU2(BFI1)
 								ALU3(BFI2)
 								ALU1(FBH)
 								ALU1(FBL)
 								ALU1(CBIT)
-												i965: Generate code for ir_binop_carry and ir_binop_borrow.

Using the ADDC and SUBB instructions on Gen7.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2013-09-19 13:01:08 -07:00
+								ALU2(ADDC)
 								ALU2(SUBB)
-												intel/compiler: Add support for ternary add instruction on XeHP

v2:
- Re-arragne opcode in correct order (Matt Turner)
- Move ADD3 case closer to LRP (Jason)

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11596>

											
										
										
											2020-06-05 22:40:26 -07:00
+								ALU3(ADD3)
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								ALU1(MOV)
-												i965: Correctly emit the RNDZ instruction.

Simply using RNDU, RNDZ, or RNDE does not produce the desired result.
Rather, the RND* instructions place a value in the destination register
that may be 1 less than the correct answer.  They can also set per-channel
"increment bits" in a flag register, which, if set, mean dest needs to
be incremented by 1.  A second instruction - a predicated add -
completes the job.

Notably, RNDD always produces the correct answer in a single
instruction.

Fixes piglit test glsl-fs-trunc.

											
										
										
											2010-10-14 11:40:19 -07:00
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								brw_inst *
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_ADD(struct brw_codegen *p, struct brw_reg dest,
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								        struct brw_reg src0, struct brw_reg src1)
-												i965: Add some validation on BRW_OPCODE_MUL and ADD's arguments.

Now that we're playing with other types in brw_fs.cpp, it's easy to
trip over issues like these.

											
										
										
											2010-09-04 21:28:04 -07:00
+								{
 								   /* 6.2.2: add */
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   if (src0.type == BRW_TYPE_F ||
-												i965: Add some validation on BRW_OPCODE_MUL and ADD's arguments.

Now that we're playing with other types in brw_fs.cpp, it's easy to
trip over issues like these.

											
										
										
											2010-09-04 21:28:04 -07:00
+								       (src0.file == BRW_IMMEDIATE_VALUE &&
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+									src0.type == BRW_TYPE_VF)) {
 								      assert(src1.type != BRW_TYPE_UD);
 								      assert(src1.type != BRW_TYPE_D);
-												i965: Add some validation on BRW_OPCODE_MUL and ADD's arguments.

Now that we're playing with other types in brw_fs.cpp, it's easy to
trip over issues like these.

											
										
										
											2010-09-04 21:28:04 -07:00
+								   }
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   if (src1.type == BRW_TYPE_F ||
-												i965: Add some validation on BRW_OPCODE_MUL and ADD's arguments.

Now that we're playing with other types in brw_fs.cpp, it's easy to
trip over issues like these.

											
										
										
											2010-09-04 21:28:04 -07:00
+								       (src1.file == BRW_IMMEDIATE_VALUE &&
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+									src1.type == BRW_TYPE_VF)) {
 								      assert(src0.type != BRW_TYPE_UD);
 								      assert(src0.type != BRW_TYPE_D);
-												i965: Add some validation on BRW_OPCODE_MUL and ADD's arguments.

Now that we're playing with other types in brw_fs.cpp, it's easy to
trip over issues like these.

											
										
										
											2010-09-04 21:28:04 -07:00
+								   }
 								   return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
 								}
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								brw_inst *
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_AVG(struct brw_codegen *p, struct brw_reg dest,
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								        struct brw_reg src0, struct brw_reg src1)
-												i965: Add support for AVG instruction.

From the Ivy Bridge PRM, Vol4 Part3 p152:

    "The avg instruction performs component-wise integer average of
    src0 and src1 and stores the results in dst. An integer average
    uses integer upward rounding. It is equivalent to increment one to
    the addition of src0 and src1 and then apply an arithmetic right
    shift to this intermediate value."

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>

											
										
										
											2012-07-07 08:28:46 -07:00
+								{
 								   assert(dest.type == src0.type);
 								   assert(src0.type == src1.type);
 								   switch (src0.type) {
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   case BRW_TYPE_B:
 								   case BRW_TYPE_UB:
 								   case BRW_TYPE_W:
 								   case BRW_TYPE_UW:
 								   case BRW_TYPE_D:
 								   case BRW_TYPE_UD:
-												i965: Add support for AVG instruction.

From the Ivy Bridge PRM, Vol4 Part3 p152:

    "The avg instruction performs component-wise integer average of
    src0 and src1 and stores the results in dst. An integer average
    uses integer upward rounding. It is equivalent to increment one to
    the addition of src0 and src1 and then apply an arithmetic right
    shift to this intermediate value."

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>

											
										
										
											2012-07-07 08:28:46 -07:00
+								      break;
 								   default:
-												i965: Use unreachable() instead of unconditional assert().

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>

											
										
										
											2014-06-29 14:54:01 -07:00
+								      unreachable("Bad type for brw_AVG");
-												i965: Add support for AVG instruction.

From the Ivy Bridge PRM, Vol4 Part3 p152:

    "The avg instruction performs component-wise integer average of
    src0 and src1 and stores the results in dst. An integer average
    uses integer upward rounding. It is equivalent to increment one to
    the addition of src0 and src1 and then apply an arithmetic right
    shift to this intermediate value."

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>

											
										
										
											2012-07-07 08:28:46 -07:00
+								   }
 								   return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
 								}
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								brw_inst *
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_MUL(struct brw_codegen *p, struct brw_reg dest,
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								        struct brw_reg src0, struct brw_reg src1)
-												i965: Add some validation on BRW_OPCODE_MUL and ADD's arguments.

Now that we're playing with other types in brw_fs.cpp, it's easy to
trip over issues like these.

											
										
										
											2010-09-04 21:28:04 -07:00
+								{
 								   /* 6.32.38: mul */
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   if (src0.type == BRW_TYPE_D ||
 								       src0.type == BRW_TYPE_UD ||
 								       src1.type == BRW_TYPE_D ||
 								       src1.type == BRW_TYPE_UD) {
 								      assert(dest.type != BRW_TYPE_F);
-												i965: Add some validation on BRW_OPCODE_MUL and ADD's arguments.

Now that we're playing with other types in brw_fs.cpp, it's easy to
trip over issues like these.

											
										
										
											2010-09-04 21:28:04 -07:00
+								   }
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   if (src0.type == BRW_TYPE_F ||
-												i965: Add some validation on BRW_OPCODE_MUL and ADD's arguments.

Now that we're playing with other types in brw_fs.cpp, it's easy to
trip over issues like these.

											
										
										
											2010-09-04 21:28:04 -07:00
+								       (src0.file == BRW_IMMEDIATE_VALUE &&
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+									src0.type == BRW_TYPE_VF)) {
 								      assert(src1.type != BRW_TYPE_UD);
 								      assert(src1.type != BRW_TYPE_D);
-												i965: Add some validation on BRW_OPCODE_MUL and ADD's arguments.

Now that we're playing with other types in brw_fs.cpp, it's easy to
trip over issues like these.

											
										
										
											2010-09-04 21:28:04 -07:00
+								   }
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   if (src1.type == BRW_TYPE_F ||
-												i965: Add some validation on BRW_OPCODE_MUL and ADD's arguments.

Now that we're playing with other types in brw_fs.cpp, it's easy to
trip over issues like these.

											
										
										
											2010-09-04 21:28:04 -07:00
+								       (src1.file == BRW_IMMEDIATE_VALUE &&
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+									src1.type == BRW_TYPE_VF)) {
 								      assert(src0.type != BRW_TYPE_UD);
 								      assert(src0.type != BRW_TYPE_D);
-												i965: Add some validation on BRW_OPCODE_MUL and ADD's arguments.

Now that we're playing with other types in brw_fs.cpp, it's easy to
trip over issues like these.

											
										
										
											2010-09-04 21:28:04 -07:00
+								   }
 								   assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
 									  src0.nr != BRW_ARF_ACCUMULATOR);
 								   assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
 									  src1.nr != BRW_ARF_ACCUMULATOR);
 								   return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
 								}
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												i965: Set the region of LINE's src0 to <0,1,0>.

The PRMs say that

   <src0> region must be a replicated scalar
   (with HorzStride = VertStride = 0).

but apparently that doesn't actually apply to all generations. I did
notice when implementing the optimization later in this series that G45
and ILK needed this regioning.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>

											
										
										
											2014-08-18 23:14:44 -07:00
+								brw_inst *
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_LINE(struct brw_codegen *p, struct brw_reg dest,
-												i965: Set the region of LINE's src0 to <0,1,0>.

The PRMs say that

   <src0> region must be a replicated scalar
   (with HorzStride = VertStride = 0).

but apparently that doesn't actually apply to all generations. I did
notice when implementing the optimization later in this series that G45
and ILK needed this regioning.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>

											
										
										
											2014-08-18 23:14:44 -07:00
+								         struct brw_reg src0, struct brw_reg src1)
 								{
 								   src0.vstride = BRW_VERTICAL_STRIDE_0;
 								   src0.width = BRW_WIDTH_1;
 								   src0.hstride = BRW_HORIZONTAL_STRIDE_0;
 								   return brw_alu2(p, BRW_OPCODE_LINE, dest, src0, src1);
 								}
-												i965/fs: Manually set source regioning on PLN instructions.

Like LINE (commit 92346db0), src0 must have a scalar region. Setting
src1's region to <8,8,1> lets us pass a properly sized combined delta_xy
argument in a few commits without getting a bogus <16,16,1> region.

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>

											
										
										
											2015-04-06 21:46:54 -07:00
+								brw_inst *
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_PLN(struct brw_codegen *p, struct brw_reg dest,
-												i965/fs: Manually set source regioning on PLN instructions.

Like LINE (commit 92346db0), src0 must have a scalar region. Setting
src1's region to <8,8,1> lets us pass a properly sized combined delta_xy
argument in a few commits without getting a bogus <16,16,1> region.

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>

											
										
										
											2015-04-06 21:46:54 -07:00
+								        struct brw_reg src0, struct brw_reg src1)
 								{
 								   src0.vstride = BRW_VERTICAL_STRIDE_0;
 								   src0.width = BRW_WIDTH_1;
 								   src0.hstride = BRW_HORIZONTAL_STRIDE_0;
 								   src1.vstride = BRW_VERTICAL_STRIDE_8;
 								   src1.width = BRW_WIDTH_8;
 								   src1.hstride = BRW_HORIZONTAL_STRIDE_1;
 								   return brw_alu2(p, BRW_OPCODE_PLN, dest, src0, src1);
 								}
-												intel/compiler: Initial bits for DPAS instruction

v2: Add brw_ir_performance.cpp and brw_fs_generator.cpp changes. Fix
overlapping register allocation (via has_source_and_destination_hazard). Fix
incorrect destination register file encoding.

v3: Prevent lower_regioning from trying to "fix" DPAS sources.

v4: Add instruction latency information for scheduling and perf
estimates.

v5: Remove all mention of DPASW. Suggested by Curro and Caio. Update
the comment in fs_inst::has_source_and_destination_hazard. Suggested
by Caio.

v6: Add some comments near the src2 calculation in
fs_inst::size_read. Suggested by Caio.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25994>

											
										
										
											2023-09-20 12:42:24 -07:00
+								brw_inst *
 								brw_DPAS(struct brw_codegen *p, enum gfx12_systolic_depth sdepth,
 								         unsigned rcount, struct brw_reg dest, struct brw_reg src0,
 								         struct brw_reg src1, struct brw_reg src2)
 								{
 								   return brw_dpas_three_src(p, BRW_OPCODE_DPAS, sdepth, rcount, dest, src0,
 								                             src1, src2);
 								}
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								void brw_NOP(struct brw_codegen *p)
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								{
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								   brw_inst *insn = next_insn(p, BRW_OPCODE_NOP);
-												i965: Emit proper NOPs.

The PRMs for HSW and newer say that other than the opcode and DebugCtrl
bits of the instruction word, the rest must be zero.

By zeroing the instruction word manually, we avoid using any of the
state inherited through brw_codegen.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96959
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>

											
										
										
											2016-12-03 20:14:55 -08:00
+								   memset(insn, 0, sizeof(*insn));
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								   brw_inst_set_opcode(p->isa, insn, BRW_OPCODE_NOP);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								}
-												intel/fs/gen12: Add codegen support for the SYNC instruction.

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2019-09-03 17:51:17 -07:00
+								void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func)
 								{
 								   brw_inst *insn = next_insn(p, BRW_OPCODE_SYNC);
 								   brw_inst_set_cond_modifier(p->devinfo, insn, func);
 								}
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
 								/***********************************************************************
 								 * Comparisons, if/else/endif
 								 */
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								brw_inst *
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_JMPI(struct brw_codegen *p, struct brw_reg index,
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								         unsigned predicate_control)
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Remove the dst and src0 parameters from brw_JMPI.

In all cases, we set both dst and src0 to brw_ip_reg().  This is no
accident: according to the ISA reference, both are required to be the IP
register.  So, we may as well drop the parameters.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2014-05-27 22:45:16 -07:00
+								   struct brw_reg ip = brw_ip_reg();
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								   brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												intel/eu: Use EXECUTE_1 for JMPI

The PRM says "The execution size must be 1."  In 73137997e23ff6c11, the
execution size was set to 1 when it should have been BRW_EXECUTE_1
(which maps to 0).  Later, in dc2d3a7f5c217a7cee9, JMPI was used for
line AA on gen6 and earlier and we started manually stomping the
exeution size to BRW_EXECUTE_1 in the generator.  This commit fixes the
original bug and makes brw_JMPI just do the right thing.

Reviewed-by: Matt Turner <mattst88@gmail.com>
Fixes: 73137997e23ff6c1145d036315d1a9ad96651281

											
										
										
											2017-08-31 11:42:00 -07:00
+								   brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1);
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								   brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
 								   brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
 								   brw_inst_set_pred_control(devinfo, inst, predicate_control);
-												i965: fixes for JMPI

1. the data type of <src1> (JMPI offset) must be D
2. execution size must be 1
3. NoMask
4. instruction compression isn't allowed.

											
										
										
											2009-07-02 16:32:19 +08:00
-												i965: Convert brw_eu_emit.c to the new brw_inst API.

v2:
 - Fix IF -> ELSE patching on Sandybridge.
 - Don't set base_mrf on Gen6+ in OWord Block Read functions.  (Although
 - the old code did this universally, it shouldn't have - the field
 - doesn't exist on Gen6+ and just got overwritten by the SFID anyway.)

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2014-06-04 17:08:57 -07:00
+								   return inst;
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								}
-												i965: Move IF stack handling into the EU abstraction layer/brw_compile.

This hides the IF stack and back-patching of IF/ELSE instructions from
each of the code generators, greatly simplifying the interface.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 12:25:18 -07:00
+								static void
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								push_if_stack(struct brw_codegen *p, brw_inst *inst)
-												i965: Move IF stack handling into the EU abstraction layer/brw_compile.

This hides the IF stack and back-patching of IF/ELSE instructions from
each of the code generators, greatly simplifying the interface.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 12:25:18 -07:00
+								{
-												i965: let the if_stack just store the instruction index

If dynamic instruction store size is enabled, while after
the brw_IF/ELSE() and before the brw_ENDIF() function, the
eu instruction store base address(p->store) may change.

Thus let if_stack just store the instruction index. This is
somehow more flexible and safe than store the instruction
memory address.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2011-12-21 14:51:59 +08:00
+								   p->if_stack[p->if_stack_depth] = inst - p->store;
-												i965: Move IF stack handling into the EU abstraction layer/brw_compile.

This hides the IF stack and back-patching of IF/ELSE instructions from
each of the code generators, greatly simplifying the interface.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 12:25:18 -07:00
 								   p->if_stack_depth++;
 								   if (p->if_stack_array_size <= p->if_stack_depth) {
 								      p->if_stack_array_size *= 2;
-												i965: let the if_stack just store the instruction index

If dynamic instruction store size is enabled, while after
the brw_IF/ELSE() and before the brw_ENDIF() function, the
eu instruction store base address(p->store) may change.

Thus let if_stack just store the instruction index. This is
somehow more flexible and safe than store the instruction
memory address.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2011-12-21 14:51:59 +08:00
+								      p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
-												i965: Move IF stack handling into the EU abstraction layer/brw_compile.

This hides the IF stack and back-patching of IF/ELSE instructions from
each of the code generators, greatly simplifying the interface.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 12:25:18 -07:00
+											     p->if_stack_array_size);
 								   }
 								}
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								static brw_inst *
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								pop_if_stack(struct brw_codegen *p)
-												i965: let the if_stack just store the instruction index

If dynamic instruction store size is enabled, while after
the brw_IF/ELSE() and before the brw_ENDIF() function, the
eu instruction store base address(p->store) may change.

Thus let if_stack just store the instruction index. This is
somehow more flexible and safe than store the instruction
memory address.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2011-12-21 14:51:59 +08:00
+								{
 								   p->if_stack_depth--;
 								   return &p->store[p->if_stack[p->if_stack_depth]];
 								}
-												i965: Don't make consumers of brw_DO()/brw_WHILE() track loop start.

This is a similar cleanup to what we did for brw_IF(), brw_ELSE(),
brw_ENDIF() handling.

Reviewed-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>

											
										
										
											2011-12-06 12:13:32 -08:00
+								static void
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								push_loop_stack(struct brw_codegen *p, brw_inst *inst)
-												i965: Don't make consumers of brw_DO()/brw_WHILE() track loop start.

This is a similar cleanup to what we did for brw_IF(), brw_ELSE(),
brw_ENDIF() handling.

Reviewed-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>

											
										
										
											2011-12-06 12:13:32 -08:00
+								{
-												i965: fix invalid memory write

I noticed some heap corruption running virgl tests, and valgrind
helped me to track it down to the following error:

==29272== Invalid write of size 4
==29272==    at 0x90283D4: push_loop_stack (brw_eu_emit.c:1307)
==29272==    by 0x9029A7D: brw_DO (brw_eu_emit.c:1750)
==29272==    by 0x90554B0: fs_generator::generate_code(cfg_t const*, int) (brw_fs_generator.cpp:1999)
==29272==    by 0x904491F: brw_compile_fs (brw_fs.cpp:5685)
==29272==    by 0x8FC5DC5: brw_codegen_wm_prog (brw_wm.c:137)
==29272==    by 0x8FC7663: brw_fs_precompile (brw_wm.c:638)
==29272==    by 0x8FA4040: brw_shader_precompile(gl_context*, gl_shader_program*) (brw_link.cpp:51)
==29272==    by 0x8FA4A9A: brw_link_shader (brw_link.cpp:260)
==29272==    by 0x8DEF751: _mesa_glsl_link_shader (ir_to_mesa.cpp:3006)
==29272==    by 0x8C84325: _mesa_link_program (shaderapi.c:1042)
==29272==    by 0x8C851D7: _mesa_LinkProgram (shaderapi.c:1515)
==29272==    by 0x4E4B8E8: add_shader_program (vrend_renderer.c:880)
==29272==  Address 0xf2f3cb0 is 0 bytes after a block of size 112 alloc'd
==29272==    at 0x4C2AA98: calloc (vg_replace_malloc.c:711)
==29272==    by 0x8ED11F7: ralloc_size (ralloc.c:113)
==29272==    by 0x8ED1282: rzalloc_size (ralloc.c:134)
==29272==    by 0x8ED14C0: rzalloc_array_size (ralloc.c:196)
==29272==    by 0x9019C7B: brw_init_codegen (brw_eu.c:291)
==29272==    by 0x904F565: fs_generator::fs_generator(brw_compiler const*, void*, void*, void const*, brw_stage_prog_data*, unsigned int, bool, gl_shader_stage) (brw_fs_generator.cpp:124)
==29272==    by 0x9044883: brw_compile_fs (brw_fs.cpp:5675)
==29272==    by 0x8FC5DC5: brw_codegen_wm_prog (brw_wm.c:137)
==29272==    by 0x8FC7663: brw_fs_precompile (brw_wm.c:638)
==29272==    by 0x8FA4040: brw_shader_precompile(gl_context*, gl_shader_program*) (brw_link.cpp:51)
==29272==    by 0x8FA4A9A: brw_link_shader (brw_link.cpp:260)
==29272==    by 0x8DEF751: _mesa_glsl_link_shader (ir_to_mesa.cpp:3006)

if_depth_in_loop is an array of size p->loop_stack_array_size, and
push_loop_stack() will access if_depth_in_loop[p->loop_stack_depth+1],
thus the condition to grow the array should be
p->loop_stack_array_size <= (p->loop_stack_depth + 1) (it's currently
off by 2...)

This can be reproduced by running the following test with virgl test
server:
LIBGL_ALWAYS_SOFTWARE=y GALLIUM_DRIVER=virpipe bin/shader_runner
./tests/shaders/glsl-fs-unroll-explosion.shader_test -auto

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2016-03-18 20:01:07 +01:00
+								   if (p->loop_stack_array_size <= (p->loop_stack_depth + 1)) {
-												i965: Don't make consumers of brw_DO()/brw_WHILE() track loop start.

This is a similar cleanup to what we did for brw_IF(), brw_ELSE(),
brw_ENDIF() handling.

Reviewed-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>

											
										
										
											2011-12-06 12:13:32 -08:00
+								      p->loop_stack_array_size *= 2;
 								      p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
 											       p->loop_stack_array_size);
 								   }
 								   p->loop_stack[p->loop_stack_depth] = inst - p->store;
 								   p->loop_stack_depth++;
 								}
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								static brw_inst *
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								get_inner_do_insn(struct brw_codegen *p)
-												i965: Don't make consumers of brw_DO()/brw_WHILE() track loop start.

This is a similar cleanup to what we did for brw_IF(), brw_ELSE(),
brw_ENDIF() handling.

Reviewed-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>

											
										
										
											2011-12-06 12:13:32 -08:00
+								{
 								   return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
 								}
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								/* EU takes the value from the flag register and pushes it onto some
 								 * sort of a stack (presumably merging with any flag value already on
 								 * the stack).  Within an if block, the flags at the top of the stack
 								 * control execution on each channel of the unit, eg. on each of the
 								 * 16 pixel values in our wm programs.
 								 *
 								 * When the matching 'else' instruction is reached (presumably by
 								 * countdown of the instruction count patched in by our ELSE/ENDIF
-												Fix a few typos

Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2015-04-22 11:33:17 +01:00
+								 * functions), the relevant flags are inverted.
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								 *
 								 * When the matching 'endif' instruction is reached, the flags are
 								 * popped off.  If the stack is now empty, normal execution resumes.
 								 */
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								brw_inst *
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_IF(struct brw_codegen *p, unsigned execute_size)
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								   brw_inst *insn;
-												i965: Avoid branch instructions while in single program flow mode.

There is an errata for Broadwater that threads don't have the instruction/loop
mask stacks initialized on thread spawn.  In single program flow mode, those
stacks are not writable, so we can't initialize them.  However, they do get
read during ELSE and ENDIF instructions.  So, instead, replace branch
instructions in single program flow mode with predicated jumps (ADD to the ip
register), avoiding use of the more complicated branch instructions that may
fail.  This is also a minor optimization as no ENDIF equivalent is necessary.

Signed-off-by: Keith Packard <keithp@neko.keithp.com>

											
										
										
											2006-10-18 00:24:01 -07:00
-												i965: Rework IF/ELSE jump target back-patching.

The primary motivation for this is to better support Ivybridge control
flow.  Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.

A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.

Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE).  With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.

To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions.  Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.

To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 13:40:00 -07:00
+								   insn = next_insn(p, BRW_OPCODE_IF);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
 								   /* Override the defaults for this instruction:
 								    */
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_TYPE_D)));
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								   if (devinfo->ver < 12)
 								      brw_set_src0(p, insn, brw_imm_d(0));
 								   brw_inst_set_jip(devinfo, insn, 0);
 								   brw_inst_set_uip(devinfo, insn, 0);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								   brw_inst_set_exec_size(devinfo, insn, execute_size);
 								   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
 								   brw_inst_set_pred_control(devinfo, insn, BRW_PREDICATE_NORMAL);
 								   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												i965: Move IF stack handling into the EU abstraction layer/brw_compile.

This hides the IF stack and back-patching of IF/ELSE instructions from
each of the code generators, greatly simplifying the interface.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 12:25:18 -07:00
+								   push_if_stack(p, insn);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								   return insn;
 								}
-												i965: Rework IF/ELSE jump target back-patching.

The primary motivation for this is to better support Ivybridge control
flow.  Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.

A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.

Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE).  With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.

To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions.  Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.

To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 13:40:00 -07:00
+								/**
 								 * Patch IF and ELSE instructions with appropriate jump targets.
 								 */
 								static void
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								patch_IF_ELSE(struct brw_codegen *p,
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								              brw_inst *if_inst, brw_inst *else_inst, brw_inst *endif_inst)
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: the offset of any branch/jump instruction is in unit of 64bits on IGDNG

											
										
										
											2009-07-15 10:40:16 +08:00
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								   assert(if_inst != NULL && brw_inst_opcode(p->isa, if_inst) == BRW_OPCODE_IF);
-												i965: Rework IF/ELSE jump target back-patching.

The primary motivation for this is to better support Ivybridge control
flow.  Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.

A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.

Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE).  With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.

To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions.  Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.

To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 13:40:00 -07:00
+								   assert(endif_inst != NULL);
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								   assert(else_inst == NULL || brw_inst_opcode(p->isa, else_inst) == BRW_OPCODE_ELSE);
-												i965: Rework IF/ELSE jump target back-patching.

The primary motivation for this is to better support Ivybridge control
flow.  Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.

A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.

Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE).  With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.

To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions.  Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.

To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 13:40:00 -07:00
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								   unsigned br = brw_jump_scale(devinfo);
-												i965: Avoid branch instructions while in single program flow mode.

There is an errata for Broadwater that threads don't have the instruction/loop
mask stacks initialized on thread spawn.  In single program flow mode, those
stacks are not writable, so we can't initialize them.  However, they do get
read during ELSE and ENDIF instructions.  So, instead, replace branch
instructions in single program flow mode with predicated jumps (ADD to the ip
register), avoiding use of the more complicated branch instructions that may
fail.  This is also a minor optimization as no ENDIF equivalent is necessary.

Signed-off-by: Keith Packard <keithp@neko.keithp.com>

											
										
										
											2006-10-18 00:24:01 -07:00
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								   assert(brw_inst_opcode(p->isa, endif_inst) == BRW_OPCODE_ENDIF);
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								   brw_inst_set_exec_size(devinfo, endif_inst, brw_inst_exec_size(devinfo, if_inst));
-												i965: Rework IF/ELSE jump target back-patching.

The primary motivation for this is to better support Ivybridge control
flow.  Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.

A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.

Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE).  With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.

To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions.  Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.

To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 13:40:00 -07:00
 								   if (else_inst == NULL) {
 								      /* Patch IF -> ENDIF */
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								      brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
 								      brw_inst_set_jip(devinfo, if_inst, br * (endif_inst - if_inst));
-												i965: Avoid branch instructions while in single program flow mode.

There is an errata for Broadwater that threads don't have the instruction/loop
mask stacks initialized on thread spawn.  In single program flow mode, those
stacks are not writable, so we can't initialize them.  However, they do get
read during ELSE and ENDIF instructions.  So, instead, replace branch
instructions in single program flow mode with predicated jumps (ADD to the ip
register), avoiding use of the more complicated branch instructions that may
fail.  This is also a minor optimization as no ENDIF equivalent is necessary.

Signed-off-by: Keith Packard <keithp@neko.keithp.com>

											
										
										
											2006-10-18 00:24:01 -07:00
+								   } else {
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								      brw_inst_set_exec_size(devinfo, else_inst, brw_inst_exec_size(devinfo, if_inst));
-												i965: Rework IF/ELSE jump target back-patching.

The primary motivation for this is to better support Ivybridge control
flow.  Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.

A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.

Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE).  With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.

To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions.  Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.

To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 13:40:00 -07:00
 								      /* Patch ELSE -> ENDIF */
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								      /* The IF instruction's JIP should point just past the ELSE */
 								      brw_inst_set_jip(devinfo, if_inst, br * (else_inst - if_inst + 1));
 								      /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
 								      brw_inst_set_uip(devinfo, if_inst, br * (endif_inst - if_inst));
 								      if (devinfo->ver < 11) {
 								         /* Set the ELSE instruction to use branch_ctrl with a join
 								          * jump target pointing at the NOP inserted right before
 								          * the ENDIF instruction in order to make sure it is
 								          * executed in all cases, since attempting to do the same
 								          * as on other generations could cause the EU to jump at
 								          * the instruction immediately after the ENDIF due to
 								          * Wa_220160235, which could cause the program to continue
 								          * running with all channels disabled.
 								          */
 								         brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst - 1));
 								         brw_inst_set_branch_control(devinfo, else_inst, true);
-												i965: Add support for IF/ELSE/ENDIF control flow on Ivybridge.

Ivybridge's IF instruction doesn't support conditional modifiers.
It also introduces UIP, which must point to the ENDIF instruction.

ELSE and ENDIF remain the same except that JIP moves from dst to src1.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-03-15 23:53:40 -07:00
+								      } else {
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								         brw_inst_set_jip(devinfo, else_inst, br * (endif_inst - else_inst));
-												i965: Rework IF/ELSE jump target back-patching.

The primary motivation for this is to better support Ivybridge control
flow.  Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.

A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.

Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE).  With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.

To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions.  Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.

To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 13:40:00 -07:00
+								      }
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
 								      /* Since we don't set branch_ctrl on Gfx11+, the ELSE's
 								       * JIP and UIP both should point to ENDIF on those
 								       * platforms.
 								       */
 								      brw_inst_set_uip(devinfo, else_inst, br * (endif_inst - else_inst));
-												i965: Avoid branch instructions while in single program flow mode.

There is an errata for Broadwater that threads don't have the instruction/loop
mask stacks initialized on thread spawn.  In single program flow mode, those
stacks are not writable, so we can't initialize them.  However, they do get
read during ELSE and ENDIF instructions.  So, instead, replace branch
instructions in single program flow mode with predicated jumps (ADD to the ip
register), avoiding use of the more complicated branch instructions that may
fail.  This is also a minor optimization as no ENDIF equivalent is necessary.

Signed-off-by: Keith Packard <keithp@neko.keithp.com>

											
										
										
											2006-10-18 00:24:01 -07:00
+								   }
-												i965: Rework IF/ELSE jump target back-patching.

The primary motivation for this is to better support Ivybridge control
flow.  Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.

A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.

Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE).  With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.

To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions.  Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.

To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 13:40:00 -07:00
+								}
 								void
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_ELSE(struct brw_codegen *p)
-												i965: Rework IF/ELSE jump target back-patching.

The primary motivation for this is to better support Ivybridge control
flow.  Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.

A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.

Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE).  With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.

To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions.  Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.

To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 13:40:00 -07:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								   brw_inst *insn;
-												i965: Rework IF/ELSE jump target back-patching.

The primary motivation for this is to better support Ivybridge control
flow.  Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.

A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.

Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE).  With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.

To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions.  Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.

To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 13:40:00 -07:00
 								   insn = next_insn(p, BRW_OPCODE_ELSE);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								   if (devinfo->ver < 12)
 								      brw_set_src0(p, insn, brw_imm_d(0));
 								   brw_inst_set_jip(devinfo, insn, 0);
 								   brw_inst_set_uip(devinfo, insn, 0);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
 								   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												i965: Rework IF/ELSE jump target back-patching.

The primary motivation for this is to better support Ivybridge control
flow.  Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.

A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.

Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE).  With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.

To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions.  Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.

To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 13:40:00 -07:00
+								   push_if_stack(p, insn);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								}
-												i965: Move IF stack handling into the EU abstraction layer/brw_compile.

This hides the IF stack and back-patching of IF/ELSE instructions from
each of the code generators, greatly simplifying the interface.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 12:25:18 -07:00
+								void
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_ENDIF(struct brw_codegen *p)
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								   brw_inst *insn = NULL;
 								   brw_inst *else_inst = NULL;
 								   brw_inst *if_inst = NULL;
 								   brw_inst *tmp;
-												i965: Rework IF/ELSE jump target back-patching.

The primary motivation for this is to better support Ivybridge control
flow.  Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.

A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.

Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE).  With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.

To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions.  Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.

To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 13:40:00 -07:00
-												intel/eu/gfx8-9: Fix execution with all channels disabled due to HW bug #220160235.

This hardware bug is the result of a control flow optimization present
in Gfx8-9 meant to prevent the ELSE instruction from disabling all
channels and update the control flow stack only to have them
re-enabled at the ENDIF instruction executed immediately after it.
Instead, on Gfx8-9 an ELSE instruction that would normally have ended
up with all channels disabled would pop off the last element of the
stack and jump directly to JIP+1 instead of to the ENDIF at JIP,
skipping over the ENDIF instruction.  In simple cases this would work
okay (though it's actual performance benefit is questionable), but in
cases where a branch instruction within the IF block (e.g. BREAK or
CONTINUE) caused all active channels to jump outside the IF
conditional, the optimization would break the JIP chain of "join"
instructions by skipping the ENDIF, causing the block of instructions
immediately after the ENDIF to execute with all channels disabled
until execution reaches the reconvergence point.

This issue was observed on SKL in the
dEQP-VK.reconvergence.subgroup_uniform_control_flow_elect.compute.nesting4.0.38
test in combination with some Vulkan binding model changes Lionel is
working on.  In such cases the execution with all channels disabled
was leading to corruption of an indirect message descriptor, causing a
hang.

Unfortunately the hardware bug doesn't provide a recommended
workaround.  In order to fix the problem we point the JIP of an ELSE
instruction to the instruction immediately before the ENDIF -- However
that's not expected to work due to the restriction that JIP and UIP
must be equal if and only if BranchCtrl is disabled -- So this patch
also enables BranchCtrl, which is intended to support join
instructions within the "ELSE" block, which in turn disables the
optimization described above, which in turn causes us to execute the
instruction immediately *before* the ENDIF with all channels disabled
-- So in order to avoid further fallout from executing code with all
channels disabled we need to insert a NOP before ENDIF instructions
that have a matching ELSE instruction.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20921>

											
										
										
											2023-01-23 19:36:15 -08:00
+								   assert(p->if_stack_depth > 0);
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								   if (devinfo->ver < 11 &&
-												intel/eu/gfx8-9: Fix execution with all channels disabled due to HW bug #220160235.

This hardware bug is the result of a control flow optimization present
in Gfx8-9 meant to prevent the ELSE instruction from disabling all
channels and update the control flow stack only to have them
re-enabled at the ENDIF instruction executed immediately after it.
Instead, on Gfx8-9 an ELSE instruction that would normally have ended
up with all channels disabled would pop off the last element of the
stack and jump directly to JIP+1 instead of to the ENDIF at JIP,
skipping over the ENDIF instruction.  In simple cases this would work
okay (though it's actual performance benefit is questionable), but in
cases where a branch instruction within the IF block (e.g. BREAK or
CONTINUE) caused all active channels to jump outside the IF
conditional, the optimization would break the JIP chain of "join"
instructions by skipping the ENDIF, causing the block of instructions
immediately after the ENDIF to execute with all channels disabled
until execution reaches the reconvergence point.

This issue was observed on SKL in the
dEQP-VK.reconvergence.subgroup_uniform_control_flow_elect.compute.nesting4.0.38
test in combination with some Vulkan binding model changes Lionel is
working on.  In such cases the execution with all channels disabled
was leading to corruption of an indirect message descriptor, causing a
hang.

Unfortunately the hardware bug doesn't provide a recommended
workaround.  In order to fix the problem we point the JIP of an ELSE
instruction to the instruction immediately before the ENDIF -- However
that's not expected to work due to the restriction that JIP and UIP
must be equal if and only if BranchCtrl is disabled -- So this patch
also enables BranchCtrl, which is intended to support join
instructions within the "ELSE" block, which in turn disables the
optimization described above, which in turn causes us to execute the
instruction immediately *before* the ENDIF with all channels disabled
-- So in order to avoid further fallout from executing code with all
channels disabled we need to insert a NOP before ENDIF instructions
that have a matching ELSE instruction.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20921>

											
										
										
											2023-01-23 19:36:15 -08:00
+								       brw_inst_opcode(p->isa, &p->store[p->if_stack[
 								                             p->if_stack_depth - 1]]) == BRW_OPCODE_ELSE) {
 								      /* Insert a NOP to be specified as join instruction within the
 								       * ELSE block, which is valid for an ELSE instruction with
 								       * branch_ctrl on.  The ELSE instruction will be set to jump
 								       * here instead of to the ENDIF instruction, since attempting to
 								       * do the latter would prevent the ENDIF from being executed in
 								       * some cases due to Wa_220160235, which could cause the program
 								       * to continue running with all channels disabled.
 								       */
 								      brw_NOP(p);
 								   }
-												i965: call next_insn() before referencing a instruction by index

A single next_insn may change the base address of instruction store
memory(p->store), so call it first before referencing the instruction
store pointer from an index.

This the final prepare work to enable the dynamic store size.

v2: comments from Ken, define emit_endif as bool type

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-12-21 15:32:02 +08:00
+								   /*
-												Fix a few typos

Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2015-04-22 11:33:17 +01:00
+								    * A single next_insn() may change the base address of instruction store
-												i965: call next_insn() before referencing a instruction by index

A single next_insn may change the base address of instruction store
memory(p->store), so call it first before referencing the instruction
store pointer from an index.

This the final prepare work to enable the dynamic store size.

v2: comments from Ken, define emit_endif as bool type

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-12-21 15:32:02 +08:00
+								    * memory(p->store), so call it first before referencing the instruction
 								    * store pointer from an index
 								    */
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								   insn = next_insn(p, BRW_OPCODE_ENDIF);
-												i965: call next_insn() before referencing a instruction by index

A single next_insn may change the base address of instruction store
memory(p->store), so call it first before referencing the instruction
store pointer from an index.

This the final prepare work to enable the dynamic store size.

v2: comments from Ken, define emit_endif as bool type

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-12-21 15:32:02 +08:00
 								   /* Pop the IF and (optional) ELSE instructions from the stack */
 								   tmp = pop_if_stack(p);
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								   if (brw_inst_opcode(p->isa, tmp) == BRW_OPCODE_ELSE) {
-												i965: call next_insn() before referencing a instruction by index

A single next_insn may change the base address of instruction store
memory(p->store), so call it first before referencing the instruction
store pointer from an index.

This the final prepare work to enable the dynamic store size.

v2: comments from Ken, define emit_endif as bool type

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-12-21 15:32:02 +08:00
+								      else_inst = tmp;
 								      tmp = pop_if_stack(p);
 								   }
 								   if_inst = tmp;
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								   brw_set_src0(p, insn, brw_imm_d(0));
-												i965: Avoid branch instructions while in single program flow mode.

There is an errata for Broadwater that threads don't have the instruction/loop
mask stacks initialized on thread spawn.  In single program flow mode, those
stacks are not writable, so we can't initialize them.  However, they do get
read during ELSE and ENDIF instructions.  So, instead, replace branch
instructions in single program flow mode with predicated jumps (ADD to the ip
register), avoiding use of the more complicated branch instructions that may
fail.  This is also a minor optimization as no ENDIF equivalent is necessary.

Signed-off-by: Keith Packard <keithp@neko.keithp.com>

											
										
										
											2006-10-18 00:24:01 -07:00
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
 								   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_ENABLE);
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
 								   brw_inst_set_jip(devinfo, insn, 2);
-												i965: Rework IF/ELSE jump target back-patching.

The primary motivation for this is to better support Ivybridge control
flow.  Ivybridge IF instructions need to point to the first instruction
of the ELSE block -and- the ENDIF instruction; the existing code only
supported back-patching one instruction ago.

A second goal is to simplify and centralize the back-patching, hopefully
clarifying the code somewhat.

Previously, brw_ELSE back-patched the IF instruction, and brw_ENDIF
back-patched the previous instruction (IF or ELSE).  With this patch,
brw_ENDIF is responsible for patching both the IF and (optional) ELSE.

To support this, the control flow stack (if_stack) maintains pointers to
both the IF and ELSE instructions.  Unfortunately, in single program
flow (SPF) mode, both were emitted as ADD instructions, and thus
indistinguishable.

To remedy this, this patch simply emits IF and ELSE, rather than ADDs;
brw_ENDIF will convert them to ADDs (the SPF version of back-patching).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-16 13:40:00 -07:00
+								   patch_IF_ELSE(p, if_inst, else_inst, insn);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								}
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								brw_inst *
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_BREAK(struct brw_codegen *p)
-												  support branch and loop in pixel shader
  most of the sample working with some small modification

											
										
										
											2007-06-21 10:22:28 +08:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								   brw_inst *insn;
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
-												  support branch and loop in pixel shader
  most of the sample working with some small modification

											
										
										
											2007-06-21 10:22:28 +08:00
+								   insn = next_insn(p, BRW_OPCODE_BREAK);
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								   brw_set_src0(p, insn, brw_imm_d(0x0));
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
-												intel/eu: Add some brw_get_default_ helpers

This is much cleaner than everything that wants a default value poking
at the bits of p->current directly.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-05-29 14:50:46 -07:00
+								   brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
-												 support continue, fix conditional

											
										
										
											2007-09-29 15:00:52 +08:00
+								   return insn;
 								}
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								brw_inst *
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_CONT(struct brw_codegen *p)
-												i965: Add support for gen6 CONTINUE instruction emit.

At this point, piglit tests for fragment shader loops are working.

											
										
										
											2010-12-01 14:02:14 -08:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								   brw_inst *insn;
-												i965: Add support for gen6 CONTINUE instruction emit.

At this point, piglit tests for fragment shader loops are working.

											
										
										
											2010-12-01 14:02:14 -08:00
 								   insn = next_insn(p, BRW_OPCODE_CONTINUE);
-												i965: Add support for the instruction compression bits on gen6.

Since the 8-wide first-quarter and 16-wide first-half have the same
bit encoding, we now need to track "do you want instruction
compression" in the compile state.

											
										
										
											2010-12-03 11:49:29 -08:00
+								   brw_set_dest(p, insn, brw_ip_reg());
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								   brw_set_src0(p, insn, brw_imm_d(0x0));
-												i965: Add support for gen6 CONTINUE instruction emit.

At this point, piglit tests for fragment shader loops are working.

											
										
										
											2010-12-01 14:02:14 -08:00
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
-												intel/eu: Add some brw_get_default_ helpers

This is much cleaner than everything that wants a default value poking
at the bits of p->current directly.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-05-29 14:50:46 -07:00
+								   brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
-												  support branch and loop in pixel shader
  most of the sample working with some small modification

											
										
										
											2007-06-21 10:22:28 +08:00
+								   return insn;
 								}
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								brw_inst *
-												intel/fs: Emit HALT for discard on Gen4-5

Using HALT to immediately jump to the end of the shader is required to
implement GL_EXT_gpu_shader4 and OpenGL 3.0.  However, vanilla OpenGL
1.2 doesn't forbid it and it likely makes something somewhere faster.
We should be consistent and implement the same discard behavior on all
hardware if we can.

The rules for HALT on Gen4-5 are a bit different from Gen6+.  On the
older hardware, there is no stack for HALT; instead it's up to software
to save and restore mask registers.  However, there's no real saving
needed since we only use HALT to jump to the end of the program where
we're about about to do our FB writes.  All we need to do is reset AMask
to DMask, the value it was initialized to at the start of the thread.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5244>

											
										
										
											2020-04-25 14:59:30 -05:00
+								brw_HALT(struct brw_codegen *p)
-												i965/fs: Improve performance of shaders that start out with a discard.

I had tried this in the past, but ran into trouble with applications
that sample from undiscarded pixels in the same subspan.  To fix that
issue, only jump to the end for an entire subspan at a time.

Improves GLbenchmark 2.7 (1024x768) performance by 7.9 +/- 1.5% (n=8).

v2: Drop the br variable in the jump instruction -- if I ever do jumps
    pre-gen6, it'll be a different code block anyway since we don't have
    HALT until gen6.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2012-12-06 10:15:08 -08:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								   brw_inst *insn;
-												i965/fs: Improve performance of shaders that start out with a discard.

I had tried this in the past, but ran into trouble with applications
that sample from undiscarded pixels in the same subspan.  To fix that
issue, only jump to the end for an entire subspan at a time.

Improves GLbenchmark 2.7 (1024x768) performance by 7.9 +/- 1.5% (n=8).

v2: Drop the br variable in the jump instruction -- if I ever do jumps
    pre-gen6, it'll be a different code block anyway since we don't have
    HALT until gen6.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2012-12-06 10:15:08 -08:00
 								   insn = next_insn(p, BRW_OPCODE_HALT);
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								   if (devinfo->ver < 12) {
-												intel/eu/gen12: Codegen control flow instructions correctly.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-11-09 14:13:35 -08:00
+								      brw_set_src0(p, insn, brw_imm_d(0x0));
-												i965/eu: Set src0 file to IMM on Gen8+ flow control instructions.

According to the documentation, we need to set the source 0 register
type to IMM for flow control instructinos that have both JIP and UIP.
Out of paranoia, just make all flow control instructions use IMM;
there's no benefit to using ARF anyway, and it could trouble that's
difficult to diagnose.

See commit 9584959123b0453cf5313722357e3abb9f736aa7, which did the
analogous change in the gen8_generator code.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2014-08-10 07:10:55 -07:00
+								   }
-												i965/fs: Improve performance of shaders that start out with a discard.

I had tried this in the past, but ran into trouble with applications
that sample from undiscarded pixels in the same subspan.  To fix that
issue, only jump to the end for an entire subspan at a time.

Improves GLbenchmark 2.7 (1024x768) performance by 7.9 +/- 1.5% (n=8).

v2: Drop the br variable in the jump instruction -- if I ever do jumps
    pre-gen6, it'll be a different code block anyway since we don't have
    HALT until gen6.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2012-12-06 10:15:08 -08:00
-												i965/eu: Fix a bunch of compression control bugs in the generator.

Most of these were resetting quarter control to zero incorrectly even
though everything they needed to do was disable instruction
compression -- The brw_SAMPLE() case was doing the right thing but it
can be simplified slightly by using the new compression control
interface.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2016-05-18 15:29:27 -07:00
+								   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
-												intel/eu: Add some brw_get_default_ helpers

This is much cleaner than everything that wants a default value poking
at the bits of p->current directly.

Cc: mesa-stable@lists.freedesktop.org
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-05-29 14:50:46 -07:00
+								   brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
-												i965/fs: Improve performance of shaders that start out with a discard.

I had tried this in the past, but ran into trouble with applications
that sample from undiscarded pixels in the same subspan.  To fix that
issue, only jump to the end for an entire subspan at a time.

Improves GLbenchmark 2.7 (1024x768) performance by 7.9 +/- 1.5% (n=8).

v2: Drop the br variable in the jump instruction -- if I ever do jumps
    pre-gen6, it'll be a different code block anyway since we don't have
    HALT until gen6.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2012-12-06 10:15:08 -08:00
+								   return insn;
 								}
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								/* DO/WHILE loop:
-												i965: Add support for gen6 DO/WHILE ISA emit.

There's no more DO since there's no more mask stack, and WHILE has
been shuffled like IF was.

											
										
										
											2010-12-01 10:45:52 -08:00
+								 *
 								 * The DO/WHILE is just an unterminated loop -- break or continue are
 								 * used for control within the loop.  We have a few ways they can be
 								 * done.
 								 *
 								 * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
 								 * jip and no DO instruction.
 								 *
-												intel: Rename genx keyword to gfxx in source files

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen[[:digit:]]+" -rIl $SEARCH_PATH | xargs sed -ie "s/gen\([[:digit:]]\+\)/gfx\1/g"

Exclude pack.h and xml changes in this patch:
grep -E "gfx[[:digit:]]+_pack\.h" -rIl $SEARCH_PATH | xargs sed -ie "s/gfx\([[:digit:]]\+_pack\.h\)/gen\1/g"
grep -E "gfx[[:digit:]]+\.xml" -rIl $SEARCH_PATH | xargs sed -ie "s/gfx\([[:digit:]]\+\.xml\)/gen\1/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 15:40:04 -07:00
+								 * For gfx6, there's no more mask stack, so no need for DO.  WHILE
-												i965: Add support for gen6 DO/WHILE ISA emit.

There's no more DO since there's no more mask stack, and WHILE has
been shuffled like IF was.

											
										
										
											2010-12-01 10:45:52 -08:00
+								 * just points back to the first instruction of the loop.
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								 */
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								brw_inst *
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_DO(struct brw_codegen *p, unsigned execute_size)
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								{
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								   push_loop_stack(p, &p->store[p->nr_insn]);
 								   return &p->store[p->nr_insn];
-												i965: Don't make consumers of brw_WHILE do pre-gen6 BREAK/CONT patching.

The EU code itself can just do this work, since all the consumers were
duplicating it.

Reviewed-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>

											
										
										
											2011-12-06 12:30:03 -08:00
+								}
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								brw_inst *
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_WHILE(struct brw_codegen *p)
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								   brw_inst *insn, *do_insn;
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								   unsigned br = brw_jump_scale(devinfo);
-												i965: Avoid branch instructions while in single program flow mode.

There is an errata for Broadwater that threads don't have the instruction/loop
mask stacks initialized on thread spawn.  In single program flow mode, those
stacks are not writable, so we can't initialize them.  However, they do get
read during ELSE and ENDIF instructions.  So, instead, replace branch
instructions in single program flow mode with predicated jumps (ADD to the ip
register), avoiding use of the more complicated branch instructions that may
fail.  This is also a minor optimization as no ENDIF equivalent is necessary.

Signed-off-by: Keith Packard <keithp@neko.keithp.com>

											
										
										
											2006-10-18 00:24:01 -07:00
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								   insn = next_insn(p, BRW_OPCODE_WHILE);
 								   do_insn = get_inner_do_insn(p);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_TYPE_D));
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								   if (devinfo->ver < 12)
 								      brw_set_src0(p, insn, brw_imm_d(0));
 								   brw_inst_set_jip(devinfo, insn, br * (do_insn - insn));
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								   brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
-												i965: Don't make consumers of brw_WHILE do pre-gen6 BREAK/CONT patching.

The EU code itself can just do this work, since all the consumers were
duplicating it.

Reviewed-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>

											
										
										
											2011-12-06 12:30:03 -08:00
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								   brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												i965: Don't make consumers of brw_WHILE do pre-gen6 BREAK/CONT patching.

The EU code itself can just do this work, since all the consumers were
duplicating it.

Reviewed-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>

											
										
										
											2011-12-06 12:30:03 -08:00
+								   p->loop_stack_depth--;
-												  support branch and loop in pixel shader
  most of the sample working with some small modification

											
										
										
											2007-06-21 10:22:28 +08:00
+								   return insn;
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								}
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								void brw_CMP(struct brw_codegen *p,
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+									     struct brw_reg dest,
-												i965: Don't use GL types in files shared with intel-gpu-tools.

sed -i -e 's/GLuint/unsigned/g' -e 's/GLint/int/g' \
       -e 's/GLfloat/float/g' -e 's/GLubyte/uint8_t/g' \
       -e 's/GLshort/int16_t/g' \
       brw_eu* brw_disasm.c brw_structs.h

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2013-11-25 15:51:24 -08:00
+									     unsigned conditional,
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+									     struct brw_reg src0,
 									     struct brw_reg src1)
 								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								   brw_inst *insn = next_insn(p, BRW_OPCODE_CMP);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								   brw_inst_set_cond_modifier(devinfo, insn, conditional);
-												i965: Add support for the instruction compression bits on gen6.

Since the 8-wide first-quarter and 16-wide first-half have the same
bit encoding, we now need to track "do you want instruction
compression" in the compile state.

											
										
										
											2010-12-03 11:49:29 -08:00
+								   brw_set_dest(p, insn, dest);
-												i965: Pass brw_compile pointer to brw_set_src[01].

This makes it symmetric with brw_set_dest, which is convenient, and will
also allow for assertions to be made based off of intel->gen.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-10 16:51:12 -07:00
+								   brw_set_src0(p, insn, src0);
 								   brw_set_src1(p, insn, src1);
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								}
-												intel/compiler: Enable the ability to emit CMPN instructions

v2: Move checks to the EU validator.  Suggested by Jason.

Fixes: 2f2c00c7279 ("i965: Lower min/max after optimization on Gen4/5.")
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9027>

											
										
										
											2021-02-13 14:11:30 -08:00
+								void brw_CMPN(struct brw_codegen *p,
 								              struct brw_reg dest,
 								              unsigned conditional,
 								              struct brw_reg src0,
 								              struct brw_reg src1)
 								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												intel/compiler: Enable the ability to emit CMPN instructions

v2: Move checks to the EU validator.  Suggested by Jason.

Fixes: 2f2c00c7279 ("i965: Lower min/max after optimization on Gen4/5.")
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9027>

											
										
										
											2021-02-13 14:11:30 -08:00
+								   brw_inst *insn = next_insn(p, BRW_OPCODE_CMPN);
 								   brw_inst_set_cond_modifier(devinfo, insn, conditional);
 								   brw_set_dest(p, insn, dest);
 								   brw_set_src0(p, insn, src0);
 								   brw_set_src1(p, insn, src1);
 								}
-												Add Intel i965G/Q DRI driver.

This driver comes from Tungsten Graphics, with a few further modifications by
Intel.

											
										
										
											2006-08-09 19:14:05 +00:00
+								/***********************************************************************
 								 * Helpers for the various SEND message types:
 								 */
-												intel: Rename genx keyword to gfxx in source files

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen[[:digit:]]+" -rIl $SEARCH_PATH | xargs sed -ie "s/gen\([[:digit:]]\+\)/gfx\1/g"

Exclude pack.h and xml changes in this patch:
grep -E "gfx[[:digit:]]+_pack\.h" -rIl $SEARCH_PATH | xargs sed -ie "s/gfx\([[:digit:]]\+_pack\.h\)/gen\1/g"
grep -E "gfx[[:digit:]]+\.xml" -rIl $SEARCH_PATH | xargs sed -ie "s/gfx\([[:digit:]]\+\.xml\)/gen\1/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 15:40:04 -07:00
+								void gfx6_math(struct brw_codegen *p,
-												i965: Add support for POW in gen6 FS.

Fixes glsl-algebraic-pow-2 in brw_wm_glsl.c mode.

											
										
										
											2010-08-22 01:33:57 -07:00
+									       struct brw_reg dest,
-												i965: Don't use GL types in files shared with intel-gpu-tools.

sed -i -e 's/GLuint/unsigned/g' -e 's/GLint/int/g' \
       -e 's/GLfloat/float/g' -e 's/GLubyte/uint8_t/g' \
       -e 's/GLshort/int16_t/g' \
       brw_eu* brw_disasm.c brw_structs.h

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2013-11-25 15:51:24 -08:00
+									       unsigned function,
-												i965: Add support for POW in gen6 FS.

Fixes glsl-algebraic-pow-2 in brw_wm_glsl.c mode.

											
										
										
											2010-08-22 01:33:57 -07:00
+									       struct brw_reg src0,
 									       struct brw_reg src1)
 								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								   brw_inst *insn = next_insn(p, BRW_OPCODE_MATH);
-												i965: Add support for POW in gen6 FS.

Fixes glsl-algebraic-pow-2 in brw_wm_glsl.c mode.

											
										
										
											2010-08-22 01:33:57 -07:00
-												intel/brw: Remove MRF type

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-21 21:21:20 -08:00
+								   assert(dest.file == BRW_GENERAL_REGISTER_FILE);
-												i965: Add a couple of checks for gen6 math instruction limits.

											
										
										
											2010-10-11 13:30:12 -07:00
 								   assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
-												i965: Fix assertions about register types for INT DIV in brw_math.

BRW_MATH_FUNCTION_REMAINDER was missing.  Also, it seems worthwhile to
assert that INT DIV's arguments are signed/unsigned integers.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Tested-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-09-28 17:37:51 -07:00
+								   if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
 								       function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
 								       function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								      assert(src0.type != BRW_TYPE_F);
 								      assert(src1.type != BRW_TYPE_F);
-												i965/eu: Allow math on immediates on Broadwell.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2014-07-11 15:54:11 -07:00
+								      assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								             src1.file == BRW_IMMEDIATE_VALUE);
-												intel/compiler: INT DIV function does not support source modifiers

BSpec says that for all generations.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5281
CC: mesa-stable

Signed-off-by: Marcin Ślusarz <marcin.slusarz@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12518>

											
										
										
											2021-08-24 10:50:42 +02:00
+								      /* From BSpec 6647/47428 "[Instruction] Extended Math Function":
 								       *     INT DIV function does not support source modifiers.
 								       */
 								      assert(!src0.negate);
 								      assert(!src0.abs);
 								      assert(!src1.negate);
 								      assert(!src1.abs);
-												i965: Fix assertions about register types for INT DIV in brw_math.

BRW_MATH_FUNCTION_REMAINDER was missing.  Also, it seems worthwhile to
assert that INT DIV's arguments are signed/unsigned integers.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Tested-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-09-28 17:37:51 -07:00
+								   } else {
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								      assert(src0.type == BRW_TYPE_F ||
 								             (src0.type == BRW_TYPE_HF && devinfo->ver >= 9));
 								      assert(src1.type == BRW_TYPE_F ||
 								             (src1.type == BRW_TYPE_HF && devinfo->ver >= 9));
-												i965: Add a couple of checks for gen6 math instruction limits.

											
										
										
											2010-10-11 13:30:12 -07:00
+								   }
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								   brw_inst_set_math_function(devinfo, insn, function);
-												i965: Add support for POW in gen6 FS.

Fixes glsl-algebraic-pow-2 in brw_wm_glsl.c mode.

											
										
										
											2010-08-22 01:33:57 -07:00
-												i965: Add support for the instruction compression bits on gen6.

Since the 8-wide first-quarter and 16-wide first-half have the same
bit encoding, we now need to track "do you want instruction
compression" in the compile state.

											
										
										
											2010-12-03 11:49:29 -08:00
+								   brw_set_dest(p, insn, dest);
-												i965: Pass brw_compile pointer to brw_set_src[01].

This makes it symmetric with brw_set_dest, which is convenient, and will
also allow for assertions to be made based off of intel->gen.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-05-10 16:51:12 -07:00
+								   brw_set_src0(p, insn, src0);
 								   brw_set_src1(p, insn, src1);
-												i965: Add support for POW in gen6 FS.

Fixes glsl-algebraic-pow-2 in brw_wm_glsl.c mode.

											
										
										
											2010-08-22 01:33:57 -07:00
+								}
-												intel/eu: Get rid of the return value of brw_send_indirect_message().

The return value is not used anymore.  This allows simplifying the
code slightly, and in addition it should frustrate anybody's attempts
to continue using the obsolete piecemeal approach to construct a
message descriptor in combination with brw_send_indirect_message().

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-06-02 15:08:18 -07:00
+								void
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_send_indirect_message(struct brw_codegen *p,
-												i965: Factor out logic to build a send message instruction with indirect descriptor.

This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices.  In pseudocode:

|   if (surface.file == BRW_IMMEDIATE_VALUE) {
|      inst = brw_SEND(p, dst, payload);
|      set_descriptor_control_bits(inst, surface, ...);
|   } else {
|      inst = brw_OR(p, addr, surface, 0);
|      set_descriptor_control_bits(inst, ...);
|      inst = brw_SEND(p, dst, payload);
|      set_indirect_send_descriptor(inst, addr);
|   }

This patch abstracts out this frequently recurring pattern so we can
now write:

| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);

without worrying about handling the immediate and indirect surface
index cases explicitly.

v2: Rebase.  Improve documentatation and commit message. (Topi)
    Preserve UW destination type cargo-cult. (Topi, Ken, Matt)

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2015-03-19 15:44:24 +02:00
+								                          unsigned sfid,
 								                          struct brw_reg dst,
 								                          struct brw_reg payload,
-												intel/eu: Provide desc immediate argument up front to brw_send_indirect_message().

The current approach of returning a setup instruction where additional
descriptor fields can be specified is still supported in order to keep
things working, but it will be removed later in this series.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-06-02 15:07:31 -07:00
+								                          struct brw_reg desc,
-												intel/eu: Add an EOT parameter to send_indirect_[split]_message

For split indirect sends we have to put the EOT parameter in the
extended descriptor as well as the instruction itself so just calling
brw_inst_set_eot is insufficient.  Moving the EOT handling handling into
the send_indirect_[split]_message helper lets us handle it properly.

											
										
										
											2019-02-07 17:45:51 -06:00
+								                          unsigned desc_imm,
 								                          bool eot)
-												i965: Factor out logic to build a send message instruction with indirect descriptor.

This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices.  In pseudocode:

|   if (surface.file == BRW_IMMEDIATE_VALUE) {
|      inst = brw_SEND(p, dst, payload);
|      set_descriptor_control_bits(inst, surface, ...);
|   } else {
|      inst = brw_OR(p, addr, surface, 0);
|      set_descriptor_control_bits(inst, ...);
|      inst = brw_SEND(p, dst, payload);
|      set_indirect_send_descriptor(inst, addr);
|   }

This patch abstracts out this frequently recurring pattern so we can
now write:

| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);

without worrying about handling the immediate and indirect surface
index cases explicitly.

v2: Rebase.  Improve documentatation and commit message. (Topi)
    Preserve UW destination type cargo-cult. (Topi, Ken, Matt)

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2015-03-19 15:44:24 +02:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Fix invalid memory accesses after resizing brw_codegen's store table

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2015-10-28 10:58:09 -07:00
+								   struct brw_inst *send;
-												i965: Factor out logic to build a send message instruction with indirect descriptor.

This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices.  In pseudocode:

|   if (surface.file == BRW_IMMEDIATE_VALUE) {
|      inst = brw_SEND(p, dst, payload);
|      set_descriptor_control_bits(inst, surface, ...);
|   } else {
|      inst = brw_OR(p, addr, surface, 0);
|      set_descriptor_control_bits(inst, ...);
|      inst = brw_SEND(p, dst, payload);
|      set_indirect_send_descriptor(inst, addr);
|   }

This patch abstracts out this frequently recurring pattern so we can
now write:

| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);

without worrying about handling the immediate and indirect surface
index cases explicitly.

v2: Rebase.  Improve documentatation and commit message. (Topi)
    Preserve UW destination type cargo-cult. (Topi, Ken, Matt)

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2015-03-19 15:44:24 +02:00
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   dst = retype(dst, BRW_TYPE_UW);
-												i965: Set dest type to UW for several send messages

Without this, on SIMD 16 the send instruction destination will appear
to write more than one destination register, causing the simulator to
report an error.

Of course, the send instruction can actually write more than one
destination register regardless of the type set for the destination,
so this is a bit strange.

Suggested-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2016-01-31 18:28:42 -08:00
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   assert(desc.type == BRW_TYPE_UD);
-												i965: Factor out logic to build a send message instruction with indirect descriptor.

This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices.  In pseudocode:

|   if (surface.file == BRW_IMMEDIATE_VALUE) {
|      inst = brw_SEND(p, dst, payload);
|      set_descriptor_control_bits(inst, surface, ...);
|   } else {
|      inst = brw_OR(p, addr, surface, 0);
|      set_descriptor_control_bits(inst, ...);
|      inst = brw_SEND(p, dst, payload);
|      set_indirect_send_descriptor(inst, addr);
|   }

This patch abstracts out this frequently recurring pattern so we can
now write:

| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);

without worrying about handling the immediate and indirect surface
index cases explicitly.

v2: Rebase.  Improve documentatation and commit message. (Topi)
    Preserve UW destination type cargo-cult. (Topi, Ken, Matt)

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2015-03-19 15:44:24 +02:00
 								   if (desc.file == BRW_IMMEDIATE_VALUE) {
-												i965: Fix invalid memory accesses after resizing brw_codegen's store table

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2015-10-28 10:58:09 -07:00
+								      send = next_insn(p, BRW_OPCODE_SEND);
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								      brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));
-												intel/eu: Provide desc immediate argument up front to brw_send_indirect_message().

The current approach of returning a setup instruction where additional
descriptor fields can be specified is still supported in order to keep
things working, but it will be removed later in this series.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-06-02 15:07:31 -07:00
+								      brw_set_desc(p, send, desc.ud | desc_imm);
-												i965: Factor out logic to build a send message instruction with indirect descriptor.

This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices.  In pseudocode:

|   if (surface.file == BRW_IMMEDIATE_VALUE) {
|      inst = brw_SEND(p, dst, payload);
|      set_descriptor_control_bits(inst, surface, ...);
|   } else {
|      inst = brw_OR(p, addr, surface, 0);
|      set_descriptor_control_bits(inst, ...);
|      inst = brw_SEND(p, dst, payload);
|      set_indirect_send_descriptor(inst, addr);
|   }

This patch abstracts out this frequently recurring pattern so we can
now write:

| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);

without worrying about handling the immediate and indirect surface
index cases explicitly.

v2: Rebase.  Improve documentatation and commit message. (Topi)
    Preserve UW destination type cargo-cult. (Topi, Ken, Matt)

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2015-03-19 15:44:24 +02:00
+								   } else {
-												intel/eu/gen12: Set SWSB annotations in hand-crafted assembly.

Reviewers are encouraged to audit the code generation pass
independently for the case I missed some potential data hazard or new
code has been added in the meantime.

v2: Add SYNC instruction to cr0 workaround in brw_float_controls_mode().

v3: Drop likely redundant (and potentially harmful) RegDist SWSB
    annotation from ce0 read in brw_find_live_channel() (Caio).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-26 23:38:24 -07:00
+								      const struct tgl_swsb swsb = brw_get_default_swsb(p);
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								      struct brw_reg addr = retype(brw_address_reg(0), BRW_TYPE_UD);
-												i965: Factor out logic to build a send message instruction with indirect descriptor.

This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices.  In pseudocode:

|   if (surface.file == BRW_IMMEDIATE_VALUE) {
|      inst = brw_SEND(p, dst, payload);
|      set_descriptor_control_bits(inst, surface, ...);
|   } else {
|      inst = brw_OR(p, addr, surface, 0);
|      set_descriptor_control_bits(inst, ...);
|      inst = brw_SEND(p, dst, payload);
|      set_indirect_send_descriptor(inst, addr);
|   }

This patch abstracts out this frequently recurring pattern so we can
now write:

| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);

without worrying about handling the immediate and indirect surface
index cases explicitly.

v2: Rebase.  Improve documentatation and commit message. (Topi)
    Preserve UW destination type cargo-cult. (Topi, Ken, Matt)

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2015-03-19 15:44:24 +02:00
 								      brw_push_insn_state(p);
 								      brw_set_default_access_mode(p, BRW_ALIGN_1);
 								      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-												intel/eu: Explicitly set EXECUTE_1 where needed

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-08-30 13:36:58 -07:00
+								      brw_set_default_exec_size(p, BRW_EXECUTE_1);
-												i965: Factor out logic to build a send message instruction with indirect descriptor.

This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices.  In pseudocode:

|   if (surface.file == BRW_IMMEDIATE_VALUE) {
|      inst = brw_SEND(p, dst, payload);
|      set_descriptor_control_bits(inst, surface, ...);
|   } else {
|      inst = brw_OR(p, addr, surface, 0);
|      set_descriptor_control_bits(inst, ...);
|      inst = brw_SEND(p, dst, payload);
|      set_indirect_send_descriptor(inst, addr);
|   }

This patch abstracts out this frequently recurring pattern so we can
now write:

| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);

without worrying about handling the immediate and indirect surface
index cases explicitly.

v2: Rebase.  Improve documentatation and commit message. (Topi)
    Preserve UW destination type cargo-cult. (Topi, Ken, Matt)

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2015-03-19 15:44:24 +02:00
+								      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
-												intel/compiler: Set flag reg to 0 when disabling predication

Having the reg set with predication disabled shouldn't cause any problems
during the execution. But when decompiling such instruction the flag won't
be shown in the output, so the recompiling will cause
functionally-identical but binary-different code. Fixing this makes
disasm/asm testing easier.

Signed-off-by: Sviatoslav Peleshko <sviatoslav.peleshko@globallogic.com>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25657>

											
										
										
											2023-10-11 11:04:12 +03:00
+								      brw_set_default_flag_reg(p, 0, 0);
-												intel/eu/gen12: Set SWSB annotations in hand-crafted assembly.

Reviewers are encouraged to audit the code generation pass
independently for the case I missed some potential data hazard or new
code has been added in the meantime.

v2: Add SYNC instruction to cr0 workaround in brw_float_controls_mode().

v3: Drop likely redundant (and potentially harmful) RegDist SWSB
    annotation from ce0 read in brw_find_live_channel() (Caio).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-26 23:38:24 -07:00
+								      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
-												i965: Factor out logic to build a send message instruction with indirect descriptor.

This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices.  In pseudocode:

|   if (surface.file == BRW_IMMEDIATE_VALUE) {
|      inst = brw_SEND(p, dst, payload);
|      set_descriptor_control_bits(inst, surface, ...);
|   } else {
|      inst = brw_OR(p, addr, surface, 0);
|      set_descriptor_control_bits(inst, ...);
|      inst = brw_SEND(p, dst, payload);
|      set_indirect_send_descriptor(inst, addr);
|   }

This patch abstracts out this frequently recurring pattern so we can
now write:

| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);

without worrying about handling the immediate and indirect surface
index cases explicitly.

v2: Rebase.  Improve documentatation and commit message. (Topi)
    Preserve UW destination type cargo-cult. (Topi, Ken, Matt)

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2015-03-19 15:44:24 +02:00
 								      /* Load the indirect descriptor to an address register using OR so the
-												intel/eu: Get rid of the return value of brw_send_indirect_message().

The return value is not used anymore.  This allows simplifying the
code slightly, and in addition it should frustrate anybody's attempts
to continue using the obsolete piecemeal approach to construct a
message descriptor in combination with brw_send_indirect_message().

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-06-02 15:08:18 -07:00
+								       * caller can specify additional descriptor bits with the desc_imm
 								       * immediate.
-												i965: Factor out logic to build a send message instruction with indirect descriptor.

This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices.  In pseudocode:

|   if (surface.file == BRW_IMMEDIATE_VALUE) {
|      inst = brw_SEND(p, dst, payload);
|      set_descriptor_control_bits(inst, surface, ...);
|   } else {
|      inst = brw_OR(p, addr, surface, 0);
|      set_descriptor_control_bits(inst, ...);
|      inst = brw_SEND(p, dst, payload);
|      set_indirect_send_descriptor(inst, addr);
|   }

This patch abstracts out this frequently recurring pattern so we can
now write:

| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);

without worrying about handling the immediate and indirect surface
index cases explicitly.

v2: Rebase.  Improve documentatation and commit message. (Topi)
    Preserve UW destination type cargo-cult. (Topi, Ken, Matt)

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2015-03-19 15:44:24 +02:00
+								       */
-												intel/eu: Provide desc immediate argument up front to brw_send_indirect_message().

The current approach of returning a setup instruction where additional
descriptor fields can be specified is still supported in order to keep
things working, but it will be removed later in this series.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-06-02 15:07:31 -07:00
+								      brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
-												i965: Factor out logic to build a send message instruction with indirect descriptor.

This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices.  In pseudocode:

|   if (surface.file == BRW_IMMEDIATE_VALUE) {
|      inst = brw_SEND(p, dst, payload);
|      set_descriptor_control_bits(inst, surface, ...);
|   } else {
|      inst = brw_OR(p, addr, surface, 0);
|      set_descriptor_control_bits(inst, ...);
|      inst = brw_SEND(p, dst, payload);
|      set_indirect_send_descriptor(inst, addr);
|   }

This patch abstracts out this frequently recurring pattern so we can
now write:

| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);

without worrying about handling the immediate and indirect surface
index cases explicitly.

v2: Rebase.  Improve documentatation and commit message. (Topi)
    Preserve UW destination type cargo-cult. (Topi, Ken, Matt)

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2015-03-19 15:44:24 +02:00
 								      brw_pop_insn_state(p);
-												intel/eu/gen12: Set SWSB annotations in hand-crafted assembly.

Reviewers are encouraged to audit the code generation pass
independently for the case I missed some potential data hazard or new
code has been added in the meantime.

v2: Add SYNC instruction to cr0 workaround in brw_float_controls_mode().

v3: Drop likely redundant (and potentially harmful) RegDist SWSB
    annotation from ce0 read in brw_find_live_channel() (Caio).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-26 23:38:24 -07:00
+								      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
-												i965: Factor out logic to build a send message instruction with indirect descriptor.

This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices.  In pseudocode:

|   if (surface.file == BRW_IMMEDIATE_VALUE) {
|      inst = brw_SEND(p, dst, payload);
|      set_descriptor_control_bits(inst, surface, ...);
|   } else {
|      inst = brw_OR(p, addr, surface, 0);
|      set_descriptor_control_bits(inst, ...);
|      inst = brw_SEND(p, dst, payload);
|      set_indirect_send_descriptor(inst, addr);
|   }

This patch abstracts out this frequently recurring pattern so we can
now write:

| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);

without worrying about handling the immediate and indirect surface
index cases explicitly.

v2: Rebase.  Improve documentatation and commit message. (Topi)
    Preserve UW destination type cargo-cult. (Topi, Ken, Matt)

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2015-03-19 15:44:24 +02:00
+								      send = next_insn(p, BRW_OPCODE_SEND);
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								      brw_set_src0(p, send, retype(payload, BRW_TYPE_UD));
-												intel/eu/gen12: Codegen SEND descriptor regions correctly.

The SEND instruction is now four-source.  The descriptor is no longer
part of source 1, so avoid touching it to avoid corruption while
initializing the descriptor.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2019-09-03 12:18:38 -07:00
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								      if (devinfo->ver >= 12)
-												intel/eu/gen12: Codegen SEND descriptor regions correctly.

The SEND instruction is now four-source.  The descriptor is no longer
part of source 1, so avoid touching it to avoid corruption while
initializing the descriptor.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2019-09-03 12:18:38 -07:00
+								         brw_inst_set_send_sel_reg32_desc(devinfo, send, true);
 								      else
 								         brw_set_src1(p, send, addr);
-												i965: Factor out logic to build a send message instruction with indirect descriptor.

This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices.  In pseudocode:

|   if (surface.file == BRW_IMMEDIATE_VALUE) {
|      inst = brw_SEND(p, dst, payload);
|      set_descriptor_control_bits(inst, surface, ...);
|   } else {
|      inst = brw_OR(p, addr, surface, 0);
|      set_descriptor_control_bits(inst, ...);
|      inst = brw_SEND(p, dst, payload);
|      set_indirect_send_descriptor(inst, addr);
|   }

This patch abstracts out this frequently recurring pattern so we can
now write:

| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);

without worrying about handling the immediate and indirect surface
index cases explicitly.

v2: Rebase.  Improve documentatation and commit message. (Topi)
    Preserve UW destination type cargo-cult. (Topi, Ken, Matt)

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2015-03-19 15:44:24 +02:00
+								   }
 								   brw_set_dest(p, send, dst);
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								   brw_inst_set_sfid(devinfo, send, sfid);
-												intel/eu: Add an EOT parameter to send_indirect_[split]_message

For split indirect sends we have to put the EOT parameter in the
extended descriptor as well as the instruction itself so just calling
brw_inst_set_eot is insufficient.  Moving the EOT handling handling into
the send_indirect_[split]_message helper lets us handle it properly.

											
										
										
											2019-02-07 17:45:51 -06:00
+								   brw_inst_set_eot(devinfo, send, eot);
-												i965: Factor out logic to build a send message instruction with indirect descriptor.

This is going to be useful because the Gen7+ uniform and varying pull
constant, texturing, typed and untyped surface read, write, and atomic
generation code on the vec4 and fs back-end all require the same logic
to handle conditionally indirect surface indices.  In pseudocode:

|   if (surface.file == BRW_IMMEDIATE_VALUE) {
|      inst = brw_SEND(p, dst, payload);
|      set_descriptor_control_bits(inst, surface, ...);
|   } else {
|      inst = brw_OR(p, addr, surface, 0);
|      set_descriptor_control_bits(inst, ...);
|      inst = brw_SEND(p, dst, payload);
|      set_indirect_send_descriptor(inst, addr);
|   }

This patch abstracts out this frequently recurring pattern so we can
now write:

| inst = brw_send_indirect_message(p, sfid, dst, payload, surface)
| set_descriptor_control_bits(inst, ...);

without worrying about handling the immediate and indirect surface
index cases explicitly.

v2: Rebase.  Improve documentatation and commit message. (Topi)
    Preserve UW destination type cargo-cult. (Topi, Ken, Matt)

Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2015-03-19 15:44:24 +02:00
+								}
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								void
 								brw_send_indirect_split_message(struct brw_codegen *p,
 								                                unsigned sfid,
 								                                struct brw_reg dst,
 								                                struct brw_reg payload0,
 								                                struct brw_reg payload1,
 								                                struct brw_reg desc,
 								                                unsigned desc_imm,
 								                                struct brw_reg ex_desc,
-												intel/eu: Add an EOT parameter to send_indirect_[split]_message

For split indirect sends we have to put the EOT parameter in the
extended descriptor as well as the instruction itself so just calling
brw_inst_set_eot is insufficient.  Moving the EOT handling handling into
the send_indirect_[split]_message helper lets us handle it properly.

											
										
										
											2019-02-07 17:45:51 -06:00
+								                                unsigned ex_desc_imm,
-												intel/fs: switch register allocation spilling to use LSC on Gfx12.5+

v2: drop the hardcoded inst->mlen=1 (Rohan)

v3: Move back to LOAD/STORE messages (limited to SIMD16 for LSC)

v4: Also use 4 GRFs transpose loads for fills (Curro)

v5: Reduce amount of needed register to build per lane offsets (Curro)
    Drop some now useless SIMD32 code
    Unify unspill code

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17555>

											
										
										
											2022-07-18 12:27:53 +03:00
+								                                bool ex_desc_scratch,
-												intel/fs: enable extended bindless surface offset

Gives use 4Gb of bindless surface state on Gfx12.5+ instead of 64Mb.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21645>

											
										
										
											2022-10-14 17:49:00 +03:00
+								                                bool ex_bso,
-												intel/eu: Add an EOT parameter to send_indirect_[split]_message

For split indirect sends we have to put the EOT parameter in the
extended descriptor as well as the instruction itself so just calling
brw_inst_set_eot is insufficient.  Moving the EOT handling handling into
the send_indirect_[split]_message helper lets us handle it properly.

											
										
										
											2019-02-07 17:45:51 -06:00
+								                                bool eot)
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								   struct brw_inst *send;
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   dst = retype(dst, BRW_TYPE_UW);
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   assert(desc.type == BRW_TYPE_UD);
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
 								   if (desc.file == BRW_IMMEDIATE_VALUE) {
 								      desc.ud |= desc_imm;
 								   } else {
-												intel/eu/gen12: Set SWSB annotations in hand-crafted assembly.

Reviewers are encouraged to audit the code generation pass
independently for the case I missed some potential data hazard or new
code has been added in the meantime.

v2: Add SYNC instruction to cr0 workaround in brw_float_controls_mode().

v3: Drop likely redundant (and potentially harmful) RegDist SWSB
    annotation from ce0 read in brw_find_live_channel() (Caio).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-26 23:38:24 -07:00
+								      const struct tgl_swsb swsb = brw_get_default_swsb(p);
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								      struct brw_reg addr = retype(brw_address_reg(0), BRW_TYPE_UD);
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
 								      brw_push_insn_state(p);
 								      brw_set_default_access_mode(p, BRW_ALIGN_1);
 								      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 								      brw_set_default_exec_size(p, BRW_EXECUTE_1);
 								      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
-												intel/compiler: Set flag reg to 0 when disabling predication

Having the reg set with predication disabled shouldn't cause any problems
during the execution. But when decompiling such instruction the flag won't
be shown in the output, so the recompiling will cause
functionally-identical but binary-different code. Fixing this makes
disasm/asm testing easier.

Signed-off-by: Sviatoslav Peleshko <sviatoslav.peleshko@globallogic.com>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25657>

											
										
										
											2023-10-11 11:04:12 +03:00
+								      brw_set_default_flag_reg(p, 0, 0);
-												intel/eu/gen12: Set SWSB annotations in hand-crafted assembly.

Reviewers are encouraged to audit the code generation pass
independently for the case I missed some potential data hazard or new
code has been added in the meantime.

v2: Add SYNC instruction to cr0 workaround in brw_float_controls_mode().

v3: Drop likely redundant (and potentially harmful) RegDist SWSB
    annotation from ce0 read in brw_find_live_channel() (Caio).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-26 23:38:24 -07:00
+								      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
 								      /* Load the indirect descriptor to an address register using OR so the
 								       * caller can specify additional descriptor bits with the desc_imm
 								       * immediate.
 								       */
 								      brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
 								      brw_pop_insn_state(p);
 								      desc = addr;
-												intel/eu/gen12: Set SWSB annotations in hand-crafted assembly.

Reviewers are encouraged to audit the code generation pass
independently for the case I missed some potential data hazard or new
code has been added in the meantime.

v2: Add SYNC instruction to cr0 workaround in brw_float_controls_mode().

v3: Drop likely redundant (and potentially harmful) RegDist SWSB
    annotation from ce0 read in brw_find_live_channel() (Caio).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-26 23:38:24 -07:00
 								      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								   }
-												intel/compiler: Handle bits 15:12 in brw_send_indirect_split_message()

Annoyingly, these bits exist in some extended message descriptors
(in particular render target writes), but they don't have any
corresponding bits in the ISA encoding.  So we can't use an immediate
and have to fall back to an indirect extended descriptor.

Thanks to Jason Ekstrand for reminding me that you can still set these
bits via an indirect descriptor, even if they don't exist in the ISA.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2019-08-25 23:43:29 -07:00
+								   if (ex_desc.file == BRW_IMMEDIATE_VALUE &&
-												intel/fs: switch register allocation spilling to use LSC on Gfx12.5+

v2: drop the hardcoded inst->mlen=1 (Rohan)

v3: Move back to LOAD/STORE messages (limited to SIMD16 for LSC)

v4: Also use 4 GRFs transpose loads for fills (Curro)

v5: Reduce amount of needed register to build per lane offsets (Curro)
    Drop some now useless SIMD32 code
    Unify unspill code

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17555>

											
										
										
											2022-07-18 12:27:53 +03:00
+								       !ex_desc_scratch &&
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								       (devinfo->ver >= 12 ||
-												intel/fs: Add an ex_desc field to fs_inst for SHADER_OPCODE_SEND

I meant to do this years ago when I first added SHADER_OPCODE_SEND.  At
the time, the only use for the extended descriptor was bindless handles
which were always one thing and never non-constant.  However, it doesn't
actually require any extra instructions because we have to OR in ex_mlen
anyway.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8748>

											
										
										
											2021-01-27 15:28:24 -06:00
+								        ((ex_desc.ud | ex_desc_imm) & INTEL_MASK(15, 12)) == 0)) {
-												intel/fs: enable extended bindless surface offset

Gives use 4Gb of bindless surface state on Gfx12.5+ instead of 64Mb.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21645>

											
										
										
											2022-10-14 17:49:00 +03:00
+								      /* ATS-M PRMs, Volume 2d: Command Reference: Structures,
 								       * EU_INSTRUCTION_SEND instruction
 								       *
 								       *    "ExBSO: Exists If: ([ExDesc.IsReg]==true)"
 								       */
 								      assert(!ex_bso);
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								      ex_desc.ud |= ex_desc_imm;
 								   } else {
-												intel/eu/gen12: Set SWSB annotations in hand-crafted assembly.

Reviewers are encouraged to audit the code generation pass
independently for the case I missed some potential data hazard or new
code has been added in the meantime.

v2: Add SYNC instruction to cr0 workaround in brw_float_controls_mode().

v3: Drop likely redundant (and potentially harmful) RegDist SWSB
    annotation from ce0 read in brw_find_live_channel() (Caio).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-26 23:38:24 -07:00
+								      const struct tgl_swsb swsb = brw_get_default_swsb(p);
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								      struct brw_reg addr = retype(brw_address_reg(2), BRW_TYPE_UD);
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
 								      brw_push_insn_state(p);
 								      brw_set_default_access_mode(p, BRW_ALIGN_1);
 								      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 								      brw_set_default_exec_size(p, BRW_EXECUTE_1);
 								      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
-												intel/compiler: Set flag reg to 0 when disabling predication

Having the reg set with predication disabled shouldn't cause any problems
during the execution. But when decompiling such instruction the flag won't
be shown in the output, so the recompiling will cause
functionally-identical but binary-different code. Fixing this makes
disasm/asm testing easier.

Signed-off-by: Sviatoslav Peleshko <sviatoslav.peleshko@globallogic.com>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25657>

											
										
										
											2023-10-11 11:04:12 +03:00
+								      brw_set_default_flag_reg(p, 0, 0);
-												intel/eu/gen12: Set SWSB annotations in hand-crafted assembly.

Reviewers are encouraged to audit the code generation pass
independently for the case I missed some potential data hazard or new
code has been added in the meantime.

v2: Add SYNC instruction to cr0 workaround in brw_float_controls_mode().

v3: Drop likely redundant (and potentially harmful) RegDist SWSB
    annotation from ce0 read in brw_find_live_channel() (Caio).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-26 23:38:24 -07:00
+								      brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
 								      /* Load the indirect extended descriptor to an address register using OR
 								       * so the caller can specify additional descriptor bits with the
 								       * desc_imm immediate.
 								       *
-												intel/eu: Add an EOT parameter to send_indirect_[split]_message

For split indirect sends we have to put the EOT parameter in the
extended descriptor as well as the instruction itself so just calling
brw_inst_set_eot is insufficient.  Moving the EOT handling handling into
the send_indirect_[split]_message helper lets us handle it properly.

											
										
										
											2019-02-07 17:45:51 -06:00
+								       * Even though the instruction dispatcher always pulls the SFID and EOT
 								       * fields from the instruction itself, actual external unit which
 								       * processes the message gets the SFID and EOT from the extended
 								       * descriptor which comes from the address register.  If we don't OR
 								       * those two bits in, the external unit may get confused and hang.
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								       */
-												intel/fs: enable extended bindless surface offset

Gives use 4Gb of bindless surface state on Gfx12.5+ instead of 64Mb.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21645>

											
										
										
											2022-10-14 17:49:00 +03:00
+								      unsigned imm_part = ex_bso ? 0 : (ex_desc_imm | sfid | eot << 5);
-												intel/compiler: Handle bits 15:12 in brw_send_indirect_split_message()

Annoyingly, these bits exist in some extended message descriptors
(in particular render target writes), but they don't have any
corresponding bits in the ISA encoding.  So we can't use an immediate
and have to fall back to an indirect extended descriptor.

Thanks to Jason Ekstrand for reminding me that you can still set these
bits via an indirect descriptor, even if they don't exist in the ISA.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2019-08-25 23:43:29 -07:00
-												intel/fs: switch register allocation spilling to use LSC on Gfx12.5+

v2: drop the hardcoded inst->mlen=1 (Rohan)

v3: Move back to LOAD/STORE messages (limited to SIMD16 for LSC)

v4: Also use 4 GRFs transpose loads for fills (Curro)

v5: Reduce amount of needed register to build per lane offsets (Curro)
    Drop some now useless SIMD32 code
    Unify unspill code

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17555>

											
										
										
											2022-07-18 12:27:53 +03:00
+								      if (ex_desc_scratch) {
 								         assert(devinfo->verx10 >= 125);
 								         brw_AND(p, addr,
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								                 retype(brw_vec1_grf(0, 5), BRW_TYPE_UD),
-												intel/fs: switch register allocation spilling to use LSC on Gfx12.5+

v2: drop the hardcoded inst->mlen=1 (Rohan)

v3: Move back to LOAD/STORE messages (limited to SIMD16 for LSC)

v4: Also use 4 GRFs transpose loads for fills (Curro)

v5: Reduce amount of needed register to build per lane offsets (Curro)
    Drop some now useless SIMD32 code
    Unify unspill code

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17555>

											
										
										
											2022-07-18 12:27:53 +03:00
+								                 brw_imm_ud(INTEL_MASK(31, 10)));
-												intel/brw/xe2+: Fix indirect extended descriptor setup for scratch space.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28283>

											
										
										
											2022-08-24 11:27:52 -07:00
 								         if (devinfo->ver >= 20 && sfid == GFX12_SFID_UGM) {
 								            const unsigned ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc_imm);
 								            assert(ex_desc_imm == brw_message_ex_desc(devinfo, ex_mlen));
 								            brw_SHR(p, addr, addr, brw_imm_ud(4));
 								         } else {
 								            /* Or the scratch surface offset together with the immediate part
 								             * of the extended descriptor.
 								             */
 								            brw_OR(p, addr, addr, brw_imm_ud(imm_part));
 								         }
-												intel/fs: switch register allocation spilling to use LSC on Gfx12.5+

v2: drop the hardcoded inst->mlen=1 (Rohan)

v3: Move back to LOAD/STORE messages (limited to SIMD16 for LSC)

v4: Also use 4 GRFs transpose loads for fills (Curro)

v5: Reduce amount of needed register to build per lane offsets (Curro)
    Drop some now useless SIMD32 code
    Unify unspill code

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Rohan Garg <rohan.garg@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17555>

											
										
										
											2022-07-18 12:27:53 +03:00
+								      } else if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
-												intel/fs: Don't unnecessarily fall back to indirect sends on Gen12

The instruction encoding for SENDS changed on Gen12 and it now supports
embedding the entire extended message descriptor in the instruction if
it's an immediate.  Stop falling back to doing an indirect SEND just
because we had something in [15:12] of ex_desc.ud.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3547>

											
										
										
											2020-01-22 22:54:20 -06:00
+								         /* ex_desc bits 15:12 don't exist in the instruction encoding prior
-												intel: Rename Genx keyword to Gfxx

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "Gen[[:digit:]]+" -rIl $SEARCH_PATH | xargs sed -ie "s/Gen\([[:digit:]]\+\)/Gfx\1/g"

Exclude changes in src/intel/perf/oa-*.xml:
find src/intel/perf -type f \( -name "*.xml" \) | xargs sed -ie "s/Gfx/Gen/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 15:46:12 -07:00
+								          * to Gfx12, so we may have fallen back to an indirect extended
-												intel/fs: Don't unnecessarily fall back to indirect sends on Gen12

The instruction encoding for SENDS changed on Gen12 and it now supports
embedding the entire extended message descriptor in the instruction if
it's an immediate.  Stop falling back to doing an indirect SEND just
because we had something in [15:12] of ex_desc.ud.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3547>

											
										
										
											2020-01-22 22:54:20 -06:00
+								          * descriptor.
-												intel/compiler: Handle bits 15:12 in brw_send_indirect_split_message()

Annoyingly, these bits exist in some extended message descriptors
(in particular render target writes), but they don't have any
corresponding bits in the ISA encoding.  So we can't use an immediate
and have to fall back to an indirect extended descriptor.

Thanks to Jason Ekstrand for reminding me that you can still set these
bits via an indirect descriptor, even if they don't exist in the ISA.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2019-08-25 23:43:29 -07:00
+								          */
 								         brw_MOV(p, addr, brw_imm_ud(ex_desc.ud | imm_part));
 								      } else {
 								         brw_OR(p, addr, ex_desc, brw_imm_ud(imm_part));
 								      }
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
 								      brw_pop_insn_state(p);
 								      ex_desc = addr;
-												intel/eu/gen12: Set SWSB annotations in hand-crafted assembly.

Reviewers are encouraged to audit the code generation pass
independently for the case I missed some potential data hazard or new
code has been added in the meantime.

v2: Add SYNC instruction to cr0 workaround in brw_float_controls_mode().

v3: Drop likely redundant (and potentially harmful) RegDist SWSB
    annotation from ce0 read in brw_find_live_channel() (Caio).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-26 23:38:24 -07:00
 								      brw_set_default_swsb(p, tgl_swsb_dst_dep(swsb, 1));
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								   }
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								   send = next_insn(p, devinfo->ver >= 12 ? BRW_OPCODE_SEND : BRW_OPCODE_SENDS);
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								   brw_set_dest(p, send, dst);
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   brw_set_src0(p, send, retype(payload0, BRW_TYPE_UD));
 								   brw_set_src1(p, send, retype(payload1, BRW_TYPE_UD));
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
 								   if (desc.file == BRW_IMMEDIATE_VALUE) {
 								      brw_inst_set_send_sel_reg32_desc(devinfo, send, 0);
 								      brw_inst_set_send_desc(devinfo, send, desc.ud);
 								   } else {
 								      assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
 								      assert(desc.nr == BRW_ARF_ADDRESS);
 								      assert(desc.subnr == 0);
 								      brw_inst_set_send_sel_reg32_desc(devinfo, send, 1);
 								   }
 								   if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
 								      brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
-												intel/eu: Split brw_inst ex_desc accessors for SEND(C) vs. SENDS(C).

The brw_inst opcode accessors are going away in one of the following
commits.  We could potentially replace them with the new helpers that
do opcode remapping, but that would lead to a circular dependency
between brw_inst.h and brw_eu.h.  This way we also avoid ordering
issues that can cause the semantics of the ex_desc accessors to change
depending on whether the ex_desc field is set after or before the
opcode instruction field.

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2019-02-05 23:22:06 -08:00
+								      brw_inst_set_sends_ex_desc(devinfo, send, ex_desc.ud);
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								   } else {
 								      assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
 								      assert(ex_desc.nr == BRW_ARF_ADDRESS);
 								      assert((ex_desc.subnr & 0x3) == 0);
 								      brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
-												intel/eu/xe2+: Translate brw_reg fields in REG_SIZE units to physical 512b GRF units during codegen.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27165>

											
										
										
											2022-07-07 01:09:32 -07:00
+								      brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, phys_subnr(devinfo, ex_desc) >> 2);
-												intel/eu/xe2+: Fix src1 length bits of SEND instruction with UGM target.

Rework:
 * Francisco Jerez: Specify the src1 length value in the correct
   units. Don't break earlier platforms.

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28283>

											
										
										
											2022-09-12 17:00:32 -07:00
 								      if (devinfo->ver >= 20 && sfid == GFX12_SFID_UGM) {
 								         const unsigned ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc_imm);
 								         brw_inst_set_bits(send, 103, 99, ex_mlen / reg_unit(devinfo));
 								      }
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								   }
-												intel/fs: enable extended bindless surface offset

Gives use 4Gb of bindless surface state on Gfx12.5+ instead of 64Mb.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21645>

											
										
										
											2022-10-14 17:49:00 +03:00
+								   if (ex_bso) {
-												intel/fs: Xe2 fix for ExBSO on UGM

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
BSpec: 56890
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25506>

											
										
										
											2023-10-02 14:21:24 +03:00
+								      /* The send instruction ExBSO field does not exist with UGM on Gfx20+,
 								       * it is assumed.
 								       *
 								       * BSpec 56890
 								       */
 								      if (devinfo->ver < 20 || sfid != GFX12_SFID_UGM)
 								         brw_inst_set_send_ex_bso(devinfo, send, true);
-												intel/fs: enable extended bindless surface offset

Gives use 4Gb of bindless surface state on Gfx12.5+ instead of 64Mb.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21645>

											
										
										
											2022-10-14 17:49:00 +03:00
+								      brw_inst_set_send_src1_len(devinfo, send, GET_BITS(ex_desc_imm, 10, 6));
 								   }
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								   brw_inst_set_sfid(devinfo, send, sfid);
-												intel/eu: Add an EOT parameter to send_indirect_[split]_message

For split indirect sends we have to put the EOT parameter in the
extended descriptor as well as the instruction itself so just calling
brw_inst_set_eot is insufficient.  Moving the EOT handling handling into
the send_indirect_[split]_message helper lets us handle it properly.

											
										
										
											2019-02-07 17:45:51 -06:00
+								   brw_inst_set_eot(devinfo, send, eot);
-												intel/eu: Add support for the SENDS[C] messages

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2018-11-15 15:17:06 -06:00
+								}
-												i965: Make a "does this while jump before our instruction?" helper.

I need to use this in an additional place.

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2016-05-14 23:53:19 -07:00
+								static bool
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								while_jumps_before_offset(const struct intel_device_info *devinfo,
-												i965: Make a "does this while jump before our instruction?" helper.

I need to use this in an additional place.

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2016-05-14 23:53:19 -07:00
+								                          brw_inst *insn, int while_offset, int start_offset)
 								{
 								   int scale = 16 / brw_jump_scale(devinfo);
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								   int jip = brw_inst_jip(devinfo, insn);
-												i965: add assert to while_jumps_before_offset()

jip should always be negative here as its the result of
do instruction - while instruction.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2017-01-26 13:50:42 +11:00
+								   assert(jip < 0);
-												i965: Make a "does this while jump before our instruction?" helper.

I need to use this in an additional place.

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2016-05-14 23:53:19 -07:00
+								   return while_offset + jip * scale <= start_offset;
 								}
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
+								static int
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_find_next_block_end(struct brw_codegen *p, int start_offset)
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
+								{
-												i965: Rename next_ip() -> next_offset().

That we were comparing its return value with offsets should have been a
clue. :)

Make it take a void *store in preparation for making the function useful
elsewhere.

Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2014-05-17 12:53:56 -07:00
+								   int offset;
-												i965: Prepare the break/cont uip/jip setting for compacted instructions.

The first cut at instruction compaction won't compact things that
would change control flow jump distances, but we do need to still be
able to walk the instruction stream, which involves jumping by 8 or 16
bytes between instructions.

Reviewed-by: Paul Berry <stereotype441@gmail.com>

											
										
										
											2012-02-03 12:05:05 +01:00
+								   void *store = p->store;
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
-												i965: Fix JIP to properly skip over unrelated control flow.

We've apparently always been botching JIP for sequences such as:

   do
       cmp.f0.0 ...
       (+f0.0) break
       ...
       if
          ...
       else
          ...
       endif
       ...
   while

Normally, UIP is supposed to point to the final destination of the jump,
while in nested control flow, JIP is supposed to point to the end of the
current nesting level.  It essentially bounces out of the current nested
control flow, to an instruction that has a JIP which bounces out another
level, and so on.

In the above example, when setting JIP for the BREAK, we call
brw_find_next_block_end(), which begins a search after the BREAK for the
next ENDIF, ELSE, WHILE, or HALT.  It ignores the IF and finds the ELSE,
setting JIP there.

This makes no sense at all.  The break is supposed to skip over the
whole if/else/endif block entirely.  They have a sibling relationship,
not a nesting relationship.

This patch fixes brw_find_next_block_end() to track depth as it does
its search, and ignore anything not at depth 0.  So when it sees the
IF, it ignores everything until after the ENDIF.  That way, it finds
the end of the right block.

I noticed this while reading some assembly code.  We believe jumping
earlier is harmless, but makes the EU walk through a bunch of disabled
instructions for no reason.  I noticed that GLBenchmark Manhattan had
a shader that contained a BREAK with a bogus JIP, but didn't measure
any performance improvement (it's likely miniscule, if there is any).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2015-11-17 18:24:11 -08:00
+								   int depth = 0;
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								   for (offset = next_offset(devinfo, store, start_offset);
-												i965: Pass brw into next_offset().

The new brw_inst API is going to require a brw pointer in order
to access fields (so it can do generation checks).  Plumb it in now.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2014-06-07 21:15:59 -07:00
+								        offset < p->next_insn_offset;
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								        offset = next_offset(devinfo, store, offset)) {
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								      brw_inst *insn = store + offset;
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								      switch (brw_inst_opcode(p->isa, insn)) {
-												i965: Fix JIP to properly skip over unrelated control flow.

We've apparently always been botching JIP for sequences such as:

   do
       cmp.f0.0 ...
       (+f0.0) break
       ...
       if
          ...
       else
          ...
       endif
       ...
   while

Normally, UIP is supposed to point to the final destination of the jump,
while in nested control flow, JIP is supposed to point to the end of the
current nesting level.  It essentially bounces out of the current nested
control flow, to an instruction that has a JIP which bounces out another
level, and so on.

In the above example, when setting JIP for the BREAK, we call
brw_find_next_block_end(), which begins a search after the BREAK for the
next ENDIF, ELSE, WHILE, or HALT.  It ignores the IF and finds the ELSE,
setting JIP there.

This makes no sense at all.  The break is supposed to skip over the
whole if/else/endif block entirely.  They have a sibling relationship,
not a nesting relationship.

This patch fixes brw_find_next_block_end() to track depth as it does
its search, and ignore anything not at depth 0.  So when it sees the
IF, it ignores everything until after the ENDIF.  That way, it finds
the end of the right block.

I noticed this while reading some assembly code.  We believe jumping
earlier is harmless, but makes the EU walk through a bunch of disabled
instructions for no reason.  I noticed that GLBenchmark Manhattan had
a shader that contained a BREAK with a bogus JIP, but didn't measure
any performance improvement (it's likely miniscule, if there is any).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2015-11-17 18:24:11 -08:00
+								      case BRW_OPCODE_IF:
 								         depth++;
 								         break;
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
+								      case BRW_OPCODE_ENDIF:
-												i965: Fix JIP to properly skip over unrelated control flow.

We've apparently always been botching JIP for sequences such as:

   do
       cmp.f0.0 ...
       (+f0.0) break
       ...
       if
          ...
       else
          ...
       endif
       ...
   while

Normally, UIP is supposed to point to the final destination of the jump,
while in nested control flow, JIP is supposed to point to the end of the
current nesting level.  It essentially bounces out of the current nested
control flow, to an instruction that has a JIP which bounces out another
level, and so on.

In the above example, when setting JIP for the BREAK, we call
brw_find_next_block_end(), which begins a search after the BREAK for the
next ENDIF, ELSE, WHILE, or HALT.  It ignores the IF and finds the ELSE,
setting JIP there.

This makes no sense at all.  The break is supposed to skip over the
whole if/else/endif block entirely.  They have a sibling relationship,
not a nesting relationship.

This patch fixes brw_find_next_block_end() to track depth as it does
its search, and ignore anything not at depth 0.  So when it sees the
IF, it ignores everything until after the ENDIF.  That way, it finds
the end of the right block.

I noticed this while reading some assembly code.  We believe jumping
earlier is harmless, but makes the EU walk through a bunch of disabled
instructions for no reason.  I noticed that GLBenchmark Manhattan had
a shader that contained a BREAK with a bogus JIP, but didn't measure
any performance improvement (it's likely miniscule, if there is any).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2015-11-17 18:24:11 -08:00
+								         if (depth == 0)
 								            return offset;
 								         depth--;
 								         break;
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
+								      case BRW_OPCODE_WHILE:
-												i965: Fix JIP to skip over sibling do...while loops.

We've apparently always been botching JIP for sequences such as:

do
    cmp.f0.0 ...
    (+f0.0) break
    ...
    do
        ...
    while
    ...
while

Because the "do" instruction doesn't actually exist, the inner "while"
is at the same depth as the "break".  brw_find_next_block_end() thus
mistook the inner "while" as the end of the loop containing the "break",
and set the "break" to point to the wrong place.

Only "while" instructions that jump before our instruction are relevant.
We need to ignore the rest, as they're sibling control flow nodes (or
children, but this was already handled by the depth == 0 check).

See also commit 1ac1581f3889d5f7e6e231c05651f44fbd80f0b6.

This prevents channel masks from being screwed up, and fixes GPU
hangs(*) in dEQP-GLES31.functional.shaders.multisample_interpolation.
interpolate_at_sample.centroid_qualified.multisample_texture_16.

The test ended up executing code with no channels enabled, and that
code contained FIND_LIVE_CHANNEL, which returned 8 (out of range for
a SIMD8 program), which then was used in indirect GRF addressing,
which randomly got a boolean value (0xFFFFFFFF), interpreted it as
a sample ID, OR'd it into an indirect send message descriptor,
which corrupted the message length, sending a pixel interpolator
message with mlen 15, which is illegal.  Whew :)

(*) Technically, the test doesn't GPU hang currently, but only
    because another bug prevents it from issuing pixel interpolator
    messages entirely...with that fixed, it hangs.

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2016-05-14 23:54:48 -07:00
+								         /* If the while doesn't jump before our instruction, it's the end
 								          * of a sibling do...while loop.  Ignore it.
 								          */
 								         if (!while_jumps_before_offset(devinfo, insn, offset, start_offset))
 								            continue;
-												Convert most remaining free-form fall-through comments to FALLTHROUGH

One exception is src/amd/addrlib/, for which -Wimplicit-fallthrough is
explicitly disabled.

Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Alyssa Rosenzweig <alyssa@collabora.com>
Reviewed-by: Juan A. Suarez <jasuarez@igalia.com>
Reviewed-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10220>

											
										
										
											2021-04-10 17:11:58 +02:00
+								         FALLTHROUGH;
-												i965: Fix JIP to skip over sibling do...while loops.

We've apparently always been botching JIP for sequences such as:

do
    cmp.f0.0 ...
    (+f0.0) break
    ...
    do
        ...
    while
    ...
while

Because the "do" instruction doesn't actually exist, the inner "while"
is at the same depth as the "break".  brw_find_next_block_end() thus
mistook the inner "while" as the end of the loop containing the "break",
and set the "break" to point to the wrong place.

Only "while" instructions that jump before our instruction are relevant.
We need to ignore the rest, as they're sibling control flow nodes (or
children, but this was already handled by the depth == 0 check).

See also commit 1ac1581f3889d5f7e6e231c05651f44fbd80f0b6.

This prevents channel masks from being screwed up, and fixes GPU
hangs(*) in dEQP-GLES31.functional.shaders.multisample_interpolation.
interpolate_at_sample.centroid_qualified.multisample_texture_16.

The test ended up executing code with no channels enabled, and that
code contained FIND_LIVE_CHANNEL, which returned 8 (out of range for
a SIMD8 program), which then was used in indirect GRF addressing,
which randomly got a boolean value (0xFFFFFFFF), interpreted it as
a sample ID, OR'd it into an indirect send message descriptor,
which corrupted the message length, sending a pixel interpolator
message with mlen 15, which is illegal.  Whew :)

(*) Technically, the test doesn't GPU hang currently, but only
    because another bug prevents it from issuing pixel interpolator
    messages entirely...with that fixed, it hangs.

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2016-05-14 23:54:48 -07:00
+								      case BRW_OPCODE_ELSE:
-												i965: Also consider HALTs a potential block end.

The final halt of the fragment shader turns off the remaining channels,
then jumps such that everything is turned back on.  So, we can have our
last ENDIF of the shader point at that directly.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2012-12-12 12:47:50 -08:00
+								      case BRW_OPCODE_HALT:
-												i965: Fix JIP to properly skip over unrelated control flow.

We've apparently always been botching JIP for sequences such as:

   do
       cmp.f0.0 ...
       (+f0.0) break
       ...
       if
          ...
       else
          ...
       endif
       ...
   while

Normally, UIP is supposed to point to the final destination of the jump,
while in nested control flow, JIP is supposed to point to the end of the
current nesting level.  It essentially bounces out of the current nested
control flow, to an instruction that has a JIP which bounces out another
level, and so on.

In the above example, when setting JIP for the BREAK, we call
brw_find_next_block_end(), which begins a search after the BREAK for the
next ENDIF, ELSE, WHILE, or HALT.  It ignores the IF and finds the ELSE,
setting JIP there.

This makes no sense at all.  The break is supposed to skip over the
whole if/else/endif block entirely.  They have a sibling relationship,
not a nesting relationship.

This patch fixes brw_find_next_block_end() to track depth as it does
its search, and ignore anything not at depth 0.  So when it sees the
IF, it ignores everything until after the ENDIF.  That way, it finds
the end of the right block.

I noticed this while reading some assembly code.  We believe jumping
earlier is harmless, but makes the EU walk through a bunch of disabled
instructions for no reason.  I noticed that GLBenchmark Manhattan had
a shader that contained a BREAK with a bogus JIP, but didn't measure
any performance improvement (it's likely miniscule, if there is any).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2015-11-17 18:24:11 -08:00
+								         if (depth == 0)
 								            return offset;
-												Use explicit break instead of fall-through to break-only case

clang generates a warning if there's no explicit break or fall-through
annotation. The latter would be kind of silly in this case, and not
robust against any future changes turning the fall-through invalid.

Reviewed-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Juan A. Suarez <jasuarez@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10220>

											
										
										
											2021-04-13 17:21:56 +02:00
+								         break;
-												intel/eu: Encode and decode native instruction opcodes from/to IR opcodes.

Change brw_inst_set_opcode() and brw_inst_opcode() to call
brw_opcode_encode/decode() transparently in order to translate between
hardware and IR opcodes, and update the EU compaction code in order to
do the same as needed, so we can eventually drop the one-to-one
correspondence between hardware and IR opcodes.

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-01-23 19:23:20 -08:00
+								      default:
 								         break;
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
+								      }
 								   }
-												i965/fs: Improve performance of shaders that start out with a discard.

I had tried this in the past, but ran into trouble with applications
that sample from undiscarded pixels in the same subspan.  To fix that
issue, only jump to the end for an entire subspan at a time.

Improves GLbenchmark 2.7 (1024x768) performance by 7.9 +/- 1.5% (n=8).

v2: Drop the br variable in the jump instruction -- if I ever do jumps
    pre-gen6, it'll be a different code block anyway since we don't have
    HALT until gen6.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2012-12-06 10:15:08 -08:00
 								   return 0;
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
+								}
-												intel: Rename genx keyword to gfxx in source files

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen[[:digit:]]+" -rIl $SEARCH_PATH | xargs sed -ie "s/gen\([[:digit:]]\+\)/gfx\1/g"

Exclude pack.h and xml changes in this patch:
grep -E "gfx[[:digit:]]+_pack\.h" -rIl $SEARCH_PATH | xargs sed -ie "s/gfx\([[:digit:]]\+_pack\.h\)/gen\1/g"
grep -E "gfx[[:digit:]]+\.xml" -rIl $SEARCH_PATH | xargs sed -ie "s/gfx\([[:digit:]]\+\.xml\)/gen\1/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 15:40:04 -07:00
+								/* There is no DO instruction on gfx6, so to find the end of the loop
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
+								 * we have to see if the loop is jumping back before our start
 								 * instruction.
 								 */
 								static int
-												i965: Rename brw_compile to brw_codegen

This name better matches what it's actually used for.  The patch was
generated with the following command:

for file in *; do
sed -i -e s/brw_compile/brw_codegen/g $file
done

Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-16 11:06:57 -07:00
+								brw_find_loop_end(struct brw_codegen *p, int start_offset)
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Rename next_ip() -> next_offset().

That we were comparing its return value with offsets should have been a
clue. :)

Make it take a void *store in preparation for making the function useful
elsewhere.

Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2014-05-17 12:53:56 -07:00
+								   int offset;
-												i965: Prepare the break/cont uip/jip setting for compacted instructions.

The first cut at instruction compaction won't compact things that
would change control flow jump distances, but we do need to still be
able to walk the instruction stream, which involves jumping by 8 or 16
bytes between instructions.

Reviewed-by: Paul Berry <stereotype441@gmail.com>

											
										
										
											2012-02-03 12:05:05 +01:00
+								   void *store = p->store;
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
-												i965: Prepare the break/cont uip/jip setting for compacted instructions.

The first cut at instruction compaction won't compact things that
would change control flow jump distances, but we do need to still be
able to walk the instruction stream, which involves jumping by 8 or 16
bytes between instructions.

Reviewed-by: Paul Berry <stereotype441@gmail.com>

											
										
										
											2012-02-03 12:05:05 +01:00
+								   /* Always start after the instruction (such as a WHILE) we're trying to fix
 								    * up.
 								    */
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								   for (offset = next_offset(devinfo, store, start_offset);
-												i965: Pass brw into next_offset().

The new brw_inst API is going to require a brw pointer in order
to access fields (so it can do generation checks).  Plumb it in now.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2014-06-07 21:15:59 -07:00
+								        offset < p->next_insn_offset;
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								        offset = next_offset(devinfo, store, offset)) {
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								      brw_inst *insn = store + offset;
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								      if (brw_inst_opcode(p->isa, insn) == BRW_OPCODE_WHILE) {
-												i965: Make a "does this while jump before our instruction?" helper.

I need to use this in an additional place.

Cc: mesa-stable@lists.freedesktop.org
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2016-05-14 23:53:19 -07:00
+									 if (while_jumps_before_offset(devinfo, insn, offset, start_offset))
-												i965: Rename next_ip() -> next_offset().

That we were comparing its return value with offsets should have been a
clue. :)

Make it take a void *store in preparation for making the function useful
elsewhere.

Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2014-05-17 12:53:56 -07:00
+									    return offset;
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
+								      }
 								   }
 								   assert(!"not reached");
-												i965: Rename next_ip() -> next_offset().

That we were comparing its return value with offsets should have been a
clue. :)

Make it take a void *store in preparation for making the function useful
elsewhere.

Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2014-05-17 12:53:56 -07:00
+								   return start_offset;
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
+								}
 								/* After program generation, go back and update the UIP and JIP of
-												i965/fs: Improve performance of shaders that start out with a discard.

I had tried this in the past, but ran into trouble with applications
that sample from undiscarded pixels in the same subspan.  To fix that
issue, only jump to the end for an entire subspan at a time.

Improves GLbenchmark 2.7 (1024x768) performance by 7.9 +/- 1.5% (n=8).

v2: Drop the br variable in the jump instruction -- if I ever do jumps
    pre-gen6, it'll be a different code block anyway since we don't have
    HALT until gen6.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2012-12-06 10:15:08 -08:00
+								 * BREAK, CONT, and HALT instructions to their correct locations.
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
+								 */
 								void
-												i965: Pass start_offset to brw_set_uip_jip().

Without this, we would pass over the instructions in the SIMD8 program
(which is located earlier in the buffer) when brw_set_uip_jip() is
called to handle the SIMD16 program.

The assertion about compacted control flow was bogus: halt, cont, break
cannot be compacted because they have both JIP and UIP. Instead, we
should never see a compacted instruction in this code at all.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2016-08-29 15:57:41 -07:00
+								brw_set_uip_jip(struct brw_codegen *p, int start_offset)
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Rename next_ip() -> next_offset().

That we were comparing its return value with offsets should have been a
clue. :)

Make it take a void *store in preparation for making the function useful
elsewhere.

Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2014-05-17 12:53:56 -07:00
+								   int offset;
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								   int br = brw_jump_scale(devinfo);
-												i965/eu: Refactor jump distance scaling to use a helper function.

Different generations of hardware measure jump distances in different
units.  Previously, every function that needed to set a jump target open
coded this scaling, or made a hardcoded assumption (i.e. just used 2).

Most functions start with the number of instructions to jump, and scale
up to the hardware-specific value.  So, I made the function match that.

Others start with a byte offset, and divide by a constant (8) to obtain
the jump distance.  This is actually 16 / 2 (the jump scale for Gen5-7).

v2: Make the helper a static inline defined in brw_eu.h, instead of
    an actual function in brw_eu_emit.c (as suggested by Matt).

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2014-06-30 08:00:25 -07:00
+								   int scale = 16 / br;
-												i965: Prepare the break/cont uip/jip setting for compacted instructions.

The first cut at instruction compaction won't compact things that
would change control flow jump distances, but we do need to still be
able to walk the instruction stream, which involves jumping by 8 or 16
bytes between instructions.

Reviewed-by: Paul Berry <stereotype441@gmail.com>

											
										
										
											2012-02-03 12:05:05 +01:00
+								   void *store = p->store;
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
-												i965: Pass start_offset to brw_set_uip_jip().

Without this, we would pass over the instructions in the SIMD8 program
(which is located earlier in the buffer) when brw_set_uip_jip() is
called to handle the SIMD16 program.

The assertion about compacted control flow was bogus: halt, cont, break
cannot be compacted because they have both JIP and UIP. Instead, we
should never see a compacted instruction in this code at all.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2016-08-29 15:57:41 -07:00
+								   for (offset = start_offset; offset < p->next_insn_offset; offset += 16) {
-												i965: Replace 'struct brw_instruction' with 'brw_inst'.

Use this an an opportunity to clean up the formatting of some old code
(brw_ADD, for instance).

Signed-off-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2014-06-13 14:29:25 -07:00
+								      brw_inst *insn = store + offset;
-												i965: Pass start_offset to brw_set_uip_jip().

Without this, we would pass over the instructions in the SIMD8 program
(which is located earlier in the buffer) when brw_set_uip_jip() is
called to handle the SIMD16 program.

The assertion about compacted control flow was bogus: halt, cont, break
cannot be compacted because they have both JIP and UIP. Instead, we
should never see a compacted instruction in this code at all.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2016-08-29 15:57:41 -07:00
+								      assert(brw_inst_cmpt_control(devinfo, insn) == 0);
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								      switch (brw_inst_opcode(p->isa, insn)) {
-												intel/eu: Don't double-loop as often in brw_set_uip_jip

brw_find_next_block_end() scans through the instructions to find the end
of the block.  We were calling it for every instruction in the program
which is, if you have a single basic block, makes the whole mess a nice
clean O(n^2) when it really doesn't need to be.  Instead, only call
brw_find_next_block_end() as-needed.  This brings it back to O(n) like
it should have been.

This cuts the runtime of the following Vulkan CTS on my SKL box by 5%
from 1:51 to 1:45:  dEQP-VK.ssbo.phys.layout.random.16bit.scalar.13

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13734>

											
										
										
											2021-11-09 16:09:23 -06:00
+								      case BRW_OPCODE_BREAK: {
 								         int block_end_offset = brw_find_next_block_end(p, offset);
-												i965: Rename next_ip() -> next_offset().

That we were comparing its return value with offsets should have been a
clue. :)

Make it take a void *store in preparation for making the function useful
elsewhere.

Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2014-05-17 12:53:56 -07:00
+								         assert(block_end_offset != 0);
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								         brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
-												intel: Rename Genx keyword to Gfxx

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "Gen[[:digit:]]+" -rIl $SEARCH_PATH | xargs sed -ie "s/Gen\([[:digit:]]\+\)/Gfx\1/g"

Exclude changes in src/intel/perf/oa-*.xml:
find src/intel/perf -type f \( -name "*.xml" \) | xargs sed -ie "s/Gfx/Gen/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 15:46:12 -07:00
+									 /* Gfx7 UIP points to WHILE; Gfx6 points just after it */
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								         brw_inst_set_uip(devinfo, insn,
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+									    (brw_find_loop_end(p, offset) - offset) / scale);
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
+									 break;
-												intel/eu: Don't double-loop as often in brw_set_uip_jip

brw_find_next_block_end() scans through the instructions to find the end
of the block.  We were calling it for every instruction in the program
which is, if you have a single basic block, makes the whole mess a nice
clean O(n^2) when it really doesn't need to be.  Instead, only call
brw_find_next_block_end() as-needed.  This brings it back to O(n) like
it should have been.

This cuts the runtime of the following Vulkan CTS on my SKL box by 5%
from 1:51 to 1:45:  dEQP-VK.ssbo.phys.layout.random.16bit.scalar.13

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13734>

											
										
										
											2021-11-09 16:09:23 -06:00
+								      }
 								      case BRW_OPCODE_CONTINUE: {
 								         int block_end_offset = brw_find_next_block_end(p, offset);
-												i965: Rename next_ip() -> next_offset().

That we were comparing its return value with offsets should have been a
clue. :)

Make it take a void *store in preparation for making the function useful
elsewhere.

Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2014-05-17 12:53:56 -07:00
+								         assert(block_end_offset != 0);
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								         brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
 								         brw_inst_set_uip(devinfo, insn,
-												i965: Convert brw_eu_emit.c to the new brw_inst API.

v2:
 - Fix IF -> ELSE patching on Sandybridge.
 - Don't set base_mrf on Gen6+ in OWord Block Read functions.  (Although
 - the old code did this universally, it shouldn't have - the field
 - doesn't exist on Gen6+ and just got overwritten by the SFID anyway.)

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2014-06-04 17:08:57 -07:00
+								            (brw_find_loop_end(p, offset) - offset) / scale);
-												i965: Make the CONT instruction point to the WHILE instruction.

This fixes piglit test glsl-fs-loop-continue.shader_test on Ivybridge.
According to the documentation, the CONT instruction's UIP field should
point to the WHILE instruction on both Sandybridge and Ivybridge.

The previous code made UIP point to the implicit DO instruction, which
seems incorrect.  I'm not sure how it could have worked on Sandybridge.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2011-04-30 01:30:55 -07:00
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								         assert(brw_inst_uip(devinfo, insn) != 0);
 								         assert(brw_inst_jip(devinfo, insn) != 0);
-												i965: Add break statement at end of BRW_OPCODE_CONTINUE case.

Fixes missing break in switch defect reported by Coverity.

Signed-off-by: Vinson Lee <vlee@freedesktop.org>
Reviewed-by: Paul Berry <stereotype441@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2013-01-01 17:02:38 -08:00
+									 break;
-												intel/eu: Don't double-loop as often in brw_set_uip_jip

brw_find_next_block_end() scans through the instructions to find the end
of the block.  We were calling it for every instruction in the program
which is, if you have a single basic block, makes the whole mess a nice
clean O(n^2) when it really doesn't need to be.  Instead, only call
brw_find_next_block_end() as-needed.  This brings it back to O(n) like
it should have been.

This cuts the runtime of the following Vulkan CTS on my SKL box by 5%
from 1:51 to 1:45:  dEQP-VK.ssbo.phys.layout.random.16bit.scalar.13

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13734>

											
										
										
											2021-11-09 16:09:23 -06:00
+								      }
-												i965: Jump to the end of the next outer conditional block on ENDIFs.

From the Ivybridge PRM, Volume 4, Part 3, section 6.24 (page 172):

"The endif instruction is also used to hop out of nested conditionals by
 jumping to the end of the next outer conditional block when all
 channels are disabled."

Also:
"Pseudocode:
 Evaluate(WrEn);
 if ( WrEn == 0 ) {  // all channels false
   Jump(IP + JIP);
 }"

First, ENDIF re-enables any channels that were disabled because they
didn't match the conditional.  If any channels are active, it proceeds
to the next instruction (IP + 16).  However, if they're all disabled,
there's no point in walking through all of the instructions that have no
effect---it can jump to the next instruction that might re-enable some
channels (an ELSE, ENDIF, or WHILE).

Previously, we always set JIP on ENDIF instructions to 2 (which is
measured in 8-byte units).  This made it do Jump(IP + 16), which just
meant it would go to the next instruction even if all channels were off.

It turns out that walking over instructions while all the channels are
disabled like this is worse than just instruction dispatch overhead: if
there are texturing messages, it still costs a couple hundred cycles to
not-actually-read from the texture results.

This patch finds the next instruction that could re-enable channels and
sets JIP accordingly.

Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2012-12-12 02:20:05 -08:00
-												i965: Set JumpCount, not JIP, on ENDIF on Gen 6.

Despite what the Sandybridge PRM says, ENDIF has Jump Count in <dst>,
not JIP in <src1>. (The same mistake appears about WHILE as well).

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>

											
										
										
											2014-08-28 13:34:22 -07:00
+								      case BRW_OPCODE_ENDIF: {
-												intel/eu: Don't double-loop as often in brw_set_uip_jip

brw_find_next_block_end() scans through the instructions to find the end
of the block.  We were calling it for every instruction in the program
which is, if you have a single basic block, makes the whole mess a nice
clean O(n^2) when it really doesn't need to be.  Instead, only call
brw_find_next_block_end() as-needed.  This brings it back to O(n) like
it should have been.

This cuts the runtime of the following Vulkan CTS on my SKL box by 5%
from 1:51 to 1:45:  dEQP-VK.ssbo.phys.layout.random.16bit.scalar.13

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13734>

											
										
										
											2021-11-09 16:09:23 -06:00
+								         int block_end_offset = brw_find_next_block_end(p, offset);
-												i965: Set JumpCount, not JIP, on ENDIF on Gen 6.

Despite what the Sandybridge PRM says, ENDIF has Jump Count in <dst>,
not JIP in <src1>. (The same mistake appears about WHILE as well).

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>

											
										
										
											2014-08-28 13:34:22 -07:00
+								         int32_t jump = (block_end_offset == 0) ?
 * br : (block_end_offset - offset) / scale;
-												intel/brw: Remove Gfx8- code from EU emission

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27691>

											
										
										
											2024-02-15 22:09:40 -08:00
+								         brw_inst_set_jip(devinfo, insn, jump);
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
+									 break;
-												i965: Set JumpCount, not JIP, on ENDIF on Gen 6.

Despite what the Sandybridge PRM says, ENDIF has Jump Count in <dst>,
not JIP in <src1>. (The same mistake appears about WHILE as well).

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>

											
										
										
											2014-08-28 13:34:22 -07:00
+								      }
-												i965: Jump to the end of the next outer conditional block on ENDIFs.

From the Ivybridge PRM, Volume 4, Part 3, section 6.24 (page 172):

"The endif instruction is also used to hop out of nested conditionals by
 jumping to the end of the next outer conditional block when all
 channels are disabled."

Also:
"Pseudocode:
 Evaluate(WrEn);
 if ( WrEn == 0 ) {  // all channels false
   Jump(IP + JIP);
 }"

First, ENDIF re-enables any channels that were disabled because they
didn't match the conditional.  If any channels are active, it proceeds
to the next instruction (IP + 16).  However, if they're all disabled,
there's no point in walking through all of the instructions that have no
effect---it can jump to the next instruction that might re-enable some
channels (an ELSE, ENDIF, or WHILE).

Previously, we always set JIP on ENDIF instructions to 2 (which is
measured in 8-byte units).  This made it do Jump(IP + 16), which just
meant it would go to the next instruction even if all channels were off.

It turns out that walking over instructions while all the channels are
disabled like this is worse than just instruction dispatch overhead: if
there are texturing messages, it still costs a couple hundred cycles to
not-actually-read from the texture results.

This patch finds the next instruction that could re-enable channels and
sets JIP accordingly.

Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2012-12-12 02:20:05 -08:00
-												intel/eu: Don't double-loop as often in brw_set_uip_jip

brw_find_next_block_end() scans through the instructions to find the end
of the block.  We were calling it for every instruction in the program
which is, if you have a single basic block, makes the whole mess a nice
clean O(n^2) when it really doesn't need to be.  Instead, only call
brw_find_next_block_end() as-needed.  This brings it back to O(n) like
it should have been.

This cuts the runtime of the following Vulkan CTS on my SKL box by 5%
from 1:51 to 1:45:  dEQP-VK.ssbo.phys.layout.random.16bit.scalar.13

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13734>

											
										
										
											2021-11-09 16:09:23 -06:00
+								      case BRW_OPCODE_HALT: {
-												i965/fs: Improve performance of shaders that start out with a discard.

I had tried this in the past, but ran into trouble with applications
that sample from undiscarded pixels in the same subspan.  To fix that
issue, only jump to the end for an entire subspan at a time.

Improves GLbenchmark 2.7 (1024x768) performance by 7.9 +/- 1.5% (n=8).

v2: Drop the br variable in the jump instruction -- if I ever do jumps
    pre-gen6, it'll be a different code block anyway since we don't have
    HALT until gen6.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2012-12-06 10:15:08 -08:00
+									 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
 									  *
 									  *    "In case of the halt instruction not inside any conditional
 									  *     code block, the value of <JIP> and <UIP> should be the
 									  *     same. In case of the halt instruction inside conditional code
 									  *     block, the <UIP> should be the end of the program, and the
 									  *     <JIP> should be end of the most inner conditional code block."
 									  *
 									  * The uip will have already been set by whoever set up the
 									  * instruction.
 									  */
-												intel/eu: Don't double-loop as often in brw_set_uip_jip

brw_find_next_block_end() scans through the instructions to find the end
of the block.  We were calling it for every instruction in the program
which is, if you have a single basic block, makes the whole mess a nice
clean O(n^2) when it really doesn't need to be.  Instead, only call
brw_find_next_block_end() as-needed.  This brings it back to O(n) like
it should have been.

This cuts the runtime of the following Vulkan CTS on my SKL box by 5%
from 1:51 to 1:45:  dEQP-VK.ssbo.phys.layout.random.16bit.scalar.13

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13734>

											
										
										
											2021-11-09 16:09:23 -06:00
+								         int block_end_offset = brw_find_next_block_end(p, offset);
-												i965: Rename next_ip() -> next_offset().

That we were comparing its return value with offsets should have been a
clue. :)

Make it take a void *store in preparation for making the function useful
elsewhere.

Reviewed-by: Eric Anholt <eric@anholt.net>

											
										
										
											2014-05-17 12:53:56 -07:00
+									 if (block_end_offset == 0) {
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								            brw_inst_set_jip(devinfo, insn, brw_inst_uip(devinfo, insn));
-												i965/fs: Improve performance of shaders that start out with a discard.

I had tried this in the past, but ran into trouble with applications
that sample from undiscarded pixels in the same subspan.  To fix that
issue, only jump to the end for an entire subspan at a time.

Improves GLbenchmark 2.7 (1024x768) performance by 7.9 +/- 1.5% (n=8).

v2: Drop the br variable in the jump instruction -- if I ever do jumps
    pre-gen6, it'll be a different code block anyway since we don't have
    HALT until gen6.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2012-12-06 10:15:08 -08:00
+									 } else {
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								            brw_inst_set_jip(devinfo, insn, (block_end_offset - offset) / scale);
-												i965/fs: Improve performance of shaders that start out with a discard.

I had tried this in the past, but ran into trouble with applications
that sample from undiscarded pixels in the same subspan.  To fix that
issue, only jump to the end for an entire subspan at a time.

Improves GLbenchmark 2.7 (1024x768) performance by 7.9 +/- 1.5% (n=8).

v2: Drop the br variable in the jump instruction -- if I ever do jumps
    pre-gen6, it'll be a different code block anyway since we don't have
    HALT until gen6.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2012-12-06 10:15:08 -08:00
+									 }
-												i965: Make the brw_inst helpers take a device_info instead of a context

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-04-14 18:00:06 -07:00
+								         assert(brw_inst_uip(devinfo, insn) != 0);
 								         assert(brw_inst_jip(devinfo, insn) != 0);
-												i965/fs: Improve performance of shaders that start out with a discard.

I had tried this in the past, but ran into trouble with applications
that sample from undiscarded pixels in the same subspan.  To fix that
issue, only jump to the end for an entire subspan at a time.

Improves GLbenchmark 2.7 (1024x768) performance by 7.9 +/- 1.5% (n=8).

v2: Drop the br variable in the jump instruction -- if I ever do jumps
    pre-gen6, it'll be a different code block anyway since we don't have
    HALT until gen6.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2012-12-06 10:15:08 -08:00
+									 break;
-												intel/eu: Don't double-loop as often in brw_set_uip_jip

brw_find_next_block_end() scans through the instructions to find the end
of the block.  We were calling it for every instruction in the program
which is, if you have a single basic block, makes the whole mess a nice
clean O(n^2) when it really doesn't need to be.  Instead, only call
brw_find_next_block_end() as-needed.  This brings it back to O(n) like
it should have been.

This cuts the runtime of the following Vulkan CTS on my SKL box by 5%
from 1:51 to 1:45:  dEQP-VK.ssbo.phys.layout.random.16bit.scalar.13

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Emma Anholt <emma@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13734>

											
										
										
											2021-11-09 16:09:23 -06:00
+								      }
-												intel/eu: Encode and decode native instruction opcodes from/to IR opcodes.

Change brw_inst_set_opcode() and brw_inst_opcode() to call
brw_opcode_encode/decode() transparently in order to translate between
hardware and IR opcodes, and update the EU compaction code in order to
do the same as needed, so we can eventually drop the one-to-one
correspondence between hardware and IR opcodes.

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-01-23 19:23:20 -08:00
 								      default:
 								         break;
-												i965: Add support for gen6 BREAK ISA emit.

There are now two targets: the hop-to-end-of-block target, and the
target for where to resume execution for active channels.

											
										
										
											2010-12-01 11:46:46 -08:00
+								      }
 								   }
 								}
-												i965: Add memory fence opcode.

Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>

											
										
										
											2015-04-23 14:30:28 +03:00
+								static void
 								brw_set_memory_fence_message(struct brw_codegen *p,
 								                             struct brw_inst *insn,
 								                             enum brw_message_target sfid,
-												intel/fs: Add support for SLM fence in Gen11

Gen11 SLM is not on L3 anymore, so now the hardware has two separate
fences.  Add a way to control which fence types to use.

At this time, we don't have enough information in NIR to control the
visibility of the memory being fenced, so for now be conservative and
assume that fences will need a stall.  With more information later
we'll be able to reduce those.

Fixes Vulkan CTS tests in ICL:

    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_nonlocal.workgroup.guard_local.buffer.comp
    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.buffer.guard_nonlocal.workgroup.comp
    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.image.guard_nonlocal.workgroup.comp
    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.workgroup.payload_local.buffer.guard_nonlocal.workgroup.comp
    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.workgroup.payload_local.image.guard_nonlocal.workgroup.comp

The whole set of supported tests in dEQP-VK.memory_model.* group
should be passing in ICL now.

v2: Pass BTI around instead of having an enum.  (Jason)
    Emit two SHADER_OPCODE_MEMORY_FENCE instead of one that gets
    transformed into two.  (Jason)
    List tests fixed.  (Lionel)

v3: For clarity, split the decision of which fences to emit from the
    emission code.  (Jason)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

											
										
										
											2019-07-10 12:02:23 -07:00
+								                             bool commit_enable,
 								                             unsigned bti)
-												i965: Add memory fence opcode.

Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>

											
										
										
											2015-04-23 14:30:28 +03:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Add memory fence opcode.

Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>

											
										
										
											2015-04-23 14:30:28 +03:00
-												intel/eu: Use brw_set_desc() along with a helper to set common descriptor controls.

This replaces brw_set_message_descriptor() with the composition of
brw_set_desc() and a new inline helper function that packs the common
message descriptor controls into an integer.  The goal is to represent
all message descriptors as a 32-bit integer which is written at once
into the instruction, which is more flexible (SENDS anyone?), robust
(see d2eecf0b0b24d203d0f171807681dffd830d54de fixing an issue
ultimately caused by some bits of the extended message descriptor
being left undefined) and future-proof than the current approach of
specifying the individual descriptor fields directly into the
instruction.

This approach also seems more self-documenting, since it will allow
removing calls to functions with way too many arguments like
brw_set_*_message() and brw_send_indirect_message(), and instead
provide a single descriptor argument constructed from an appropriate
combination of brw_*_desc() helpers.

Note that because brw_set_message_descriptor() was (conditionally?)
overriding fields of the instruction which strictly speaking weren't
part of the message descriptor, this involves calling
brw_inst_set_sfid() and brw_inst_set_eot() in some cases in addition
to brw_set_desc().

v2: Use SET_BITS macro instead of left shift (Ken).

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-06-11 10:49:39 -07:00
+								   brw_set_desc(p, insn, brw_message_desc(
 								                   devinfo, 1, (commit_enable ? 1 : 0), true));
 								   brw_inst_set_sfid(devinfo, insn, sfid);
-												i965: Add memory fence opcode.

Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>

											
										
										
											2015-04-23 14:30:28 +03:00
 								   switch (sfid) {
-												intel: Rename GENx prefix in macros to GFXx in source files

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "GEN" -rIl src/intel/genxml | grep -E ".*py" |  xargs sed -ie "s/GEN\([%{]\)/GFX\1/g"
grep -E "[^_]GEN[[:digit:]]+" -rIl $SEARCH_PATH | grep -E ".*(\.c|\.h|\.y|\.l)" | xargs sed -ie "s/\([^_]\)GEN\([[:digit:]]\+\)/\1GFX\2/g"

Leave out renaming GFX12_CCS_E macros. They fall under renaming pattern like "_GEN[[:digit:]]+":
grep -E "GFX12_CCS_E" -rIl $SEARCH_PATH | xargs sed -ie "s/GFX12_CCS_E/GEN12_CCS_E/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 15:16:59 -07:00
+								   case GFX6_SFID_DATAPORT_RENDER_CACHE:
 								      brw_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_RC_MEMORY_FENCE);
-												i965: Add memory fence opcode.

Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>

											
										
										
											2015-04-23 14:30:28 +03:00
+								      break;
-												intel: Rename GENx prefix in macros to GFXx in source files

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "GEN" -rIl src/intel/genxml | grep -E ".*py" |  xargs sed -ie "s/GEN\([%{]\)/GFX\1/g"
grep -E "[^_]GEN[[:digit:]]+" -rIl $SEARCH_PATH | grep -E ".*(\.c|\.h|\.y|\.l)" | xargs sed -ie "s/\([^_]\)GEN\([[:digit:]]\+\)/\1GFX\2/g"

Leave out renaming GFX12_CCS_E macros. They fall under renaming pattern like "_GEN[[:digit:]]+":
grep -E "GFX12_CCS_E" -rIl $SEARCH_PATH | xargs sed -ie "s/GFX12_CCS_E/GEN12_CCS_E/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 15:16:59 -07:00
+								   case GFX7_SFID_DATAPORT_DATA_CACHE:
 								      brw_inst_set_dp_msg_type(devinfo, insn, GFX7_DATAPORT_DC_MEMORY_FENCE);
-												i965: Add memory fence opcode.

Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>

											
										
										
											2015-04-23 14:30:28 +03:00
+								      break;
 								   default:
 								      unreachable("Not reached");
 								   }
 								   if (commit_enable)
 								      brw_inst_set_dp_msg_control(devinfo, insn, 1 << 5);
-												intel/fs: Add support for SLM fence in Gen11

Gen11 SLM is not on L3 anymore, so now the hardware has two separate
fences.  Add a way to control which fence types to use.

At this time, we don't have enough information in NIR to control the
visibility of the memory being fenced, so for now be conservative and
assume that fences will need a stall.  With more information later
we'll be able to reduce those.

Fixes Vulkan CTS tests in ICL:

    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_nonlocal.workgroup.guard_local.buffer.comp
    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.buffer.guard_nonlocal.workgroup.comp
    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.image.guard_nonlocal.workgroup.comp
    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.workgroup.payload_local.buffer.guard_nonlocal.workgroup.comp
    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.workgroup.payload_local.image.guard_nonlocal.workgroup.comp

The whole set of supported tests in dEQP-VK.memory_model.* group
should be passing in ICL now.

v2: Pass BTI around instead of having an enum.  (Jason)
    Emit two SHADER_OPCODE_MEMORY_FENCE instead of one that gets
    transformed into two.  (Jason)
    List tests fixed.  (Lionel)

v3: For clarity, split the decision of which fences to emit from the
    emission code.  (Jason)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

											
										
										
											2019-07-10 12:02:23 -07:00
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								   assert(devinfo->ver >= 11 || bti == 0);
-												intel/fs: Add support for SLM fence in Gen11

Gen11 SLM is not on L3 anymore, so now the hardware has two separate
fences.  Add a way to control which fence types to use.

At this time, we don't have enough information in NIR to control the
visibility of the memory being fenced, so for now be conservative and
assume that fences will need a stall.  With more information later
we'll be able to reduce those.

Fixes Vulkan CTS tests in ICL:

    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_nonlocal.workgroup.guard_local.buffer.comp
    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.buffer.guard_nonlocal.workgroup.comp
    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.image.guard_nonlocal.workgroup.comp
    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.workgroup.payload_local.buffer.guard_nonlocal.workgroup.comp
    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.workgroup.payload_local.image.guard_nonlocal.workgroup.comp

The whole set of supported tests in dEQP-VK.memory_model.* group
should be passing in ICL now.

v2: Pass BTI around instead of having an enum.  (Jason)
    Emit two SHADER_OPCODE_MEMORY_FENCE instead of one that gets
    transformed into two.  (Jason)
    List tests fixed.  (Lionel)

v3: For clarity, split the decision of which fences to emit from the
    emission code.  (Jason)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

											
										
										
											2019-07-10 12:02:23 -07:00
+								   brw_inst_set_binding_table_index(devinfo, insn, bti);
-												i965: Add memory fence opcode.

Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>

											
										
										
											2015-04-23 14:30:28 +03:00
+								}
-												intel/compiler: Add support for LSC fence operations

v2 (Jason Ekstrand):
 - Squash SLM and global fence ops together

v3 (Jason Ekstrand):
 - Rework to use message descriptors instead of instruction fields

v4 (Jason Ekstrand):
 - Don't pass BTI into back-end emit function.  Always use FLAT.

Co-authored-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11600>

											
										
										
											2020-07-11 18:33:05 -07:00
+								static void
 								gfx12_set_memory_fence_message(struct brw_codegen *p,
 								                               struct brw_inst *insn,
-												intel/fs: Add Wa_22013689345

v2: Use a simpler framework (Lionel)

v3: Rebase, add task/mesh (Lionel)

v4: Fixup fence exec size (SIMDX -> SIMD1)

v5: Fix invalidate_analysis, add finishme comment (Curro)

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: 22.0 <mesa-stable>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14947>

											
										
										
											2021-10-27 14:11:27 -07:00
+								                               enum brw_message_target sfid,
 								                               uint32_t desc)
-												intel/compiler: Add support for LSC fence operations

v2 (Jason Ekstrand):
 - Squash SLM and global fence ops together

v3 (Jason Ekstrand):
 - Rework to use message descriptors instead of instruction fields

v4 (Jason Ekstrand):
 - Don't pass BTI into back-end emit function.  Always use FLAT.

Co-authored-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11600>

											
										
										
											2020-07-11 18:33:05 -07:00
+								{
-												intel/compiler: Adjust fence message lengths for new register width on Xe2+

Signed-off-by: Rohan Garg <rohan.garg@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25020>

											
										
										
											2022-07-22 13:33:17 +02:00
+								   const unsigned mlen = 1 * reg_unit(p->devinfo); /* g0 header */
-												intel/compiler: Add support for LSC fence operations

v2 (Jason Ekstrand):
 - Squash SLM and global fence ops together

v3 (Jason Ekstrand):
 - Rework to use message descriptors instead of instruction fields

v4 (Jason Ekstrand):
 - Don't pass BTI into back-end emit function.  Always use FLAT.

Co-authored-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11600>

											
										
										
											2020-07-11 18:33:05 -07:00
+								    /* Completion signaled by write to register. No data returned. */
-												intel/compiler: Adjust fence message lengths for new register width on Xe2+

Signed-off-by: Rohan Garg <rohan.garg@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25020>

											
										
										
											2022-07-22 13:33:17 +02:00
+								   const unsigned rlen = 1 * reg_unit(p->devinfo);
-												intel/compiler: Add support for LSC fence operations

v2 (Jason Ekstrand):
 - Squash SLM and global fence ops together

v3 (Jason Ekstrand):
 - Rework to use message descriptors instead of instruction fields

v4 (Jason Ekstrand):
 - Don't pass BTI into back-end emit function.  Always use FLAT.

Co-authored-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11600>

											
										
										
											2020-07-11 18:33:05 -07:00
 								   brw_inst_set_sfid(p->devinfo, insn, sfid);
-												intel/compiler/xe2: URB fence uses LSC now

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25195>

											
										
										
											2022-08-26 19:02:16 -07:00
+								   if (sfid == BRW_SFID_URB && p->devinfo->ver < 20) {
-												intel/fs: Add the URB fence message

When they re-arranged all the dataport stuff and added the LSC, doing
URB fencing through the dataport no longer makes sense.  Instead, there
is now a fence message on the URB shared function.

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Tested-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13092>

											
										
										
											2021-09-15 16:21:14 -05:00
+								      brw_set_desc(p, insn, brw_urb_fence_desc(p->devinfo) |
-												intel/eu: Mark header present in URB memory fences on XeHP

Fixes the following EU validation error:

   ERROR: Header must be present for all URB messages.

The message header is ignored for URB fence messages, so I doubt that
this actually matters in practice.  But we should probably mark it as
present, because you have to send something, and according to the
documentation, there is a message header, it's just ignored.

Fixes: e6a9501aa27 ("intel/fs: Add the URB fence message")
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17624>

											
										
										
											2022-07-18 16:48:39 -07:00
+								                            brw_message_desc(p->devinfo, mlen, rlen, true));
-												intel/fs: Add the URB fence message

When they re-arranged all the dataport stuff and added the LSC, doing
URB fencing through the dataport no longer makes sense.  Instead, there
is now a fence message on the URB shared function.

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Tested-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13092>

											
										
										
											2021-09-15 16:21:14 -05:00
+								   } else {
-												intel/fs: Add Wa_22013689345

v2: Use a simpler framework (Lionel)

v3: Rebase, add task/mesh (Lionel)

v4: Fixup fence exec size (SIMDX -> SIMD1)

v5: Fix invalidate_analysis, add finishme comment (Curro)

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: 22.0 <mesa-stable>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14947>

											
										
										
											2021-10-27 14:11:27 -07:00
+								      enum lsc_fence_scope scope = lsc_fence_msg_desc_scope(p->devinfo, desc);
 								      enum lsc_flush_type flush_type = lsc_fence_msg_desc_flush_type(p->devinfo, desc);
-												intel/compiler: Add support for LSC fence operations

v2 (Jason Ekstrand):
 - Squash SLM and global fence ops together

v3 (Jason Ekstrand):
 - Rework to use message descriptors instead of instruction fields

v4 (Jason Ekstrand):
 - Don't pass BTI into back-end emit function.  Always use FLAT.

Co-authored-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11600>

											
										
										
											2020-07-11 18:33:05 -07:00
-												intel/fs: Add the URB fence message

When they re-arranged all the dataport stuff and added the LSC, doing
URB fencing through the dataport no longer makes sense.  Instead, there
is now a fence message on the URB shared function.

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Tested-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13092>

											
										
										
											2021-09-15 16:21:14 -05:00
+								      if (sfid == GFX12_SFID_TGM) {
 								         scope = LSC_FENCE_TILE;
 								         flush_type = LSC_FLUSH_TYPE_EVICT;
 								      }
-												intel/compiler: Add support for LSC fence operations

v2 (Jason Ekstrand):
 - Squash SLM and global fence ops together

v3 (Jason Ekstrand):
 - Rework to use message descriptors instead of instruction fields

v4 (Jason Ekstrand):
 - Don't pass BTI into back-end emit function.  Always use FLAT.

Co-authored-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11600>

											
										
										
											2020-07-11 18:33:05 -07:00
-												intel/compiler: use intel_needs_workaround for Wa_14012437816

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22437>

											
										
										
											2023-04-12 12:07:14 +03:00
+								      /* Wa_14012437816:
-												intel/fs: Add Wa_14014435656

For any fence greater than local scope, always set flush type to at
least invalidate so that fence goes on properly.

v2: Fixup condition to trigger workaround (Lionel)

v3: Simplify workaround (Curro)

v4: Don't drop the existing WA (Curro)

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: 22.0 <mesa-stable>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14947>

											
										
										
											2021-09-09 10:45:33 -07:00
+								       *
 								       *   "For any fence greater than local scope, always set flush type to
 								       *    at least invalidate so that fence goes on properly."
 								       *
 								       *   "The bug is if flush_type is 'None', the scope is always downgraded
 								       *    to 'local'."
 								       *
 								       * Here set scope to NONE_6 instead of NONE, which has the same effect
 								       * as NONE but avoids the downgrade to scope LOCAL.
 								       */
-												intel/compiler: use intel_needs_workaround for Wa_14012437816

Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22437>

											
										
										
											2023-04-12 12:07:14 +03:00
+								      if (intel_needs_workaround(p->devinfo, 14012437816) &&
-												intel/fs: Add Wa_14014435656

For any fence greater than local scope, always set flush type to at
least invalidate so that fence goes on properly.

v2: Fixup condition to trigger workaround (Lionel)

v3: Simplify workaround (Curro)

v4: Don't drop the existing WA (Curro)

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: 22.0 <mesa-stable>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14947>

											
										
										
											2021-09-09 10:45:33 -07:00
+								          scope > LSC_FENCE_LOCAL &&
 								          flush_type == LSC_FLUSH_TYPE_NONE) {
 								         flush_type = LSC_FLUSH_TYPE_NONE_6;
 								      }
-												intel/fs: Add the URB fence message

When they re-arranged all the dataport stuff and added the LSC, doing
URB fencing through the dataport no longer makes sense.  Instead, there
is now a fence message on the URB shared function.

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Tested-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13092>

											
										
										
											2021-09-15 16:21:14 -05:00
+								      brw_set_desc(p, insn, lsc_fence_msg_desc(p->devinfo, scope,
 								                                               flush_type, false) |
 								                            brw_message_desc(p->devinfo, mlen, rlen, false));
 								   }
-												intel/compiler: Add support for LSC fence operations

v2 (Jason Ekstrand):
 - Squash SLM and global fence ops together

v3 (Jason Ekstrand):
 - Rework to use message descriptors instead of instruction fields

v4 (Jason Ekstrand):
 - Don't pass BTI into back-end emit function.  Always use FLAT.

Co-authored-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11600>

											
										
										
											2020-07-11 18:33:05 -07:00
+								}
-												intel/fs,vec4: Pull stall logic for memory fences up into the IR

Instead of emitting the stall MOV "inside" the
SHADER_OPCODE_MEMORY_FENCE generation, use the scheduling fences when
creating the IR.

For IvyBridge, every (data cache) fence is accompained by a render
cache fence, that now is explicit in the IR, two
SHADER_OPCODE_MEMORY_FENCEs are emitted (with different SFIDs).

Because Begin and End interlock intrinsics are effectively memory
barriers, move its handling alongside the other memory barrier
intrinsics.  The SHADER_OPCODE_INTERLOCK is still used to distinguish
if we are going to use a SENDC (for Begin) or regular SEND (for End).

This change is a preparation to allow emitting both SENDs in Gen11+
before we can stall on them.

Shader-db results for IVB (i965):

    total instructions in shared programs: 11971190 -> 11971200 (<.01%)
    instructions in affected programs: 11482 -> 11492 (0.09%)
    helped: 0
    HURT: 8
    HURT stats (abs)   min: 1 max: 3 x̄: 1.25 x̃: 1
    HURT stats (rel)   min: 0.03% max: 0.50% x̄: 0.14% x̃: 0.10%
    95% mean confidence interval for instructions value: 0.66 1.84
    95% mean confidence interval for instructions %-change: 0.01% 0.27%
    Instructions are HURT.

  Unlike the previous code, that used the `mov g1 g2` trick to force
  both `g1` and `g2` to stall, the scheduling fence will generate `mov
  null g1` and `mov null g2`.  During review it was decided it was not
  worth keeping the special codepath for the small effect will have.

Shader-db results for HSW (i965), BDW and SKL don't have a change
on instruction count, but do report changes in cycles count, showing
SKL results below

    total cycles in shared programs: 341738444 -> 341710570 (<.01%)
    cycles in affected programs: 7240002 -> 7212128 (-0.38%)
    helped: 46
    HURT: 5
    helped stats (abs) min: 14 max: 1940 x̄: 676.22 x̃: 154
    helped stats (rel) min: <.01% max: 2.62% x̄: 1.28% x̃: 0.95%
    HURT stats (abs)   min: 2 max: 1768 x̄: 646.40 x̃: 362
    HURT stats (rel)   min: <.01% max: 0.83% x̄: 0.28% x̃: 0.08%
    95% mean confidence interval for cycles value: -777.71 -315.38
    95% mean confidence interval for cycles %-change: -1.42% -0.83%
    Cycles are helped.

  This seems to be the effect of allocating two registers separatedly
  instead of a single one with size 2, which causes different register
  allocation, affecting the cycle estimates.

while ICL also has not change on instruction count but report changes
negative changes in cycles

    total cycles in shared programs: 352665369 -> 352707484 (0.01%)
    cycles in affected programs: 9608288 -> 9650403 (0.44%)
    helped: 4
    HURT: 104
    helped stats (abs) min: 24 max: 128 x̄: 88.50 x̃: 101
    helped stats (rel) min: <.01% max: 0.85% x̄: 0.46% x̃: 0.49%
    HURT stats (abs)   min: 2 max: 2016 x̄: 408.36 x̃: 48
    HURT stats (rel)   min: <.01% max: 3.31% x̄: 0.88% x̃: 0.45%
    95% mean confidence interval for cycles value: 256.67 523.24
    95% mean confidence interval for cycles %-change: 0.63% 1.03%
    Cycles are HURT.

  AFAICT this is the result of the case above.

Shader-db results for TGL have similar cycles result as ICL, but also
affect instructions

    total instructions in shared programs: 17690586 -> 17690597 (<.01%)
    instructions in affected programs: 64617 -> 64628 (0.02%)
    helped: 55
    HURT: 32
    helped stats (abs) min: 1 max: 16 x̄: 4.13 x̃: 3
    helped stats (rel) min: 0.05% max: 2.78% x̄: 0.86% x̃: 0.74%
    HURT stats (abs)   min: 1 max: 65 x̄: 7.44 x̃: 2
    HURT stats (rel)   min: 0.05% max: 4.58% x̄: 1.13% x̃: 0.69%
    95% mean confidence interval for instructions value: -2.03 2.28
    95% mean confidence interval for instructions %-change: -0.41% 0.15%
    Inconclusive result (value mean confidence interval includes 0).

  Now that more is done in the IR, more dependencies are visible and
  more SWSB annotations are emitted.  Mixed with different register
  allocation decisions like above, some shaders will see more `sync
  nops` while others able to avoid them.

  Most of the new `sync nops` are also redundant and could be dropped,
  which will be fixed in a separate change.

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3278>

											
										
										
											2020-01-17 15:07:44 -08:00
+								void
-												i965: Add memory fence opcode.

Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>

											
										
										
											2015-04-23 14:30:28 +03:00
+								brw_memory_fence(struct brw_codegen *p,
-												i965: Add ARB_fragment_shader_interlock support.

Adds suppport for ARB_fragment_shader_interlock. We achieve
the interlock and fragment ordering by issuing a memory fence
via sendc.

Signed-off-by: Plamena Manolova <plamena.manolova@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>

											
										
										
											2018-04-27 15:06:56 +01:00
+								                 struct brw_reg dst,
-												intel/fs,vec4: Use g0 as the header for MFENCE

We set header_present but then pass it some random garbage.  Give it g0
instead.  I'm not actually sure this does anything but g0 is the usual
header data and this is what the windows driver does so it seems like a
good idea.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2019-05-22 12:20:01 -05:00
+								                 struct brw_reg src,
-												intel/fs: Do a stalling MFENCE in endInvocationInterlock()

Fixes: 939312702e "i965: Add ARB_fragment_shader_interlock support"
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2019-05-22 12:36:17 -05:00
+								                 enum opcode send_op,
-												intel/fs,vec4: Pull stall logic for memory fences up into the IR

Instead of emitting the stall MOV "inside" the
SHADER_OPCODE_MEMORY_FENCE generation, use the scheduling fences when
creating the IR.

For IvyBridge, every (data cache) fence is accompained by a render
cache fence, that now is explicit in the IR, two
SHADER_OPCODE_MEMORY_FENCEs are emitted (with different SFIDs).

Because Begin and End interlock intrinsics are effectively memory
barriers, move its handling alongside the other memory barrier
intrinsics.  The SHADER_OPCODE_INTERLOCK is still used to distinguish
if we are going to use a SENDC (for Begin) or regular SEND (for End).

This change is a preparation to allow emitting both SENDs in Gen11+
before we can stall on them.

Shader-db results for IVB (i965):

    total instructions in shared programs: 11971190 -> 11971200 (<.01%)
    instructions in affected programs: 11482 -> 11492 (0.09%)
    helped: 0
    HURT: 8
    HURT stats (abs)   min: 1 max: 3 x̄: 1.25 x̃: 1
    HURT stats (rel)   min: 0.03% max: 0.50% x̄: 0.14% x̃: 0.10%
    95% mean confidence interval for instructions value: 0.66 1.84
    95% mean confidence interval for instructions %-change: 0.01% 0.27%
    Instructions are HURT.

  Unlike the previous code, that used the `mov g1 g2` trick to force
  both `g1` and `g2` to stall, the scheduling fence will generate `mov
  null g1` and `mov null g2`.  During review it was decided it was not
  worth keeping the special codepath for the small effect will have.

Shader-db results for HSW (i965), BDW and SKL don't have a change
on instruction count, but do report changes in cycles count, showing
SKL results below

    total cycles in shared programs: 341738444 -> 341710570 (<.01%)
    cycles in affected programs: 7240002 -> 7212128 (-0.38%)
    helped: 46
    HURT: 5
    helped stats (abs) min: 14 max: 1940 x̄: 676.22 x̃: 154
    helped stats (rel) min: <.01% max: 2.62% x̄: 1.28% x̃: 0.95%
    HURT stats (abs)   min: 2 max: 1768 x̄: 646.40 x̃: 362
    HURT stats (rel)   min: <.01% max: 0.83% x̄: 0.28% x̃: 0.08%
    95% mean confidence interval for cycles value: -777.71 -315.38
    95% mean confidence interval for cycles %-change: -1.42% -0.83%
    Cycles are helped.

  This seems to be the effect of allocating two registers separatedly
  instead of a single one with size 2, which causes different register
  allocation, affecting the cycle estimates.

while ICL also has not change on instruction count but report changes
negative changes in cycles

    total cycles in shared programs: 352665369 -> 352707484 (0.01%)
    cycles in affected programs: 9608288 -> 9650403 (0.44%)
    helped: 4
    HURT: 104
    helped stats (abs) min: 24 max: 128 x̄: 88.50 x̃: 101
    helped stats (rel) min: <.01% max: 0.85% x̄: 0.46% x̃: 0.49%
    HURT stats (abs)   min: 2 max: 2016 x̄: 408.36 x̃: 48
    HURT stats (rel)   min: <.01% max: 3.31% x̄: 0.88% x̃: 0.45%
    95% mean confidence interval for cycles value: 256.67 523.24
    95% mean confidence interval for cycles %-change: 0.63% 1.03%
    Cycles are HURT.

  AFAICT this is the result of the case above.

Shader-db results for TGL have similar cycles result as ICL, but also
affect instructions

    total instructions in shared programs: 17690586 -> 17690597 (<.01%)
    instructions in affected programs: 64617 -> 64628 (0.02%)
    helped: 55
    HURT: 32
    helped stats (abs) min: 1 max: 16 x̄: 4.13 x̃: 3
    helped stats (rel) min: 0.05% max: 2.78% x̄: 0.86% x̃: 0.74%
    HURT stats (abs)   min: 1 max: 65 x̄: 7.44 x̃: 2
    HURT stats (rel)   min: 0.05% max: 4.58% x̄: 1.13% x̃: 0.69%
    95% mean confidence interval for instructions value: -2.03 2.28
    95% mean confidence interval for instructions %-change: -0.41% 0.15%
    Inconclusive result (value mean confidence interval includes 0).

  Now that more is done in the IR, more dependencies are visible and
  more SWSB annotations are emitted.  Mixed with different register
  allocation decisions like above, some shaders will see more `sync
  nops` while others able to avoid them.

  Most of the new `sync nops` are also redundant and could be dropped,
  which will be fixed in a separate change.

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3278>

											
										
										
											2020-01-17 15:07:44 -08:00
+								                 enum brw_message_target sfid,
-												intel/fs: Add Wa_22013689345

v2: Use a simpler framework (Lionel)

v3: Rebase, add task/mesh (Lionel)

v4: Fixup fence exec size (SIMDX -> SIMD1)

v5: Fix invalidate_analysis, add finishme comment (Curro)

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: 22.0 <mesa-stable>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14947>

											
										
										
											2021-10-27 14:11:27 -07:00
+								                 uint32_t desc,
-												intel/fs,vec4: Pull stall logic for memory fences up into the IR

Instead of emitting the stall MOV "inside" the
SHADER_OPCODE_MEMORY_FENCE generation, use the scheduling fences when
creating the IR.

For IvyBridge, every (data cache) fence is accompained by a render
cache fence, that now is explicit in the IR, two
SHADER_OPCODE_MEMORY_FENCEs are emitted (with different SFIDs).

Because Begin and End interlock intrinsics are effectively memory
barriers, move its handling alongside the other memory barrier
intrinsics.  The SHADER_OPCODE_INTERLOCK is still used to distinguish
if we are going to use a SENDC (for Begin) or regular SEND (for End).

This change is a preparation to allow emitting both SENDs in Gen11+
before we can stall on them.

Shader-db results for IVB (i965):

    total instructions in shared programs: 11971190 -> 11971200 (<.01%)
    instructions in affected programs: 11482 -> 11492 (0.09%)
    helped: 0
    HURT: 8
    HURT stats (abs)   min: 1 max: 3 x̄: 1.25 x̃: 1
    HURT stats (rel)   min: 0.03% max: 0.50% x̄: 0.14% x̃: 0.10%
    95% mean confidence interval for instructions value: 0.66 1.84
    95% mean confidence interval for instructions %-change: 0.01% 0.27%
    Instructions are HURT.

  Unlike the previous code, that used the `mov g1 g2` trick to force
  both `g1` and `g2` to stall, the scheduling fence will generate `mov
  null g1` and `mov null g2`.  During review it was decided it was not
  worth keeping the special codepath for the small effect will have.

Shader-db results for HSW (i965), BDW and SKL don't have a change
on instruction count, but do report changes in cycles count, showing
SKL results below

    total cycles in shared programs: 341738444 -> 341710570 (<.01%)
    cycles in affected programs: 7240002 -> 7212128 (-0.38%)
    helped: 46
    HURT: 5
    helped stats (abs) min: 14 max: 1940 x̄: 676.22 x̃: 154
    helped stats (rel) min: <.01% max: 2.62% x̄: 1.28% x̃: 0.95%
    HURT stats (abs)   min: 2 max: 1768 x̄: 646.40 x̃: 362
    HURT stats (rel)   min: <.01% max: 0.83% x̄: 0.28% x̃: 0.08%
    95% mean confidence interval for cycles value: -777.71 -315.38
    95% mean confidence interval for cycles %-change: -1.42% -0.83%
    Cycles are helped.

  This seems to be the effect of allocating two registers separatedly
  instead of a single one with size 2, which causes different register
  allocation, affecting the cycle estimates.

while ICL also has not change on instruction count but report changes
negative changes in cycles

    total cycles in shared programs: 352665369 -> 352707484 (0.01%)
    cycles in affected programs: 9608288 -> 9650403 (0.44%)
    helped: 4
    HURT: 104
    helped stats (abs) min: 24 max: 128 x̄: 88.50 x̃: 101
    helped stats (rel) min: <.01% max: 0.85% x̄: 0.46% x̃: 0.49%
    HURT stats (abs)   min: 2 max: 2016 x̄: 408.36 x̃: 48
    HURT stats (rel)   min: <.01% max: 3.31% x̄: 0.88% x̃: 0.45%
    95% mean confidence interval for cycles value: 256.67 523.24
    95% mean confidence interval for cycles %-change: 0.63% 1.03%
    Cycles are HURT.

  AFAICT this is the result of the case above.

Shader-db results for TGL have similar cycles result as ICL, but also
affect instructions

    total instructions in shared programs: 17690586 -> 17690597 (<.01%)
    instructions in affected programs: 64617 -> 64628 (0.02%)
    helped: 55
    HURT: 32
    helped stats (abs) min: 1 max: 16 x̄: 4.13 x̃: 3
    helped stats (rel) min: 0.05% max: 2.78% x̄: 0.86% x̃: 0.74%
    HURT stats (abs)   min: 1 max: 65 x̄: 7.44 x̃: 2
    HURT stats (rel)   min: 0.05% max: 4.58% x̄: 1.13% x̃: 0.69%
    95% mean confidence interval for instructions value: -2.03 2.28
    95% mean confidence interval for instructions %-change: -0.41% 0.15%
    Inconclusive result (value mean confidence interval includes 0).

  Now that more is done in the IR, more dependencies are visible and
  more SWSB annotations are emitted.  Mixed with different register
  allocation decisions like above, some shaders will see more `sync
  nops` while others able to avoid them.

  Most of the new `sync nops` are also redundant and could be dropped,
  which will be fixed in a separate change.

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3278>

											
										
										
											2020-01-17 15:07:44 -08:00
+								                 bool commit_enable,
-												intel/fs: Add support for SLM fence in Gen11

Gen11 SLM is not on L3 anymore, so now the hardware has two separate
fences.  Add a way to control which fence types to use.

At this time, we don't have enough information in NIR to control the
visibility of the memory being fenced, so for now be conservative and
assume that fences will need a stall.  With more information later
we'll be able to reduce those.

Fixes Vulkan CTS tests in ICL:

    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_nonlocal.workgroup.guard_local.buffer.comp
    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.buffer.guard_nonlocal.workgroup.comp
    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.image.guard_nonlocal.workgroup.comp
    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.workgroup.payload_local.buffer.guard_nonlocal.workgroup.comp
    dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.workgroup.payload_local.image.guard_nonlocal.workgroup.comp

The whole set of supported tests in dEQP-VK.memory_model.* group
should be passing in ICL now.

v2: Pass BTI around instead of having an enum.  (Jason)
    Emit two SHADER_OPCODE_MEMORY_FENCE instead of one that gets
    transformed into two.  (Jason)
    List tests fixed.  (Lionel)

v3: For clarity, split the decision of which fences to emit from the
    emission code.  (Jason)

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Acked-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

											
										
										
											2019-07-10 12:02:23 -07:00
+								                 unsigned bti)
-												i965: Add memory fence opcode.

Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>

											
										
										
											2015-04-23 14:30:28 +03:00
+								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Add memory fence opcode.

Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>

											
										
										
											2015-04-23 14:30:28 +03:00
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   dst = retype(vec1(dst), BRW_TYPE_UW);
 								   src = retype(vec1(src), BRW_TYPE_UD);
-												i965/eu: Set execution size explicitly for memory fence send message.

We don't want to emit a 32-wide send message in 32-wide programs.  The
memory fence message should have the same effect regardless of the
execution size (as long as it's valid) so just set it to one.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2016-04-25 19:20:12 -07:00
-												i965: Add memory fence opcode.

Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>

											
										
										
											2015-04-23 14:30:28 +03:00
+								   /* Set dst as destination for dependency tracking, the MEMORY_FENCE
 								    * message doesn't write anything back.
 								    */
-												intel/fs,vec4: Pull stall logic for memory fences up into the IR

Instead of emitting the stall MOV "inside" the
SHADER_OPCODE_MEMORY_FENCE generation, use the scheduling fences when
creating the IR.

For IvyBridge, every (data cache) fence is accompained by a render
cache fence, that now is explicit in the IR, two
SHADER_OPCODE_MEMORY_FENCEs are emitted (with different SFIDs).

Because Begin and End interlock intrinsics are effectively memory
barriers, move its handling alongside the other memory barrier
intrinsics.  The SHADER_OPCODE_INTERLOCK is still used to distinguish
if we are going to use a SENDC (for Begin) or regular SEND (for End).

This change is a preparation to allow emitting both SENDs in Gen11+
before we can stall on them.

Shader-db results for IVB (i965):

    total instructions in shared programs: 11971190 -> 11971200 (<.01%)
    instructions in affected programs: 11482 -> 11492 (0.09%)
    helped: 0
    HURT: 8
    HURT stats (abs)   min: 1 max: 3 x̄: 1.25 x̃: 1
    HURT stats (rel)   min: 0.03% max: 0.50% x̄: 0.14% x̃: 0.10%
    95% mean confidence interval for instructions value: 0.66 1.84
    95% mean confidence interval for instructions %-change: 0.01% 0.27%
    Instructions are HURT.

  Unlike the previous code, that used the `mov g1 g2` trick to force
  both `g1` and `g2` to stall, the scheduling fence will generate `mov
  null g1` and `mov null g2`.  During review it was decided it was not
  worth keeping the special codepath for the small effect will have.

Shader-db results for HSW (i965), BDW and SKL don't have a change
on instruction count, but do report changes in cycles count, showing
SKL results below

    total cycles in shared programs: 341738444 -> 341710570 (<.01%)
    cycles in affected programs: 7240002 -> 7212128 (-0.38%)
    helped: 46
    HURT: 5
    helped stats (abs) min: 14 max: 1940 x̄: 676.22 x̃: 154
    helped stats (rel) min: <.01% max: 2.62% x̄: 1.28% x̃: 0.95%
    HURT stats (abs)   min: 2 max: 1768 x̄: 646.40 x̃: 362
    HURT stats (rel)   min: <.01% max: 0.83% x̄: 0.28% x̃: 0.08%
    95% mean confidence interval for cycles value: -777.71 -315.38
    95% mean confidence interval for cycles %-change: -1.42% -0.83%
    Cycles are helped.

  This seems to be the effect of allocating two registers separatedly
  instead of a single one with size 2, which causes different register
  allocation, affecting the cycle estimates.

while ICL also has not change on instruction count but report changes
negative changes in cycles

    total cycles in shared programs: 352665369 -> 352707484 (0.01%)
    cycles in affected programs: 9608288 -> 9650403 (0.44%)
    helped: 4
    HURT: 104
    helped stats (abs) min: 24 max: 128 x̄: 88.50 x̃: 101
    helped stats (rel) min: <.01% max: 0.85% x̄: 0.46% x̃: 0.49%
    HURT stats (abs)   min: 2 max: 2016 x̄: 408.36 x̃: 48
    HURT stats (rel)   min: <.01% max: 3.31% x̄: 0.88% x̃: 0.45%
    95% mean confidence interval for cycles value: 256.67 523.24
    95% mean confidence interval for cycles %-change: 0.63% 1.03%
    Cycles are HURT.

  AFAICT this is the result of the case above.

Shader-db results for TGL have similar cycles result as ICL, but also
affect instructions

    total instructions in shared programs: 17690586 -> 17690597 (<.01%)
    instructions in affected programs: 64617 -> 64628 (0.02%)
    helped: 55
    HURT: 32
    helped stats (abs) min: 1 max: 16 x̄: 4.13 x̃: 3
    helped stats (rel) min: 0.05% max: 2.78% x̄: 0.86% x̃: 0.74%
    HURT stats (abs)   min: 1 max: 65 x̄: 7.44 x̃: 2
    HURT stats (rel)   min: 0.05% max: 4.58% x̄: 1.13% x̃: 0.69%
    95% mean confidence interval for instructions value: -2.03 2.28
    95% mean confidence interval for instructions %-change: -0.41% 0.15%
    Inconclusive result (value mean confidence interval includes 0).

  Now that more is done in the IR, more dependencies are visible and
  more SWSB annotations are emitted.  Mixed with different register
  allocation decisions like above, some shaders will see more `sync
  nops` while others able to avoid them.

  Most of the new `sync nops` are also redundant and could be dropped,
  which will be fixed in a separate change.

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3278>

											
										
										
											2020-01-17 15:07:44 -08:00
+								   struct brw_inst *insn = next_insn(p, send_op);
 								   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
 								   brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
-												i965: Add memory fence opcode.

Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>

											
										
										
											2015-04-23 14:30:28 +03:00
+								   brw_set_dest(p, insn, dst);
-												intel/fs,vec4: Use g0 as the header for MFENCE

We set header_present but then pass it some random garbage.  Give it g0
instead.  I'm not actually sure this does anything but g0 is the usual
header data and this is what the windows driver does so it seems like a
good idea.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2019-05-22 12:20:01 -05:00
+								   brw_set_src0(p, insn, src);
-												intel/compiler: Add support for LSC fence operations

v2 (Jason Ekstrand):
 - Squash SLM and global fence ops together

v3 (Jason Ekstrand):
 - Rework to use message descriptors instead of instruction fields

v4 (Jason Ekstrand):
 - Don't pass BTI into back-end emit function.  Always use FLAT.

Co-authored-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11600>

											
										
										
											2020-07-11 18:33:05 -07:00
 								   /* All DG2 hardware requires LSC for fence messages, even A-step */
 								   if (devinfo->has_lsc)
-												intel/fs: Add Wa_22013689345

v2: Use a simpler framework (Lionel)

v3: Rebase, add task/mesh (Lionel)

v4: Fixup fence exec size (SIMDX -> SIMD1)

v5: Fix invalidate_analysis, add finishme comment (Curro)

Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com>
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: 22.0 <mesa-stable>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14947>

											
										
										
											2021-10-27 14:11:27 -07:00
+								      gfx12_set_memory_fence_message(p, insn, sfid, desc);
-												intel/compiler: Add support for LSC fence operations

v2 (Jason Ekstrand):
 - Squash SLM and global fence ops together

v3 (Jason Ekstrand):
 - Rework to use message descriptors instead of instruction fields

v4 (Jason Ekstrand):
 - Don't pass BTI into back-end emit function.  Always use FLAT.

Co-authored-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11600>

											
										
										
											2020-07-11 18:33:05 -07:00
+								   else
 								      brw_set_memory_fence_message(p, insn, sfid, commit_enable, bti);
-												i965: Add memory fence opcode.

Acked-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com>

											
										
										
											2015-04-23 14:30:28 +03:00
+								}
-												i965: Introduce the BROADCAST pseudo-opcode.

The BROADCAST instruction picks the channel from its first source
given by an index passed in as second source.  This will be used in
situations where all channels from the same SIMD thread have to agree
on the value of something, e.g. a surface binding table index.

This is in particular the case for UBO, sampler and image arrays,
which can be indexed dynamically with the restriction that all active
SIMD channels access the same index, provided to the shared unit as
part of a single scalar field of the message descriptor.  Simply
taking the index value from the first channel as we were doing until
now is incorrect, because it might contain an uninitialized value if
the channel had previously been disabled by non-uniform control flow.

v2: Minor style fixes.  Improve commit message.

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-02-20 20:14:24 +02:00
+								void
 								brw_broadcast(struct brw_codegen *p,
 								              struct brw_reg dst,
 								              struct brw_reg src,
 								              struct brw_reg idx)
 								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												intel/brw: Drop align16 support in brw_broadcast()

align16 support is only used on Gen9 for 3-source instructions, quad
swizzling, and dPdy calculations.  We don't need it for broadcast.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>

											
										
										
											2024-03-27 16:02:18 -07:00
+								   assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
-												i965: Introduce the BROADCAST pseudo-opcode.

The BROADCAST instruction picks the channel from its first source
given by an index passed in as second source.  This will be used in
situations where all channels from the same SIMD thread have to agree
on the value of something, e.g. a surface binding table index.

This is in particular the case for UBO, sampler and image arrays,
which can be indexed dynamically with the restriction that all active
SIMD channels access the same index, provided to the shared unit as
part of a single scalar field of the message descriptor.  Simply
taking the index value from the first channel as we were doing until
now is incorrect, because it might contain an uninitialized value if
the channel had previously been disabled by non-uniform control flow.

v2: Minor style fixes.  Improve commit message.

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-02-20 20:14:24 +02:00
-												i965/ir: Make BROADCAST emit an unmasked single-channel move.

Alternatively we could have extended the current semantics to 32-wide
mode by changing brw_broadcast() to emit multiple indexed MOV
instructions in the generator copying the selected value to all
destination registers, but it seemed rather silly to waste EU cycles
unnecessarily copying the exact same value 32 times in the GRF.

The vstride change in the Align16 path is required to avoid assertions
in validate_reg() since the change causes the execution size of the
MOV and SEL instructions to be equal to the source region width.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2016-05-19 00:10:03 -07:00
+								   brw_push_insn_state(p);
 								   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-												intel/brw: Drop align16 support in brw_broadcast()

align16 support is only used on Gen9 for 3-source instructions, quad
swizzling, and dPdy calculations.  We don't need it for broadcast.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>

											
										
										
											2024-03-27 16:02:18 -07:00
+								   brw_set_default_exec_size(p, BRW_EXECUTE_1);
-												i965/ir: Make BROADCAST emit an unmasked single-channel move.

Alternatively we could have extended the current semantics to 32-wide
mode by changing brw_broadcast() to emit multiple indexed MOV
instructions in the generator copying the selected value to all
destination registers, but it seemed rather silly to waste EU cycles
unnecessarily copying the exact same value 32 times in the GRF.

The vstride change in the Align16 path is required to avoid assertions
in validate_reg() since the change causes the execution size of the
MOV and SEL instructions to be equal to the source region width.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2016-05-19 00:10:03 -07:00
-												i965: Introduce the BROADCAST pseudo-opcode.

The BROADCAST instruction picks the channel from its first source
given by an index passed in as second source.  This will be used in
situations where all channels from the same SIMD thread have to agree
on the value of something, e.g. a surface binding table index.

This is in particular the case for UBO, sampler and image arrays,
which can be indexed dynamically with the restriction that all active
SIMD channels access the same index, provided to the shared unit as
part of a single scalar field of the message descriptor.  Simply
taking the index value from the first channel as we were doing until
now is incorrect, because it might contain an uninitialized value if
the channel had previously been disabled by non-uniform control flow.

v2: Minor style fixes.  Improve commit message.

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-02-20 20:14:24 +02:00
+								   assert(src.file == BRW_GENERAL_REGISTER_FILE &&
 								          src.address_mode == BRW_ADDRESS_DIRECT);
-												intel/compiler: Add some restrictions to MOV_INDIRECT and BROADCAST

These restrictions effectively already existed due to the way we use
indirect sources but weren't being directly enforced.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-10-17 11:57:48 -07:00
+								   assert(!src.abs && !src.negate);
-												intel/fs: Always use integer types for indirect MOVs

There's a new Gen12.5 restriction which forbids using the VxH or Vx1 on
the floating-point pipe.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16970>

											
										
										
											2020-10-29 09:34:08 -05:00
 								   /* Gen12.5 adds the following region restriction:
 								    *
 								    *    "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float
 								    *    and Quad-Word data must not be used."
 								    *
 								    * We require the source and destination types to match so stomp to an
 								    * unsigned integer type.
 								    */
-												intel/compiler: Add some restrictions to MOV_INDIRECT and BROADCAST

These restrictions effectively already existed due to the way we use
indirect sources but weren't being directly enforced.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-10-17 11:57:48 -07:00
+								   assert(src.type == dst.type);
-												intel/brw: Replace brw_reg_type_from_bit_size by brw_type_with_size

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-21 00:33:52 -07:00
+								   src.type = dst.type =
 								      brw_type_with_size(BRW_TYPE_UD, brw_type_size_bits(src.type));
-												i965: Introduce the BROADCAST pseudo-opcode.

The BROADCAST instruction picks the channel from its first source
given by an index passed in as second source.  This will be used in
situations where all channels from the same SIMD thread have to agree
on the value of something, e.g. a surface binding table index.

This is in particular the case for UBO, sampler and image arrays,
which can be indexed dynamically with the restriction that all active
SIMD channels access the same index, provided to the shared unit as
part of a single scalar field of the message descriptor.  Simply
taking the index value from the first channel as we were doing until
now is incorrect, because it might contain an uninitialized value if
the channel had previously been disabled by non-uniform control flow.

v2: Minor style fixes.  Improve commit message.

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-02-20 20:14:24 +02:00
-												intel/brw: Drop align16 support in brw_broadcast()

align16 support is only used on Gen9 for 3-source instructions, quad
swizzling, and dPdy calculations.  We don't need it for broadcast.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>

											
										
										
											2024-03-27 16:02:18 -07:00
+								   if ((src.vstride == 0 && src.hstride == 0) ||
-												i965: Introduce the BROADCAST pseudo-opcode.

The BROADCAST instruction picks the channel from its first source
given by an index passed in as second source.  This will be used in
situations where all channels from the same SIMD thread have to agree
on the value of something, e.g. a surface binding table index.

This is in particular the case for UBO, sampler and image arrays,
which can be indexed dynamically with the restriction that all active
SIMD channels access the same index, provided to the shared unit as
part of a single scalar field of the message descriptor.  Simply
taking the index value from the first channel as we were doing until
now is incorrect, because it might contain an uninitialized value if
the channel had previously been disabled by non-uniform control flow.

v2: Minor style fixes.  Improve commit message.

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-02-20 20:14:24 +02:00
+								       idx.file == BRW_IMMEDIATE_VALUE) {
 								      /* Trivial, the source is already uniform or the index is a constant.
 								       * We will typically not get here if the optimizer is doing its job, but
 								       * asserting would be mean.
 								       */
-												i965: Make 'dw1' and 'bits' unnamed structures in brw_reg.

Generated by

   sed -i -e 's/\.bits\././g' *.c *.h *.cpp
   sed -i -e 's/dw1\.//g' *.c *.h *.cpp

and then reverting changes to comments in gen7_blorp.cpp and
brw_fs_generator.cpp.

There wasn't any utility offered by forcing the programmer to list these
to access their fields. Removing them will reduce churn in future
commits.

This is C11 (and gcc has apparently supported it for sometime
"compatibility with other compilers")

See https://gcc.gnu.org/onlinedocs/gcc/Unnamed-Fields.html

Reviewed-by: Emil Velikov <emil.velikov@collabora.co.uk>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2015-10-22 19:41:30 -07:00
+								      const unsigned i = idx.file == BRW_IMMEDIATE_VALUE ? idx.ud : 0;
-												intel/brw: Drop align16 support in brw_broadcast()

align16 support is only used on Gen9 for 3-source instructions, quad
swizzling, and dPdy calculations.  We don't need it for broadcast.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>

											
										
										
											2024-03-27 16:02:18 -07:00
+								      src = stride(suboffset(src, i), 0, 1, 0);
-												intel/fs: Fix MOV_INDIRECT and BROADCAST of Q types on Gen11+

The immediate case is pretty uncommon to see but it can happen, in
theory.  BROADCAST is typically used to uniformize values and those are
usually 32-bit.  However, it does come up in some subgroup ops.

Fixes: 49c21802cbca "intel/compiler: Split has_64bit_types into float/int"
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6211>

											
										
										
											2020-07-17 16:22:11 -05:00
-												intel/brw: Replace type_sz and brw_reg_type_to_size with brw_type_size_*

Both of these helpers do the same thing.  We now have brw_type_size_bits
and brw_type_size_bytes and can use whichever makes sense in that place.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-21 00:57:59 -07:00
+								      if (brw_type_size_bytes(src.type) > 4 && !devinfo->has_64bit_int) {
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								         brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
 								                    subscript(src, BRW_TYPE_D, 0));
-												intel/fs: Fix MOV_INDIRECT and BROADCAST of Q types on Gen11+

The immediate case is pretty uncommon to see but it can happen, in
theory.  BROADCAST is typically used to uniformize values and those are
usually 32-bit.  However, it does come up in some subgroup ops.

Fixes: 49c21802cbca "intel/compiler: Split has_64bit_types into float/int"
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6211>

											
										
										
											2020-07-17 16:22:11 -05:00
+								         brw_set_default_swsb(p, tgl_swsb_null());
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								         brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
 								                    subscript(src, BRW_TYPE_D, 1));
-												intel/fs: Fix MOV_INDIRECT and BROADCAST of Q types on Gen11+

The immediate case is pretty uncommon to see but it can happen, in
theory.  BROADCAST is typically used to uniformize values and those are
usually 32-bit.  However, it does come up in some subgroup ops.

Fixes: 49c21802cbca "intel/compiler: Split has_64bit_types into float/int"
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6211>

											
										
										
											2020-07-17 16:22:11 -05:00
+								      } else {
 								         brw_MOV(p, dst, src);
 								      }
-												i965: Introduce the BROADCAST pseudo-opcode.

The BROADCAST instruction picks the channel from its first source
given by an index passed in as second source.  This will be used in
situations where all channels from the same SIMD thread have to agree
on the value of something, e.g. a surface binding table index.

This is in particular the case for UBO, sampler and image arrays,
which can be indexed dynamically with the restriction that all active
SIMD channels access the same index, provided to the shared unit as
part of a single scalar field of the message descriptor.  Simply
taking the index value from the first channel as we were doing until
now is incorrect, because it might contain an uninitialized value if
the channel had previously been disabled by non-uniform control flow.

v2: Minor style fixes.  Improve commit message.

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-02-20 20:14:24 +02:00
+								   } else {
-												intel/compiler: Add some restrictions to MOV_INDIRECT and BROADCAST

These restrictions effectively already existed due to the way we use
indirect sources but weren't being directly enforced.

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>

											
										
										
											2017-10-17 11:57:48 -07:00
+								      /* From the Haswell PRM section "Register Region Restrictions":
 								       *
 								       *    "The lower bits of the AddressImmediate must not overflow to
 								       *    change the register address.  The lower 5 bits of Address
 								       *    Immediate when added to lower 5 bits of address register gives
 								       *    the sub-register offset. The upper bits of Address Immediate
 								       *    when added to upper bits of address register gives the register
 								       *    address. Any overflow from sub-register offset is dropped."
 								       *
 								       * Fortunately, for broadcast, we never have a sub-register offset so
 								       * this isn't an issue.
 								       */
 								      assert(src.subnr == 0);
-												intel/brw: Drop align16 support in brw_broadcast()

align16 support is only used on Gen9 for 3-source instructions, quad
swizzling, and dPdy calculations.  We don't need it for broadcast.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>

											
										
										
											2024-03-27 16:02:18 -07:00
+								      const struct brw_reg addr =
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								         retype(brw_address_reg(0), BRW_TYPE_UD);
-												intel/brw: Drop align16 support in brw_broadcast()

align16 support is only used on Gen9 for 3-source instructions, quad
swizzling, and dPdy calculations.  We don't need it for broadcast.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>

											
										
										
											2024-03-27 16:02:18 -07:00
+								      unsigned offset = src.nr * REG_SIZE + src.subnr;
 								      /* Limit in bytes of the signed indirect addressing immediate. */
 								      const unsigned limit = 512;
 								      brw_push_insn_state(p);
 								      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 								      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
 								      brw_set_default_flag_reg(p, 0, 0);
-												i965: Introduce the BROADCAST pseudo-opcode.

The BROADCAST instruction picks the channel from its first source
given by an index passed in as second source.  This will be used in
situations where all channels from the same SIMD thread have to agree
on the value of something, e.g. a surface binding table index.

This is in particular the case for UBO, sampler and image arrays,
which can be indexed dynamically with the restriction that all active
SIMD channels access the same index, provided to the shared unit as
part of a single scalar field of the message descriptor.  Simply
taking the index value from the first channel as we were doing until
now is incorrect, because it might contain an uninitialized value if
the channel had previously been disabled by non-uniform control flow.

v2: Minor style fixes.  Improve commit message.

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-02-20 20:14:24 +02:00
-												intel/brw: Drop align16 support in brw_broadcast()

align16 support is only used on Gen9 for 3-source instructions, quad
swizzling, and dPdy calculations.  We don't need it for broadcast.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>

											
										
										
											2024-03-27 16:02:18 -07:00
+								      /* Take into account the component size and horizontal stride. */
 								      assert(src.vstride == src.hstride + src.width);
 								      brw_SHL(p, addr, vec1(idx),
-												intel/brw: Replace type_sz and brw_reg_type_to_size with brw_type_size_*

Both of these helpers do the same thing.  We now have brw_type_size_bits
and brw_type_size_bytes and can use whichever makes sense in that place.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-21 00:57:59 -07:00
+								              brw_imm_ud(util_logbase2(brw_type_size_bytes(src.type)) +
-												intel/brw: Drop align16 support in brw_broadcast()

align16 support is only used on Gen9 for 3-source instructions, quad
swizzling, and dPdy calculations.  We don't need it for broadcast.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>

											
										
										
											2024-03-27 16:02:18 -07:00
+								                         src.hstride - 1));
-												i965: Introduce the BROADCAST pseudo-opcode.

The BROADCAST instruction picks the channel from its first source
given by an index passed in as second source.  This will be used in
situations where all channels from the same SIMD thread have to agree
on the value of something, e.g. a surface binding table index.

This is in particular the case for UBO, sampler and image arrays,
which can be indexed dynamically with the restriction that all active
SIMD channels access the same index, provided to the shared unit as
part of a single scalar field of the message descriptor.  Simply
taking the index value from the first channel as we were doing until
now is incorrect, because it might contain an uninitialized value if
the channel had previously been disabled by non-uniform control flow.

v2: Minor style fixes.  Improve commit message.

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-02-20 20:14:24 +02:00
-												intel/brw: Drop align16 support in brw_broadcast()

align16 support is only used on Gen9 for 3-source instructions, quad
swizzling, and dPdy calculations.  We don't need it for broadcast.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>

											
										
										
											2024-03-27 16:02:18 -07:00
+								      /* We can only address up to limit bytes using the indirect
 								       * addressing immediate, account for the difference if the source
 								       * register is above this limit.
 								       */
 								      if (offset >= limit) {
-												intel/eu/gen12: Set SWSB annotations in hand-crafted assembly.

Reviewers are encouraged to audit the code generation pass
independently for the case I missed some potential data hazard or new
code has been added in the meantime.

v2: Add SYNC instruction to cr0 workaround in brw_float_controls_mode().

v3: Drop likely redundant (and potentially harmful) RegDist SWSB
    annotation from ce0 read in brw_find_live_channel() (Caio).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-26 23:38:24 -07:00
+								         brw_set_default_swsb(p, tgl_swsb_regdist(1));
-												intel/brw: Drop align16 support in brw_broadcast()

align16 support is only used on Gen9 for 3-source instructions, quad
swizzling, and dPdy calculations.  We don't need it for broadcast.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>

											
										
										
											2024-03-27 16:02:18 -07:00
+								         brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
 								         offset = offset % limit;
 								      }
-												intel/eu/gen12: Set SWSB annotations in hand-crafted assembly.

Reviewers are encouraged to audit the code generation pass
independently for the case I missed some potential data hazard or new
code has been added in the meantime.

v2: Add SYNC instruction to cr0 workaround in brw_float_controls_mode().

v3: Drop likely redundant (and potentially harmful) RegDist SWSB
    annotation from ce0 read in brw_find_live_channel() (Caio).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-26 23:38:24 -07:00
-												intel/brw: Drop align16 support in brw_broadcast()

align16 support is only used on Gen9 for 3-source instructions, quad
swizzling, and dPdy calculations.  We don't need it for broadcast.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>

											
										
										
											2024-03-27 16:02:18 -07:00
+								      brw_pop_insn_state(p);
 								      brw_set_default_swsb(p, tgl_swsb_regdist(1));
 								      /* Use indirect addressing to fetch the specified component. */
-												intel/brw: Replace type_sz and brw_reg_type_to_size with brw_type_size_*

Both of these helpers do the same thing.  We now have brw_type_size_bits
and brw_type_size_bytes and can use whichever makes sense in that place.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-21 00:57:59 -07:00
+								      if (brw_type_size_bytes(src.type) > 4 &&
-												intel/brw: Drop align16 support in brw_broadcast()

align16 support is only used on Gen9 for 3-source instructions, quad
swizzling, and dPdy calculations.  We don't need it for broadcast.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>

											
										
										
											2024-03-27 16:02:18 -07:00
+								          (intel_device_info_is_9lp(devinfo) || !devinfo->has_64bit_int)) {
 								         /* From the Cherryview PRM Vol 7. "Register Region Restrictions":
 								          *
 								          *   "When source or destination datatype is 64b or operation is
 								          *    integer DWord multiply, indirect addressing must not be
 								          *    used."
 								          *
 								          * We may also not support Q/UQ types.
 								          *
 								          * To work around both of these, we do two integer MOVs instead
 								          * of one 64-bit MOV.  Because no double value should ever cross
 								          * a register boundary, it's safe to use the immediate offset in
 								          * the indirect here to handle adding 4 bytes to the offset and
 								          * avoid the extra ADD to the register file.
-												i965: Introduce the BROADCAST pseudo-opcode.

The BROADCAST instruction picks the channel from its first source
given by an index passed in as second source.  This will be used in
situations where all channels from the same SIMD thread have to agree
on the value of something, e.g. a surface binding table index.

This is in particular the case for UBO, sampler and image arrays,
which can be indexed dynamically with the restriction that all active
SIMD channels access the same index, provided to the shared unit as
part of a single scalar field of the message descriptor.  Simply
taking the index value from the first channel as we were doing until
now is incorrect, because it might contain an uninitialized value if
the channel had previously been disabled by non-uniform control flow.

v2: Minor style fixes.  Improve commit message.

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-02-20 20:14:24 +02:00
+								          */
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								         brw_MOV(p, subscript(dst, BRW_TYPE_D, 0),
-												intel/brw: Drop align16 support in brw_broadcast()

align16 support is only used on Gen9 for 3-source instructions, quad
swizzling, and dPdy calculations.  We don't need it for broadcast.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>

											
										
										
											2024-03-27 16:02:18 -07:00
+								                    retype(brw_vec1_indirect(addr.subnr, offset),
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								                           BRW_TYPE_D));
-												intel/brw: Drop align16 support in brw_broadcast()

align16 support is only used on Gen9 for 3-source instructions, quad
swizzling, and dPdy calculations.  We don't need it for broadcast.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>

											
										
										
											2024-03-27 16:02:18 -07:00
+								         brw_set_default_swsb(p, tgl_swsb_null());
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								         brw_MOV(p, subscript(dst, BRW_TYPE_D, 1),
-												intel/brw: Drop align16 support in brw_broadcast()

align16 support is only used on Gen9 for 3-source instructions, quad
swizzling, and dPdy calculations.  We don't need it for broadcast.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>

											
										
										
											2024-03-27 16:02:18 -07:00
+								                    retype(brw_vec1_indirect(addr.subnr, offset + 4),
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								                           BRW_TYPE_D));
-												intel/brw: Drop align16 support in brw_broadcast()

align16 support is only used on Gen9 for 3-source instructions, quad
swizzling, and dPdy calculations.  We don't need it for broadcast.

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28458>

											
										
										
											2024-03-27 16:02:18 -07:00
+								      } else {
 								         brw_MOV(p, dst,
 								                 retype(brw_vec1_indirect(addr.subnr, offset), src.type));
-												i965: Introduce the BROADCAST pseudo-opcode.

The BROADCAST instruction picks the channel from its first source
given by an index passed in as second source.  This will be used in
situations where all channels from the same SIMD thread have to agree
on the value of something, e.g. a surface binding table index.

This is in particular the case for UBO, sampler and image arrays,
which can be indexed dynamically with the restriction that all active
SIMD channels access the same index, provided to the shared unit as
part of a single scalar field of the message descriptor.  Simply
taking the index value from the first channel as we were doing until
now is incorrect, because it might contain an uninitialized value if
the channel had previously been disabled by non-uniform control flow.

v2: Minor style fixes.  Improve commit message.

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-02-20 20:14:24 +02:00
+								      }
 								   }
-												i965/ir: Make BROADCAST emit an unmasked single-channel move.

Alternatively we could have extended the current semantics to 32-wide
mode by changing brw_broadcast() to emit multiple indexed MOV
instructions in the generator copying the selected value to all
destination registers, but it seemed rather silly to waste EU cycles
unnecessarily copying the exact same value 32 times in the GRF.

The vstride change in the Align16 path is required to avoid assertions
in validate_reg() since the change causes the execution size of the
MOV and SEL instructions to be equal to the source region width.

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2016-05-19 00:10:03 -07:00
 								   brw_pop_insn_state(p);
-												i965: Introduce the BROADCAST pseudo-opcode.

The BROADCAST instruction picks the channel from its first source
given by an index passed in as second source.  This will be used in
situations where all channels from the same SIMD thread have to agree
on the value of something, e.g. a surface binding table index.

This is in particular the case for UBO, sampler and image arrays,
which can be indexed dynamically with the restriction that all active
SIMD channels access the same index, provided to the shared unit as
part of a single scalar field of the message descriptor.  Simply
taking the index value from the first channel as we were doing until
now is incorrect, because it might contain an uninitialized value if
the channel had previously been disabled by non-uniform control flow.

v2: Minor style fixes.  Improve commit message.

Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2015-02-20 20:14:24 +02:00
+								}
-												i965: Add brw_WAIT to emit wait instruction

This will be used to implement the barrier function.

v2:
 * Rename to brw_WAIT (mattst88)

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2014-11-04 18:05:04 -08:00
-												i965: Add brw_barrier to emit a Gateway Barrier SEND

This will be used to implement the Gateway Barrier SEND needed to implement
the barrier function.

v2:
 * notify => gateway_notify (Ken)
 * combine short lines of brw_barrier proto/decl (mattst88)

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>

											
										
										
											2014-11-04 18:11:37 -08:00
+								/**
 								 * Emit the SEND message for a barrier
 								 */
 								void
 								brw_barrier(struct brw_codegen *p, struct brw_reg src)
 								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Add brw_barrier to emit a Gateway Barrier SEND

This will be used to implement the Gateway Barrier SEND needed to implement
the barrier function.

v2:
 * notify => gateway_notify (Ken)
 * combine short lines of brw_barrier proto/decl (mattst88)

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>

											
										
										
											2014-11-04 18:11:37 -08:00
+								   struct brw_inst *inst;
-												i965: Use align1 mode for barrier messages.

In commit 7428e6f86ab5 we switched the barrier SEND message's
destination type to UW to avoid problems in SIMD16 compute shaders.

Tessellation control shaders also use barriers, and in vec4 mode, we
were emitting them in align16 mode.  The simulator warns that only UD,
D, F, and DF are valid destination types - UW is technically illegal.

So, switch to align1 mode.  Either mode should work fine.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>

											
										
										
											2017-01-15 00:58:20 -08:00
+								   brw_push_insn_state(p);
 								   brw_set_default_access_mode(p, BRW_ALIGN_1);
-												i965: Add brw_barrier to emit a Gateway Barrier SEND

This will be used to implement the Gateway Barrier SEND needed to implement
the barrier function.

v2:
 * notify => gateway_notify (Ken)
 * combine short lines of brw_barrier proto/decl (mattst88)

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>

											
										
										
											2014-11-04 18:11:37 -08:00
+								   inst = next_insn(p, BRW_OPCODE_SEND);
-												intel/brw: Stop using long BRW_REGISTER_TYPE enum names

s/BRW_REGISTER_TYPE/BRW_TYPE/g

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-20 17:08:02 -07:00
+								   brw_set_dest(p, inst, retype(brw_null_reg(), BRW_TYPE_UW));
-												i965: Add brw_barrier to emit a Gateway Barrier SEND

This will be used to implement the Gateway Barrier SEND needed to implement
the barrier function.

v2:
 * notify => gateway_notify (Ken)
 * combine short lines of brw_barrier proto/decl (mattst88)

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>

											
										
										
											2014-11-04 18:11:37 -08:00
+								   brw_set_src0(p, inst, src);
 								   brw_set_src1(p, inst, brw_null_reg());
-												intel/compiler: Adjust barrier emission for Xe2+

Signed-off-by: Rohan Garg <rohan.garg@intel.com>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25020>

											
										
										
											2022-08-01 16:45:30 +02:00
+								   brw_set_desc(p, inst, brw_message_desc(devinfo,
 * reg_unit(devinfo), 0, false));
-												i965: Add brw_barrier to emit a Gateway Barrier SEND

This will be used to implement the Gateway Barrier SEND needed to implement
the barrier function.

v2:
 * notify => gateway_notify (Ken)
 * combine short lines of brw_barrier proto/decl (mattst88)

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>

											
										
										
											2014-11-04 18:11:37 -08:00
-												intel/eu: Use brw_set_desc() along with a helper to set common descriptor controls.

This replaces brw_set_message_descriptor() with the composition of
brw_set_desc() and a new inline helper function that packs the common
message descriptor controls into an integer.  The goal is to represent
all message descriptors as a 32-bit integer which is written at once
into the instruction, which is more flexible (SENDS anyone?), robust
(see d2eecf0b0b24d203d0f171807681dffd830d54de fixing an issue
ultimately caused by some bits of the extended message descriptor
being left undefined) and future-proof than the current approach of
specifying the individual descriptor fields directly into the
instruction.

This approach also seems more self-documenting, since it will allow
removing calls to functions with way too many arguments like
brw_set_*_message() and brw_send_indirect_message(), and instead
provide a single descriptor argument constructed from an appropriate
combination of brw_*_desc() helpers.

Note that because brw_set_message_descriptor() was (conditionally?)
overriding fields of the instruction which strictly speaking weren't
part of the message descriptor, this involves calling
brw_inst_set_sfid() and brw_inst_set_eot() in some cases in addition
to brw_set_desc().

v2: Use SET_BITS macro instead of left shift (Ken).

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2018-06-11 10:49:39 -07:00
+								   brw_inst_set_sfid(devinfo, inst, BRW_SFID_MESSAGE_GATEWAY);
-												i965: Add brw_barrier to emit a Gateway Barrier SEND

This will be used to implement the Gateway Barrier SEND needed to implement
the barrier function.

v2:
 * notify => gateway_notify (Ken)
 * combine short lines of brw_barrier proto/decl (mattst88)

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>

											
										
										
											2014-11-04 18:11:37 -08:00
+								   brw_inst_set_gateway_subfuncid(devinfo, inst,
 								                                  BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
 								   brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
-												i965: Use align1 mode for barrier messages.

In commit 7428e6f86ab5 we switched the barrier SEND message's
destination type to UW to avoid problems in SIMD16 compute shaders.

Tessellation control shaders also use barriers, and in vec4 mode, we
were emitting them in align16 mode.  The simulator warns that only UD,
D, F, and DF are valid destination types - UW is technically illegal.

So, switch to align1 mode.  Either mode should work fine.

Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>

											
										
										
											2017-01-15 00:58:20 -08:00
+								   brw_pop_insn_state(p);
-												i965: Add brw_barrier to emit a Gateway Barrier SEND

This will be used to implement the Gateway Barrier SEND needed to implement
the barrier function.

v2:
 * notify => gateway_notify (Ken)
 * combine short lines of brw_barrier proto/decl (mattst88)

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>

											
										
										
											2014-11-04 18:11:37 -08:00
+								}
-												i965: Add brw_WAIT to emit wait instruction

This will be used to implement the barrier function.

v2:
 * Rename to brw_WAIT (mattst88)

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2014-11-04 18:05:04 -08:00
+								/**
 								 * Emit the wait instruction for a barrier
 								 */
 								void
 								brw_WAIT(struct brw_codegen *p)
 								{
-												intel: Rename gen_device prefix to intel_device

export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "gen_device" -rIl $SEARCH_PATH | xargs sed -ie "s/gen_device/intel_device/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10241>

											
										
										
											2021-04-05 13:19:39 -07:00
+								   const struct intel_device_info *devinfo = p->devinfo;
-												i965: Add brw_WAIT to emit wait instruction

This will be used to implement the barrier function.

v2:
 * Rename to brw_WAIT (mattst88)

Signed-off-by: Jordan Justen <jordan.l.justen@intel.com>
Reviewed-by: Chris Forbes <chrisf@ijw.co.nz>
Reviewed-by: Matt Turner <mattst88@gmail.com>

											
										
										
											2014-11-04 18:05:04 -08:00
+								   struct brw_inst *insn;
 								   struct brw_reg src = brw_notification_reg();
 								   insn = next_insn(p, BRW_OPCODE_WAIT);
 								   brw_set_dest(p, insn, src);
 								   brw_set_src0(p, insn, src);
 								   brw_set_src1(p, insn, brw_null_reg());
 								   brw_inst_set_exec_size(devinfo, insn, BRW_EXECUTE_1);
 								   brw_inst_set_mask_control(devinfo, insn, BRW_MASK_DISABLE);
 								}
-												i965/fs: Define new shader opcode to set rounding modes

Although it is possible to emit them directly as AND/OR on brw_fs_nir,
having a specific opcode makes it easier to remove duplicate settings
later.

v2: (Curro)
  - Set thread control to 'switch' when using the control register
  - Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate
    with the rounding mode.
  - Avoid magic numbers setting rounding mode field at control register.
v3: (Curro)
  - Remove redundant and add missing whitespace lines.
  - Match printing instruction to IR opcode "rnd_mode"

v4: (Topi Pohjolainen)
  - Fix code style.

Signed-off-by:  Alejandro Piñeiro <apinheiro@igalia.com>
Signed-off-by:  Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2017-07-01 08:12:59 +02:00
 								void
-												i965/fs/generator: refactor rounding mode helper in preparation for float controls

v2:
- Fix bug in defining BRW_CR0_FP_MODE_MASK.

v3:
- Update comment (Caio).

v4:
- Split the patch into the helper (this one) and the new
  opcode (Caio).

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-13 01:34:35 +03:00
+								brw_float_controls_mode(struct brw_codegen *p,
 								                        unsigned mode, unsigned mask)
 								{
-												intel/fs: use nomask for setting cr0 for float controls

The instructions manipulation cr0 use the default mask on lane0. So if
for some reason that lane is disabled in some of the dispatchs, we can
end up not executing the instructions.

Fixes flakyness in dEQP-VK.spirv_assembly.instruction.graphics.16bit_storage.uniform_float_32_to_16.uniform_matrix_float_rtz_frag

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: mesa-stable
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22314>

											
										
										
											2023-04-05 15:38:34 +03:00
+								   assert(p->current->mask_control == BRW_MASK_DISABLE);
-												i965/fs/generator: refactor rounding mode helper in preparation for float controls

v2:
- Fix bug in defining BRW_CR0_FP_MODE_MASK.

v3:
- Update comment (Caio).

v4:
- Split the patch into the helper (this one) and the new
  opcode (Caio).

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-13 01:34:35 +03:00
+								   /* From the Skylake PRM, Volume 7, page 760:
 								    *  "Implementation Restriction on Register Access: When the control
 								    *   register is used as an explicit source and/or destination, hardware
 								    *   does not ensure execution pipeline coherency. Software must set the
 								    *   thread control field to ‘switch’ for an instruction that uses
 								    *   control register as an explicit operand."
-												intel/eu/gen12: Set SWSB annotations in hand-crafted assembly.

Reviewers are encouraged to audit the code generation pass
independently for the case I missed some potential data hazard or new
code has been added in the meantime.

v2: Add SYNC instruction to cr0 workaround in brw_float_controls_mode().

v3: Drop likely redundant (and potentially harmful) RegDist SWSB
    annotation from ce0 read in brw_find_live_channel() (Caio).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-26 23:38:24 -07:00
+								    *
-												intel: Rename Genx keyword to Gfxx

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "Gen[[:digit:]]+" -rIl $SEARCH_PATH | xargs sed -ie "s/Gen\([[:digit:]]\+\)/Gfx\1/g"

Exclude changes in src/intel/perf/oa-*.xml:
find src/intel/perf -type f \( -name "*.xml" \) | xargs sed -ie "s/Gfx/Gen/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 15:46:12 -07:00
+								    * On Gfx12+ this is implemented in terms of SWSB annotations instead.
-												i965/fs/generator: refactor rounding mode helper in preparation for float controls

v2:
- Fix bug in defining BRW_CR0_FP_MODE_MASK.

v3:
- Update comment (Caio).

v4:
- Split the patch into the helper (this one) and the new
  opcode (Caio).

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-13 01:34:35 +03:00
+								    */
-												intel/eu/gen12: Set SWSB annotations in hand-crafted assembly.

Reviewers are encouraged to audit the code generation pass
independently for the case I missed some potential data hazard or new
code has been added in the meantime.

v2: Add SYNC instruction to cr0 workaround in brw_float_controls_mode().

v3: Drop likely redundant (and potentially harmful) RegDist SWSB
    annotation from ce0 read in brw_find_live_channel() (Caio).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-26 23:38:24 -07:00
+								   brw_set_default_swsb(p, tgl_swsb_regdist(1));
 								   brw_inst *inst = brw_AND(p, brw_cr0_reg(0), brw_cr0_reg(0),
 								                            brw_imm_ud(~mask));
 								   brw_inst_set_exec_size(p->devinfo, inst, BRW_EXECUTE_1);
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								   if (p->devinfo->ver < 12)
-												intel/eu/gen12: Don't set thread control, it's gone.

An effect similar to the one formerly provided by setting thread
control to "switch" can be achieved now by setting a RegDist of 1 on
the SWSB field.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2019-09-26 23:36:58 -07:00
+								      brw_inst_set_thread_control(p->devinfo, inst, BRW_THREAD_SWITCH);
-												i965/fs/generator: refactor rounding mode helper in preparation for float controls

v2:
- Fix bug in defining BRW_CR0_FP_MODE_MASK.

v3:
- Update comment (Caio).

v4:
- Split the patch into the helper (this one) and the new
  opcode (Caio).

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-13 01:34:35 +03:00
 								   if (mode) {
 								      brw_inst *inst_or = brw_OR(p, brw_cr0_reg(0), brw_cr0_reg(0),
 								                                 brw_imm_ud(mode));
 								      brw_inst_set_exec_size(p->devinfo, inst_or, BRW_EXECUTE_1);
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								      if (p->devinfo->ver < 12)
-												intel/eu/gen12: Don't set thread control, it's gone.

An effect similar to the one formerly provided by setting thread
control to "switch" can be achieved now by setting a RegDist of 1 on
the SWSB field.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>

											
										
										
											2019-09-26 23:36:58 -07:00
+								         brw_inst_set_thread_control(p->devinfo, inst_or, BRW_THREAD_SWITCH);
-												i965/fs: Define new shader opcode to set rounding modes

Although it is possible to emit them directly as AND/OR on brw_fs_nir,
having a specific opcode makes it easier to remove duplicate settings
later.

v2: (Curro)
  - Set thread control to 'switch' when using the control register
  - Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate
    with the rounding mode.
  - Avoid magic numbers setting rounding mode field at control register.
v3: (Curro)
  - Remove redundant and add missing whitespace lines.
  - Match printing instruction to IR opcode "rnd_mode"

v4: (Topi Pohjolainen)
  - Fix code style.

Signed-off-by:  Alejandro Piñeiro <apinheiro@igalia.com>
Signed-off-by:  Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2017-07-01 08:12:59 +02:00
+								   }
-												intel/eu/gen12: Set SWSB annotations in hand-crafted assembly.

Reviewers are encouraged to audit the code generation pass
independently for the case I missed some potential data hazard or new
code has been added in the meantime.

v2: Add SYNC instruction to cr0 workaround in brw_float_controls_mode().

v3: Drop likely redundant (and potentially harmful) RegDist SWSB
    annotation from ce0 read in brw_find_live_channel() (Caio).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-26 23:38:24 -07:00
-												intel: Rename gen field in gen_device_info struct to ver

Commands used to do the changes:
export SEARCH_PATH="src/intel src/gallium/drivers/iris src/mesa/drivers/dri/i965"
grep -E "info\)*(.|->)gen" -rIl $SEARCH_PATH | xargs sed -ie "s/info\()*\)\(\.\|->\)gen/info\1\2ver/g"

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9936>

											
										
										
											2021-03-29 14:41:58 -07:00
+								   if (p->devinfo->ver >= 12)
-												intel/eu/gen12: Set SWSB annotations in hand-crafted assembly.

Reviewers are encouraged to audit the code generation pass
independently for the case I missed some potential data hazard or new
code has been added in the meantime.

v2: Add SYNC instruction to cr0 workaround in brw_float_controls_mode().

v3: Drop likely redundant (and potentially harmful) RegDist SWSB
    annotation from ce0 read in brw_find_live_channel() (Caio).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>

											
										
										
											2019-09-26 23:38:24 -07:00
+								      brw_SYNC(p, TGL_SYNC_NOP);
-												i965/fs: Define new shader opcode to set rounding modes

Although it is possible to emit them directly as AND/OR on brw_fs_nir,
having a specific opcode makes it easier to remove duplicate settings
later.

v2: (Curro)
  - Set thread control to 'switch' when using the control register
  - Use a single SHADER_OPCODE_RND_MODE opcode taking an immediate
    with the rounding mode.
  - Avoid magic numbers setting rounding mode field at control register.
v3: (Curro)
  - Remove redundant and add missing whitespace lines.
  - Match printing instruction to IR opcode "rnd_mode"

v4: (Topi Pohjolainen)
  - Fix code style.

Signed-off-by:  Alejandro Piñeiro <apinheiro@igalia.com>
Signed-off-by:  Jose Maria Casanova Crespo <jmcasanova@igalia.com>
Reviewed-by: Francisco Jerez <currojerez@riseup.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>

											
										
										
											2017-07-01 08:12:59 +02:00
+								}
-												intel/eu: Add a mechanism for emitting relocatable constant MOVs

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6244>

											
										
										
											2020-08-08 12:55:29 -05:00
 								void
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								brw_update_reloc_imm(const struct brw_isa_info *isa,
-												intel/eu: Add a mechanism for emitting relocatable constant MOVs

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6244>

											
										
										
											2020-08-08 12:55:29 -05:00
+								                     brw_inst *inst,
 								                     uint32_t value)
 								{
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								   const struct intel_device_info *devinfo = isa->devinfo;
-												intel/eu: Add a mechanism for emitting relocatable constant MOVs

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6244>

											
										
										
											2020-08-08 12:55:29 -05:00
+								   /* Sanity check that the instruction is a MOV of an immediate */
-												intel/compiler: Introduce a new brw_isa_info structure

This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>

											
										
										
											2022-06-29 14:13:31 -07:00
+								   assert(brw_inst_opcode(isa, inst) == BRW_OPCODE_MOV);
-												intel/eu: Add a mechanism for emitting relocatable constant MOVs

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6244>

											
										
										
											2020-08-08 12:55:29 -05:00
+								   assert(brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE);
 								   /* If it was compacted, we can't safely rewrite */
 								   assert(brw_inst_cmpt_control(devinfo, inst) == 0);
 								   brw_inst_set_imm_ud(devinfo, inst, value);
 								}
 								/* A default value for constants that will be patched at run-time.
 								 * We pick an arbitrary value that prevents instruction compaction.
 								 */
 								#define DEFAULT_PATCH_IMM 0x4a7cc037
 								void
 								brw_MOV_reloc_imm(struct brw_codegen *p,
 								                  struct brw_reg dst,
 								                  enum brw_reg_type src_type,
-												intel/nir: add reloc delta to load_reloc_const_intel intrinsic

We'll use the delta for an upcoming internal printf mechanism, where
the PARAM_IDX will be the base printf reloc identifier and the BASE
will be the string id.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25814>

											
										
										
											2023-09-08 00:05:13 +03:00
+								                  uint32_t id,
 								                  uint32_t base)
-												intel/eu: Add a mechanism for emitting relocatable constant MOVs

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6244>

											
										
										
											2020-08-08 12:55:29 -05:00
+								{
-												intel/brw: Replace type_sz and brw_reg_type_to_size with brw_type_size_*

Both of these helpers do the same thing.  We now have brw_type_size_bits
and brw_type_size_bytes and can use whichever makes sense in that place.

Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28847>

											
										
										
											2024-04-21 00:57:59 -07:00
+								   assert(brw_type_size_bytes(src_type) == 4);
 								   assert(brw_type_size_bytes(dst.type) == 4);
-												intel/eu: Add a mechanism for emitting relocatable constant MOVs

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6244>

											
										
										
											2020-08-08 12:55:29 -05:00
-												intel/compiler: Generalize shader relocations a bit

This commit adds a delta to be added to the relocated value as well as
the possibility of multiple types of relocations.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8637>

											
										
										
											2020-09-04 12:09:11 -05:00
+								   brw_add_reloc(p, id, BRW_SHADER_RELOC_TYPE_MOV_IMM,
-												intel/nir: add reloc delta to load_reloc_const_intel intrinsic

We'll use the delta for an upcoming internal printf mechanism, where
the PARAM_IDX will be the base printf reloc identifier and the BASE
will be the string id.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25814>

											
										
										
											2023-09-08 00:05:13 +03:00
+								                 p->next_insn_offset, base);
-												intel/eu: Add a mechanism for emitting relocatable constant MOVs

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6244>

											
										
										
											2020-08-08 12:55:29 -05:00
 								   brw_MOV(p, dst, retype(brw_imm_ud(DEFAULT_PATCH_IMM), src_type));
 								}