mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-19 20:08:06 +02:00
Coverity complains that we could end up rolling over on a 32bit platform, which isn't really true because of the assertion, but there's also no harm in ensuring that we have exactly the same behavior for both 32 bit and 64 bit platforms. CID: 1515989 Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21572>
1401 lines
40 KiB
C
1401 lines
40 KiB
C
/*
|
|
* Copyright © 2019 Intel Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#ifndef MI_BUILDER_H
|
|
#define MI_BUILDER_H
|
|
|
|
#include "dev/intel_device_info.h"
|
|
#include "genxml/genX_bits.h"
|
|
#include "util/bitscan.h"
|
|
#include "util/fast_idiv_by_const.h"
|
|
#include "util/u_math.h"
|
|
|
|
#ifndef MI_BUILDER_NUM_ALLOC_GPRS
|
|
/** The number of GPRs the MI builder is allowed to allocate
|
|
*
|
|
* This may be set by a user of this API so that it can reserve some GPRs at
|
|
* the top end for its own use.
|
|
*/
|
|
#define MI_BUILDER_NUM_ALLOC_GPRS 16
|
|
#endif
|
|
|
|
/** These must be defined by the user of the builder
|
|
*
|
|
* void *__gen_get_batch_dwords(__gen_user_data *user_data,
|
|
* unsigned num_dwords);
|
|
*
|
|
* __gen_address_type
|
|
* __gen_address_offset(__gen_address_type addr, uint64_t offset);
|
|
*
|
|
*
|
|
* If self-modifying batches are supported, we must be able to pass batch
|
|
* addresses around as void*s so pinning as well as batch chaining or some
|
|
* other mechanism for ensuring batch pointers remain valid during building is
|
|
* required. The following function must also be defined, it returns an
|
|
* address in canonical form:
|
|
*
|
|
* __gen_address_type
|
|
* __gen_get_batch_address(__gen_user_data *user_data, void *location);
|
|
*
|
|
* Also, __gen_combine_address must accept a location value of NULL and return
|
|
* a fully valid 64-bit address.
|
|
*/
|
|
|
|
/*
|
|
* Start of the actual MI builder
|
|
*/
|
|
|
|
#define __genxml_cmd_length(cmd) cmd ## _length
|
|
#define __genxml_cmd_header(cmd) cmd ## _header
|
|
#define __genxml_cmd_pack(cmd) cmd ## _pack
|
|
|
|
#define mi_builder_pack(b, cmd, dst, name) \
|
|
for (struct cmd name = { __genxml_cmd_header(cmd) }, \
|
|
*_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \
|
|
__genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name), \
|
|
_dst = NULL)
|
|
|
|
#define mi_builder_emit(b, cmd, name) \
|
|
mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name)
|
|
|
|
|
|
enum mi_value_type {
|
|
MI_VALUE_TYPE_IMM,
|
|
MI_VALUE_TYPE_MEM32,
|
|
MI_VALUE_TYPE_MEM64,
|
|
MI_VALUE_TYPE_REG32,
|
|
MI_VALUE_TYPE_REG64,
|
|
};
|
|
|
|
struct mi_value {
|
|
enum mi_value_type type;
|
|
|
|
union {
|
|
uint64_t imm;
|
|
__gen_address_type addr;
|
|
uint32_t reg;
|
|
};
|
|
|
|
#if GFX_VERx10 >= 75
|
|
bool invert;
|
|
#endif
|
|
};
|
|
|
|
struct mi_reg_num {
|
|
uint32_t num;
|
|
#if GFX_VER >= 11
|
|
bool cs;
|
|
#endif
|
|
};
|
|
|
|
static inline struct mi_reg_num
|
|
mi_adjust_reg_num(uint32_t reg)
|
|
{
|
|
#if GFX_VER >= 11
|
|
bool cs = reg >= 0x2000 && reg < 0x4000;
|
|
return (struct mi_reg_num) {
|
|
.num = reg - (cs ? 0x2000 : 0),
|
|
.cs = cs,
|
|
};
|
|
#else
|
|
return (struct mi_reg_num) { .num = reg, };
|
|
#endif
|
|
}
|
|
|
|
#if GFX_VER >= 9
|
|
#define MI_BUILDER_MAX_MATH_DWORDS 256
|
|
#else
|
|
#define MI_BUILDER_MAX_MATH_DWORDS 64
|
|
#endif
|
|
|
|
struct mi_builder {
|
|
const struct intel_device_info *devinfo;
|
|
__gen_user_data *user_data;
|
|
|
|
#if GFX_VERx10 >= 75
|
|
uint32_t gprs;
|
|
uint8_t gpr_refs[MI_BUILDER_NUM_ALLOC_GPRS];
|
|
|
|
unsigned num_math_dwords;
|
|
uint32_t math_dwords[MI_BUILDER_MAX_MATH_DWORDS];
|
|
#endif
|
|
};
|
|
|
|
static inline void
|
|
mi_builder_init(struct mi_builder *b,
|
|
const struct intel_device_info *devinfo,
|
|
__gen_user_data *user_data)
|
|
{
|
|
memset(b, 0, sizeof(*b));
|
|
b->devinfo = devinfo;
|
|
b->user_data = user_data;
|
|
|
|
#if GFX_VERx10 >= 75
|
|
b->gprs = 0;
|
|
b->num_math_dwords = 0;
|
|
#endif
|
|
}
|
|
|
|
static inline void
|
|
mi_builder_flush_math(struct mi_builder *b)
|
|
{
|
|
#if GFX_VERx10 >= 75
|
|
if (b->num_math_dwords == 0)
|
|
return;
|
|
|
|
uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
|
|
1 + b->num_math_dwords);
|
|
mi_builder_pack(b, GENX(MI_MATH), dw, math) {
|
|
math.DWordLength = 1 + b->num_math_dwords - GENX(MI_MATH_length_bias);
|
|
}
|
|
memcpy(dw + 1, b->math_dwords, b->num_math_dwords * sizeof(uint32_t));
|
|
b->num_math_dwords = 0;
|
|
#endif
|
|
}
|
|
|
|
#define _MI_BUILDER_GPR_BASE 0x2600
|
|
/* The actual hardware limit on GPRs */
|
|
#define _MI_BUILDER_NUM_HW_GPRS 16
|
|
|
|
#if GFX_VERx10 >= 75
|
|
|
|
static inline bool
|
|
mi_value_is_reg(struct mi_value val)
|
|
{
|
|
return val.type == MI_VALUE_TYPE_REG32 ||
|
|
val.type == MI_VALUE_TYPE_REG64;
|
|
}
|
|
|
|
static inline bool
|
|
mi_value_is_gpr(struct mi_value val)
|
|
{
|
|
return mi_value_is_reg(val) &&
|
|
val.reg >= _MI_BUILDER_GPR_BASE &&
|
|
val.reg < _MI_BUILDER_GPR_BASE +
|
|
_MI_BUILDER_NUM_HW_GPRS * 8;
|
|
}
|
|
|
|
static inline bool
|
|
_mi_value_is_allocated_gpr(struct mi_value val)
|
|
{
|
|
return mi_value_is_reg(val) &&
|
|
val.reg >= _MI_BUILDER_GPR_BASE &&
|
|
val.reg < _MI_BUILDER_GPR_BASE +
|
|
MI_BUILDER_NUM_ALLOC_GPRS * 8;
|
|
}
|
|
|
|
static inline uint32_t
|
|
_mi_value_as_gpr(struct mi_value val)
|
|
{
|
|
assert(mi_value_is_gpr(val));
|
|
/* Some of the GRL metakernels will generate 64bit value in a GP register,
|
|
* then use only half of that as the last operation on that value. So allow
|
|
* unref on part of a GP register.
|
|
*/
|
|
assert(val.reg % 4 == 0);
|
|
return (val.reg - _MI_BUILDER_GPR_BASE) / 8;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_new_gpr(struct mi_builder *b)
|
|
{
|
|
unsigned gpr = ffs(~b->gprs) - 1;
|
|
assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
|
|
assert(b->gpr_refs[gpr] == 0);
|
|
b->gprs |= (1u << gpr);
|
|
b->gpr_refs[gpr] = 1;
|
|
|
|
return (struct mi_value) {
|
|
.type = MI_VALUE_TYPE_REG64,
|
|
.reg = _MI_BUILDER_GPR_BASE + gpr * 8,
|
|
};
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_reserve_gpr(struct mi_builder *b, unsigned gpr)
|
|
{
|
|
assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
|
|
assert(!(b->gprs & (1 << gpr)));
|
|
assert(b->gpr_refs[gpr] == 0);
|
|
b->gprs |= (1u << gpr);
|
|
b->gpr_refs[gpr] = 128; /* Enough that we won't unref it */
|
|
|
|
return (struct mi_value) {
|
|
.type = MI_VALUE_TYPE_REG64,
|
|
.reg = _MI_BUILDER_GPR_BASE + gpr * 8,
|
|
};
|
|
}
|
|
#endif /* GFX_VERx10 >= 75 */
|
|
|
|
/** Take a reference to a mi_value
|
|
*
|
|
* The MI builder uses reference counting to automatically free ALU GPRs for
|
|
* re-use in calculations. All mi_* math functions consume the reference
|
|
* they are handed for each source and return a reference to a value which the
|
|
* caller must consume. In particular, if you pas the same value into a
|
|
* single mi_* math function twice (say to add a number to itself), you
|
|
* are responsible for calling mi_value_ref() to get a second reference
|
|
* because the mi_* math function will consume it twice.
|
|
*/
|
|
static inline void
|
|
mi_value_add_refs(struct mi_builder *b, struct mi_value val, unsigned num_refs)
|
|
{
|
|
#if GFX_VERx10 >= 75
|
|
if (_mi_value_is_allocated_gpr(val)) {
|
|
unsigned gpr = _mi_value_as_gpr(val);
|
|
assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
|
|
assert(b->gprs & (1u << gpr));
|
|
assert(b->gpr_refs[gpr] < UINT8_MAX);
|
|
b->gpr_refs[gpr] += num_refs;
|
|
}
|
|
#endif /* GFX_VERx10 >= 75 */
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_value_ref(struct mi_builder *b, struct mi_value val)
|
|
{
|
|
mi_value_add_refs(b, val, 1);
|
|
return val;
|
|
}
|
|
|
|
|
|
/** Drop a reference to a mi_value
|
|
*
|
|
* See also mi_value_ref.
|
|
*/
|
|
static inline void
|
|
mi_value_unref(struct mi_builder *b, struct mi_value val)
|
|
{
|
|
#if GFX_VERx10 >= 75
|
|
if (_mi_value_is_allocated_gpr(val)) {
|
|
unsigned gpr = _mi_value_as_gpr(val);
|
|
assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
|
|
assert(b->gprs & (1u << gpr));
|
|
assert(b->gpr_refs[gpr] > 0);
|
|
if (--b->gpr_refs[gpr] == 0)
|
|
b->gprs &= ~(1u << gpr);
|
|
}
|
|
#endif /* GFX_VERx10 >= 75 */
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_imm(uint64_t imm)
|
|
{
|
|
return (struct mi_value) {
|
|
.type = MI_VALUE_TYPE_IMM,
|
|
.imm = imm,
|
|
};
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_reg32(uint32_t reg)
|
|
{
|
|
struct mi_value val = {
|
|
.type = MI_VALUE_TYPE_REG32,
|
|
.reg = reg,
|
|
};
|
|
#if GFX_VERx10 >= 75
|
|
assert(!_mi_value_is_allocated_gpr(val));
|
|
#endif
|
|
return val;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_reg64(uint32_t reg)
|
|
{
|
|
struct mi_value val = {
|
|
.type = MI_VALUE_TYPE_REG64,
|
|
.reg = reg,
|
|
};
|
|
#if GFX_VERx10 >= 75
|
|
assert(!_mi_value_is_allocated_gpr(val));
|
|
#endif
|
|
return val;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_mem32(__gen_address_type addr)
|
|
{
|
|
return (struct mi_value) {
|
|
.type = MI_VALUE_TYPE_MEM32,
|
|
.addr = addr,
|
|
};
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_mem64(__gen_address_type addr)
|
|
{
|
|
return (struct mi_value) {
|
|
.type = MI_VALUE_TYPE_MEM64,
|
|
.addr = addr,
|
|
};
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_value_half(struct mi_value value, bool top_32_bits)
|
|
{
|
|
switch (value.type) {
|
|
case MI_VALUE_TYPE_IMM:
|
|
if (top_32_bits)
|
|
value.imm >>= 32;
|
|
else
|
|
value.imm &= 0xffffffffu;
|
|
return value;
|
|
|
|
case MI_VALUE_TYPE_MEM32:
|
|
assert(!top_32_bits);
|
|
return value;
|
|
|
|
case MI_VALUE_TYPE_MEM64:
|
|
if (top_32_bits)
|
|
value.addr = __gen_address_offset(value.addr, 4);
|
|
value.type = MI_VALUE_TYPE_MEM32;
|
|
return value;
|
|
|
|
case MI_VALUE_TYPE_REG32:
|
|
assert(!top_32_bits);
|
|
return value;
|
|
|
|
case MI_VALUE_TYPE_REG64:
|
|
if (top_32_bits)
|
|
value.reg += 4;
|
|
value.type = MI_VALUE_TYPE_REG32;
|
|
return value;
|
|
}
|
|
|
|
unreachable("Invalid mi_value type");
|
|
}
|
|
|
|
static inline void
|
|
_mi_copy_no_unref(struct mi_builder *b,
|
|
struct mi_value dst, struct mi_value src)
|
|
{
|
|
#if GFX_VERx10 >= 75
|
|
/* TODO: We could handle src.invert by emitting a bit of math if we really
|
|
* wanted to.
|
|
*/
|
|
assert(!dst.invert && !src.invert);
|
|
#endif
|
|
mi_builder_flush_math(b);
|
|
|
|
switch (dst.type) {
|
|
case MI_VALUE_TYPE_IMM:
|
|
unreachable("Cannot copy to an immediate");
|
|
|
|
case MI_VALUE_TYPE_MEM64:
|
|
case MI_VALUE_TYPE_REG64:
|
|
switch (src.type) {
|
|
case MI_VALUE_TYPE_IMM:
|
|
if (dst.type == MI_VALUE_TYPE_REG64) {
|
|
uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
|
|
GENX(MI_LOAD_REGISTER_IMM_length) + 2);
|
|
struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
|
|
mi_builder_pack(b, GENX(MI_LOAD_REGISTER_IMM), dw, lri) {
|
|
lri.DWordLength = GENX(MI_LOAD_REGISTER_IMM_length) + 2 -
|
|
GENX(MI_LOAD_REGISTER_IMM_length_bias);
|
|
#if GFX_VER >= 11
|
|
lri.AddCSMMIOStartOffset = reg.cs;
|
|
#endif
|
|
}
|
|
dw[1] = reg.num;
|
|
dw[2] = src.imm;
|
|
dw[3] = reg.num + 4;
|
|
dw[4] = src.imm >> 32;
|
|
} else {
|
|
#if GFX_VER >= 8
|
|
assert(dst.type == MI_VALUE_TYPE_MEM64);
|
|
uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
|
|
GENX(MI_STORE_DATA_IMM_length) + 1);
|
|
mi_builder_pack(b, GENX(MI_STORE_DATA_IMM), dw, sdm) {
|
|
sdm.DWordLength = GENX(MI_STORE_DATA_IMM_length) + 1 -
|
|
GENX(MI_STORE_DATA_IMM_length_bias);
|
|
sdm.StoreQword = true;
|
|
sdm.Address = dst.addr;
|
|
}
|
|
dw[3] = src.imm;
|
|
dw[4] = src.imm >> 32;
|
|
#else
|
|
_mi_copy_no_unref(b, mi_value_half(dst, false),
|
|
mi_value_half(src, false));
|
|
_mi_copy_no_unref(b, mi_value_half(dst, true),
|
|
mi_value_half(src, true));
|
|
#endif
|
|
}
|
|
break;
|
|
case MI_VALUE_TYPE_REG32:
|
|
case MI_VALUE_TYPE_MEM32:
|
|
_mi_copy_no_unref(b, mi_value_half(dst, false),
|
|
mi_value_half(src, false));
|
|
_mi_copy_no_unref(b, mi_value_half(dst, true),
|
|
mi_imm(0));
|
|
break;
|
|
case MI_VALUE_TYPE_REG64:
|
|
case MI_VALUE_TYPE_MEM64:
|
|
_mi_copy_no_unref(b, mi_value_half(dst, false),
|
|
mi_value_half(src, false));
|
|
_mi_copy_no_unref(b, mi_value_half(dst, true),
|
|
mi_value_half(src, true));
|
|
break;
|
|
default:
|
|
unreachable("Invalid mi_value type");
|
|
}
|
|
break;
|
|
|
|
case MI_VALUE_TYPE_MEM32:
|
|
switch (src.type) {
|
|
case MI_VALUE_TYPE_IMM:
|
|
mi_builder_emit(b, GENX(MI_STORE_DATA_IMM), sdi) {
|
|
sdi.Address = dst.addr;
|
|
#if GFX_VER >= 12
|
|
sdi.ForceWriteCompletionCheck = true;
|
|
#endif
|
|
sdi.ImmediateData = src.imm;
|
|
}
|
|
break;
|
|
|
|
case MI_VALUE_TYPE_MEM32:
|
|
case MI_VALUE_TYPE_MEM64:
|
|
#if GFX_VER >= 8
|
|
mi_builder_emit(b, GENX(MI_COPY_MEM_MEM), cmm) {
|
|
cmm.DestinationMemoryAddress = dst.addr;
|
|
cmm.SourceMemoryAddress = src.addr;
|
|
}
|
|
#elif GFX_VERx10 == 75
|
|
{
|
|
struct mi_value tmp = mi_new_gpr(b);
|
|
_mi_copy_no_unref(b, tmp, src);
|
|
_mi_copy_no_unref(b, dst, tmp);
|
|
mi_value_unref(b, tmp);
|
|
}
|
|
#else
|
|
unreachable("Cannot do mem <-> mem copy on IVB and earlier");
|
|
#endif
|
|
break;
|
|
|
|
case MI_VALUE_TYPE_REG32:
|
|
case MI_VALUE_TYPE_REG64:
|
|
mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
|
|
struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
|
|
srm.RegisterAddress = reg.num;
|
|
#if GFX_VER >= 11
|
|
srm.AddCSMMIOStartOffset = reg.cs;
|
|
#endif
|
|
srm.MemoryAddress = dst.addr;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
unreachable("Invalid mi_value type");
|
|
}
|
|
break;
|
|
|
|
case MI_VALUE_TYPE_REG32:
|
|
switch (src.type) {
|
|
case MI_VALUE_TYPE_IMM:
|
|
mi_builder_emit(b, GENX(MI_LOAD_REGISTER_IMM), lri) {
|
|
struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
|
|
lri.RegisterOffset = reg.num;
|
|
#if GFX_VER >= 11
|
|
lri.AddCSMMIOStartOffset = reg.cs;
|
|
#endif
|
|
lri.DataDWord = src.imm;
|
|
}
|
|
break;
|
|
|
|
case MI_VALUE_TYPE_MEM32:
|
|
case MI_VALUE_TYPE_MEM64:
|
|
#if GFX_VER >= 7
|
|
mi_builder_emit(b, GENX(MI_LOAD_REGISTER_MEM), lrm) {
|
|
struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
|
|
lrm.RegisterAddress = reg.num;
|
|
#if GFX_VER >= 11
|
|
lrm.AddCSMMIOStartOffset = reg.cs;
|
|
#endif
|
|
lrm.MemoryAddress = src.addr;
|
|
}
|
|
#else
|
|
unreachable("Cannot load do mem -> reg copy on SNB and earlier");
|
|
#endif
|
|
break;
|
|
|
|
case MI_VALUE_TYPE_REG32:
|
|
case MI_VALUE_TYPE_REG64:
|
|
#if GFX_VERx10 >= 75
|
|
if (src.reg != dst.reg) {
|
|
mi_builder_emit(b, GENX(MI_LOAD_REGISTER_REG), lrr) {
|
|
struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
|
|
lrr.SourceRegisterAddress = reg.num;
|
|
#if GFX_VER >= 11
|
|
lrr.AddCSMMIOStartOffsetSource = reg.cs;
|
|
#endif
|
|
reg = mi_adjust_reg_num(dst.reg);
|
|
lrr.DestinationRegisterAddress = reg.num;
|
|
#if GFX_VER >= 11
|
|
lrr.AddCSMMIOStartOffsetDestination = reg.cs;
|
|
#endif
|
|
}
|
|
}
|
|
#else
|
|
unreachable("Cannot do reg <-> reg copy on IVB and earlier");
|
|
#endif
|
|
break;
|
|
|
|
default:
|
|
unreachable("Invalid mi_value type");
|
|
}
|
|
break;
|
|
|
|
default:
|
|
unreachable("Invalid mi_value type");
|
|
}
|
|
}
|
|
|
|
#if GFX_VERx10 >= 75
|
|
static inline struct mi_value
|
|
mi_resolve_invert(struct mi_builder *b, struct mi_value src);
|
|
#endif
|
|
|
|
/** Store the value in src to the value represented by dst
|
|
*
|
|
* If the bit size of src and dst mismatch, this function does an unsigned
|
|
* integer cast. If src has more bits than dst, it takes the bottom bits. If
|
|
* src has fewer bits then dst, it fills the top bits with zeros.
|
|
*
|
|
* This function consumes one reference for each of src and dst.
|
|
*/
|
|
static inline void
|
|
mi_store(struct mi_builder *b, struct mi_value dst, struct mi_value src)
|
|
{
|
|
#if GFX_VERx10 >= 75
|
|
src = mi_resolve_invert(b, src);
|
|
#endif
|
|
_mi_copy_no_unref(b, dst, src);
|
|
mi_value_unref(b, src);
|
|
mi_value_unref(b, dst);
|
|
}
|
|
|
|
static inline void
|
|
mi_memset(struct mi_builder *b, __gen_address_type dst,
|
|
uint32_t value, uint32_t size)
|
|
{
|
|
#if GFX_VERx10 >= 75
|
|
assert(b->num_math_dwords == 0);
|
|
#endif
|
|
|
|
/* This memset operates in units of dwords. */
|
|
assert(size % 4 == 0);
|
|
|
|
for (uint32_t i = 0; i < size; i += 4) {
|
|
mi_store(b, mi_mem32(__gen_address_offset(dst, i)),
|
|
mi_imm(value));
|
|
}
|
|
}
|
|
|
|
/* NOTE: On IVB, this function stomps GFX7_3DPRIM_BASE_VERTEX */
|
|
static inline void
|
|
mi_memcpy(struct mi_builder *b, __gen_address_type dst,
|
|
__gen_address_type src, uint32_t size)
|
|
{
|
|
#if GFX_VERx10 >= 75
|
|
assert(b->num_math_dwords == 0);
|
|
#endif
|
|
|
|
/* This memcpy operates in units of dwords. */
|
|
assert(size % 4 == 0);
|
|
|
|
for (uint32_t i = 0; i < size; i += 4) {
|
|
struct mi_value dst_val = mi_mem32(__gen_address_offset(dst, i));
|
|
struct mi_value src_val = mi_mem32(__gen_address_offset(src, i));
|
|
#if GFX_VERx10 >= 75
|
|
mi_store(b, dst_val, src_val);
|
|
#else
|
|
/* IVB does not have a general purpose register for command streamer
|
|
* commands. Therefore, we use an alternate temporary register.
|
|
*/
|
|
struct mi_value tmp_reg = mi_reg32(0x2440); /* GFX7_3DPRIM_BASE_VERTEX */
|
|
mi_store(b, tmp_reg, src_val);
|
|
mi_store(b, dst_val, tmp_reg);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
/*
|
|
* MI_MATH Section. Only available on Haswell+
|
|
*/
|
|
|
|
#if GFX_VERx10 >= 75
|
|
|
|
/**
|
|
* Perform a predicated store (assuming the condition is already loaded
|
|
* in the MI_PREDICATE_RESULT register) of the value in src to the memory
|
|
* location specified by dst. Non-memory destinations are not supported.
|
|
*
|
|
* This function consumes one reference for each of src and dst.
|
|
*/
|
|
static inline void
|
|
mi_store_if(struct mi_builder *b, struct mi_value dst, struct mi_value src)
|
|
{
|
|
assert(!dst.invert && !src.invert);
|
|
|
|
mi_builder_flush_math(b);
|
|
|
|
/* We can only predicate MI_STORE_REGISTER_MEM, so restrict the
|
|
* destination to be memory, and resolve the source to a temporary
|
|
* register if it isn't in one already.
|
|
*/
|
|
assert(dst.type == MI_VALUE_TYPE_MEM64 ||
|
|
dst.type == MI_VALUE_TYPE_MEM32);
|
|
|
|
if (src.type != MI_VALUE_TYPE_REG32 &&
|
|
src.type != MI_VALUE_TYPE_REG64) {
|
|
struct mi_value tmp = mi_new_gpr(b);
|
|
_mi_copy_no_unref(b, tmp, src);
|
|
src = tmp;
|
|
}
|
|
|
|
if (dst.type == MI_VALUE_TYPE_MEM64) {
|
|
mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
|
|
struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
|
|
srm.RegisterAddress = reg.num;
|
|
#if GFX_VER >= 11
|
|
srm.AddCSMMIOStartOffset = reg.cs;
|
|
#endif
|
|
srm.MemoryAddress = dst.addr;
|
|
srm.PredicateEnable = true;
|
|
}
|
|
mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
|
|
struct mi_reg_num reg = mi_adjust_reg_num(src.reg + 4);
|
|
srm.RegisterAddress = reg.num;
|
|
#if GFX_VER >= 11
|
|
srm.AddCSMMIOStartOffset = reg.cs;
|
|
#endif
|
|
srm.MemoryAddress = __gen_address_offset(dst.addr, 4);
|
|
srm.PredicateEnable = true;
|
|
}
|
|
} else {
|
|
mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
|
|
struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
|
|
srm.RegisterAddress = reg.num;
|
|
#if GFX_VER >= 11
|
|
srm.AddCSMMIOStartOffset = reg.cs;
|
|
#endif
|
|
srm.MemoryAddress = dst.addr;
|
|
srm.PredicateEnable = true;
|
|
}
|
|
}
|
|
|
|
mi_value_unref(b, src);
|
|
mi_value_unref(b, dst);
|
|
}
|
|
|
|
static inline void
|
|
_mi_builder_push_math(struct mi_builder *b,
|
|
const uint32_t *dwords,
|
|
unsigned num_dwords)
|
|
{
|
|
assert(num_dwords < MI_BUILDER_MAX_MATH_DWORDS);
|
|
if (b->num_math_dwords + num_dwords > MI_BUILDER_MAX_MATH_DWORDS)
|
|
mi_builder_flush_math(b);
|
|
|
|
memcpy(&b->math_dwords[b->num_math_dwords],
|
|
dwords, num_dwords * sizeof(*dwords));
|
|
b->num_math_dwords += num_dwords;
|
|
}
|
|
|
|
static inline uint32_t
|
|
_mi_pack_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2)
|
|
{
|
|
struct GENX(MI_MATH_ALU_INSTRUCTION) instr = {
|
|
.Operand2 = operand2,
|
|
.Operand1 = operand1,
|
|
.ALUOpcode = opcode,
|
|
};
|
|
|
|
uint32_t dw;
|
|
GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr);
|
|
|
|
return dw;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_value_to_gpr(struct mi_builder *b, struct mi_value val)
|
|
{
|
|
if (mi_value_is_gpr(val))
|
|
return val;
|
|
|
|
/* Save off the invert flag because it makes copy() grumpy */
|
|
bool invert = val.invert;
|
|
val.invert = false;
|
|
|
|
struct mi_value tmp = mi_new_gpr(b);
|
|
_mi_copy_no_unref(b, tmp, val);
|
|
tmp.invert = invert;
|
|
|
|
return tmp;
|
|
}
|
|
|
|
static inline uint64_t
|
|
mi_value_to_u64(struct mi_value val)
|
|
{
|
|
assert(val.type == MI_VALUE_TYPE_IMM);
|
|
return val.invert ? ~val.imm : val.imm;
|
|
}
|
|
|
|
static inline uint32_t
|
|
_mi_math_load_src(struct mi_builder *b, unsigned src, struct mi_value *val)
|
|
{
|
|
if (val->type == MI_VALUE_TYPE_IMM &&
|
|
(val->imm == 0 || val->imm == UINT64_MAX)) {
|
|
uint64_t imm = val->invert ? ~val->imm : val->imm;
|
|
return _mi_pack_alu(imm ? MI_ALU_LOAD1 : MI_ALU_LOAD0, src, 0);
|
|
} else {
|
|
*val = mi_value_to_gpr(b, *val);
|
|
return _mi_pack_alu(val->invert ? MI_ALU_LOADINV : MI_ALU_LOAD,
|
|
src, _mi_value_as_gpr(*val));
|
|
}
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_math_binop(struct mi_builder *b, uint32_t opcode,
|
|
struct mi_value src0, struct mi_value src1,
|
|
uint32_t store_op, uint32_t store_src)
|
|
{
|
|
struct mi_value dst = mi_new_gpr(b);
|
|
|
|
uint32_t dw[4];
|
|
dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &src0);
|
|
dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &src1);
|
|
dw[2] = _mi_pack_alu(opcode, 0, 0);
|
|
dw[3] = _mi_pack_alu(store_op, _mi_value_as_gpr(dst), store_src);
|
|
_mi_builder_push_math(b, dw, 4);
|
|
|
|
mi_value_unref(b, src0);
|
|
mi_value_unref(b, src1);
|
|
|
|
return dst;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_inot(struct mi_builder *b, struct mi_value val)
|
|
{
|
|
if (val.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(~mi_value_to_u64(val));
|
|
|
|
val.invert = !val.invert;
|
|
return val;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_resolve_invert(struct mi_builder *b, struct mi_value src)
|
|
{
|
|
if (!src.invert)
|
|
return src;
|
|
|
|
assert(src.type != MI_VALUE_TYPE_IMM);
|
|
return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
|
|
MI_ALU_STORE, MI_ALU_ACCU);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_iadd(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) + mi_value_to_u64(src1));
|
|
|
|
return mi_math_binop(b, MI_ALU_ADD, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_ACCU);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_iadd_imm(struct mi_builder *b,
|
|
struct mi_value src, uint64_t N)
|
|
{
|
|
if (N == 0)
|
|
return src;
|
|
|
|
return mi_iadd(b, src, mi_imm(N));
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_isub(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) - mi_value_to_u64(src1));
|
|
|
|
return mi_math_binop(b, MI_ALU_SUB, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_ACCU);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ieq(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) == mi_value_to_u64(src1) ? ~0ull : 0);
|
|
|
|
/* Compute "equal" by subtracting and storing the zero bit */
|
|
return mi_math_binop(b, MI_ALU_SUB, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_ZF);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ine(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) != mi_value_to_u64(src1) ? ~0ull : 0);
|
|
|
|
/* Compute "not equal" by subtracting and storing the inverse zero bit */
|
|
return mi_math_binop(b, MI_ALU_SUB, src0, src1,
|
|
MI_ALU_STOREINV, MI_ALU_ZF);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ult(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) < mi_value_to_u64(src1) ? ~0ull : 0);
|
|
|
|
/* Compute "less than" by subtracting and storing the carry bit */
|
|
return mi_math_binop(b, MI_ALU_SUB, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_CF);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_uge(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) >= mi_value_to_u64(src1) ? ~0ull : 0);
|
|
|
|
/* Compute "less than" by subtracting and storing the carry bit */
|
|
return mi_math_binop(b, MI_ALU_SUB, src0, src1,
|
|
MI_ALU_STOREINV, MI_ALU_CF);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_iand(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) & mi_value_to_u64(src1));
|
|
|
|
return mi_math_binop(b, MI_ALU_AND, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_ACCU);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_nz(struct mi_builder *b, struct mi_value src)
|
|
{
|
|
if (src.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src) != 0 ? ~0ull : 0);
|
|
|
|
return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
|
|
MI_ALU_STOREINV, MI_ALU_ZF);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_z(struct mi_builder *b, struct mi_value src)
|
|
{
|
|
if (src.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src) == 0 ? ~0ull : 0);
|
|
|
|
return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
|
|
MI_ALU_STORE, MI_ALU_ZF);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ior(struct mi_builder *b,
|
|
struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) | mi_value_to_u64(src1));
|
|
|
|
return mi_math_binop(b, MI_ALU_OR, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_ACCU);
|
|
}
|
|
|
|
#if GFX_VERx10 >= 125
|
|
static inline struct mi_value
|
|
mi_ishl(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src1.type == MI_VALUE_TYPE_IMM) {
|
|
assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
|
|
assert(mi_value_to_u64(src1) <= 32);
|
|
}
|
|
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) << mi_value_to_u64(src1));
|
|
|
|
return mi_math_binop(b, MI_ALU_SHL, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_ACCU);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ushr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src1.type == MI_VALUE_TYPE_IMM) {
|
|
assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
|
|
assert(mi_value_to_u64(src1) <= 32);
|
|
}
|
|
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src0) >> mi_value_to_u64(src1));
|
|
|
|
return mi_math_binop(b, MI_ALU_SHR, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_ACCU);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ushr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
|
|
{
|
|
if (shift == 0)
|
|
return src;
|
|
|
|
if (shift >= 64)
|
|
return mi_imm(0);
|
|
|
|
if (src.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src) >> shift);
|
|
|
|
struct mi_value res = mi_value_to_gpr(b, src);
|
|
|
|
/* Annoyingly, we only have power-of-two shifts */
|
|
while (shift) {
|
|
int bit = u_bit_scan(&shift);
|
|
assert(bit <= 5);
|
|
res = mi_ushr(b, res, mi_imm(1ULL << bit));
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ishr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
|
|
{
|
|
if (src1.type == MI_VALUE_TYPE_IMM) {
|
|
assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
|
|
assert(mi_value_to_u64(src1) <= 32);
|
|
}
|
|
|
|
if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm((int64_t)mi_value_to_u64(src0) >> mi_value_to_u64(src1));
|
|
|
|
return mi_math_binop(b, MI_ALU_SAR, src0, src1,
|
|
MI_ALU_STORE, MI_ALU_ACCU);
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ishr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
|
|
{
|
|
if (shift == 0)
|
|
return src;
|
|
|
|
if (shift >= 64)
|
|
return mi_imm(0);
|
|
|
|
if (src.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm((int64_t)mi_value_to_u64(src) >> shift);
|
|
|
|
struct mi_value res = mi_value_to_gpr(b, src);
|
|
|
|
/* Annoyingly, we only have power-of-two shifts */
|
|
while (shift) {
|
|
int bit = u_bit_scan(&shift);
|
|
assert(bit <= 5);
|
|
res = mi_ishr(b, res, mi_imm(1 << bit));
|
|
}
|
|
|
|
return res;
|
|
}
|
|
#endif /* if GFX_VERx10 >= 125 */
|
|
|
|
static inline struct mi_value
|
|
mi_imul_imm(struct mi_builder *b, struct mi_value src, uint32_t N)
|
|
{
|
|
if (src.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src) * N);
|
|
|
|
if (N == 0) {
|
|
mi_value_unref(b, src);
|
|
return mi_imm(0);
|
|
}
|
|
|
|
if (N == 1)
|
|
return src;
|
|
|
|
src = mi_value_to_gpr(b, src);
|
|
|
|
struct mi_value res = mi_value_ref(b, src);
|
|
|
|
unsigned top_bit = 31 - __builtin_clz(N);
|
|
for (int i = top_bit - 1; i >= 0; i--) {
|
|
res = mi_iadd(b, res, mi_value_ref(b, res));
|
|
if (N & (1 << i))
|
|
res = mi_iadd(b, res, mi_value_ref(b, src));
|
|
}
|
|
|
|
mi_value_unref(b, src);
|
|
|
|
return res;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ishl_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
|
|
{
|
|
if (shift == 0)
|
|
return src;
|
|
|
|
if (shift >= 64)
|
|
return mi_imm(0);
|
|
|
|
if (src.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm(mi_value_to_u64(src) << shift);
|
|
|
|
struct mi_value res = mi_value_to_gpr(b, src);
|
|
|
|
#if GFX_VERx10 >= 125
|
|
/* Annoyingly, we only have power-of-two shifts */
|
|
while (shift) {
|
|
int bit = u_bit_scan(&shift);
|
|
assert(bit <= 5);
|
|
res = mi_ishl(b, res, mi_imm(1 << bit));
|
|
}
|
|
#else
|
|
for (unsigned i = 0; i < shift; i++)
|
|
res = mi_iadd(b, res, mi_value_ref(b, res));
|
|
#endif
|
|
|
|
return res;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_ushr32_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
|
|
{
|
|
if (shift == 0)
|
|
return src;
|
|
|
|
if (shift >= 64)
|
|
return mi_imm(0);
|
|
|
|
/* We right-shift by left-shifting by 32 - shift and taking the top 32 bits
|
|
* of the result.
|
|
*/
|
|
if (src.type == MI_VALUE_TYPE_IMM)
|
|
return mi_imm((mi_value_to_u64(src) >> shift) & UINT32_MAX);
|
|
|
|
if (shift > 32) {
|
|
struct mi_value tmp = mi_new_gpr(b);
|
|
_mi_copy_no_unref(b, mi_value_half(tmp, false),
|
|
mi_value_half(src, true));
|
|
_mi_copy_no_unref(b, mi_value_half(tmp, true), mi_imm(0));
|
|
mi_value_unref(b, src);
|
|
src = tmp;
|
|
shift -= 32;
|
|
}
|
|
assert(shift <= 32);
|
|
struct mi_value tmp = mi_ishl_imm(b, src, 32 - shift);
|
|
struct mi_value dst = mi_new_gpr(b);
|
|
_mi_copy_no_unref(b, mi_value_half(dst, false),
|
|
mi_value_half(tmp, true));
|
|
_mi_copy_no_unref(b, mi_value_half(dst, true), mi_imm(0));
|
|
mi_value_unref(b, tmp);
|
|
return dst;
|
|
}
|
|
|
|
static inline struct mi_value
|
|
mi_udiv32_imm(struct mi_builder *b, struct mi_value N, uint32_t D)
|
|
{
|
|
if (N.type == MI_VALUE_TYPE_IMM) {
|
|
assert(mi_value_to_u64(N) <= UINT32_MAX);
|
|
return mi_imm(mi_value_to_u64(N) / D);
|
|
}
|
|
|
|
/* We implicitly assume that N is only a 32-bit value */
|
|
if (D == 0) {
|
|
/* This is invalid but we should do something */
|
|
return mi_imm(0);
|
|
} else if (util_is_power_of_two_or_zero(D)) {
|
|
return mi_ushr32_imm(b, N, util_logbase2(D));
|
|
} else {
|
|
struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32);
|
|
assert(m.multiplier <= UINT32_MAX);
|
|
|
|
if (m.pre_shift)
|
|
N = mi_ushr32_imm(b, N, m.pre_shift);
|
|
|
|
/* Do the 32x32 multiply into gpr0 */
|
|
N = mi_imul_imm(b, N, m.multiplier);
|
|
|
|
if (m.increment)
|
|
N = mi_iadd(b, N, mi_imm(m.multiplier));
|
|
|
|
N = mi_ushr32_imm(b, N, 32);
|
|
|
|
if (m.post_shift)
|
|
N = mi_ushr32_imm(b, N, m.post_shift);
|
|
|
|
return N;
|
|
}
|
|
}
|
|
|
|
#endif /* MI_MATH section */
|
|
|
|
/* This assumes addresses of strictly more than 32bits (aka. Gfx8+). */
|
|
#if MI_BUILDER_CAN_WRITE_BATCH
|
|
|
|
struct mi_address_token {
|
|
/* Pointers to address memory fields in the batch. */
|
|
uint64_t *ptrs[2];
|
|
};
|
|
|
|
static inline struct mi_address_token
|
|
mi_store_address(struct mi_builder *b, struct mi_value addr_reg)
|
|
{
|
|
mi_builder_flush_math(b);
|
|
|
|
assert(addr_reg.type == MI_VALUE_TYPE_REG64);
|
|
|
|
struct mi_address_token token = {};
|
|
|
|
for (unsigned i = 0; i < 2; i++) {
|
|
mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
|
|
srm.RegisterAddress = addr_reg.reg + (i * 4);
|
|
|
|
const unsigned addr_dw =
|
|
GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8;
|
|
token.ptrs[i] = (void *)_dst + addr_dw;
|
|
}
|
|
}
|
|
|
|
mi_value_unref(b, addr_reg);
|
|
return token;
|
|
}
|
|
|
|
static inline void
|
|
mi_self_mod_barrier(struct mi_builder *b, unsigned cs_prefetch_size)
|
|
{
|
|
/* First make sure all the memory writes from previous modifying commands
|
|
* have landed. We want to do this before going through the CS cache,
|
|
* otherwise we could be fetching memory that hasn't been written to yet.
|
|
*/
|
|
mi_builder_emit(b, GENX(PIPE_CONTROL), pc) {
|
|
pc.CommandStreamerStallEnable = true;
|
|
}
|
|
/* Documentation says Gfx11+ should be able to invalidate the command cache
|
|
* but experiment show it doesn't work properly, so for now just get over
|
|
* the CS prefetch.
|
|
*/
|
|
for (uint32_t i = 0; i < (cs_prefetch_size / 4); i++)
|
|
mi_builder_emit(b, GENX(MI_NOOP), noop);
|
|
}
|
|
|
|
static inline void
|
|
_mi_resolve_address_token(struct mi_builder *b,
|
|
struct mi_address_token token,
|
|
void *batch_location)
|
|
{
|
|
__gen_address_type addr = __gen_get_batch_address(b->user_data,
|
|
batch_location);
|
|
uint64_t addr_addr_u64 = __gen_combine_address(b->user_data, batch_location,
|
|
addr, 0);
|
|
*(token.ptrs[0]) = addr_addr_u64;
|
|
*(token.ptrs[1]) = addr_addr_u64 + 4;
|
|
}
|
|
|
|
#endif /* MI_BUILDER_CAN_WRITE_BATCH */
|
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
/*
|
|
* Indirect load/store. Only available on XE_HP+
|
|
*/
|
|
|
|
MUST_CHECK static inline struct mi_value
|
|
mi_load_mem64_offset(struct mi_builder *b,
|
|
__gen_address_type addr, struct mi_value offset)
|
|
{
|
|
uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
|
|
struct mi_value addr_val = mi_imm(addr_u64);
|
|
|
|
struct mi_value dst = mi_new_gpr(b);
|
|
|
|
uint32_t dw[5];
|
|
dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
|
|
dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
|
|
dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
|
|
dw[3] = _mi_pack_alu(MI_ALU_LOADIND, _mi_value_as_gpr(dst), MI_ALU_ACCU);
|
|
dw[4] = _mi_pack_alu(MI_ALU_FENCE_RD, 0, 0);
|
|
_mi_builder_push_math(b, dw, 5);
|
|
|
|
mi_value_unref(b, addr_val);
|
|
mi_value_unref(b, offset);
|
|
|
|
return dst;
|
|
}
|
|
|
|
static inline void
|
|
mi_store_mem64_offset(struct mi_builder *b,
|
|
__gen_address_type addr, struct mi_value offset,
|
|
struct mi_value data)
|
|
{
|
|
uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
|
|
struct mi_value addr_val = mi_imm(addr_u64);
|
|
|
|
data = mi_value_to_gpr(b, mi_resolve_invert(b, data));
|
|
|
|
uint32_t dw[5];
|
|
dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
|
|
dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
|
|
dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
|
|
dw[3] = _mi_pack_alu(MI_ALU_STOREIND, MI_ALU_ACCU, _mi_value_as_gpr(data));
|
|
dw[4] = _mi_pack_alu(MI_ALU_FENCE_WR, 0, 0);
|
|
_mi_builder_push_math(b, dw, 5);
|
|
|
|
mi_value_unref(b, addr_val);
|
|
mi_value_unref(b, offset);
|
|
mi_value_unref(b, data);
|
|
|
|
/* This is the only math case which has side-effects outside of regular
|
|
* registers to flush math afterwards so we don't confuse anyone.
|
|
*/
|
|
mi_builder_flush_math(b);
|
|
}
|
|
|
|
/*
|
|
* Control-flow Section. Only available on XE_HP+
|
|
*/
|
|
|
|
struct _mi_goto {
|
|
bool predicated;
|
|
void *mi_bbs;
|
|
};
|
|
|
|
struct mi_goto_target {
|
|
bool placed;
|
|
unsigned num_gotos;
|
|
struct _mi_goto gotos[8];
|
|
__gen_address_type addr;
|
|
};
|
|
|
|
#define MI_GOTO_TARGET_INIT ((struct mi_goto_target) {})
|
|
|
|
#define MI_BUILDER_MI_PREDICATE_RESULT_num 0x2418
|
|
|
|
static inline void
|
|
mi_goto_if(struct mi_builder *b, struct mi_value cond,
|
|
struct mi_goto_target *t)
|
|
{
|
|
/* First, set up the predicate, if any */
|
|
bool predicated;
|
|
if (cond.type == MI_VALUE_TYPE_IMM) {
|
|
/* If it's an immediate, the goto either doesn't happen or happens
|
|
* unconditionally.
|
|
*/
|
|
if (mi_value_to_u64(cond) == 0)
|
|
return;
|
|
|
|
assert(mi_value_to_u64(cond) == ~0ull);
|
|
predicated = false;
|
|
} else if (mi_value_is_reg(cond) &&
|
|
cond.reg == MI_BUILDER_MI_PREDICATE_RESULT_num) {
|
|
/* If it's MI_PREDICATE_RESULT, we use whatever predicate the client
|
|
* provided us with
|
|
*/
|
|
assert(cond.type == MI_VALUE_TYPE_REG32);
|
|
predicated = true;
|
|
} else {
|
|
mi_store(b, mi_reg32(MI_BUILDER_MI_PREDICATE_RESULT_num), cond);
|
|
predicated = true;
|
|
}
|
|
|
|
if (predicated) {
|
|
mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
|
|
sp.PredicateEnable = NOOPOnResultClear;
|
|
}
|
|
}
|
|
if (t->placed) {
|
|
mi_builder_emit(b, GENX(MI_BATCH_BUFFER_START), bbs) {
|
|
bbs.PredicationEnable = predicated;
|
|
bbs.AddressSpaceIndicator = ASI_PPGTT;
|
|
bbs.BatchBufferStartAddress = t->addr;
|
|
}
|
|
} else {
|
|
assert(t->num_gotos < ARRAY_SIZE(t->gotos));
|
|
struct _mi_goto g = {
|
|
.predicated = predicated,
|
|
.mi_bbs = __gen_get_batch_dwords(b->user_data,
|
|
GENX(MI_BATCH_BUFFER_START_length)),
|
|
};
|
|
memset(g.mi_bbs, 0, 4 * GENX(MI_BATCH_BUFFER_START_length));
|
|
t->gotos[t->num_gotos++] = g;
|
|
}
|
|
if (predicated) {
|
|
mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
|
|
sp.PredicateEnable = NOOPNever;
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline void
|
|
mi_goto(struct mi_builder *b, struct mi_goto_target *t)
|
|
{
|
|
mi_goto_if(b, mi_imm(-1), t);
|
|
}
|
|
|
|
static inline void
|
|
mi_goto_target(struct mi_builder *b, struct mi_goto_target *t)
|
|
{
|
|
mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
|
|
sp.PredicateEnable = NOOPNever;
|
|
t->addr = __gen_get_batch_address(b->user_data, _dst);
|
|
}
|
|
t->placed = true;
|
|
|
|
struct GENX(MI_BATCH_BUFFER_START) bbs = { GENX(MI_BATCH_BUFFER_START_header) };
|
|
bbs.AddressSpaceIndicator = ASI_PPGTT;
|
|
bbs.BatchBufferStartAddress = t->addr;
|
|
|
|
for (unsigned i = 0; i < t->num_gotos; i++) {
|
|
bbs.PredicationEnable = t->gotos[i].predicated;
|
|
GENX(MI_BATCH_BUFFER_START_pack)(b->user_data, t->gotos[i].mi_bbs, &bbs);
|
|
}
|
|
}
|
|
|
|
static inline struct mi_goto_target
|
|
mi_goto_target_init_and_place(struct mi_builder *b)
|
|
{
|
|
struct mi_goto_target t = MI_GOTO_TARGET_INIT;
|
|
mi_goto_target(b, &t);
|
|
return t;
|
|
}
|
|
|
|
#define mi_loop(b) \
|
|
for (struct mi_goto_target __break = MI_GOTO_TARGET_INIT, \
|
|
__continue = mi_goto_target_init_and_place(b); !__break.placed; \
|
|
mi_goto(b, &__continue), mi_goto_target(b, &__break))
|
|
|
|
#define mi_break(b) mi_goto(b, &__break)
|
|
#define mi_break_if(b, cond) mi_goto_if(b, cond, &__break)
|
|
#define mi_continue(b) mi_goto(b, &__continue)
|
|
#define mi_continue_if(b, cond) mi_goto_if(b, cond, &__continue)
|
|
|
|
#endif /* GFX_VERx10 >= 125 */
|
|
|
|
#endif /* MI_BUILDER_H */
|